Snap for 6435660 from 42134b99b59d452af33538ecd644bbf3771d5ec0 to sdk-release Change-Id: I5a1ac2192179c387aecc8dfd09973685021dfac8

commit: afecf3d339a2ff3ef4655b1af1c0622739767fbd [log] [tgz]
author: android-build-prod (mdb) <android-build-team-robot@google.com> Mon Apr 27 21:47:20 2020 +0000
committer: android-build-prod (mdb) <android-build-team-robot@google.com> Mon Apr 27 21:47:20 2020 +0000
tree: 3d189268e6a23cc8cf45809b1e9be4156cb4362d
parent: 815d0a2344b5321fd47f0cb7dba96e4cb4e84615 [diff]
parent: 42134b99b59d452af33538ecd644bbf3771d5ec0 [diff]
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json
new file mode 100644
index 0000000..02890e7
--- /dev/null
+++ b/.cargo_vcs_info.json

@@ -0,0 +1,5 @@
+{
+  "git": {
+    "sha1": "adb4aa3ce437ba1978af540071f85e302cced3ec"
+  }
+}

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..8f7a426
--- /dev/null
+++ b/.gitignore

@@ -0,0 +1,8 @@
+target
+Cargo.lock
+bench-log
+.*.swp
+wiki
+tags
+examples/debug.rs
+tmp/

diff --git a/Android.bp b/Android.bp
new file mode 100644
index 0000000..14e8126
--- /dev/null
+++ b/Android.bp

@@ -0,0 +1,41 @@
+// This file is generated by cargo2android.py.
+
+rust_library_host_rlib {
+    name: "libregex",
+    crate_name: "regex",
+    srcs: ["src/lib.rs"],
+    edition: "2015",
+    features: [
+        "aho-corasick",
+        "default",
+        "memchr",
+        "perf",
+        "perf-cache",
+        "perf-dfa",
+        "perf-inline",
+        "perf-literal",
+        "std",
+        "thread_local",
+        "unicode",
+        "unicode-age",
+        "unicode-bool",
+        "unicode-case",
+        "unicode-gencat",
+        "unicode-perl",
+        "unicode-script",
+        "unicode-segment",
+    ],
+    rlibs: [
+        "libaho_corasick",
+        "libmemchr",
+        "libregex_syntax",
+        "libthread_local",
+    ],
+}
+
+// dependent_library ["feature_list"]
+//   aho-corasick-0.7.7 "default,std"
+//   lazy_static-1.4.0
+//   memchr-2.3.0 "default,std,use_std"
+//   regex-syntax-0.6.17 "unicode-age,unicode-bool,unicode-case,unicode-gencat,unicode-perl,unicode-script,unicode-segment"
+//   thread_local-1.0.1

diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..2db8688
--- /dev/null
+++ b/CHANGELOG.md

@@ -0,0 +1,834 @@
+1.3.7 (2020-04-17)
+==================
+This release contains a small bug fix that fixes how `regex` forwards crate
+features to `regex-syntax`. In particular, this will reduce recompilations in
+some cases.
+
+Bug fixes:
+
+* [BUG #665](https://github.com/rust-lang/regex/pull/665):
+  Fix feature forwarding to `regex-syntax`.
+
+
+1.3.6 (2020-03-24)
+==================
+This release contains a sizable (~30%) performance improvement when compiling
+some kinds of large regular expressions.
+
+Performance improvements:
+
+* [PERF #657](https://github.com/rust-lang/regex/pull/657):
+  Improvement performance of compiling large regular expressions.
+
+
+1.3.5 (2020-03-12)
+==================
+This release updates this crate to Unicode 13.
+
+New features:
+
+* [FEATURE #653](https://github.com/rust-lang/regex/pull/653):
+  Update `regex-syntax` to Unicode 13.
+
+
+1.3.4 (2020-01-30)
+==================
+This is a small bug fix release that fixes a bug related to the scoping of
+flags in a regex. Namely, before this fix, a regex like `((?i)a)b)` would
+match `aB` despite the fact that `b` should not be matched case insensitively.
+
+Bug fixes:
+
+* [BUG #640](https://github.com/rust-lang/regex/issues/640):
+  Fix bug related to the scoping of flags in a regex.
+
+
+1.3.3 (2020-01-09)
+==================
+This is a small maintenance release that upgrades the dependency on
+`thread_local` from `0.3` to `1.0`. The minimum supported Rust version remains
+at Rust 1.28.
+
+
+1.3.2 (2020-01-09)
+==================
+This is a small maintenance release with some house cleaning and bug fixes.
+
+New features:
+
+* [FEATURE #631](https://github.com/rust-lang/regex/issues/631):
+  Add a `Match::range` method an a `From<Match> for Range` impl.
+
+Bug fixes:
+
+* [BUG #521](https://github.com/rust-lang/regex/issues/521):
+  Corrects `/-/.splitn("a", 2)` to return `["a"]` instead of `["a", ""]`.
+* [BUG #594](https://github.com/rust-lang/regex/pull/594):
+  Improve error reporting when writing `\p\`.
+* [BUG #627](https://github.com/rust-lang/regex/issues/627):
+  Corrects `/-/.split("a-")` to return `["a", ""]` instead of `["a"]`.
+* [BUG #633](https://github.com/rust-lang/regex/pull/633):
+  Squash deprecation warnings for the `std::error::Error::description` method.
+
+
+1.3.1 (2019-09-04)
+==================
+This is a maintenance release with no changes in order to try to work-around
+a [docs.rs/Cargo issue](https://github.com/rust-lang/docs.rs/issues/400).
+
+
+1.3.0 (2019-09-03)
+==================
+This release adds a plethora of new crate features that permit users of regex
+to shrink its size considerably, in exchange for giving up either functionality
+(such as Unicode support) or runtime performance. When all such features are
+disabled, the dependency tree for `regex` shrinks to exactly 1 crate
+(`regex-syntax`). More information about the new crate features can be
+[found in the docs](https://docs.rs/regex/*/#crate-features).
+
+Note that while this is a new minor version release, the minimum supported
+Rust version for this crate remains at `1.28.0`.
+
+New features:
+
+* [FEATURE #474](https://github.com/rust-lang/regex/issues/474):
+  The `use_std` feature has been deprecated in favor of the `std` feature.
+  The `use_std` feature will be removed in regex 2. Until then, `use_std` will
+  remain as an alias for the `std` feature.
+* [FEATURE #583](https://github.com/rust-lang/regex/issues/583):
+  Add a substantial number of crate features shrinking `regex`.
+
+
+1.2.1 (2019-08-03)
+==================
+This release does a bit of house cleaning. Namely:
+
+* This repository is now using rustfmt.
+* License headers have been removed from all files, in following suit with the
+  Rust project.
+* Teddy has been removed from the `regex` crate, and is now part of the
+  `aho-corasick` crate.
+  [See `aho-corasick`'s new `packed` sub-module for details](https://docs.rs/aho-corasick/0.7.6/aho_corasick/packed/index.html).
+* The `utf8-ranges` crate has been deprecated, with its functionality moving
+  into the
+  [`utf8` sub-module of `regex-syntax`](https://docs.rs/regex-syntax/0.6.11/regex_syntax/utf8/index.html).
+* The `ucd-util` dependency has been dropped, in favor of implementing what
+  little we need inside of `regex-syntax` itself.
+
+In general, this is part of an ongoing (long term) effort to make optimizations
+in the regex engine easier to reason about. The current code is too convoluted
+and thus it is very easy to introduce new bugs. This simplification effort is
+the primary motivation behind re-working the `aho-corasick` crate to not only
+bundle algorithms like Teddy, but to also provide regex-like match semantics
+automatically.
+
+Moving forward, the plan is to join up with the `bstr` and `regex-automata`
+crates, with the former providing more sophisticated substring search
+algorithms (thereby deleting existing code in `regex`) and the latter providing
+ahead-of-time compiled DFAs for cases where they are inexpensive to compute.
+
+
+1.2.0 (2019-07-20)
+==================
+This release updates regex's minimum supported Rust version to 1.28, which was
+release almost 1 year ago. This release also updates regex's Unicode data
+tables to 12.1.0.
+
+
+1.1.9 (2019-07-06)
+==================
+This release contains a bug fix that caused regex's tests to fail, due to a
+dependency on an unreleased behavior in regex-syntax.
+
+* [BUG #593](https://github.com/rust-lang/regex/issues/593):
+  Move an integration-style test on error messages into regex-syntax.
+
+
+1.1.8 (2019-07-04)
+==================
+This release contains a few small internal refactorings. One of which fixes
+an instance of undefined behavior in a part of the SIMD code.
+
+Bug fixes:
+
+* [BUG #545](https://github.com/rust-lang/regex/issues/545):
+  Improves error messages when a repetition operator is used without a number.
+* [BUG #588](https://github.com/rust-lang/regex/issues/588):
+  Removes use of a repr(Rust) union used for type punning in the Teddy matcher.
+* [BUG #591](https://github.com/rust-lang/regex/issues/591):
+  Update docs for running benchmarks and improve failure modes.
+
+
+1.1.7 (2019-06-09)
+==================
+This release fixes up a few warnings as a result of recent deprecations.
+
+
+1.1.6 (2019-04-16)
+==================
+This release fixes a regression introduced by a bug fix (for
+[BUG #557](https://github.com/rust-lang/regex/issues/557)) which could cause
+the regex engine to enter an infinite loop. This bug was originally
+[reported against ripgrep](https://github.com/BurntSushi/ripgrep/issues/1247).
+
+
+1.1.5 (2019-04-01)
+==================
+This release fixes a bug in regex's dependency specification where it requires
+a newer version of regex-syntax, but this wasn't communicated correctly in the
+Cargo.toml. This would have been caught by a minimal version check, but this
+check was disabled because the `rand` crate itself advertises incorrect
+dependency specifications.
+
+Bug fixes:
+
+* [BUG #570](https://github.com/rust-lang/regex/pull/570):
+  Fix regex-syntax minimal version.
+
+
+1.1.4 (2019-03-31)
+==================
+This release fixes a backwards compatibility regression where Regex was no
+longer UnwindSafe. This was caused by the upgrade to aho-corasick 0.7, whose
+AhoCorasick type was itself not UnwindSafe. This has been fixed in aho-corasick
+0.7.4, which we now require.
+
+Bug fixes:
+
+* [BUG #568](https://github.com/rust-lang/regex/pull/568):
+  Fix an API regression where Regex was no longer UnwindSafe.
+
+
+1.1.3 (2019-03-30)
+==================
+This releases fixes a few bugs and adds a performance improvement when a regex
+is a simple alternation of literals.
+
+Performance improvements:
+
+* [OPT #566](https://github.com/rust-lang/regex/pull/566):
+  Upgrades `aho-corasick` to 0.7 and uses it for `foo|bar|...|quux` regexes.
+
+Bug fixes:
+
+* [BUG #527](https://github.com/rust-lang/regex/issues/527):
+  Fix a bug where the parser would panic on patterns like `((?x))`.
+* [BUG #555](https://github.com/rust-lang/regex/issues/555):
+  Fix a bug where the parser would panic on patterns like `(?m){1,1}`.
+* [BUG #557](https://github.com/rust-lang/regex/issues/557):
+  Fix a bug where captures could lead to an incorrect match.
+
+
+1.1.2 (2019-02-27)
+==================
+This release fixes a bug found in the fix introduced in 1.1.1.
+
+Bug fixes:
+
+* [BUG edf45e6f](https://github.com/rust-lang/regex/commit/edf45e6f):
+  Fix bug introduced in reverse suffix literal matcher in the 1.1.1 release.
+
+
+1.1.1 (2019-02-27)
+==================
+This is a small release with one fix for a bug caused by literal optimizations.
+
+Bug fixes:
+
+* [BUG 661bf53d](https://github.com/rust-lang/regex/commit/661bf53d):
+  Fixes a bug in the reverse suffix literal optimization. This was originally
+  reported
+  [against ripgrep](https://github.com/BurntSushi/ripgrep/issues/1203).
+
+
+1.1.0 (2018-11-30)
+==================
+This is a small release with a couple small enhancements. This release also
+increases the minimal supported Rust version (MSRV) to 1.24.1 (from 1.20.0). In
+accordance with this crate's MSRV policy, this release bumps the minor version
+number.
+
+Performance improvements:
+
+* [OPT #511](https://github.com/rust-lang/regex/pull/511),
+  [OPT #540](https://github.com/rust-lang/regex/pull/540):
+  Improve lazy DFA construction for large regex sets.
+
+New features:
+
+* [FEATURE #538](https://github.com/rust-lang/regex/pull/538):
+  Add Emoji and "break" Unicode properties. See [UNICODE.md](UNICODE.md).
+
+Bug fixes:
+
+* [BUG #530](https://github.com/rust-lang/regex/pull/530):
+  Add Unicode license (for data tables).
+* Various typo/doc fixups.
+
+
+1.0.6 (2018-11-06)
+==================
+This is a small release.
+
+Performance improvements:
+
+* [OPT #513](https://github.com/rust-lang/regex/pull/513):
+  Improve performance of compiling large Unicode classes by 8-10%.
+
+Bug fixes:
+
+* [BUG #533](https://github.com/rust-lang/regex/issues/533):
+  Fix definition of `[[:blank:]]` class that regressed in `regex-syntax 0.5`.
+
+
+1.0.5 (2018-09-06)
+==================
+This is a small release with an API enhancement.
+
+New features:
+
+* [FEATURE #509](https://github.com/rust-lang/regex/pull/509):
+  Generalize impls of the `Replacer` trait.
+
+
+1.0.4 (2018-08-25)
+==================
+This is a small release that bumps the quickcheck dependency.
+
+
+1.0.3 (2018-08-24)
+==================
+This is a small bug fix release.
+
+Bug fixes:
+
+* [BUG #504](https://github.com/rust-lang/regex/pull/504):
+  Fix for Cargo's "minimal version" support.
+* [BUG 1e39165f](https://github.com/rust-lang/regex/commit/1e39165f):
+  Fix doc examples for byte regexes.
+
+
+1.0.2 (2018-07-18)
+==================
+This release exposes some new lower level APIs on `Regex` that permit
+amortizing allocation and controlling the location at which a search is
+performed in a more granular way. Most users of the regex crate will not
+need or want to use these APIs.
+
+New features:
+
+* [FEATURE #493](https://github.com/rust-lang/regex/pull/493):
+  Add a few lower level APIs for amortizing allocation and more fine grained
+  searching.
+
+Bug fixes:
+
+* [BUG 3981d2ad](https://github.com/rust-lang/regex/commit/3981d2ad):
+  Correct outdated documentation on `RegexBuilder::dot_matches_new_line`.
+* [BUG 7ebe4ae0](https://github.com/rust-lang/regex/commit/7ebe4ae0):
+  Correct outdated documentation on `Parser::allow_invalid_utf8` in the
+  `regex-syntax` crate.
+* [BUG 24c7770b](https://github.com/rust-lang/regex/commit/24c7770b):
+  Fix a bug in the HIR printer where it wouldn't correctly escape meta
+  characters in character classes.
+
+
+1.0.1 (2018-06-19)
+==================
+This release upgrades regex's Unicode tables to Unicode 11, and enables SIMD
+optimizations automatically on Rust stable (1.27 or newer).
+
+New features:
+
+* [FEATURE #486](https://github.com/rust-lang/regex/pull/486):
+  Implement `size_hint` on `RegexSet` match iterators.
+* [FEATURE #488](https://github.com/rust-lang/regex/pull/488):
+  Update Unicode tables for Unicode 11.
+* [FEATURE #490](https://github.com/rust-lang/regex/pull/490):
+  SIMD optimizations are now enabled automatically in Rust stable, for versions
+  1.27 and up. No compilation flags or features need to be set. CPU support
+  SIMD is detected automatically at runtime.
+
+Bug fixes:
+
+* [BUG #482](https://github.com/rust-lang/regex/pull/482):
+  Present a better compilation error when the `use_std` feature isn't used.
+
+
+1.0.0 (2018-05-01)
+==================
+This release marks the 1.0 release of regex.
+
+While this release includes some breaking changes, most users of older versions
+of the regex library should be able to migrate to 1.0 by simply bumping the
+version number. The important changes are as follows:
+
+* We adopt Rust 1.20 as the new minimum supported version of Rust for regex.
+  We also tentativley adopt a policy that permits bumping the minimum supported
+  version of Rust in minor version releases of regex, but no patch releases.
+  That is, with respect to semver, we do not strictly consider bumping the
+  minimum version of Rust to be a breaking change, but adopt a conservative
+  stance as a compromise.
+* Octal syntax in regular expressions has been disabled by default. This
+  permits better error messages that inform users that backreferences aren't
+  available. Octal syntax can be re-enabled via the corresponding option on
+  `RegexBuilder`.
+* `(?-u:\B)` is no longer allowed in Unicode regexes since it can match at
+  invalid UTF-8 code unit boundaries. `(?-u:\b)` is still allowed in Unicode
+  regexes.
+* The `From<regex_syntax::Error>` impl has been removed. This formally removes
+  the public dependency on `regex-syntax`.
+* A new feature, `use_std`, has been added and enabled by default. Disabling
+  the feature will result in a compilation error. In the future, this may
+  permit us to support `no_std` environments (w/ `alloc`) in a backwards
+  compatible way.
+
+For more information and discussion, please see
+[1.0 release tracking issue](https://github.com/rust-lang/regex/issues/457).
+
+
+0.2.11 (2018-05-01)
+===================
+This release primarily contains bug fixes. Some of them resolve bugs where
+the parser could panic.
+
+New features:
+
+* [FEATURE #459](https://github.com/rust-lang/regex/pull/459):
+  Include C++'s standard regex library and Boost's regex library in the
+  benchmark harness. We now include D/libphobos, C++/std, C++/boost, Oniguruma,
+  PCRE1, PCRE2, RE2 and Tcl in the harness.
+
+Bug fixes:
+
+* [BUG #445](https://github.com/rust-lang/regex/issues/445):
+  Clarify order of indices returned by RegexSet match iterator.
+* [BUG #461](https://github.com/rust-lang/regex/issues/461):
+  Improve error messages for invalid regexes like `[\d-a]`.
+* [BUG #464](https://github.com/rust-lang/regex/issues/464):
+  Fix a bug in the error message pretty printer that could cause a panic when
+  a regex contained a literal `\n` character.
+* [BUG #465](https://github.com/rust-lang/regex/issues/465):
+  Fix a panic in the parser that was caused by applying a repetition operator
+  to `(?flags)`.
+* [BUG #466](https://github.com/rust-lang/regex/issues/466):
+  Fix a bug where `\pC` was not recognized as an alias for `\p{Other}`.
+* [BUG #470](https://github.com/rust-lang/regex/pull/470):
+  Fix a bug where literal searches did more work than necessary for anchored
+  regexes.
+
+
+0.2.10 (2018-03-16)
+===================
+This release primarily updates the regex crate to changes made in `std::arch`
+on nightly Rust.
+
+New features:
+
+* [FEATURE #458](https://github.com/rust-lang/regex/pull/458):
+  The `Hir` type in `regex-syntax` now has a printer.
+
+
+0.2.9 (2018-03-12)
+==================
+This release introduces a new nightly only feature, `unstable`, which enables
+SIMD optimizations for certain types of regexes. No additional compile time
+options are necessary, and the regex crate will automatically choose the
+best CPU features at run time. As a result, the `simd` (nightly only) crate
+dependency has been dropped.
+
+New features:
+
+* [FEATURE #456](https://github.com/rust-lang/regex/pull/456):
+  The regex crate now includes AVX2 optimizations in addition to the extant
+  SSSE3 optimization.
+
+Bug fixes:
+
+* [BUG #455](https://github.com/rust-lang/regex/pull/455):
+  Fix a bug where `(?x)[ / - ]` failed to parse.
+
+
+0.2.8 (2018-03-12)
+==================
+Bug gixes:
+
+* [BUG #454](https://github.com/rust-lang/regex/pull/454):
+  Fix a bug in the nest limit checker being too aggressive.
+
+
+0.2.7 (2018-03-07)
+==================
+This release includes a ground-up rewrite of the regex-syntax crate, which has
+been in development for over a year.
+
+New features:
+
+* Error messages for invalid regexes have been greatly improved. You get these
+  automatically; you don't need to do anything. In addition to better
+  formatting, error messages will now explicitly call out the use of look
+  around. When regex 1.0 is released, this will happen for backreferences as
+  well.
+* Full support for intersection, difference and symmetric difference of
+  character classes. These can be used via the `&&`, `--` and `~~` binary
+  operators within classes.
+* A Unicode Level 1 conformat implementation of `\p{..}` character classes.
+  Things like `\p{scx:Hira}`, `\p{age:3.2}` or `\p{Changes_When_Casefolded}`
+  now work. All property name and value aliases are supported, and properties
+  are selected via loose matching. e.g., `\p{Greek}` is the same as
+  `\p{G r E e K}`.
+* A new `UNICODE.md` document has been added to this repository that
+  exhaustively documents support for UTS#18.
+* Empty sub-expressions are now permitted in most places. That is, `()+` is
+  now a valid regex.
+* Almost everything in regex-syntax now uses constant stack space, even when
+  performing anaylsis that requires structural induction. This reduces the risk
+  of a user provided regular expression causing a stack overflow.
+* [FEATURE #174](https://github.com/rust-lang/regex/issues/174):
+  The `Ast` type in `regex-syntax` now contains span information.
+* [FEATURE #424](https://github.com/rust-lang/regex/issues/424):
+  Support `\u`, `\u{...}`, `\U` and `\U{...}` syntax for specifying code points
+  in a regular expression.
+* [FEATURE #449](https://github.com/rust-lang/regex/pull/449):
+  Add a `Replace::by_ref` adapter for use of a replacer without consuming it.
+
+Bug fixes:
+
+* [BUG #446](https://github.com/rust-lang/regex/issues/446):
+  We re-enable the Boyer-Moore literal matcher.
+
+
+0.2.6 (2018-02-08)
+==================
+Bug fixes:
+
+* [BUG #446](https://github.com/rust-lang/regex/issues/446):
+  Fixes a bug in the new Boyer-Moore searcher that results in a match failure.
+  We fix this bug by temporarily disabling Boyer-Moore.
+
+
+0.2.5 (2017-12-30)
+==================
+Bug fixes:
+
+* [BUG #437](https://github.com/rust-lang/regex/issues/437):
+  Fixes a bug in the new Boyer-Moore searcher that results in a panic.
+
+
+0.2.4 (2017-12-30)
+==================
+New features:
+
+* [FEATURE #348](https://github.com/rust-lang/regex/pull/348):
+  Improve performance for capture searches on anchored regex.
+  (Contributed by @ethanpailes. Nice work!)
+* [FEATURE #419](https://github.com/rust-lang/regex/pull/419):
+  Expand literal searching to include Tuned Boyer-Moore in some cases.
+  (Contributed by @ethanpailes. Nice work!)
+
+Bug fixes:
+
+* [BUG](https://github.com/rust-lang/regex/pull/436):
+  The regex compiler plugin has been removed.
+* [BUG](https://github.com/rust-lang/regex/pull/436):
+  `simd` has been bumped to `0.2.1`, which fixes a Rust nightly build error.
+* [BUG](https://github.com/rust-lang/regex/pull/436):
+  Bring the benchmark harness up to date.
+
+
+0.2.3 (2017-11-30)
+==================
+New features:
+
+* [FEATURE #374](https://github.com/rust-lang/regex/pull/374):
+  Add `impl From<Match> for &str`.
+* [FEATURE #380](https://github.com/rust-lang/regex/pull/380):
+  Derive `Clone` and `PartialEq` on `Error`.
+* [FEATURE #400](https://github.com/rust-lang/regex/pull/400):
+  Update to Unicode 10.
+
+Bug fixes:
+
+* [BUG #375](https://github.com/rust-lang/regex/issues/375):
+  Fix a bug that prevented the bounded backtracker from terminating.
+* [BUG #393](https://github.com/rust-lang/regex/issues/393),
+  [BUG #394](https://github.com/rust-lang/regex/issues/394):
+  Fix bug with `replace` methods for empty matches.
+
+
+0.2.2 (2017-05-21)
+==================
+New features:
+
+* [FEATURE #341](https://github.com/rust-lang/regex/issues/341):
+  Support nested character classes and intersection operation.
+  For example, `[\p{Greek}&&\pL]` matches greek letters and
+  `[[0-9]&&[^4]]` matches every decimal digit except `4`.
+  (Much thanks to @robinst, who contributed this awesome feature.)
+
+Bug fixes:
+
+* [BUG #321](https://github.com/rust-lang/regex/issues/321):
+  Fix bug in literal extraction and UTF-8 decoding.
+* [BUG #326](https://github.com/rust-lang/regex/issues/326):
+  Add documentation tip about the `(?x)` flag.
+* [BUG #333](https://github.com/rust-lang/regex/issues/333):
+  Show additional replacement example using curly braces.
+* [BUG #334](https://github.com/rust-lang/regex/issues/334):
+  Fix bug when resolving captures after a match.
+* [BUG #338](https://github.com/rust-lang/regex/issues/338):
+  Add example that uses `Captures::get` to API documentation.
+* [BUG #353](https://github.com/rust-lang/regex/issues/353):
+  Fix RegexSet bug that caused match failure in some cases.
+* [BUG #354](https://github.com/rust-lang/regex/pull/354):
+  Fix panic in parser when `(?x)` is used.
+* [BUG #358](https://github.com/rust-lang/regex/issues/358):
+  Fix literal optimization bug with RegexSet.
+* [BUG #359](https://github.com/rust-lang/regex/issues/359):
+  Fix example code in README.
+* [BUG #365](https://github.com/rust-lang/regex/pull/365):
+  Fix bug in `rure_captures_len` in the C binding.
+* [BUG #367](https://github.com/rust-lang/regex/issues/367):
+  Fix byte class bug that caused a panic.
+
+
+0.2.1
+=====
+One major bug with `replace_all` has been fixed along with a couple of other
+touchups.
+
+* [BUG #312](https://github.com/rust-lang/regex/issues/312):
+  Fix documentation for `NoExpand` to reference correct lifetime parameter.
+* [BUG #314](https://github.com/rust-lang/regex/issues/314):
+  Fix a bug with `replace_all` when replacing a match with the empty string.
+* [BUG #316](https://github.com/rust-lang/regex/issues/316):
+  Note a missing breaking change from the `0.2.0` CHANGELOG entry.
+  (`RegexBuilder::compile` was renamed to `RegexBuilder::build`.)
+* [BUG #324](https://github.com/rust-lang/regex/issues/324):
+  Compiling `regex` should only require one version of `memchr` crate.
+
+
+0.2.0
+=====
+This is a new major release of the regex crate, and is an implementation of the
+[regex 1.0 RFC](https://github.com/rust-lang/rfcs/blob/master/text/1620-regex-1.0.md).
+We are releasing a `0.2` first, and if there are no major problems, we will
+release a `1.0` shortly. For `0.2`, the minimum *supported* Rust version is
+1.12.
+
+There are a number of **breaking changes** in `0.2`. They are split into two
+types. The first type correspond to breaking changes in regular expression
+syntax. The second type correspond to breaking changes in the API.
+
+Breaking changes for regex syntax:
+
+* POSIX character classes now require double bracketing. Previously, the regex
+  `[:upper:]` would parse as the `upper` POSIX character class. Now it parses
+  as the character class containing the characters `:upper:`. The fix to this
+  change is to use `[[:upper:]]` instead. Note that variants like
+  `[[:upper:][:blank:]]` continue to work.
+* The character `[` must always be escaped inside a character class.
+* The characters `&`, `-` and `~` must be escaped if any one of them are
+  repeated consecutively. For example, `[&]`, `[\&]`, `[\&\&]`, `[&-&]` are all
+  equivalent while `[&&]` is illegal. (The motivation for this and the prior
+  change is to provide a backwards compatible path for adding character class
+  set notation.)
+* A `bytes::Regex` now has Unicode mode enabled by default (like the main
+  `Regex` type). This means regexes compiled with `bytes::Regex::new` that
+  don't have the Unicode flag set should add `(?-u)` to recover the original
+  behavior.
+
+Breaking changes for the regex API:
+
+* `find` and `find_iter` now **return `Match` values instead of
+  `(usize, usize)`.** `Match` values have `start` and `end` methods, which
+  return the match offsets. `Match` values also have an `as_str` method,
+  which returns the text of the match itself.
+* The `Captures` type now only provides a single iterator over all capturing
+  matches, which should replace uses of `iter` and `iter_pos`. Uses of
+  `iter_named` should use the `capture_names` method on `Regex`.
+* The `at` method on the `Captures` type has been renamed to `get`, and it
+  now returns a `Match`. Similarly, the `name` method on `Captures` now returns
+  a `Match`.
+* The `replace` methods now return `Cow` values. The `Cow::Borrowed` variant
+  is returned when no replacements are made.
+* The `Replacer` trait has been completely overhauled. This should only
+  impact clients that implement this trait explicitly. Standard uses of
+  the `replace` methods should continue to work unchanged. If you implement
+  the `Replacer` trait, please consult the new documentation.
+* The `quote` free function has been renamed to `escape`.
+* The `Regex::with_size_limit` method has been removed. It is replaced by
+  `RegexBuilder::size_limit`.
+* The `RegexBuilder` type has switched from owned `self` method receivers to
+  `&mut self` method receivers. Most uses will continue to work unchanged, but
+  some code may require naming an intermediate variable to hold the builder.
+* The `compile` method on `RegexBuilder` has been renamed to `build`.
+* The free `is_match` function has been removed. It is replaced by compiling
+  a `Regex` and calling its `is_match` method.
+* The `PartialEq` and `Eq` impls on `Regex` have been dropped. If you relied
+  on these impls, the fix is to define a wrapper type around `Regex`, impl
+  `Deref` on it and provide the necessary impls.
+* The `is_empty` method on `Captures` has been removed. This always returns
+  `false`, so its use is superfluous.
+* The `Syntax` variant of the `Error` type now contains a string instead of
+  a `regex_syntax::Error`. If you were examining syntax errors more closely,
+  you'll need to explicitly use the `regex_syntax` crate to re-parse the regex.
+* The `InvalidSet` variant of the `Error` type has been removed since it is
+  no longer used.
+* Most of the iterator types have been renamed to match conventions. If you
+  were using these iterator types explicitly, please consult the documentation
+  for its new name. For example, `RegexSplits` has been renamed to `Split`.
+
+A number of bugs have been fixed:
+
+* [BUG #151](https://github.com/rust-lang/regex/issues/151):
+  The `Replacer` trait has been changed to permit the caller to control
+  allocation.
+* [BUG #165](https://github.com/rust-lang/regex/issues/165):
+  Remove the free `is_match` function.
+* [BUG #166](https://github.com/rust-lang/regex/issues/166):
+  Expose more knobs (available in `0.1`) and remove `with_size_limit`.
+* [BUG #168](https://github.com/rust-lang/regex/issues/168):
+  Iterators produced by `Captures` now have the correct lifetime parameters.
+* [BUG #175](https://github.com/rust-lang/regex/issues/175):
+  Fix a corner case in the parsing of POSIX character classes.
+* [BUG #178](https://github.com/rust-lang/regex/issues/178):
+  Drop the `PartialEq` and `Eq` impls on `Regex`.
+* [BUG #179](https://github.com/rust-lang/regex/issues/179):
+  Remove `is_empty` from `Captures` since it always returns false.
+* [BUG #276](https://github.com/rust-lang/regex/issues/276):
+  Position of named capture can now be retrieved from a `Captures`.
+* [BUG #296](https://github.com/rust-lang/regex/issues/296):
+  Remove winapi/kernel32-sys dependency on UNIX.
+* [BUG #307](https://github.com/rust-lang/regex/issues/307):
+  Fix error on emscripten.
+
+
+0.1.80
+======
+* [PR #292](https://github.com/rust-lang/regex/pull/292):
+  Fixes bug #291, which was introduced by PR #290.
+
+0.1.79
+======
+* Require regex-syntax 0.3.8.
+
+0.1.78
+======
+* [PR #290](https://github.com/rust-lang/regex/pull/290):
+  Fixes bug #289, which caused some regexes with a certain combination
+  of literals to match incorrectly.
+
+0.1.77
+======
+* [PR #281](https://github.com/rust-lang/regex/pull/281):
+  Fixes bug #280 by disabling all literal optimizations when a pattern
+  is partially anchored.
+
+0.1.76
+======
+* Tweak criteria for using the Teddy literal matcher.
+
+0.1.75
+======
+* [PR #275](https://github.com/rust-lang/regex/pull/275):
+  Improves match verification performance in the Teddy SIMD searcher.
+* [PR #278](https://github.com/rust-lang/regex/pull/278):
+  Replaces slow substring loop in the Teddy SIMD searcher with Aho-Corasick.
+* Implemented DoubleEndedIterator on regex set match iterators.
+
+0.1.74
+======
+* Release regex-syntax 0.3.5 with a minor bug fix.
+* Fix bug #272.
+* Fix bug #277.
+* [PR #270](https://github.com/rust-lang/regex/pull/270):
+  Fixes bugs #264, #268 and an unreported where the DFA cache size could be
+  drastically under estimated in some cases (leading to high unexpected memory
+  usage).
+
+0.1.73
+======
+* Release `regex-syntax 0.3.4`.
+* Bump `regex-syntax` dependency version for `regex` to `0.3.4`.
+
+0.1.72
+======
+* [PR #262](https://github.com/rust-lang/regex/pull/262):
+  Fixes a number of small bugs caught by fuzz testing (AFL).
+
+0.1.71
+======
+* [PR #236](https://github.com/rust-lang/regex/pull/236):
+  Fix a bug in how suffix literals were extracted, which could lead
+  to invalid match behavior in some cases.
+
+0.1.70
+======
+* [PR #231](https://github.com/rust-lang/regex/pull/231):
+  Add SIMD accelerated multiple pattern search.
+* [PR #228](https://github.com/rust-lang/regex/pull/228):
+  Reintroduce the reverse suffix literal optimization.
+* [PR #226](https://github.com/rust-lang/regex/pull/226):
+  Implements NFA state compression in the lazy DFA.
+* [PR #223](https://github.com/rust-lang/regex/pull/223):
+  A fully anchored RegexSet can now short-circuit.
+
+0.1.69
+======
+* [PR #216](https://github.com/rust-lang/regex/pull/216):
+  Tweak the threshold for running backtracking.
+* [PR #217](https://github.com/rust-lang/regex/pull/217):
+  Add upper limit (from the DFA) to capture search (for the NFA).
+* [PR #218](https://github.com/rust-lang/regex/pull/218):
+  Add rure, a C API.
+
+0.1.68
+======
+* [PR #210](https://github.com/rust-lang/regex/pull/210):
+  Fixed a performance bug in `bytes::Regex::replace` where `extend` was used
+  instead of `extend_from_slice`.
+* [PR #211](https://github.com/rust-lang/regex/pull/211):
+  Fixed a bug in the handling of word boundaries in the DFA.
+* [PR #213](https://github.com/rust-lang/pull/213):
+  Added RE2 and Tcl to the benchmark harness. Also added a CLI utility from
+  running regexes using any of the following regex engines: PCRE1, PCRE2,
+  Oniguruma, RE2, Tcl and of course Rust's own regexes.
+
+0.1.67
+======
+* [PR #201](https://github.com/rust-lang/regex/pull/201):
+  Fix undefined behavior in the `regex!` compiler plugin macro.
+* [PR #205](https://github.com/rust-lang/regex/pull/205):
+  More improvements to DFA performance. Competitive with RE2. See PR for
+  benchmarks.
+* [PR #209](https://github.com/rust-lang/regex/pull/209):
+  Release 0.1.66 was semver incompatible since it required a newer version
+  of Rust than previous releases. This PR fixes that. (And `0.1.66` was
+  yanked.)
+
+0.1.66
+======
+* Speculative support for Unicode word boundaries was added to the DFA. This
+  should remove the last common case that disqualified use of the DFA.
+* An optimization that scanned for suffix literals and then matched the regular
+  expression in reverse was removed because it had worst case quadratic time
+  complexity. It was replaced with a more limited optimization where, given any
+  regex of the form `re$`, it will be matched in reverse from the end of the
+  haystack.
+* [PR #202](https://github.com/rust-lang/regex/pull/202):
+  The inner loop of the DFA was heavily optimized to improve cache locality
+  and reduce the overall number of instructions run on each iteration. This
+  represents the first use of `unsafe` in `regex` (to elide bounds checks).
+* [PR #200](https://github.com/rust-lang/regex/pull/200):
+  Use of the `mempool` crate (which used thread local storage) was replaced
+  with a faster version of a similar API in @Amanieu's `thread_local` crate.
+  It should reduce contention when using a regex from multiple threads
+  simultaneously.
+* PCRE2 JIT benchmarks were added. A benchmark comparison can be found
+  [here](https://gist.github.com/anonymous/14683c01993e91689f7206a18675901b).
+  (Includes a comparison with PCRE1's JIT and Oniguruma.)
+* A bug where word boundaries weren't being matched correctly in the DFA was
+  fixed. This only affected use of `bytes::Regex`.
+* [#160](https://github.com/rust-lang/regex/issues/160):
+  `Captures` now has a `Debug` impl.

diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..0b2f0b9
--- /dev/null
+++ b/Cargo.toml

@@ -0,0 +1,120 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies
+#
+# If you believe there's an error in this file please file an
+# issue against the rust-lang/cargo repository. If you're
+# editing this file be aware that the upstream Cargo.toml
+# will likely look very different (and much more reasonable)
+
+[package]
+name = "regex"
+version = "1.3.7"
+authors = ["The Rust Project Developers"]
+exclude = ["/scripts/*", "/.github/*"]
+autotests = false
+description = "An implementation of regular expressions for Rust. This implementation uses\nfinite automata and guarantees linear time matching on all inputs.\n"
+homepage = "https://github.com/rust-lang/regex"
+documentation = "https://docs.rs/regex"
+readme = "README.md"
+categories = ["text-processing"]
+license = "MIT OR Apache-2.0"
+repository = "https://github.com/rust-lang/regex"
+[profile.bench]
+debug = true
+
+[profile.release]
+debug = true
+
+[profile.test]
+debug = true
+
+[lib]
+doctest = false
+bench = false
+
+[[test]]
+name = "default"
+path = "tests/test_default.rs"
+
+[[test]]
+name = "default-bytes"
+path = "tests/test_default_bytes.rs"
+
+[[test]]
+name = "nfa"
+path = "tests/test_nfa.rs"
+
+[[test]]
+name = "nfa-utf8bytes"
+path = "tests/test_nfa_utf8bytes.rs"
+
+[[test]]
+name = "nfa-bytes"
+path = "tests/test_nfa_bytes.rs"
+
+[[test]]
+name = "backtrack"
+path = "tests/test_backtrack.rs"
+
+[[test]]
+name = "backtrack-utf8bytes"
+path = "tests/test_backtrack_utf8bytes.rs"
+
+[[test]]
+name = "backtrack-bytes"
+path = "tests/test_backtrack_bytes.rs"
+
+[[test]]
+name = "crates-regex"
+path = "tests/test_crates_regex.rs"
+[dependencies.aho-corasick]
+version = "0.7.6"
+optional = true
+
+[dependencies.memchr]
+version = "2.2.1"
+optional = true
+
+[dependencies.regex-syntax]
+version = "0.6.17"
+default-features = false
+
+[dependencies.thread_local]
+version = "1"
+optional = true
+[dev-dependencies.doc-comment]
+version = "0.3"
+
+[dev-dependencies.lazy_static]
+version = "1"
+
+[dev-dependencies.quickcheck]
+version = "0.8"
+default-features = false
+
+[dev-dependencies.rand]
+version = "0.6.5"
+
+[features]
+default = ["std", "perf", "unicode", "regex-syntax/default"]
+pattern = []
+perf = ["perf-cache", "perf-dfa", "perf-inline", "perf-literal"]
+perf-cache = ["thread_local"]
+perf-dfa = []
+perf-inline = []
+perf-literal = ["aho-corasick", "memchr"]
+std = []
+unicode = ["unicode-age", "unicode-bool", "unicode-case", "unicode-gencat", "unicode-perl", "unicode-script", "unicode-segment", "regex-syntax/unicode"]
+unicode-age = ["regex-syntax/unicode-age"]
+unicode-bool = ["regex-syntax/unicode-bool"]
+unicode-case = ["regex-syntax/unicode-case"]
+unicode-gencat = ["regex-syntax/unicode-gencat"]
+unicode-perl = ["regex-syntax/unicode-perl"]
+unicode-script = ["regex-syntax/unicode-script"]
+unicode-segment = ["regex-syntax/unicode-segment"]
+unstable = ["pattern"]
+use_std = ["std"]

diff --git a/Cargo.toml.orig b/Cargo.toml.orig
new file mode 100644
index 0000000..1f7863f
--- /dev/null
+++ b/Cargo.toml.orig

@@ -0,0 +1,192 @@
+[package]
+name = "regex"
+version = "1.3.7"  #:version
+authors = ["The Rust Project Developers"]
+license = "MIT OR Apache-2.0"
+readme = "README.md"
+repository = "https://github.com/rust-lang/regex"
+documentation = "https://docs.rs/regex"
+homepage = "https://github.com/rust-lang/regex"
+description = """
+An implementation of regular expressions for Rust. This implementation uses
+finite automata and guarantees linear time matching on all inputs.
+"""
+categories = ["text-processing"]
+autotests = false
+exclude = ["/scripts/*", "/.github/*"]
+
+[workspace]
+members = [
+  "bench", "regex-capi", "regex-debug", "regex-syntax",
+]
+
+[lib]
+# There are no benchmarks in the library code itself
+bench = false
+# Doc tests fail when some features aren't present. The easiest way to work
+# around this is to disable automatic doc testing, but explicitly test them
+# with `cargo test --doc`.
+doctest = false
+
+# Features are documented in the "Crate features" section of the crate docs:
+# https://docs.rs/regex/*/#crate-features
+[features]
+default = ["std", "perf", "unicode", "regex-syntax/default"]
+
+# ECOSYSTEM FEATURES
+
+# The 'std' feature permits the regex crate to use the standard library. This
+# is intended to support future use cases where the regex crate may be able
+# to compile without std, and instead just rely on 'core' and 'alloc' (for
+# example). Currently, this isn't supported, and removing the 'std' feature
+# will prevent regex from compiling.
+std = []
+# The 'use_std' feature is DEPRECATED. It will be removed in regex 2. Until
+# then, it is an alias for the 'std' feature.
+use_std = ["std"]
+
+
+# PERFORMANCE FEATURES
+
+# Enables all performance features.
+perf = ["perf-cache", "perf-dfa", "perf-inline", "perf-literal"]
+# Enables fast caching. (If disabled, caching is still used, but is slower.)
+perf-cache = ["thread_local"]
+# Enables use of a lazy DFA when possible.
+perf-dfa = []
+# Enables aggressive use of inlining.
+perf-inline = []
+# Enables literal optimizations.
+perf-literal = ["aho-corasick", "memchr"]
+
+
+# UNICODE DATA FEATURES
+
+# Enables all Unicode features. This expands if new Unicode features are added.
+unicode = [
+  "unicode-age",
+  "unicode-bool",
+  "unicode-case",
+  "unicode-gencat",
+  "unicode-perl",
+  "unicode-script",
+  "unicode-segment",
+  "regex-syntax/unicode",
+]
+# Enables use of the `Age` property, e.g., `\p{Age:3.0}`.
+unicode-age = ["regex-syntax/unicode-age"]
+# Enables use of a smattering of boolean properties, e.g., `\p{Emoji}`.
+unicode-bool = ["regex-syntax/unicode-bool"]
+# Enables Unicode-aware case insensitive matching, e.g., `(?i)β`.
+unicode-case = ["regex-syntax/unicode-case"]
+# Enables Unicode general categories, e.g., `\p{Letter}` or `\pL`.
+unicode-gencat = ["regex-syntax/unicode-gencat"]
+# Enables Unicode-aware Perl classes corresponding to `\w`, `\s` and `\d`.
+unicode-perl = ["regex-syntax/unicode-perl"]
+# Enables Unicode scripts and script extensions, e.g., `\p{Greek}`.
+unicode-script = ["regex-syntax/unicode-script"]
+# Enables Unicode segmentation properties, e.g., `\p{gcb=Extend}`.
+unicode-segment = ["regex-syntax/unicode-segment"]
+
+
+# UNSTABLE FEATURES (requires Rust nightly)
+
+# A blanket feature that governs whether unstable features are enabled or not.
+# Unstable features are disabled by default, and typically rely on unstable
+# features in rustc itself.
+unstable = ["pattern"]
+
+# Enable to use the unstable pattern traits defined in std. This is enabled
+# by default if the unstable feature is enabled.
+pattern = []
+
+# For very fast prefix literal matching.
+[dependencies.aho-corasick]
+version = "0.7.6"
+optional = true
+
+# For skipping along search text quickly when a leading byte is known.
+[dependencies.memchr]
+version = "2.2.1"
+optional = true
+
+# For managing regex caches quickly across multiple threads.
+[dependencies.thread_local]
+version = "1"
+optional = true
+
+# For parsing regular expressions.
+[dependencies.regex-syntax]
+path = "regex-syntax"
+version = "0.6.17"
+default-features = false
+
+[dev-dependencies]
+# For examples.
+lazy_static = "1"
+# For property based tests.
+quickcheck = { version = "0.8", default-features = false }
+# For generating random test data.
+rand = "0.6.5"
+# To check README's example
+doc-comment = "0.3"
+
+# Run the test suite on the default behavior of Regex::new.
+# This includes a mish mash of NFAs and DFAs, which are chosen automatically
+# based on the regex. We test both of the NFA implementations by forcing their
+# usage with the test definitions below. (We can't test the DFA implementations
+# in the same way since they can't be used for every regex tested.)
+[[test]]
+path = "tests/test_default.rs"
+name = "default"
+
+# The same as the default tests, but run on bytes::Regex.
+[[test]]
+path = "tests/test_default_bytes.rs"
+name = "default-bytes"
+
+# Run the test suite on the NFA algorithm over Unicode codepoints.
+[[test]]
+path = "tests/test_nfa.rs"
+name = "nfa"
+
+# Run the test suite on the NFA algorithm over bytes that match UTF-8 only.
+[[test]]
+path = "tests/test_nfa_utf8bytes.rs"
+name = "nfa-utf8bytes"
+
+# Run the test suite on the NFA algorithm over arbitrary bytes.
+[[test]]
+path = "tests/test_nfa_bytes.rs"
+name = "nfa-bytes"
+
+# Run the test suite on the backtracking engine over Unicode codepoints.
+[[test]]
+path = "tests/test_backtrack.rs"
+name = "backtrack"
+
+# Run the test suite on the backtracking engine over bytes that match UTF-8
+# only.
+[[test]]
+path = "tests/test_backtrack_utf8bytes.rs"
+name = "backtrack-utf8bytes"
+
+# Run the test suite on the backtracking engine over arbitrary bytes.
+[[test]]
+path = "tests/test_backtrack_bytes.rs"
+name = "backtrack-bytes"
+
+# Run all backends against each regex found on crates.io and make sure
+# that they all do the same thing.
+[[test]]
+path = "tests/test_crates_regex.rs"
+name = "crates-regex"
+
+[profile.release]
+debug = true
+
+[profile.bench]
+debug = true
+
+[profile.test]
+debug = true

diff --git a/HACKING.md b/HACKING.md
new file mode 100644
index 0000000..34af5b5
--- /dev/null
+++ b/HACKING.md

@@ -0,0 +1,341 @@
+Your friendly guide to hacking and navigating the regex library.
+
+This guide assumes familiarity with Rust and Cargo, and at least a perusal of
+the user facing documentation for this crate.
+
+If you're looking for background on the implementation in this library, then
+you can do no better than Russ Cox's article series on implementing regular
+expressions using finite automata: https://swtch.com/~rsc/regexp/
+
+
+## Architecture overview
+
+As you probably already know, this library executes regular expressions using
+finite automata. In particular, a design goal is to make searching linear
+with respect to both the regular expression and the text being searched.
+Meeting that design goal on its own is not so hard and can be done with an
+implementation of the Pike VM (similar to Thompson's construction, but supports
+capturing groups), as described in: https://swtch.com/~rsc/regexp/regexp2.html
+--- This library contains such an implementation in src/pikevm.rs.
+
+Making it fast is harder. One of the key problems with the Pike VM is that it
+can be in more than one state at any point in time, and must shuffle capture
+positions between them. The Pike VM also spends a lot of time following the
+same epsilon transitions over and over again. We can employ one trick to
+speed up the Pike VM: extract one or more literal prefixes from the regular
+expression and execute specialized code to quickly find matches of those
+prefixes in the search text. The Pike VM can then be avoided for most the
+search, and instead only executed when a prefix is found. The code to find
+prefixes is in the regex-syntax crate (in this repository). The code to search
+for literals is in src/literals.rs. When more than one literal prefix is found,
+we fall back to an Aho-Corasick DFA using the aho-corasick crate. For one
+literal, we use a variant of the Boyer-Moore algorithm. Both Aho-Corasick and
+Boyer-Moore use `memchr` when appropriate. The Boyer-Moore variant in this
+library also uses elementary frequency analysis to choose the right byte to run
+`memchr` with.
+
+Of course, detecting prefix literals can only take us so far. Not all regular
+expressions have literal prefixes. To remedy this, we try another approach
+to executing the Pike VM: backtracking, whose implementation can be found in
+src/backtrack.rs. One reason why backtracking can be faster is that it avoids
+excessive shuffling of capture groups. Of course, backtracking is susceptible
+to exponential runtimes, so we keep track of every state we've visited to make
+sure we never visit it again. This guarantees linear time execution, but we
+pay for it with the memory required to track visited states. Because of the
+memory requirement, we only use this engine on small search strings *and* small
+regular expressions.
+
+Lastly, the real workhorse of this library is the "lazy" DFA in src/dfa.rs.
+It is distinct from the Pike VM in that the DFA is explicitly represented in
+memory and is only ever in one state at a time. It is said to be "lazy" because
+the DFA is computed as text is searched, where each byte in the search text
+results in at most one new DFA state. It is made fast by caching states. DFAs
+are susceptible to exponential state blow up (where the worst case is computing
+a new state for every input byte, regardless of what's in the state cache). To
+avoid using a lot of memory, the lazy DFA uses a bounded cache. Once the cache
+is full, it is wiped and state computation starts over again. If the cache is
+wiped too frequently, then the DFA gives up and searching falls back to one of
+the aforementioned algorithms.
+
+All of the above matching engines expose precisely the same matching semantics.
+This is indeed tested. (See the section below about testing.)
+
+The following sub-sections describe the rest of the library and how each of the
+matching engines are actually used.
+
+### Parsing
+
+Regular expressions are parsed using the regex-syntax crate, which is
+maintained in this repository. The regex-syntax crate defines an abstract
+syntax and provides very detailed error messages when a parse error is
+encountered. Parsing is done in a separate crate so that others may benefit
+from its existence, and because it is relatively divorced from the rest of the
+regex library.
+
+The regex-syntax crate also provides sophisticated support for extracting
+prefix and suffix literals from regular expressions.
+
+### Compilation
+
+The compiler is in src/compile.rs. The input to the compiler is some abstract
+syntax for a regular expression and the output is a sequence of opcodes that
+matching engines use to execute a search. (One can think of matching engines as
+mini virtual machines.) The sequence of opcodes is a particular encoding of a
+non-deterministic finite automaton. In particular, the opcodes explicitly rely
+on epsilon transitions.
+
+Consider a simple regular expression like `a|b`. Its compiled form looks like
+this:
+
+    000 Save(0)
+    001 Split(2, 3)
+    002 'a' (goto: 4)
+    003 'b'
+    004 Save(1)
+    005 Match
+
+The first column is the instruction pointer and the second column is the
+instruction. Save instructions indicate that the current position in the input
+should be stored in a captured location. Split instructions represent a binary
+branch in the program (i.e., epsilon transitions). The instructions `'a'` and
+`'b'` indicate that the literal bytes `'a'` or `'b'` should match.
+
+In older versions of this library, the compilation looked like this:
+
+    000 Save(0)
+    001 Split(2, 3)
+    002 'a'
+    003 Jump(5)
+    004 'b'
+    005 Save(1)
+    006 Match
+
+In particular, empty instructions that merely served to move execution from one
+point in the program to another were removed. Instead, every instruction has a
+`goto` pointer embedded into it. This resulted in a small performance boost for
+the Pike VM, because it was one fewer epsilon transition that it had to follow.
+
+There exist more instructions and they are defined and documented in
+src/prog.rs.
+
+Compilation has several knobs and a few unfortunately complicated invariants.
+Namely, the output of compilation can be one of two types of programs: a
+program that executes on Unicode scalar values or a program that executes
+on raw bytes. In the former case, the matching engine is responsible for
+performing UTF-8 decoding and executing instructions using Unicode codepoints.
+In the latter case, the program handles UTF-8 decoding implicitly, so that the
+matching engine can execute on raw bytes. All matching engines can execute
+either Unicode or byte based programs except for the lazy DFA, which requires
+byte based programs. In general, both representations were kept because (1) the
+lazy DFA requires byte based programs so that states can be encoded in a memory
+efficient manner and (2) the Pike VM benefits greatly from inlining Unicode
+character classes into fewer instructions as it results in fewer epsilon
+transitions.
+
+N.B. UTF-8 decoding is built into the compiled program by making use of the
+utf8-ranges crate. The compiler in this library factors out common suffixes to
+reduce the size of huge character classes (e.g., `\pL`).
+
+A regrettable consequence of this split in instruction sets is we generally
+need to compile two programs; one for NFA execution and one for the lazy DFA.
+
+In fact, it is worse than that: the lazy DFA is not capable of finding the
+starting location of a match in a single scan, and must instead execute a
+backwards search after finding the end location. To execute a backwards search,
+we must have compiled the regular expression *in reverse*.
+
+This means that every compilation of a regular expression generally results in
+three distinct programs. It would be possible to lazily compile the Unicode
+program, since it is never needed if (1) the regular expression uses no word
+boundary assertions and (2) the caller never asks for sub-capture locations.
+
+### Execution
+
+At the time of writing, there are four matching engines in this library:
+
+1. The Pike VM (supports captures).
+2. Bounded backtracking (supports captures).
+3. Literal substring or multi-substring search.
+4. Lazy DFA (no support for Unicode word boundary assertions).
+
+Only the first two matching engines are capable of executing every regular
+expression program. They also happen to be the slowest, which means we need
+some logic that (1) knows various facts about the regular expression and (2)
+knows what the caller wants. Using this information, we can determine which
+engine (or engines) to use.
+
+The logic for choosing which engine to execute is in src/exec.rs and is
+documented on the Exec type. Exec values contain regular expression Programs
+(defined in src/prog.rs), which contain all the necessary tidbits for actually
+executing a regular expression on search text.
+
+For the most part, the execution logic is straight-forward and follows the
+limitations of each engine described above pretty faithfully. The hairiest
+part of src/exec.rs by far is the execution of the lazy DFA, since it requires
+a forwards and backwards search, and then falls back to either the Pike VM or
+backtracking if the caller requested capture locations.
+
+The Exec type also contains mutable scratch space for each type of matching
+engine. This scratch space is used during search (for example, for the lazy
+DFA, it contains compiled states that are reused on subsequent searches).
+
+### Programs
+
+A regular expression program is essentially a sequence of opcodes produced by
+the compiler plus various facts about the regular expression (such as whether
+it is anchored, its capture names, etc.).
+
+### The regex! macro
+
+The `regex!` macro no longer exists. It was developed in a bygone era as a
+compiler plugin during the infancy of the regex crate. Back then, then only
+matching engine in the crate was the Pike VM. The `regex!` macro was, itself,
+also a Pike VM. The only advantages it offered over the dynamic Pike VM that
+was built at runtime were the following:
+
+  1. Syntax checking was done at compile time. Your Rust program wouldn't
+     compile if your regex didn't compile.
+  2. Reduction of overhead that was proportional to the size of the regex.
+     For the most part, this overhead consisted of heap allocation, which
+     was nearly eliminated in the compiler plugin.
+
+The main takeaway here is that the compiler plugin was a marginally faster
+version of a slow regex engine. As the regex crate evolved, it grew other regex
+engines (DFA, bounded backtracker) and sophisticated literal optimizations.
+The regex macro didn't keep pace, and it therefore became (dramatically) slower
+than the dynamic engines. The only reason left to use it was for the compile
+time guarantee that your regex is correct. Fortunately, Clippy (the Rust lint
+tool) has a lint that checks your regular expression validity, which mostly
+replaces that use case.
+
+Additionally, the regex compiler plugin stopped receiving maintenance. Nobody
+complained. At that point, it seemed prudent to just remove it.
+
+Will a compiler plugin be brought back? The future is murky, but there is
+definitely an opportunity there to build something that is faster than the
+dynamic engines in some cases. But it will be challenging! As of now, there
+are no plans to work on this.
+
+
+## Testing
+
+A key aspect of any mature regex library is its test suite. A subset of the
+tests in this library come from Glenn Fowler's AT&T test suite (its online
+presence seems gone at the time of writing). The source of the test suite is
+located in src/testdata. The scripts/regex-match-tests.py takes the test suite
+in src/testdata and generates tests/matches.rs.
+
+There are also many other manually crafted tests and regression tests in
+tests/tests.rs. Some of these tests were taken from RE2.
+
+The biggest source of complexity in the tests is related to answering this
+question: how can we reuse the tests to check all of our matching engines? One
+approach would have been to encode every test into some kind of format (like
+the AT&T test suite) and code generate tests for each matching engine. The
+approach we use in this library is to create a Cargo.toml entry point for each
+matching engine we want to test. The entry points are:
+
+* `tests/test_default.rs` - tests `Regex::new`
+* `tests/test_default_bytes.rs` - tests `bytes::Regex::new`
+* `tests/test_nfa.rs` - tests `Regex::new`, forced to use the NFA
+  algorithm on every regex.
+* `tests/test_nfa_bytes.rs` - tests `Regex::new`, forced to use the NFA
+  algorithm on every regex and use *arbitrary* byte based programs.
+* `tests/test_nfa_utf8bytes.rs` - tests `Regex::new`, forced to use the NFA
+  algorithm on every regex and use *UTF-8* byte based programs.
+* `tests/test_backtrack.rs` - tests `Regex::new`, forced to use
+  backtracking on every regex.
+* `tests/test_backtrack_bytes.rs` - tests `Regex::new`, forced to use
+  backtracking on every regex and use *arbitrary* byte based programs.
+* `tests/test_backtrack_utf8bytes.rs` - tests `Regex::new`, forced to use
+  backtracking on every regex and use *UTF-8* byte based programs.
+* `tests/test_crates_regex.rs` - tests to make sure that all of the
+  backends behave in the same way against a number of quickcheck
+  generated random inputs. These tests need to be enabled through
+  the `RUST_REGEX_RANDOM_TEST` environment variable (see
+  below).
+
+The lazy DFA and pure literal engines are absent from this list because
+they cannot be used on every regular expression. Instead, we rely on
+`tests/test_dynamic.rs` to test the lazy DFA and literal engines when possible.
+
+Since the tests are repeated several times, and because `cargo test` runs all
+entry points, it can take a while to compile everything. To reduce compile
+times slightly, try using `cargo test --test default`, which will only use the
+`tests/test_default.rs` entry point.
+
+The random testing takes quite a while, so it is not enabled by default.
+In order to run the random testing you can set the
+`RUST_REGEX_RANDOM_TEST` environment variable to anything before
+invoking `cargo test`. Note that this variable is inspected at compile
+time, so if the tests don't seem to be running, you may need to run
+`cargo clean`.
+
+## Benchmarking
+
+The benchmarking in this crate is made up of many micro-benchmarks. Currently,
+there are two primary sets of benchmarks: the benchmarks that were adopted
+at this library's inception (in `bench/src/misc.rs`) and a newer set of
+benchmarks meant to test various optimizations. Specifically, the latter set
+contain some analysis and are in `bench/src/sherlock.rs`. Also, the latter
+set are all executed on the same lengthy input whereas the former benchmarks
+are executed on strings of varying length.
+
+There is also a smattering of benchmarks for parsing and compilation.
+
+Benchmarks are in a separate crate so that its dependencies can be managed
+separately from the main regex crate.
+
+Benchmarking follows a similarly wonky setup as tests. There are multiple entry
+points:
+
+* `bench_rust.rs` - benchmarks `Regex::new`
+* `bench_rust_bytes.rs` benchmarks `bytes::Regex::new`
+* `bench_pcre.rs` - benchmarks PCRE
+* `bench_onig.rs` - benchmarks Oniguruma
+
+The PCRE and Oniguruma benchmarks exist as a comparison point to a mature
+regular expression library. In general, this regex library compares favorably
+(there are even a few benchmarks that PCRE simply runs too slowly on or
+outright can't execute at all). I would love to add other regular expression
+library benchmarks (especially RE2).
+
+If you're hacking on one of the matching engines and just want to see
+benchmarks, then all you need to run is:
+
+    $ (cd bench && ./run rust)
+
+If you want to compare your results with older benchmarks, then try:
+
+    $ (cd bench && ./run rust | tee old)
+    $ ... make it faster
+    $ (cd bench && ./run rust | tee new)
+    $ cargo benchcmp old new --improvements
+
+The `cargo-benchcmp` utility is available here:
+https://github.com/BurntSushi/cargo-benchcmp
+
+The `./bench/run` utility can run benchmarks for PCRE and Oniguruma too. See
+`./bench/bench --help`.
+
+## Dev Docs
+
+When digging your teeth into the codebase for the first time, the
+crate documentation can be a great resource. By default `rustdoc`
+will strip out all documentation of private crate members in an
+effort to help consumers of the crate focus on the *interface*
+without having to concern themselves with the *implementation*.
+Normally this is a great thing, but if you want to start hacking
+on regex internals it is not what you want. Many of the private members
+of this crate are well documented with rustdoc style comments, and
+it would be a shame to miss out on the opportunity that presents.
+You can generate the private docs with:
+
+```
+$ rustdoc --crate-name docs src/lib.rs -o target/doc -L target/debug/deps --no-defaults --passes collapse-docs --passes unindent-comments
+```
+
+Then just point your browser at `target/doc/regex/index.html`.
+
+See https://github.com/rust-lang/rust/issues/15347 for more info
+about generating developer docs for internal use.

diff --git a/LICENSE b/LICENSE
new file mode 120000
index 0000000..6b579aa
--- /dev/null
+++ b/LICENSE

@@ -0,0 +1 @@
+LICENSE-APACHE
\ No newline at end of file

diff --git a/LICENSE-APACHE b/LICENSE-APACHE
new file mode 100644
index 0000000..16fe87b
--- /dev/null
+++ b/LICENSE-APACHE

@@ -0,0 +1,201 @@
+                              Apache License
+                        Version 2.0, January 2004
+                     http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+   "License" shall mean the terms and conditions for use, reproduction,
+   and distribution as defined by Sections 1 through 9 of this document.
+
+   "Licensor" shall mean the copyright owner or entity authorized by
+   the copyright owner that is granting the License.
+
+   "Legal Entity" shall mean the union of the acting entity and all
+   other entities that control, are controlled by, or are under common
+   control with that entity. For the purposes of this definition,
+   "control" means (i) the power, direct or indirect, to cause the
+   direction or management of such entity, whether by contract or
+   otherwise, or (ii) ownership of fifty percent (50%) or more of the
+   outstanding shares, or (iii) beneficial ownership of such entity.
+
+   "You" (or "Your") shall mean an individual or Legal Entity
+   exercising permissions granted by this License.
+
+   "Source" form shall mean the preferred form for making modifications,
+   including but not limited to software source code, documentation
+   source, and configuration files.
+
+   "Object" form shall mean any form resulting from mechanical
+   transformation or translation of a Source form, including but
+   not limited to compiled object code, generated documentation,
+   and conversions to other media types.
+
+   "Work" shall mean the work of authorship, whether in Source or
+   Object form, made available under the License, as indicated by a
+   copyright notice that is included in or attached to the work
+   (an example is provided in the Appendix below).
+
+   "Derivative Works" shall mean any work, whether in Source or Object
+   form, that is based on (or derived from) the Work and for which the
+   editorial revisions, annotations, elaborations, or other modifications
+   represent, as a whole, an original work of authorship. For the purposes
+   of this License, Derivative Works shall not include works that remain
+   separable from, or merely link (or bind by name) to the interfaces of,
+   the Work and Derivative Works thereof.
+
+   "Contribution" shall mean any work of authorship, including
+   the original version of the Work and any modifications or additions
+   to that Work or Derivative Works thereof, that is intentionally
+   submitted to Licensor for inclusion in the Work by the copyright owner
+   or by an individual or Legal Entity authorized to submit on behalf of
+   the copyright owner. For the purposes of this definition, "submitted"
+   means any form of electronic, verbal, or written communication sent
+   to the Licensor or its representatives, including but not limited to
+   communication on electronic mailing lists, source code control systems,
+   and issue tracking systems that are managed by, or on behalf of, the
+   Licensor for the purpose of discussing and improving the Work, but
+   excluding communication that is conspicuously marked or otherwise
+   designated in writing by the copyright owner as "Not a Contribution."
+
+   "Contributor" shall mean Licensor and any individual or Legal Entity
+   on behalf of whom a Contribution has been received by Licensor and
+   subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of
+   this License, each Contributor hereby grants to You a perpetual,
+   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+   copyright license to reproduce, prepare Derivative Works of,
+   publicly display, publicly perform, sublicense, and distribute the
+   Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of
+   this License, each Contributor hereby grants to You a perpetual,
+   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+   (except as stated in this section) patent license to make, have made,
+   use, offer to sell, sell, import, and otherwise transfer the Work,
+   where such license applies only to those patent claims licensable
+   by such Contributor that are necessarily infringed by their
+   Contribution(s) alone or by combination of their Contribution(s)
+   with the Work to which such Contribution(s) was submitted. If You
+   institute patent litigation against any entity (including a
+   cross-claim or counterclaim in a lawsuit) alleging that the Work
+   or a Contribution incorporated within the Work constitutes direct
+   or contributory patent infringement, then any patent licenses
+   granted to You under this License for that Work shall terminate
+   as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the
+   Work or Derivative Works thereof in any medium, with or without
+   modifications, and in Source or Object form, provided that You
+   meet the following conditions:
+
+   (a) You must give any other recipients of the Work or
+       Derivative Works a copy of this License; and
+
+   (b) You must cause any modified files to carry prominent notices
+       stating that You changed the files; and
+
+   (c) You must retain, in the Source form of any Derivative Works
+       that You distribute, all copyright, patent, trademark, and
+       attribution notices from the Source form of the Work,
+       excluding those notices that do not pertain to any part of
+       the Derivative Works; and
+
+   (d) If the Work includes a "NOTICE" text file as part of its
+       distribution, then any Derivative Works that You distribute must
+       include a readable copy of the attribution notices contained
+       within such NOTICE file, excluding those notices that do not
+       pertain to any part of the Derivative Works, in at least one
+       of the following places: within a NOTICE text file distributed
+       as part of the Derivative Works; within the Source form or
+       documentation, if provided along with the Derivative Works; or,
+       within a display generated by the Derivative Works, if and
+       wherever such third-party notices normally appear. The contents
+       of the NOTICE file are for informational purposes only and
+       do not modify the License. You may add Your own attribution
+       notices within Derivative Works that You distribute, alongside
+       or as an addendum to the NOTICE text from the Work, provided
+       that such additional attribution notices cannot be construed
+       as modifying the License.
+
+   You may add Your own copyright statement to Your modifications and
+   may provide additional or different license terms and conditions
+   for use, reproduction, or distribution of Your modifications, or
+   for any such Derivative Works as a whole, provided Your use,
+   reproduction, and distribution of the Work otherwise complies with
+   the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise,
+   any Contribution intentionally submitted for inclusion in the Work
+   by You to the Licensor shall be under the terms and conditions of
+   this License, without any additional terms or conditions.
+   Notwithstanding the above, nothing herein shall supersede or modify
+   the terms of any separate license agreement you may have executed
+   with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade
+   names, trademarks, service marks, or product names of the Licensor,
+   except as required for reasonable and customary use in describing the
+   origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or
+   agreed to in writing, Licensor provides the Work (and each
+   Contributor provides its Contributions) on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+   implied, including, without limitation, any warranties or conditions
+   of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+   PARTICULAR PURPOSE. You are solely responsible for determining the
+   appropriateness of using or redistributing the Work and assume any
+   risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory,
+   whether in tort (including negligence), contract, or otherwise,
+   unless required by applicable law (such as deliberate and grossly
+   negligent acts) or agreed to in writing, shall any Contributor be
+   liable to You for damages, including any direct, indirect, special,
+   incidental, or consequential damages of any character arising as a
+   result of this License or out of the use or inability to use the
+   Work (including but not limited to damages for loss of goodwill,
+   work stoppage, computer failure or malfunction, or any and all
+   other commercial damages or losses), even if such Contributor
+   has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing
+   the Work or Derivative Works thereof, You may choose to offer,
+   and charge a fee for, acceptance of support, warranty, indemnity,
+   or other liability obligations and/or rights consistent with this
+   License. However, in accepting such obligations, You may act only
+   on Your own behalf and on Your sole responsibility, not on behalf
+   of any other Contributor, and only if You agree to indemnify,
+   defend, and hold each Contributor harmless for any liability
+   incurred by, or claims asserted against, such Contributor by reason
+   of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
+
+APPENDIX: How to apply the Apache License to your work.
+
+   To apply the Apache License to your work, attach the following
+   boilerplate notice, with the fields enclosed by brackets "[]"
+   replaced with your own identifying information. (Don't include
+   the brackets!)  The text should be enclosed in the appropriate
+   comment syntax for the file format. We also recommend that a
+   file or class name and description of purpose be included on the
+   same "printed page" as the copyright notice for easier
+   identification within third-party archives.
+
+Copyright [yyyy] [name of copyright owner]
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+	http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.

diff --git a/LICENSE-MIT b/LICENSE-MIT
new file mode 100644
index 0000000..39d4bdb
--- /dev/null
+++ b/LICENSE-MIT

@@ -0,0 +1,25 @@
+Copyright (c) 2014 The Rust Project Developers
+
+Permission is hereby granted, free of charge, to any
+person obtaining a copy of this software and associated
+documentation files (the "Software"), to deal in the
+Software without restriction, including without
+limitation the rights to use, copy, modify, merge,
+publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software
+is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice
+shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.

diff --git a/METADATA b/METADATA
new file mode 100644
index 0000000..53b95fe
--- /dev/null
+++ b/METADATA

@@ -0,0 +1,19 @@
+name: "regex"
+description: "A Rust library for parsing, compiling, and executing regular expressions. Its syntax is similar to Perl-style regular expressions, but lacks a few features like look around and backreferences. In exchange, all searches execute in linear time with respect to the size of the regular expression and search text. Much of the syntax and implementation is inspired by RE2."
+third_party {
+  url {
+    type: HOMEPAGE
+    value: "https://crates.io/crates/regex"
+  }
+  url {
+    type: GIT
+    value: "https://github.com/rust-lang/regex"
+  }
+  version: "1.3.7"
+  license_type: NOTICE
+  last_upgrade_date {
+    year: 2020
+    month: 4
+    day: 17
+  }
+}

diff --git a/MODULE_LICENSE_APACHE2 b/MODULE_LICENSE_APACHE2
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/MODULE_LICENSE_APACHE2


diff --git a/OWNERS b/OWNERS
new file mode 100644
index 0000000..46fc303
--- /dev/null
+++ b/OWNERS

@@ -0,0 +1 @@
+include platform/prebuilts/rust:/OWNERS

diff --git a/PERFORMANCE.md b/PERFORMANCE.md
new file mode 100644
index 0000000..b4aeb89
--- /dev/null
+++ b/PERFORMANCE.md

@@ -0,0 +1,279 @@
+Your friendly guide to understanding the performance characteristics of this
+crate.
+
+This guide assumes some familiarity with the public API of this crate, which
+can be found here: https://docs.rs/regex
+
+## Theory vs. Practice
+
+One of the design goals of this crate is to provide worst case linear time
+behavior with respect to the text searched using finite state automata. This
+means that, *in theory*, the performance of this crate is much better than most
+regex implementations, which typically use backtracking which has worst case
+exponential time.
+
+For example, try opening a Python interpreter and typing this:
+
+    >>> import re
+    >>> re.search('(a*)*c', 'a' * 30).span()
+
+I'll wait.
+
+At some point, you'll figure out that it won't terminate any time soon. ^C it.
+
+The promise of this crate is that *this pathological behavior can't happen*.
+
+With that said, just because we have protected ourselves against worst case
+exponential behavior doesn't mean we are immune from large constant factors
+or places where the current regex engine isn't quite optimal. This guide will
+detail those cases and provide guidance on how to avoid them, among other
+bits of general advice.
+
+## Thou Shalt Not Compile Regular Expressions In A Loop
+
+**Advice**: Use `lazy_static` to amortize the cost of `Regex` compilation.
+
+Don't do it unless you really don't mind paying for it. Compiling a regular
+expression in this crate is quite expensive. It is conceivable that it may get
+faster some day, but I wouldn't hold out hope for, say, an order of magnitude
+improvement. In particular, compilation can take any where from a few dozen
+microseconds to a few dozen milliseconds. Yes, milliseconds. Unicode character
+classes, in particular, have the largest impact on compilation performance. At
+the time of writing, for example, `\pL{100}` takes around 44ms to compile. This
+is because `\pL` corresponds to every letter in Unicode and compilation must
+turn it into a proper automaton that decodes a subset of UTF-8 which
+corresponds to those letters. Compilation also spends some cycles shrinking the
+size of the automaton.
+
+This means that in order to realize efficient regex matching, one must
+*amortize the cost of compilation*. Trivially, if a call to `is_match` is
+inside a loop, then make sure your call to `Regex::new` is *outside* that loop.
+
+In many programming languages, regular expressions can be conveniently defined
+and compiled in a global scope, and code can reach out and use them as if
+they were global static variables. In Rust, there is really no concept of
+life-before-main, and therefore, one cannot utter this:
+
+    static MY_REGEX: Regex = Regex::new("...").unwrap();
+
+Unfortunately, this would seem to imply that one must pass `Regex` objects
+around to everywhere they are used, which can be especially painful depending
+on how your program is structured. Thankfully, the
+[`lazy_static`](https://crates.io/crates/lazy_static)
+crate provides an answer that works well:
+
+    #[macro_use] extern crate lazy_static;
+    extern crate regex;
+
+    use regex::Regex;
+
+    fn some_helper_function(text: &str) -> bool {
+        lazy_static! {
+            static ref MY_REGEX: Regex = Regex::new("...").unwrap();
+        }
+        MY_REGEX.is_match(text)
+    }
+
+In other words, the `lazy_static!` macro enables us to define a `Regex` *as if*
+it were a global static value. What is actually happening under the covers is
+that the code inside the macro (i.e., `Regex::new(...)`) is run on *first use*
+of `MY_REGEX` via a `Deref` impl. The implementation is admittedly magical, but
+it's self contained and everything works exactly as you expect. In particular,
+`MY_REGEX` can be used from multiple threads without wrapping it in an `Arc` or
+a `Mutex`. On that note...
+
+## Using a regex from multiple threads
+
+**Advice**: The performance impact from using a `Regex` from multiple threads
+is likely negligible. If necessary, clone the `Regex` so that each thread gets
+its own copy. Cloning a regex does not incur any additional memory overhead
+than what would be used by using a `Regex` from multiple threads
+simultaneously. *Its only cost is ergonomics.*
+
+It is supported and encouraged to define your regexes using `lazy_static!` as
+if they were global static values, and then use them to search text from
+multiple threads simultaneously.
+
+One might imagine that this is possible because a `Regex` represents a
+*compiled* program, so that any allocation or mutation is already done, and is
+therefore read-only. Unfortunately, this is not true. Each type of search
+strategy in this crate requires some kind of mutable scratch space to use
+*during search*. For example, when executing a DFA, its states are computed
+lazily and reused on subsequent searches. Those states go into that mutable
+scratch space.
+
+The mutable scratch space is an implementation detail, and in general, its
+mutation should not be observable from users of this crate. Therefore, it uses
+interior mutability. This implies that `Regex` can either only be used from one
+thread, or it must do some sort of synchronization. Either choice is
+reasonable, but this crate chooses the latter, in particular because it is
+ergonomic and makes use with `lazy_static!` straight forward.
+
+Synchronization implies *some* amount of overhead. When a `Regex` is used from
+a single thread, this overhead is negligible. When a `Regex` is used from
+multiple threads simultaneously, it is possible for the overhead of
+synchronization from contention to impact performance. The specific cases where
+contention may happen is if you are calling any of these methods repeatedly
+from multiple threads simultaneously:
+
+* shortest_match
+* is_match
+* find
+* captures
+
+In particular, every invocation of one of these methods must synchronize with
+other threads to retrieve its mutable scratch space before searching can start.
+If, however, you are using one of these methods:
+
+* find_iter
+* captures_iter
+
+Then you may not suffer from contention since the cost of synchronization is
+amortized on *construction of the iterator*. That is, the mutable scratch space
+is obtained when the iterator is created and retained throughout its lifetime.
+
+## Only ask for what you need
+
+**Advice**: Prefer in this order: `is_match`, `find`, `captures`.
+
+There are three primary search methods on a `Regex`:
+
+* is_match
+* find
+* captures
+
+In general, these are ordered from fastest to slowest.
+
+`is_match` is fastest because it doesn't actually need to find the start or the
+end of the leftmost-first match. It can quit immediately after it knows there
+is a match. For example, given the regex `a+` and the haystack, `aaaaa`, the
+search will quit after examing the first byte.
+
+In constrast, `find` must return both the start and end location of the
+leftmost-first match. It can use the DFA matcher for this, but must run it
+forwards once to find the end of the match *and then run it backwards* to find
+the start of the match. The two scans and the cost of finding the real end of
+the leftmost-first match make this more expensive than `is_match`.
+
+`captures` is the most expensive of them all because it must do what `find`
+does, and then run either the bounded backtracker or the Pike VM to fill in the
+capture group locations. Both of these are simulations of an NFA, which must
+spend a lot of time shuffling states around. The DFA limits the performance hit
+somewhat by restricting the amount of text that must be searched via an NFA
+simulation.
+
+One other method not mentioned is `shortest_match`. This method has precisely
+the same performance characteristics as `is_match`, except it will return the
+end location of when it discovered a match. For example, given the regex `a+`
+and the haystack `aaaaa`, `shortest_match` may return `1` as opposed to `5`,
+the latter of which being the correct end location of the leftmost-first match.
+
+## Literals in your regex may make it faster
+
+**Advice**: Literals can reduce the work that the regex engine needs to do. Use
+them if you can, especially as prefixes.
+
+In particular, if your regex starts with a prefix literal, the prefix is
+quickly searched before entering the (much slower) regex engine. For example,
+given the regex `foo\w+`, the literal `foo` will be searched for using
+Boyer-Moore. If there's no match, then no regex engine is ever used. Only when
+there's a match is the regex engine invoked at the location of the match, which
+effectively permits the regex engine to skip large portions of a haystack.
+If a regex is comprised entirely of literals (possibly more than one), then
+it's possible that the regex engine can be avoided entirely even when there's a
+match.
+
+When one literal is found, Boyer-Moore is used. When multiple literals are
+found, then an optimized version of Aho-Corasick is used.
+
+This optimization is in particular extended quite a bit in this crate. Here are
+a few examples of regexes that get literal prefixes detected:
+
+* `(foo|bar)` detects `foo` and `bar`
+* `(a|b)c` detects `ac` and `bc`
+* `[ab]foo[yz]` detects `afooy`, `afooz`, `bfooy` and `bfooz`
+* `a?b` detects `a` and `b`
+* `a*b` detects `a` and `b`
+* `(ab){3,6}` detects `ababab`
+
+Literals in anchored regexes can also be used for detecting non-matches very
+quickly. For example, `^foo\w+` and `\w+foo$` may be able to detect a non-match
+just by examing the first (or last) three bytes of the haystack.
+
+## Unicode word boundaries may prevent the DFA from being used
+
+**Advice**: In most cases, `\b` should work well. If not, use `(?-u:\b)`
+instead of `\b` if you care about consistent performance more than correctness.
+
+It's a sad state of the current implementation. At the moment, the DFA will try
+to interpret Unicode word boundaries as if they were ASCII word boundaries.
+If the DFA comes across any non-ASCII byte, it will quit and fall back to an
+alternative matching engine that can handle Unicode word boundaries correctly.
+The alternate matching engine is generally quite a bit slower (perhaps by an
+order of magnitude). If necessary, this can be ameliorated in two ways.
+
+The first way is to add some number of literal prefixes to your regular
+expression. Even though the DFA may not be used, specialized routines will
+still kick in to find prefix literals quickly, which limits how much work the
+NFA simulation will need to do.
+
+The second way is to give up on Unicode and use an ASCII word boundary instead.
+One can use an ASCII word boundary by disabling Unicode support. That is,
+instead of using `\b`, use `(?-u:\b)`.  Namely, given the regex `\b.+\b`, it
+can be transformed into a regex that uses the DFA with `(?-u:\b).+(?-u:\b)`. It
+is important to limit the scope of disabling the `u` flag, since it might lead
+to a syntax error if the regex could match arbitrary bytes. For example, if one
+wrote `(?-u)\b.+\b`, then a syntax error would be returned because `.` matches
+any *byte* when the Unicode flag is disabled.
+
+The second way isn't appreciably different than just using a Unicode word
+boundary in the first place, since the DFA will speculatively interpret it as
+an ASCII word boundary anyway. The key difference is that if an ASCII word
+boundary is used explicitly, then the DFA won't quit in the presence of
+non-ASCII UTF-8 bytes. This results in giving up correctness in exchange for
+more consistent performance.
+
+N.B. When using `bytes::Regex`, Unicode support is disabled by default, so one
+can simply write `\b` to get an ASCII word boundary.
+
+## Excessive counting can lead to exponential state blow up in the DFA
+
+**Advice**: Don't write regexes that cause DFA state blow up if you care about
+match performance.
+
+Wait, didn't I say that this crate guards against exponential worst cases?
+Well, it turns out that the process of converting an NFA to a DFA can lead to
+an exponential blow up in the number of states. This crate specifically guards
+against exponential blow up by doing two things:
+
+1. The DFA is computed lazily. That is, a state in the DFA only exists in
+   memory if it is visited. In particular, the lazy DFA guarantees that *at
+   most* one state is created for every byte of input. This, on its own,
+   guarantees linear time complexity.
+2. Of course, creating a new state for *every* byte of input means that search
+   will go incredibly slow because of very large constant factors. On top of
+   that, creating a state for every byte in a large haystack could result in
+   exorbitant memory usage. To ameliorate this, the DFA bounds the number of
+   states it can store. Once it reaches its limit, it flushes its cache. This
+   prevents reuse of states that it already computed. If the cache is flushed
+   too frequently, then the DFA will give up and execution will fall back to
+   one of the NFA simulations.
+
+In effect, this crate will detect exponential state blow up and fall back to
+a search routine with fixed memory requirements. This does, however, mean that
+searching will be much slower than one might expect. Regexes that rely on
+counting in particular are strong aggravators of this behavior. For example,
+matching `[01]*1[01]{20}$` against a random sequence of `0`s and `1`s.
+
+In the future, it may be possible to increase the bound that the DFA uses,
+which would allow the caller to choose how much memory they're willing to
+spend.
+
+## Resist the temptation to "optimize" regexes
+
+**Advice**: This ain't a backtracking engine.
+
+An entire book was written on how to optimize Perl-style regular expressions.
+Most of those techniques are not applicable for this library. For example,
+there is no problem with using non-greedy matching or having lots of
+alternations in your regex.

diff --git a/README.md b/README.md
new file mode 100644
index 0000000..8c05a90
--- /dev/null
+++ b/README.md

@@ -0,0 +1,256 @@
+regex
+=====
+A Rust library for parsing, compiling, and executing regular expressions. Its
+syntax is similar to Perl-style regular expressions, but lacks a few features
+like look around and backreferences. In exchange, all searches execute in
+linear time with respect to the size of the regular expression and search text.
+Much of the syntax and implementation is inspired
+by [RE2](https://github.com/google/re2).
+
+[![Build status](https://github.com/rust-lang/regex/workflows/ci/badge.svg)](https://github.com/rust-lang/regex/actions)
+[![](https://meritbadge.herokuapp.com/regex)](https://crates.io/crates/regex)
+[![Rust](https://img.shields.io/badge/rust-1.28.0%2B-blue.svg?maxAge=3600)](https://github.com/rust-lang/regex)
+
+### Documentation
+
+[Module documentation with examples](https://docs.rs/regex).
+The module documentation also includes a comprehensive description of the
+syntax supported.
+
+Documentation with examples for the various matching functions and iterators
+can be found on the
+[`Regex` type](https://docs.rs/regex/*/regex/struct.Regex.html).
+
+### Usage
+
+Add this to your `Cargo.toml`:
+
+```toml
+[dependencies]
+regex = "1"
+```
+
+and this to your crate root (if you're using Rust 2015):
+
+```rust
+extern crate regex;
+```
+
+Here's a simple example that matches a date in YYYY-MM-DD format and prints the
+year, month and day:
+
+```rust
+use regex::Regex;
+
+fn main() {
+    let re = Regex::new(r"(?x)
+(?P<year>\d{4})  # the year
+-
+(?P<month>\d{2}) # the month
+-
+(?P<day>\d{2})   # the day
+").unwrap();
+    let caps = re.captures("2010-03-14").unwrap();
+
+    assert_eq!("2010", &caps["year"]);
+    assert_eq!("03", &caps["month"]);
+    assert_eq!("14", &caps["day"]);
+}
+```
+
+If you have lots of dates in text that you'd like to iterate over, then it's
+easy to adapt the above example with an iterator:
+
+```rust
+use regex::Regex;
+
+const TO_SEARCH: &'static str = "
+On 2010-03-14, foo happened. On 2014-10-14, bar happened.
+";
+
+fn main() {
+    let re = Regex::new(r"(\d{4})-(\d{2})-(\d{2})").unwrap();
+
+    for caps in re.captures_iter(TO_SEARCH) {
+        // Note that all of the unwraps are actually OK for this regex
+        // because the only way for the regex to match is if all of the
+        // capture groups match. This is not true in general though!
+        println!("year: {}, month: {}, day: {}",
+                 caps.get(1).unwrap().as_str(),
+                 caps.get(2).unwrap().as_str(),
+                 caps.get(3).unwrap().as_str());
+    }
+}
+```
+
+This example outputs:
+
+```text
+year: 2010, month: 03, day: 14
+year: 2014, month: 10, day: 14
+```
+
+### Usage: Avoid compiling the same regex in a loop
+
+It is an anti-pattern to compile the same regular expression in a loop since
+compilation is typically expensive. (It takes anywhere from a few microseconds
+to a few **milliseconds** depending on the size of the regex.) Not only is
+compilation itself expensive, but this also prevents optimizations that reuse
+allocations internally to the matching engines.
+
+In Rust, it can sometimes be a pain to pass regular expressions around if
+they're used from inside a helper function. Instead, we recommend using the
+[`lazy_static`](https://crates.io/crates/lazy_static) crate to ensure that
+regular expressions are compiled exactly once.
+
+For example:
+
+```rust,ignore
+use regex::Regex;
+
+fn some_helper_function(text: &str) -> bool {
+    lazy_static! {
+        static ref RE: Regex = Regex::new("...").unwrap();
+    }
+    RE.is_match(text)
+}
+```
+
+Specifically, in this example, the regex will be compiled when it is used for
+the first time. On subsequent uses, it will reuse the previous compilation.
+
+### Usage: match regular expressions on `&[u8]`
+
+The main API of this crate (`regex::Regex`) requires the caller to pass a
+`&str` for searching. In Rust, an `&str` is required to be valid UTF-8, which
+means the main API can't be used for searching arbitrary bytes.
+
+To match on arbitrary bytes, use the `regex::bytes::Regex` API. The API
+is identical to the main API, except that it takes an `&[u8]` to search
+on instead of an `&str`. By default, `.` will match any *byte* using
+`regex::bytes::Regex`, while `.` will match any *UTF-8 encoded Unicode scalar
+value* using the main API.
+
+This example shows how to find all null-terminated strings in a slice of bytes:
+
+```rust
+use regex::bytes::Regex;
+
+let re = Regex::new(r"(?P<cstr>[^\x00]+)\x00").unwrap();
+let text = b"foo\x00bar\x00baz\x00";
+
+// Extract all of the strings without the null terminator from each match.
+// The unwrap is OK here since a match requires the `cstr` capture to match.
+let cstrs: Vec<&[u8]> =
+    re.captures_iter(text)
+      .map(|c| c.name("cstr").unwrap().as_bytes())
+      .collect();
+assert_eq!(vec![&b"foo"[..], &b"bar"[..], &b"baz"[..]], cstrs);
+```
+
+Notice here that the `[^\x00]+` will match any *byte* except for `NUL`. When
+using the main API, `[^\x00]+` would instead match any valid UTF-8 sequence
+except for `NUL`.
+
+### Usage: match multiple regular expressions simultaneously
+
+This demonstrates how to use a `RegexSet` to match multiple (possibly
+overlapping) regular expressions in a single scan of the search text:
+
+```rust
+use regex::RegexSet;
+
+let set = RegexSet::new(&[
+    r"\w+",
+    r"\d+",
+    r"\pL+",
+    r"foo",
+    r"bar",
+    r"barfoo",
+    r"foobar",
+]).unwrap();
+
+// Iterate over and collect all of the matches.
+let matches: Vec<_> = set.matches("foobar").into_iter().collect();
+assert_eq!(matches, vec![0, 2, 3, 4, 6]);
+
+// You can also test whether a particular regex matched:
+let matches = set.matches("foobar");
+assert!(!matches.matched(5));
+assert!(matches.matched(6));
+```
+
+### Usage: enable SIMD optimizations
+
+SIMD optimizations are enabled automatically on Rust stable 1.27 and newer.
+For nightly versions of Rust, this requires a recent version with the SIMD
+features stabilized.
+
+
+### Usage: a regular expression parser
+
+This repository contains a crate that provides a well tested regular expression
+parser, abstract syntax and a high-level intermediate representation for
+convenient analysis. It provides no facilities for compilation or execution.
+This may be useful if you're implementing your own regex engine or otherwise
+need to do analysis on the syntax of a regular expression. It is otherwise not
+recommended for general use.
+
+[Documentation `regex-syntax`.](https://docs.rs/regex-syntax)
+
+
+### Crate features
+
+This crate comes with several features that permit tweaking the trade off
+between binary size, compilation time and runtime performance. Users of this
+crate can selectively disable Unicode tables, or choose from a variety of
+optimizations performed by this crate to disable.
+
+When all of these features are disabled, runtime match performance may be much
+worse, but if you're matching on short strings, or if high performance isn't
+necessary, then such a configuration is perfectly serviceable. To disable
+all such features, use the following `Cargo.toml` dependency configuration:
+
+```toml
+[dependencies.regex]
+version = "1.3"
+default-features = false
+# regex currently requires the standard library, you must re-enable it.
+features = ["std"]
+```
+
+This will reduce the dependency tree of `regex` down to a single crate
+(`regex-syntax`).
+
+The full set of features one can disable are
+[in the "Crate features" section of the documentation](https://docs.rs/regex/*/#crate-features).
+
+
+### Minimum Rust version policy
+
+This crate's minimum supported `rustc` version is `1.28.0`.
+
+The current **tentative** policy is that the minimum Rust version required
+to use this crate can be increased in minor version updates. For example, if
+regex 1.0 requires Rust 1.20.0, then regex 1.0.z for all values of `z` will
+also require Rust 1.20.0 or newer. However, regex 1.y for `y > 0` may require a
+newer minimum version of Rust.
+
+In general, this crate will be conservative with respect to the minimum
+supported version of Rust.
+
+
+### License
+
+This project is licensed under either of
+
+ * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
+   http://www.apache.org/licenses/LICENSE-2.0)
+ * MIT license ([LICENSE-MIT](LICENSE-MIT) or
+   http://opensource.org/licenses/MIT)
+
+at your option.
+
+The data in `regex-syntax/src/unicode_tables/` is licensed under the Unicode
+License Agreement
+([LICENSE-UNICODE](http://www.unicode.org/copyright.html#License)).

diff --git a/UNICODE.md b/UNICODE.md
new file mode 100644
index 0000000..18fa9b1
--- /dev/null
+++ b/UNICODE.md

@@ -0,0 +1,259 @@
+# Unicode conformance
+
+This document describes the regex crate's conformance to Unicode's
+[UTS#18](http://unicode.org/reports/tr18/)
+report, which lays out 3 levels of support: Basic, Extended and Tailored.
+
+Full support for Level 1 ("Basic Unicode Support") is provided with two
+exceptions:
+
+1. Line boundaries are not Unicode aware. Namely, only the `\n`
+   (`END OF LINE`) character is recognized as a line boundary.
+2. The compatibility properties specified by
+   [RL1.2a](http://unicode.org/reports/tr18/#RL1.2a)
+   are ASCII-only definitions.
+
+Little to no support is provided for either Level 2 or Level 3. For the most
+part, this is because the features are either complex/hard to implement, or at
+the very least, very difficult to implement without sacrificing performance.
+For example, tackling canonical equivalence such that matching worked as one
+would expect regardless of normalization form would be a significant
+undertaking. This is at least partially a result of the fact that this regex
+engine is based on finite automata, which admits less flexibility normally
+associated with backtracking implementations.
+
+
+## RL1.1 Hex Notation
+
+[UTS#18 RL1.1](https://unicode.org/reports/tr18/#Hex_notation)
+
+Hex Notation refers to the ability to specify a Unicode code point in a regular
+expression via its hexadecimal code point representation. This is useful in
+environments that have poor Unicode font rendering or if you need to express a
+code point that is not normally displayable. All forms of hexadecimal notation
+are supported
+
+    \x7F        hex character code (exactly two digits)
+    \x{10FFFF}  any hex character code corresponding to a Unicode code point
+    \u007F      hex character code (exactly four digits)
+    \u{7F}      any hex character code corresponding to a Unicode code point
+    \U0000007F  hex character code (exactly eight digits)
+    \U{7F}      any hex character code corresponding to a Unicode code point
+
+Briefly, the `\x{...}`, `\u{...}` and `\U{...}` are all exactly equivalent ways
+of expressing hexadecimal code points. Any number of digits can be written
+within the brackets. In contrast, `\xNN`, `\uNNNN`, `\UNNNNNNNN` are all
+fixed-width variants of the same idea.
+
+Note that when Unicode mode is disabled, any non-ASCII Unicode codepoint is
+banned. Additionally, the `\xNN` syntax represents arbitrary bytes when Unicode
+mode is disabled. That is, the regex `\xFF` matches the Unicode codepoint
+U+00FF (encoded as `\xC3\xBF` in UTF-8) while the regex `(?-u)\xFF` matches
+the literal byte `\xFF`.
+
+
+## RL1.2 Properties
+
+[UTS#18 RL1.2](https://unicode.org/reports/tr18/#Categories)
+
+Full support for Unicode property syntax is provided. Unicode properties
+provide a convenient way to construct character classes of groups of code
+points specified by Unicode. The regex crate does not provide exhaustive
+support, but covers a useful subset. In particular:
+
+* [General categories](http://unicode.org/reports/tr18/#General_Category_Property)
+* [Scripts and Script Extensions](http://unicode.org/reports/tr18/#Script_Property)
+* [Age](http://unicode.org/reports/tr18/#Age)
+* A smattering of boolean properties, including all of those specified by
+  [RL1.2](http://unicode.org/reports/tr18/#RL1.2) explicitly.
+
+In all cases, property name and value abbreviations are supported, and all
+names/values are matched loosely without regard for case, whitespace or
+underscores. Property name aliases can be found in Unicode's
+[`PropertyAliases.txt`](http://www.unicode.org/Public/UCD/latest/ucd/PropertyAliases.txt)
+file, while property value aliases can be found in Unicode's
+[`PropertyValueAliases.txt`](http://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt)
+file.
+
+The syntax supported is also consistent with the UTS#18 recommendation:
+
+* `\p{Greek}` selects the `Greek` script. Equivalent expressions follow:
+  `\p{sc:Greek}`, `\p{Script:Greek}`, `\p{Sc=Greek}`, `\p{script=Greek}`,
+  `\P{sc!=Greek}`. Similarly for `General_Category` (or `gc` for short) and
+  `Script_Extensions` (or `scx` for short).
+* `\p{age:3.2}` selects all code points in Unicode 3.2.
+* `\p{Alphabetic}` selects the "alphabetic" property and can be abbreviated
+  via `\p{alpha}` (for example).
+* Single letter variants for properties with single letter abbreviations.
+  For example, `\p{Letter}` can be equivalently written as `\pL`.
+
+The following is a list of all properties supported by the regex crate (starred
+properties correspond to properties required by RL1.2):
+
+* `General_Category` \* (including `Any`, `ASCII` and `Assigned`)
+* `Script` \*
+* `Script_Extensions` \*
+* `Age`
+* `ASCII_Hex_Digit`
+* `Alphabetic` \*
+* `Bidi_Control`
+* `Case_Ignorable`
+* `Cased`
+* `Changes_When_Casefolded`
+* `Changes_When_Casemapped`
+* `Changes_When_Lowercased`
+* `Changes_When_Titlecased`
+* `Changes_When_Uppercased`
+* `Dash`
+* `Default_Ignorable_Code_Point` \*
+* `Deprecated`
+* `Diacritic`
+* `Emoji`
+* `Emoji_Presentation`
+* `Emoji_Modifier`
+* `Emoji_Modifier_Base`
+* `Emoji_Component`
+* `Extended_Pictographic`
+* `Extender`
+* `Grapheme_Base`
+* `Grapheme_Cluster_Break`
+* `Grapheme_Extend`
+* `Hex_Digit`
+* `IDS_Binary_Operator`
+* `IDS_Trinary_Operator`
+* `ID_Continue`
+* `ID_Start`
+* `Join_Control`
+* `Logical_Order_Exception`
+* `Lowercase` \*
+* `Math`
+* `Noncharacter_Code_Point` \*
+* `Pattern_Syntax`
+* `Pattern_White_Space`
+* `Prepended_Concatenation_Mark`
+* `Quotation_Mark`
+* `Radical`
+* `Regional_Indicator`
+* `Sentence_Break`
+* `Sentence_Terminal`
+* `Soft_Dotted`
+* `Terminal_Punctuation`
+* `Unified_Ideograph`
+* `Uppercase` \*
+* `Variation_Selector`
+* `White_Space` \*
+* `Word_Break`
+* `XID_Continue`
+* `XID_Start`
+
+
+## RL1.2a Compatibility Properties
+
+[UTS#18 RL1.2a](http://unicode.org/reports/tr18/#RL1.2a)
+
+The regex crate only provides ASCII definitions of the
+[compatibility properties documented in UTS#18 Annex C](http://unicode.org/reports/tr18/#Compatibility_Properties)
+(sans the `\X` class, for matching grapheme clusters, which isn't provided
+at all). This is because it seems to be consistent with most other regular
+expression engines, and in particular, because these are often referred to as
+"ASCII" or "POSIX" character classes.
+
+Note that the `\w`, `\s` and `\d` character classes **are** Unicode aware.
+Their traditional ASCII definition can be used by disabling Unicode. That is,
+`[[:word:]]` and `(?-u)\w` are equivalent.
+
+
+## RL1.3 Subtraction and Intersection
+
+[UTS#18 RL1.3](http://unicode.org/reports/tr18/#Subtraction_and_Intersection)
+
+The regex crate provides full support for nested character classes, along with
+union, intersection (`&&`), difference (`--`) and symmetric difference (`~~`)
+operations on arbitrary character classes.
+
+For example, to match all non-ASCII letters, you could use either
+`[\p{Letter}--\p{Ascii}]` (difference) or `[\p{Letter}&&[^\p{Ascii}]]`
+(intersecting the negation).
+
+
+## RL1.4 Simple Word Boundaries
+
+[UTS#18 RL1.4](http://unicode.org/reports/tr18/#Simple_Word_Boundaries)
+
+The regex crate provides basic Unicode aware word boundary assertions. A word
+boundary assertion can be written as `\b`, or `\B` as its negation. A word
+boundary negation corresponds to a zero-width match, where its adjacent
+characters correspond to word and non-word, or non-word and word characters.
+
+Conformance in this case chooses to define word character in the same way that
+the `\w` character class is defined: a code point that is a member of one of
+the following classes:
+
+* `\p{Alphabetic}`
+* `\p{Join_Control}`
+* `\p{gc:Mark}`
+* `\p{gc:Decimal_Number}`
+* `\p{gc:Connector_Punctuation}`
+
+In particular, this differs slightly from the
+[prescription given in RL1.4](http://unicode.org/reports/tr18/#Simple_Word_Boundaries)
+but is permissible according to
+[UTS#18 Annex C](http://unicode.org/reports/tr18/#Compatibility_Properties).
+Namely, it is convenient and simpler to have `\w` and `\b` be in sync with
+one another.
+
+Finally, Unicode word boundaries can be disabled, which will cause ASCII word
+boundaries to be used instead. That is, `\b` is a Unicode word boundary while
+`(?-u)\b` is an ASCII-only word boundary. This can occasionally be beneficial
+if performance is important, since the implementation of Unicode word
+boundaries is currently sub-optimal on non-ASCII text.
+
+
+## RL1.5 Simple Loose Matches
+
+[UTS#18 RL1.5](http://unicode.org/reports/tr18/#Simple_Loose_Matches)
+
+The regex crate provides full support for case insensitive matching in
+accordance with RL1.5. That is, it uses the "simple" case folding mapping. The
+"simple" mapping was chosen because of a key convenient property: every
+"simple" mapping is a mapping from exactly one code point to exactly one other
+code point. This makes case insensitive matching of character classes, for
+example, straight-forward to implement.
+
+When case insensitive mode is enabled (e.g., `(?i)[a]` is equivalent to `a|A`),
+then all characters classes are case folded as well.
+
+
+## RL1.6 Line Boundaries
+
+[UTS#18 RL1.6](http://unicode.org/reports/tr18/#Line_Boundaries)
+
+The regex crate only provides support for recognizing the `\n` (`END OF LINE`)
+character as a line boundary. This choice was made mostly for implementation
+convenience, and to avoid performance cliffs that Unicode word boundaries are
+subject to.
+
+Ideally, it would be nice to at least support `\r\n` as a line boundary as
+well, and in theory, this could be done efficiently.
+
+
+## RL1.7 Code Points
+
+[UTS#18 RL1.7](http://unicode.org/reports/tr18/#Supplementary_Characters)
+
+The regex crate provides full support for Unicode code point matching. Namely,
+the fundamental atom of any match is always a single code point.
+
+Given Rust's strong ties to UTF-8, the following guarantees are also provided:
+
+* All matches are reported on valid UTF-8 code unit boundaries. That is, any
+  match range returned by the public regex API is guaranteed to successfully
+  slice the string that was searched.
+* By consequence of the above, it is impossible to match surrogode code points.
+  No support for UTF-16 is provided, so this is never necessary.
+
+Note that when Unicode mode is disabled, the fundamental atom of matching is
+no longer a code point but a single byte. When Unicode mode is disabled, many
+Unicode features are disabled as well. For example, `(?-u)\pL` is not a valid
+regex but `\pL(?-u)\xFF` (matches any Unicode `Letter` followed by the literal
+byte `\xFF`) is, for example.

diff --git a/examples/regexdna-input.txt b/examples/regexdna-input.txt
new file mode 100644
index 0000000..fb23263
--- /dev/null
+++ b/examples/regexdna-input.txt

@@ -0,0 +1,1671 @@
+>ONE Homo sapiens alu
+GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGA
+TCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACT
+AAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAG
+GCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCG
+CCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGT
+GGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCA
+GGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAA
+TTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAG
+AATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCA
+GCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGT
+AATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACC
+AGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTG
+GTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACC
+CGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAG
+AGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTT
+TGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACA
+TGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCT
+GTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGG
+TTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGT
+CTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGG
+CGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCG
+TCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTA
+CTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCG
+AGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCG
+GGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACC
+TGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAA
+TACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGA
+GGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACT
+GCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTC
+ACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGT
+TCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGC
+CGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCG
+CTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTG
+GGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCC
+CAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCT
+GGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGC
+GCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGA
+GGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGA
+GACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGA
+GGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTG
+AAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAAT
+CCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCA
+GTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAA
+AAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGC
+GGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCT
+ACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGG
+GAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATC
+GCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGC
+GGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGG
+TCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAA
+AAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAG
+GAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACT
+CCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCC
+TGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAG
+ACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGC
+GTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGA
+ACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGA
+CAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCA
+CTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCA
+ACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCG
+CCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGG
+AGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTC
+CGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCG
+AGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACC
+CCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAG
+CTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAG
+CCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGG
+CCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATC
+ACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAA
+AAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGC
+TGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCC
+ACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGG
+CTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGG
+AGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATT
+AGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAA
+TCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGC
+CTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAA
+TCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAG
+CCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGT
+GGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCG
+GGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAG
+CGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTG
+GGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATG
+GTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGT
+AATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTT
+GCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCT
+CAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCG
+GGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTC
+TCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACT
+CGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAG
+ATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGG
+CGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTG
+AGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATA
+CAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGG
+CAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGC
+ACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCAC
+GCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTC
+GAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCG
+GGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCT
+TGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGG
+CGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCA
+GCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGG
+CCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGC
+GCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGG
+CGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGA
+CTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGG
+CCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAA
+ACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCC
+CAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGT
+GAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAA
+AGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGG
+ATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTAC
+TAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGA
+GGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGC
+GCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGG
+TGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTC
+AGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAA
+ATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGA
+GAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC
+AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTG
+TAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGAC
+CAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGT
+GGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAAC
+CCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACA
+GAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACT
+TTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAAC
+ATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCC
+TGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAG
+GTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCG
+TCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAG
+GCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCC
+GTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCT
+ACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCC
+GAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCC
+GGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCAC
+CTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAA
+ATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTG
+AGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCAC
+TGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCT
+CACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAG
+TTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAG
+CCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATC
+GCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCT
+GGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATC
+CCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCC
+TGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGG
+CGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG
+AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCG
+AGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGG
+AGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGT
+GAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAA
+TCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGC
+AGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCA
+AAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGG
+CGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTC
+TACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCG
+GGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGAT
+CGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCG
+CGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAG
+GTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACA
+AAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCA
+GGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCAC
+TCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGC
+CTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGA
+GACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGG
+CGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTG
+AACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCG
+ACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGC
+ACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCC
+AACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGC
+GCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCG
+GAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACT
+CCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCC
+GAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAAC
+CCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA
+GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGA
+GCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAG
+GCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGAT
+CACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTA
+AAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGG
+CTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGC
+CACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTG
+GCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAG
+GAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAAT
+TAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGA
+ATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAG
+CCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTA
+ATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCA
+GCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGG
+TGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCC
+GGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGA
+GCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTT
+GGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACAT
+GGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTG
+TAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGT
+TGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTC
+TCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGC
+GGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGT
+CTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTAC
+TCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGA
+GATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGG
+GCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCT
+GAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT
+ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAG
+GCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTG
+CACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCA
+CGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTT
+CGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCC
+GGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGC
+TTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGG
+GCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCC
+AGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTG
+GCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCG
+CGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAG
+GCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAG
+ACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAG
+GCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGA
+AACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATC
+CCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAG
+TGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAA
+AAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCG
+GATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTA
+CTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGG
+AGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCG
+CGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCG
+GTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGT
+CAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAA
+AATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGG
+AGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTC
+CAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCT
+GTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA
+CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCG
+TGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAA
+CCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGAC
+AGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCAC
+TTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAA
+CATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGC
+CTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGA
+GGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCC
+GTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGA
+GGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCC
+CGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGC
+TACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGC
+CGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGC
+CGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCA
+CCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAA
+AATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCT
+GAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCA
+CTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGC
+TCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGA
+GTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTA
+GCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAAT
+CGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCC
+TGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAAT
+CCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGC
+CTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTG
+GCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGG
+GAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGC
+GAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG
+GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGG
+TGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTA
+ATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTG
+CAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTC
+AAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGG
+GCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCT
+CTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTC
+GGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGA
+TCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGC
+GCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGA
+GGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATAC
+AAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGC
+AGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCA
+CTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACG
+CCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCG
+AGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGG
+GCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTT
+GAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGC
+GACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAG
+CACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGC
+CAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCG
+CGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGC
+GGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGAC
+TCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGC
+CGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAA
+CCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCC
+AGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTG
+AGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA
+GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGA
+TCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACT
+AAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAG
+GCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCG
+CCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGT
+GGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCA
+GGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAA
+TTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAG
+AATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCA
+GCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGT
+AATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACC
+AGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTG
+GTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACC
+CGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAG
+AGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTT
+TGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACA
+TGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCT
+GTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGG
+TTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGT
+CTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGG
+CGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCG
+TCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTA
+CTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCG
+AGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCG
+GGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACC
+TGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAA
+TACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGA
+GGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACT
+GCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTC
+ACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGT
+TCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGC
+CGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCG
+CTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTG
+GGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCC
+CAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCT
+GGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGC
+GCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGA
+GGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGA
+GACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGA
+GGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTG
+AAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAAT
+CCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCA
+GTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAA
+AAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGC
+GGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCT
+ACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGG
+GAGGCTGAGGCAGGAGAATC
+>TWO IUB ambiguity codes
+cttBtatcatatgctaKggNcataaaSatgtaaaDcDRtBggDtctttataattcBgtcg
+tactDtDagcctatttSVHtHttKtgtHMaSattgWaHKHttttagacatWatgtRgaaa
+NtactMcSMtYtcMgRtacttctWBacgaaatatagScDtttgaagacacatagtVgYgt
+cattHWtMMWcStgttaggKtSgaYaaccWStcgBttgcgaMttBYatcWtgacaYcaga
+gtaBDtRacttttcWatMttDBcatWtatcttactaBgaYtcttgttttttttYaaScYa
+HgtgttNtSatcMtcVaaaStccRcctDaataataStcYtRDSaMtDttgttSagtRRca
+tttHatSttMtWgtcgtatSSagactYaaattcaMtWatttaSgYttaRgKaRtccactt
+tattRggaMcDaWaWagttttgacatgttctacaaaRaatataataaMttcgDacgaSSt
+acaStYRctVaNMtMgtaggcKatcttttattaaaaagVWaHKYagtttttatttaacct
+tacgtVtcVaattVMBcttaMtttaStgacttagattWWacVtgWYagWVRctDattBYt
+gtttaagaagattattgacVatMaacattVctgtBSgaVtgWWggaKHaatKWcBScSWa
+accRVacacaaactaccScattRatatKVtactatatttHttaagtttSKtRtacaaagt
+RDttcaaaaWgcacatWaDgtDKacgaacaattacaRNWaatHtttStgttattaaMtgt
+tgDcgtMgcatBtgcttcgcgaDWgagctgcgaggggVtaaScNatttacttaatgacag
+cccccacatYScaMgtaggtYaNgttctgaMaacNaMRaacaaacaKctacatagYWctg
+ttWaaataaaataRattagHacacaagcgKatacBttRttaagtatttccgatctHSaat
+actcNttMaagtattMtgRtgaMgcataatHcMtaBSaRattagttgatHtMttaaKagg
+YtaaBataSaVatactWtataVWgKgttaaaacagtgcgRatatacatVtHRtVYataSa
+KtWaStVcNKHKttactatccctcatgWHatWaRcttactaggatctataDtDHBttata
+aaaHgtacVtagaYttYaKcctattcttcttaataNDaaggaaaDYgcggctaaWSctBa
+aNtgctggMBaKctaMVKagBaactaWaDaMaccYVtNtaHtVWtKgRtcaaNtYaNacg
+gtttNattgVtttctgtBaWgtaattcaagtcaVWtactNggattctttaYtaaagccgc
+tcttagHVggaYtgtNcDaVagctctctKgacgtatagYcctRYHDtgBattDaaDgccK
+tcHaaStttMcctagtattgcRgWBaVatHaaaataYtgtttagMDMRtaataaggatMt
+ttctWgtNtgtgaaaaMaatatRtttMtDgHHtgtcattttcWattRSHcVagaagtacg
+ggtaKVattKYagactNaatgtttgKMMgYNtcccgSKttctaStatatNVataYHgtNa
+BKRgNacaactgatttcctttaNcgatttctctataScaHtataRagtcRVttacDSDtt
+aRtSatacHgtSKacYagttMHtWataggatgactNtatSaNctataVtttRNKtgRacc
+tttYtatgttactttttcctttaaacatacaHactMacacggtWataMtBVacRaSaatc
+cgtaBVttccagccBcttaRKtgtgcctttttRtgtcagcRttKtaaacKtaaatctcac
+aattgcaNtSBaaccgggttattaaBcKatDagttactcttcattVtttHaaggctKKga
+tacatcBggScagtVcacattttgaHaDSgHatRMaHWggtatatRgccDttcgtatcga
+aacaHtaagttaRatgaVacttagattVKtaaYttaaatcaNatccRttRRaMScNaaaD
+gttVHWgtcHaaHgacVaWtgttScactaagSgttatcttagggDtaccagWattWtRtg
+ttHWHacgattBtgVcaYatcggttgagKcWtKKcaVtgaYgWctgYggVctgtHgaNcV
+taBtWaaYatcDRaaRtSctgaHaYRttagatMatgcatttNattaDttaattgttctaa
+ccctcccctagaWBtttHtBccttagaVaatMcBHagaVcWcagBVttcBtaYMccagat
+gaaaaHctctaacgttagNWRtcggattNatcRaNHttcagtKttttgWatWttcSaNgg
+gaWtactKKMaacatKatacNattgctWtatctaVgagctatgtRaHtYcWcttagccaa
+tYttWttaWSSttaHcaaaaagVacVgtaVaRMgattaVcDactttcHHggHRtgNcctt
+tYatcatKgctcctctatVcaaaaKaaaagtatatctgMtWtaaaacaStttMtcgactt
+taSatcgDataaactaaacaagtaaVctaggaSccaatMVtaaSKNVattttgHccatca
+cBVctgcaVatVttRtactgtVcaattHgtaaattaaattttYtatattaaRSgYtgBag
+aHSBDgtagcacRHtYcBgtcacttacactaYcgctWtattgSHtSatcataaatataHt
+cgtYaaMNgBaatttaRgaMaatatttBtttaaaHHKaatctgatWatYaacttMctctt
+ttVctagctDaaagtaVaKaKRtaacBgtatccaaccactHHaagaagaaggaNaaatBW
+attccgStaMSaMatBttgcatgRSacgttVVtaaDMtcSgVatWcaSatcttttVatag
+ttactttacgatcaccNtaDVgSRcgVcgtgaacgaNtaNatatagtHtMgtHcMtagaa
+attBgtataRaaaacaYKgtRccYtatgaagtaataKgtaaMttgaaRVatgcagaKStc
+tHNaaatctBBtcttaYaBWHgtVtgacagcaRcataWctcaBcYacYgatDgtDHccta
+aagacYRcaggattHaYgtKtaatgcVcaataMYacccatatcacgWDBtgaatcBaata
+cKcttRaRtgatgaBDacggtaattaaYtataStgVHDtDctgactcaaatKtacaatgc
+gYatBtRaDatHaactgtttatatDttttaaaKVccYcaaccNcBcgHaaVcattHctcg
+attaaatBtatgcaaaaatYMctSactHatacgaWacattacMBgHttcgaatVaaaaca
+BatatVtctgaaaaWtctRacgBMaatSgRgtgtcgactatcRtattaScctaStagKga
+DcWgtYtDDWKRgRtHatRtggtcgaHgggcgtattaMgtcagccaBggWVcWctVaaat
+tcgNaatcKWagcNaHtgaaaSaaagctcYctttRVtaaaatNtataaccKtaRgtttaM
+tgtKaBtRtNaggaSattHatatWactcagtgtactaKctatttgRYYatKatgtccgtR
+tttttatttaatatVgKtttgtatgtNtataRatWYNgtRtHggtaaKaYtKSDcatcKg
+taaYatcSRctaVtSMWtVtRWHatttagataDtVggacagVcgKWagBgatBtaaagNc
+aRtagcataBggactaacacRctKgttaatcctHgDgttKHHagttgttaatgHBtatHc
+DaagtVaBaRccctVgtgDtacRHSctaagagcggWYaBtSaKtHBtaaactYacgNKBa
+VYgtaacttagtVttcttaatgtBtatMtMtttaattaatBWccatRtttcatagVgMMt
+agctStKctaMactacDNYgKYHgaWcgaHgagattacVgtttgtRaSttaWaVgataat
+gtgtYtaStattattMtNgWtgttKaccaatagNYttattcgtatHcWtctaaaNVYKKt
+tWtggcDtcgaagtNcagatacgcattaagaccWctgcagcttggNSgaNcHggatgtVt
+catNtRaaBNcHVagagaaBtaaSggDaatWaatRccaVgggStctDaacataKttKatt
+tggacYtattcSatcttagcaatgaVBMcttDattctYaaRgatgcattttNgVHtKcYR
+aatRKctgtaaacRatVSagctgtWacBtKVatctgttttKcgtctaaDcaagtatcSat
+aWVgcKKataWaYttcccSaatgaaaacccWgcRctWatNcWtBRttYaattataaNgac
+acaatagtttVNtataNaYtaatRaVWKtBatKagtaatataDaNaaaaataMtaagaaS
+tccBcaatNgaataWtHaNactgtcDtRcYaaVaaaaaDgtttRatctatgHtgttKtga
+aNSgatactttcgagWaaatctKaaDaRttgtggKKagcDgataaattgSaacWaVtaNM
+acKtcaDaaatttctRaaVcagNacaScRBatatctRatcctaNatWgRtcDcSaWSgtt
+RtKaRtMtKaatgttBHcYaaBtgatSgaSWaScMgatNtctcctatttctYtatMatMt
+RRtSaattaMtagaaaaStcgVgRttSVaScagtgDtttatcatcatacRcatatDctta
+tcatVRtttataaHtattcYtcaaaatactttgVctagtaaYttagatagtSYacKaaac
+gaaKtaaatagataatSatatgaaatSgKtaatVtttatcctgKHaatHattagaaccgt
+YaaHactRcggSBNgtgctaaBagBttgtRttaaattYtVRaaaattgtaatVatttctc
+ttcatgBcVgtgKgaHaaatattYatagWacNctgaaMcgaattStagWaSgtaaKagtt
+ttaagaDgatKcctgtaHtcatggKttVDatcaaggtYcgccagNgtgcVttttagagat
+gctaccacggggtNttttaSHaNtatNcctcatSaaVgtactgBHtagcaYggYVKNgta
+KBcRttgaWatgaatVtagtcgattYgatgtaatttacDacSctgctaaaStttaWMagD
+aaatcaVYctccgggcgaVtaaWtStaKMgDtttcaaMtVgBaatccagNaaatcYRMBg
+gttWtaaScKttMWtYataRaDBMaDataatHBcacDaaKDactaMgagttDattaHatH
+taYatDtattDcRNStgaatattSDttggtattaaNSYacttcDMgYgBatWtaMagact
+VWttctttgYMaYaacRgHWaattgRtaagcattctMKVStatactacHVtatgatcBtV
+NataaBttYtSttacKgggWgYDtgaVtYgatDaacattYgatggtRDaVDttNactaSa
+MtgNttaacaaSaBStcDctaccacagacgcaHatMataWKYtaYattMcaMtgSttDag
+cHacgatcaHttYaKHggagttccgatYcaatgatRaVRcaagatcagtatggScctata
+ttaNtagcgacgtgKaaWaactSgagtMYtcttccaKtStaacggMtaagNttattatcg
+tctaRcactctctDtaacWYtgaYaSaagaWtNtatttRacatgNaatgttattgWDDcN
+aHcctgaaHacSgaataaRaataMHttatMtgaSDSKatatHHaNtacagtccaYatWtc
+actaactatKDacSaStcggataHgYatagKtaatKagStaNgtatactatggRHacttg
+tattatgtDVagDVaRctacMYattDgtttYgtctatggtKaRSttRccRtaaccttaga
+gRatagSaaMaacgcaNtatgaaatcaRaagataatagatactcHaaYKBctccaagaRa
+BaStNagataggcgaatgaMtagaatgtcaKttaaatgtaWcaBttaatRcggtgNcaca
+aKtttScRtWtgcatagtttWYaagBttDKgcctttatMggNttattBtctagVtacata
+aaYttacacaaRttcYtWttgHcaYYtaMgBaBatctNgcDtNttacgacDcgataaSat
+YaSttWtcctatKaatgcagHaVaacgctgcatDtgttaSataaaaYSNttatagtaNYt
+aDaaaNtggggacttaBggcHgcgtNtaaMcctggtVtaKcgNacNtatVaSWctWtgaW
+cggNaBagctctgaYataMgaagatBSttctatacttgtgtKtaattttRagtDtacata
+tatatgatNHVgBMtKtaKaNttDHaagatactHaccHtcatttaaagttVaMcNgHata
+tKtaNtgYMccttatcaaNagctggacStttcNtggcaVtattactHaSttatgNMVatt
+MMDtMactattattgWMSgtHBttStStgatatRaDaagattttctatMtaaaaaggtac
+taaVttaSacNaatactgMttgacHaHRttgMacaaaatagttaatatWKRgacDgaRta
+tatttattatcYttaWtgtBRtWatgHaaattHataagtVaDtWaVaWtgStcgtMSgaS
+RgMKtaaataVacataatgtaSaatttagtcgaaHtaKaatgcacatcggRaggSKctDc
+agtcSttcccStYtccRtctctYtcaaKcgagtaMttttcRaYDttgttatctaatcata
+NctctgctatcaMatactataggDaHaaSttMtaDtcNatataattctMcStaaBYtaNa
+gatgtaatHagagSttgWHVcttatKaYgDctcttggtgttMcRaVgSgggtagacaata
+aDtaattSaDaNaHaBctattgNtaccaaRgaVtKNtaaYggHtaKKgHcatctWtctDt
+ttctttggSDtNtaStagttataaacaattgcaBaBWggHgcaaaBtYgctaatgaaatW
+cDcttHtcMtWWattBHatcatcaaatctKMagtDNatttWaBtHaaaNgMttaaStagt
+tctctaatDtcRVaYttgttMtRtgtcaSaaYVgSWDRtaatagctcagDgcWWaaaBaa
+RaBctgVgggNgDWStNaNBKcBctaaKtttDcttBaaggBttgaccatgaaaNgttttt
+tttatctatgttataccaaDRaaSagtaVtDtcaWatBtacattaWacttaSgtattggD
+gKaaatScaattacgWcagKHaaccaYcRcaRttaDttRtttHgaHVggcttBaRgtccc
+tDatKaVtKtcRgYtaKttacgtatBtStaagcaattaagaRgBagSaattccSWYttta
+ttVaataNctgHgttaaNBgcVYgtRtcccagWNaaaacaDNaBcaaaaRVtcWMgBagM
+tttattacgDacttBtactatcattggaaatVccggttRttcatagttVYcatYaSHaHc
+ttaaagcNWaHataaaRWtctVtRYtagHtaaaYMataHYtNBctNtKaatattStgaMc
+BtRgctaKtgcScSttDgYatcVtggaaKtaagatWccHccgKYctaNNctacaWctttt
+gcRtgtVcgaKttcMRHgctaHtVaataaDtatgKDcttatBtDttggNtacttttMtga
+acRattaaNagaactcaaaBBVtcDtcgaStaDctgaaaSgttMaDtcgttcaccaaaag
+gWtcKcgSMtcDtatgtttStaaBtatagDcatYatWtaaaBacaKgcaDatgRggaaYc
+taRtccagattDaWtttggacBaVcHtHtaacDacYgtaatataMagaatgHMatcttat
+acgtatttttatattacHactgttataMgStYaattYaccaattgagtcaaattaYtgta
+tcatgMcaDcgggtcttDtKgcatgWRtataatatRacacNRBttcHtBgcRttgtgcgt
+catacMtttBctatctBaatcattMttMYgattaaVYatgDaatVagtattDacaacDMa
+tcMtHcccataagatgBggaccattVWtRtSacatgctcaaggggYtttDtaaNgNtaaB
+atggaatgtctRtaBgBtcNYatatNRtagaacMgagSaSDDSaDcctRagtVWSHtVSR
+ggaacaBVaccgtttaStagaacaMtactccagtttVctaaRaaHttNcttagcaattta
+ttaatRtaaaatctaacDaBttggSagagctacHtaaRWgattcaaBtctRtSHaNtgta
+cattVcaHaNaagtataccacaWtaRtaaVKgMYaWgttaKggKMtKcgWatcaDatYtK
+SttgtacgaccNctSaattcDcatcttcaaaDKttacHtggttHggRRaRcaWacaMtBW
+VHSHgaaMcKattgtaRWttScNattBBatYtaNRgcggaagacHSaattRtttcYgacc
+BRccMacccKgatgaacttcgDgHcaaaaaRtatatDtatYVtttttHgSHaSaatagct
+NYtaHYaVYttattNtttgaaaYtaKttWtctaNtgagaaaNctNDctaaHgttagDcRt
+tatagccBaacgcaRBtRctRtggtaMYYttWtgataatcgaataattattataVaaaaa
+ttacNRVYcaaMacNatRttcKatMctgaagactaattataaYgcKcaSYaatMNctcaa
+cgtgatttttBacNtgatDccaattattKWWcattttatatatgatBcDtaaaagttgaa
+VtaHtaHHtBtataRBgtgDtaataMttRtDgDcttattNtggtctatctaaBcatctaR
+atgNacWtaatgaagtcMNaacNgHttatactaWgcNtaStaRgttaaHacccgaYStac
+aaaatWggaYaWgaattattcMaactcBKaaaRVNcaNRDcYcgaBctKaacaaaaaSgc
+tccYBBHYaVagaatagaaaacagYtctVccaMtcgtttVatcaatttDRtgWctagtac
+RttMctgtDctttcKtWttttataaatgVttgBKtgtKWDaWagMtaaagaaattDVtag
+gttacatcatttatgtcgMHaVcttaBtVRtcgtaYgBRHatttHgaBcKaYWaatcNSc
+tagtaaaaatttacaatcactSWacgtaatgKttWattagttttNaggtctcaagtcact
+attcttctaagKggaataMgtttcataagataaaaatagattatDgcBVHWgaBKttDgc
+atRHaagcaYcRaattattatgtMatatattgHDtcaDtcaaaHctStattaatHaccga
+cNattgatatattttgtgtDtRatagSacaMtcRtcattcccgacacSattgttKaWatt
+NHcaacttccgtttSRtgtctgDcgctcaaMagVtBctBMcMcWtgtaacgactctcttR
+ggRKSttgYtYatDccagttDgaKccacgVatWcataVaaagaataMgtgataaKYaaat
+cHDaacgataYctRtcYatcgcaMgtNttaBttttgatttaRtStgcaacaaaataccVg
+aaDgtVgDcStctatatttattaaaaRKDatagaaagaKaaYYcaYSgKStctccSttac
+agtcNactttDVttagaaagMHttRaNcSaRaMgBttattggtttaRMggatggcKDgWR
+tNaataataWKKacttcKWaaagNaBttaBatMHtccattaacttccccYtcBcYRtaga
+ttaagctaaYBDttaNtgaaaccHcaRMtKtaaHMcNBttaNaNcVcgVttWNtDaBatg
+ataaVtcWKcttRggWatcattgaRagHgaattNtatttctctattaattaatgaDaaMa
+tacgttgggcHaYVaaNaDDttHtcaaHtcVVDgBVagcMacgtgttaaBRNtatRtcag
+taagaggtttaagacaVaaggttaWatctccgtVtaDtcDatttccVatgtacNtttccg
+tHttatKgScBatgtVgHtYcWagcaKtaMYaaHgtaattaSaHcgcagtWNaatNccNN
+YcacgVaagaRacttctcattcccRtgtgtaattagcSttaaStWaMtctNNcSMacatt
+ataaactaDgtatWgtagtttaagaaaattgtagtNagtcaataaatttgatMMYactaa
+tatcggBWDtVcYttcDHtVttatacYaRgaMaacaStaatcRttttVtagaDtcacWat
+ttWtgaaaagaaagNRacDtttStVatBaDNtaactatatcBSMcccaSttccggaMatg
+attaaWatKMaBaBatttgataNctgttKtVaagtcagScgaaaDggaWgtgttttKtWt
+atttHaatgtagttcactaaKMagttSYBtKtaYgaactcagagRtatagtVtatcaaaW
+YagcgNtaDagtacNSaaYDgatBgtcgataacYDtaaactacagWDcYKaagtttatta
+gcatcgagttKcatDaattgattatDtcagRtWSKtcgNtMaaaaacaMttKcaWcaaSV
+MaaaccagMVtaMaDtMaHaBgaacataBBVtaatVYaNSWcSgNtDNaaKacacBttta
+tKtgtttcaaHaMctcagtaacgtcgYtactDcgcctaNgagagcYgatattttaaattt
+ccattttacatttDaaRctattttWctttacgtDatYtttcagacgcaaVttagtaaKaa
+aRtgVtccataBggacttatttgtttaWNtgttVWtaWNVDaattgtatttBaagcBtaa
+BttaaVatcHcaVgacattccNggtcgacKttaaaRtagRtctWagaYggtgMtataatM
+tgaaRttattttgWcttNtDRRgMDKacagaaaaggaaaRStcccagtYccVattaNaaK
+StNWtgacaVtagaagcttSaaDtcacaacgDYacWDYtgtttKatcVtgcMaDaSKStV
+cgtagaaWaKaagtttcHaHgMgMtctataagBtKaaaKKcactggagRRttaagaBaaN
+atVVcgRcKSttDaactagtSttSattgttgaaRYatggttVttaataaHttccaagDtg
+atNWtaagHtgcYtaactRgcaatgMgtgtRaatRaNaacHKtagactactggaatttcg
+ccataacgMctRgatgttaccctaHgtgWaYcactcacYaattcttaBtgacttaaacct
+gYgaWatgBttcttVttcgttWttMcNYgtaaaatctYgMgaaattacNgaHgaacDVVM
+tttggtHtctaaRgtacagacgHtVtaBMNBgattagcttaRcttacaHcRctgttcaaD
+BggttKaacatgKtttYataVaNattccgMcgcgtagtRaVVaattaKaatggttRgaMc
+agtatcWBttNtHagctaatctagaaNaaacaYBctatcgcVctBtgcaaagDgttVtga
+HtactSNYtaaNccatgtgDacgaVtDcgKaRtacDcttgctaagggcagMDagggtBWR
+tttSgccttttttaacgtcHctaVtVDtagatcaNMaVtcVacatHctDWNaataRgcgt
+aVHaggtaaaaSgtttMtattDgBtctgatSgtRagagYtctSaKWaataMgattRKtaa
+catttYcgtaacacattRWtBtcggtaaatMtaaacBatttctKagtcDtttgcBtKYYB
+aKttctVttgttaDtgattttcttccacttgSaaacggaaaNDaattcYNNaWcgaaYat
+tttMgcBtcatRtgtaaagatgaWtgaccaYBHgaatagataVVtHtttVgYBtMctaMt
+cctgaDcYttgtccaaaRNtacagcMctKaaaggatttacatgtttaaWSaYaKttBtag
+DacactagctMtttNaKtctttcNcSattNacttggaacaatDagtattRtgSHaataat
+gccVgacccgatactatccctgtRctttgagaSgatcatatcgDcagWaaHSgctYYWta
+tHttggttctttatVattatcgactaagtgtagcatVgtgHMtttgtttcgttaKattcM
+atttgtttWcaaStNatgtHcaaaDtaagBaKBtRgaBgDtSagtatMtaacYaatYtVc
+KatgtgcaacVaaaatactKcRgtaYtgtNgBBNcKtcttaccttKgaRaYcaNKtactt
+tgagSBtgtRagaNgcaaaNcacagtVtttHWatgttaNatBgtttaatNgVtctgaata
+tcaRtattcttttttttRaaKcRStctcggDgKagattaMaaaKtcaHacttaataataK
+taRgDtKVBttttcgtKaggHHcatgttagHggttNctcgtatKKagVagRaaaggaaBt
+NatttVKcRttaHctaHtcaaatgtaggHccaBataNaNaggttgcWaatctgatYcaaa
+HaatWtaVgaaBttagtaagaKKtaaaKtRHatMaDBtBctagcatWtatttgWttVaaa
+ScMNattRactttgtYtttaaaagtaagtMtaMaSttMBtatgaBtttaKtgaatgagYg
+tNNacMtcNRacMMHcttWtgtRtctttaacaacattattcYaMagBaacYttMatcttK
+cRMtgMNccattaRttNatHaHNaSaaHMacacaVaatacaKaSttHatattMtVatWga
+ttttttaYctttKttHgScWaacgHtttcaVaaMgaacagNatcgttaacaaaaagtaca
+HBNaattgttKtcttVttaaBtctgctacgBgcWtttcaggacacatMgacatcccagcg
+gMgaVKaBattgacttaatgacacacaaaaaatRKaaBctacgtRaDcgtagcVBaacDS
+BHaaaaSacatatacagacRNatcttNaaVtaaaataHattagtaaaaSWccgtatWatg
+gDttaactattgcccatcttHaSgYataBttBaactattBtcHtgatcaataSttaBtat
+KSHYttWggtcYtttBttaataccRgVatStaHaKagaatNtagRMNgtcttYaaSaact
+cagDSgagaaYtMttDtMRVgWKWtgMaKtKaDttttgactatacataatcNtatNaHat
+tVagacgYgatatatttttgtStWaaatctWaMgagaRttRatacgStgattcttaagaD
+taWccaaatRcagcagaaNKagtaaDggcgccBtYtagSBMtactaaataMataBSacRM
+gDgattMMgtcHtcaYDtRaDaacggttDaggcMtttatgttaNctaattaVacgaaMMt
+aatDccSgtattgaRtWWaccaccgagtactMcgVNgctDctaMScatagcgtcaactat
+acRacgHRttgctatttaatgaattataYKttgtaagWgtYttgcHgMtaMattWaWVta
+RgcttgYgttBHtYataSccStBtgtagMgtDtggcVaaSBaatagDttgBgtctttctc
+attttaNagtHKtaMWcYactVcgcgtatMVtttRacVagDaatcttgctBBcRDgcaac
+KttgatSKtYtagBMagaRtcgBattHcBWcaactgatttaatttWDccatttatcgagS
+KaWttataHactaHMttaatHtggaHtHagaatgtKtaaRactgtttMatacgatcaagD
+gatKaDctataMggtHDtggHacctttRtatcttYattttgacttgaaSaataaatYcgB
+aaaaccgNatVBttMacHaKaataagtatKgtcaagactcttaHttcggaattgttDtct
+aaccHttttWaaatgaaatataaaWattccYDtKtaaaacggtgaggWVtctattagtga
+ctattaagtMgtttaagcatttgSgaaatatccHaaggMaaaattttcWtatKctagDtY
+tMcctagagHcactttactatacaaacattaacttaHatcVMYattYgVgtMttaaRtga
+aataaDatcaHgtHHatKcDYaatcttMtNcgatYatgSaMaNtcttKcWataScKggta
+tcttacgcttWaaagNatgMgHtctttNtaacVtgttcMaaRatccggggactcMtttaY
+MtcWRgNctgNccKatcttgYDcMgattNYaRagatHaaHgKctcataRDttacatBatc
+cattgDWttatttaWgtcggagaaaaatacaatacSNtgggtttccttacSMaagBatta
+caMaNcactMttatgaRBacYcYtcaaaWtagctSaacttWgDMHgaggatgBVgcHaDt
+ggaactttggtcNatNgtaKaBcccaNtaagttBaacagtatacDYttcctNgWgcgSMc
+acatStctHatgRcNcgtacacaatRttMggaNKKggataaaSaYcMVcMgtaMaHtgat
+tYMatYcggtcttcctHtcDccgtgRatcattgcgccgatatMaaYaataaYSggatagc
+gcBtNtaaaScaKgttBgagVagttaKagagtatVaactaSacWactSaKatWccaKaaa
+atBKgaaKtDMattttgtaaatcRctMatcaaMagMttDgVatggMaaWgttcgaWatga
+aatttgRtYtattaWHKcRgctacatKttctaccaaHttRatctaYattaaWatVNccat
+NgagtcKttKataStRaatatattcctRWatDctVagttYDgSBaatYgttttgtVaatt
+taatagcagMatRaacttBctattgtMagagattaaactaMatVtHtaaatctRgaaaaa
+aaatttWacaacaYccYDSaattMatgaccKtaBKWBattgtcaagcHKaagttMMtaat
+ttcKcMagNaaKagattggMagaggtaatttYacatcWaaDgatMgKHacMacgcVaaca
+DtaDatatYggttBcgtatgWgaSatttgtagaHYRVacaRtctHaaRtatgaactaata
+tctSSBgggaaHMWtcaagatKgagtDaSatagttgattVRatNtctMtcSaagaSHaat
+aNataataRaaRgattctttaataaagWaRHcYgcatgtWRcttgaaggaMcaataBRaa
+ccagStaaacNtttcaatataYtaatatgHaDgcStcWttaacctaRgtYaRtataKtgM
+ttttatgactaaaatttacYatcccRWtttHRtattaaatgtttatatttgttYaatMca
+RcSVaaDatcgtaYMcatgtagacatgaaattgRtcaaYaaYtRBatKacttataccaNa
+aattVaBtctggacaagKaaYaaatatWtMtatcYaaVNtcgHaactBaagKcHgtctac
+aatWtaDtSgtaHcataHtactgataNctRgttMtDcDttatHtcgtacatcccaggStt
+aBgtcacacWtccNMcNatMVaVgtccDYStatMaccDatggYaRKaaagataRatttHK
+tSaaatDgataaacttaHgttgVBtcttVttHgDacgaKatgtatatNYataactctSat
+atatattgcHRRYttStggaactHgttttYtttaWtatMcttttctatctDtagVHYgMR
+BgtHttcctaatYRttKtaagatggaVRataKDctaMtKBNtMtHNtWtttYcVtattMc
+gRaacMcctNSctcatttaaagDcaHtYccSgatgcaatYaaaaDcttcgtaWtaattct
+cgttttScttggtaatctttYgtctaactKataHacctMctcttacHtKataacacagcN
+RatgKatttttSaaatRYcgDttaMRcgaaattactMtgcgtaagcgttatBtttttaat
+taagtNacatHgttcRgacKcBBtVgatKttcgaBaatactDRgtRtgaNacWtcacYtt
+aaKcgttctHaKttaNaMgWgWaggtctRgaKgWttSttBtDcNtgtttacaaatYcDRt
+gVtgcctattcNtctaaaDMNttttNtggctgagaVctDaacVtWccaagtaacacaNct
+gaScattccDHcVBatcgatgtMtaatBgHaatDctMYgagaatgYWKcctaatNaStHa
+aaKccgHgcgtYaaYtattgtStgtgcaaRtattaKatattagaWVtcaMtBagttatta
+gNaWHcVgcaattttDcMtgtaRHVYtHtctgtaaaaHVtMKacatcgNaatttMatatg
+ttgttactagWYtaRacgataKagYNKcattataNaRtgaacKaYgcaaYYacaNccHat
+MatDcNgtHttRaWttagaaDcaaaaaatagggtKDtStaDaRtaVtHWKNtgtattVct
+SVgRgataDaRaWataBgaagaaKtaataaYgDcaStaNgtaDaaggtattHaRaWMYaY
+aWtggttHYgagVtgtgcttttcaaDKcagVcgttagacNaaWtagtaataDttctggtt
+VcatcataaagtgKaaaNaMtaBBaattaatWaattgctHaVKaSgDaaVKaHtatatat
+HatcatSBagNgHtatcHYMHgttDgtaHtBttWatcgtttaRaattgStKgSKNWKatc
+agDtctcagatttctRtYtBatBgHHtKaWtgYBgacVVWaKtacKcDttKMaKaVcggt
+gttataagaataaHaatattagtataatMHgttYgaRttagtaRtcaaVatacggtcMcg
+agtaaRttacWgactKRYataaaagSattYaWgagatYagKagatgSaagKgttaatMgg
+tataatgttWYttatgagaaacctNVataatHcccKtDctcctaatactggctHggaSag
+gRtKHaWaattcgSatMatttagaggcYtctaMcgctcataSatatgRagacNaaDagga
+VBagaYttKtacNaKgtSYtagttggaWcatcWttaatctatgaVtcgtgtMtatcaYcg
+tRccaaYgDctgcMgtgtWgacWtgataacacgcgctBtgttaKtYDtatDcatcagKaV
+MctaatcttgVcaaRgcRMtDcgattaHttcaNatgaatMtactacVgtRgatggaWttt
+actaaKatgagSaaKggtaNtactVaYtaaKRagaacccacaMtaaMtKtatBcttgtaa
+WBtMctaataaVcDaaYtcRHBtcgttNtaaHatttBNgRStVDattBatVtaagttaYa
+tVattaagaBcacggtSgtVtatttaRattgatgtaHDKgcaatattKtggcctatgaWD
+KRYcggattgRctatNgatacaatMNttctgtcRBYRaaaHctNYattcHtaWcaattct
+BtMKtVgYataatMgYtcagcttMDataVtggRtKtgaatgccNcRttcaMtRgattaac
+attRcagcctHtWMtgtDRagaKaBtgDttYaaaaKatKgatctVaaYaacWcgcatagB
+VtaNtRtYRaggBaaBtgKgttacataagagcatgtRattccacttaccatRaaatgWgD
+aMHaYVgVtaSctatcgKaatatattaDgacccYagtgtaYNaaatKcagtBRgagtcca
+tgKgaaaccBgaagBtgSttWtacgatWHaYatcgatttRaaNRgcaNaKVacaNtDgat
+tgHVaatcDaagcgtatgcNttaDataatcSataaKcaataaHWataBtttatBtcaKtK
+tatagttaDgSaYctacaRatNtaWctSaatatttYaKaKtaccWtatcRagacttaYtt
+VcKgSDcgagaagatccHtaattctSttatggtKYgtMaHagVaBRatttctgtRgtcta
+tgggtaHKgtHacHtSYacgtacacHatacKaaBaVaccaDtatcSaataaHaagagaat
+ScagactataaRttagcaaVcaHataKgDacatWccccaagcaBgagWatctaYttgaaa
+tctVNcYtttWagHcgcgcDcVaaatgttKcHtNtcaatagtgtNRaactttttcaatgg
+WgBcgDtgVgtttctacMtaaataaaRggaaacWaHttaRtNtgctaaRRtVBctYtVta
+tDcattDtgaccYatagatYRKatNYKttNgcctagtaWtgaactaMVaacctgaStttc
+tgaKVtaaVaRKDttVtVctaDNtataaaDtccccaagtWtcgatcactDgYaBcatcct
+MtVtacDaaBtYtMaKNatNtcaNacgDatYcatcgcaRatWBgaacWttKttagYtaat
+tcggttgSWttttDWctttacYtatatWtcatDtMgtBttgRtVDggttaacYtacgtac
+atgaattgaaWcttMStaDgtatattgaDtcRBcattSgaaVBRgagccaaKtttcDgcg
+aSMtatgWattaKttWtgDBMaggBBttBaatWttRtgcNtHcgttttHtKtcWtagHSt
+aacagttgatatBtaWSaWggtaataaMttaKacDaatactcBttcaatatHttcBaaSa
+aatYggtaRtatNtHcaatcaHtagVtgtattataNggaMtcttHtNagctaaaggtaga
+YctMattNaMVNtcKtactBKcaHHcBttaSagaKacataYgctaKaYgttYcgacWVtt
+WtSagcaacatcccHaccKtcttaacgaKttcacKtNtacHtatatRtaaatacactaBt
+ttgaHaRttggttWtatYagcatYDatcggagagcWBataagRtacctataRKgtBgatg
+aDatataSttagBaHtaatNtaDWcWtgtaattacagKttcNtMagtattaNgtctcgtc
+ctcttBaHaKcKccgtRcaaYagSattaagtKataDatatatagtcDtaacaWHcaKttD
+gaaRcgtgYttgtcatatNtatttttatggccHtgDtYHtWgttatYaacaattcaWtat
+NgctcaaaSttRgctaatcaaatNatcgtttaBtNNVtgttataagcaaagattBacgtD
+atttNatttaaaDcBgtaSKgacgtagataatttcHMVNttgttBtDtgtaWKaaRMcKM
+tHtaVtagataWctccNNaSWtVaHatctcMgggDgtNHtDaDttatatVWttgttattt
+aacctttcacaaggaSaDcggttttttatatVtctgVtaacaStDVaKactaMtttaSNa
+gtgaaattaNacttSKctattcctctaSagKcaVttaagNaVcttaVaaRNaHaaHttat
+gtHttgtgatMccaggtaDcgaccgtWgtWMtttaHcRtattgScctatttKtaaccaag
+tYagaHgtWcHaatgccKNRtttagtMYSgaDatctgtgaWDtccMNcgHgcaaacNDaa
+aRaStDWtcaaaaHKtaNBctagBtgtattaactaattttVctagaatggcWSatMaccc
+ttHttaSgSgtgMRcatRVKtatctgaaaccDNatYgaaVHNgatMgHRtacttaaaRta
+tStRtDtatDttYatattHggaBcttHgcgattgaKcKtttcRataMtcgaVttWacatN
+catacctRataDDatVaWNcggttgaHtgtMacVtttaBHtgagVttMaataattatgtt
+cttagtttgtgcDtSatttgBtcaacHattaaBagVWcgcaSYttMgcttacYKtVtatc
+aYaKctgBatgcgggcYcaaaaacgNtctagKBtattatctttKtaVttatagtaYtRag
+NtaYataaVtgaatatcHgcaaRataHtacacatgtaNtgtcgYatWMatttgaactacR
+ctaWtWtatacaatctBatatgYtaagtatgtgtatSttactVatcttYtaBcKgRaSgg
+RaaaaatgcagtaaaWgtaRgcgataatcBaataccgtatttttccatcNHtatWYgatH
+SaaaDHttgctgtccHtggggcctaataatttttctatattYWtcattBtgBRcVttaVM
+RSgctaatMagtYtttaaaaatBRtcBttcaaVtaacagctccSaaSttKNtHtKYcagc
+agaaaccccRtttttaaDcDtaStatccaagcgctHtatcttaDRYgatDHtWcaaaBcW
+gKWHttHataagHacgMNKttMKHccaYcatMVaacgttaKgYcaVaaBtacgcaacttt
+MctaaHaatgtBatgagaSatgtatgSRgHgWaVWgataaatatttccKagVgataattW
+aHNcYggaaatgctHtKtaDtctaaagtMaatVDVactWtSaaWaaMtaHtaSKtcBRaN
+cttStggtBttacNagcatagRgtKtgcgaacaacBcgKaatgataagatgaaaattgta
+ctgcgggtccHHWHaaNacaBttNKtKtcaaBatatgctaHNgtKcDWgtttatNgVDHg
+accaacWctKaaggHttgaRgYaatHcaBacaatgagcaaattactgtaVaaYaDtagat
+tgagNKggtggtgKtWKaatacagDRtatRaMRtgattDggtcaaYRtatttNtagaDtc
+acaaSDctDtataatcgtactaHttatacaatYaacaaHttHatHtgcgatRRttNgcat
+SVtacWWgaaggagtatVMaVaaattScDDKNcaYBYaDatHgtctatBagcaacaagaa
+tgagaaRcataaKNaRtBDatcaaacgcattttttaaBtcSgtacaRggatgtMNaattg
+gatatWtgagtattaaaVctgcaYMtatgatttttYgaHtgtcttaagWBttHttgtctt
+attDtcgtatWtataataSgctaHagcDVcNtaatcaagtaBDaWaDgtttagYctaNcc
+DtaKtaHcttaataacccaRKtacaVaatNgcWRaMgaattatgaBaaagattVYaHMDc
+aDHtcRcgYtcttaaaWaaaVKgatacRtttRRKYgaatacaWVacVcRtatMacaBtac
+tggMataaattttHggNagSctacHgtBagcgtcgtgattNtttgatSaaggMttctttc
+ttNtYNagBtaaacaaatttMgaccttacataattgYtcgacBtVMctgStgMDtagtaR
+ctHtatgttcatatVRNWataDKatWcgaaaaagttaaaagcacgHNacgtaatctttMR
+tgacttttDacctataaacgaaatatgattagaactccSYtaBctttaataacWgaaaYa
+tagatgWttcatKtNgatttttcaagHtaYgaaRaDaagtaggagcttatVtagtctttc
+attaaaatcgKtattaRttacagVaDatgcatVgattgggtctttHVtagKaaRBtaHta
+aggccccaaaaKatggtttaMWgtBtaaacttcactttKHtcgatctccctaYaBacMgt
+cttBaBaNgcgaaacaatctagtHccHtKttcRtRVttccVctttcatacYagMVtMcag
+aMaaacaataBctgYtaatRaaagattaaccatVRatHtaRagcgcaBcgDttStttttc
+VtttaDtKgcaaWaaaaatSccMcVatgtKgtaKgcgatatgtagtSaaaDttatacaaa
+catYaRRcVRHctKtcgacKttaaVctaDaatgttMggRcWaacttttHaDaKaDaBctg
+taggcgtttaHBccatccattcNHtDaYtaataMttacggctNVaacDattgatatttta
+cVttSaattacaaRtataNDgacVtgaacataVRttttaDtcaaacataYDBtttaatBa
+DtttYDaDaMccMttNBttatatgagaaMgaNtattHccNataattcaHagtgaaggDga
+tgtatatatgYatgaStcataaBStWacgtcccataRMaaDattggttaaattcMKtctM
+acaBSactcggaatDDgatDgcWctaacaccgggaVcacWKVacggtaNatatacctMta
+tgatagtgcaKagggVaDtgtaacttggagtcKatatcgMcttRaMagcattaBRaStct
+YSggaHYtacaactMBaagDcaBDRaaacMYacaHaattagcattaaaHgcgctaaggSc
+cKtgaaKtNaBtatDDcKBSaVtgatVYaagVtctSgMctacgttaacWaaattctSgtD
+actaaStaaattgcagBBRVctaatatacctNttMcRggctttMttagacRaHcaBaacV
+KgaataHttttMgYgattcYaNRgttMgcVaaacaVVcDHaatttgKtMYgtatBtVVct
+WgVtatHtacaaHttcacgatagcagtaaNattBatatatttcVgaDagcggttMaagtc
+ScHagaaatgcYNggcgtttttMtStggtRatctacttaaatVVtBacttHNttttaRca
+aatcacagHgagagtMgatcSWaNRacagDtatactaaDKaSRtgattctccatSaaRtt
+aaYctacacNtaRtaactggatgaccYtacactttaattaattgattYgttcagDtNKtt
+agDttaaaaaaaBtttaaNaYWKMBaaaacVcBMtatWtgBatatgaacVtattMtYatM
+NYDKNcKgDttDaVtaaaatgggatttctgtaaatWtctcWgtVVagtcgRgacttcccc
+taDcacagcRcagagtgtWSatgtacatgttaaSttgtaaHcgatgggMagtgaacttat
+RtttaVcaccaWaMgtactaatSSaHtcMgaaYtatcgaaggYgggcgtgaNDtgttMNg
+aNDMtaattcgVttttaacatgVatgtWVMatatcaKgaaattcaBcctccWcttgaaWH
+tWgHtcgNWgaRgctcBgSgaattgcaaHtgattgtgNagtDttHHgBttaaWcaaWagc
+aSaHHtaaaVctRaaMagtaDaatHtDMtcVaWMtagSagcttHSattaacaaagtRacM
+tRtctgttagcMtcaBatVKtKtKacgagaSNatSactgtatatcBctgagVtYactgta
+aattaaaggcYgDHgtaacatSRDatMMccHatKgttaacgactKtgKagtcttcaaHRV
+tccttKgtSataatttacaactggatDNgaacttcaRtVaagDcaWatcBctctHYatHa
+DaaatttagYatSatccaWtttagaaatVaacBatHcatcgtacaatatcgcNYRcaata
+YaRaYtgattVttgaatgaVaactcRcaNStgtgtattMtgaggtNttBaDRcgaaaagc
+tNgBcWaWgtSaDcVtgVaatMKBtttcgtttctaaHctaaagYactgMtatBDtcStga
+ccgtSDattYaataHctgggaYYttcggttaWaatctggtRagWMaDagtaacBccacta
+cgHWMKaatgatWatcctgHcaBaSctVtcMtgtDttacctaVgatYcWaDRaaaaRtag
+atcgaMagtggaRaWctctgMgcWttaagKBRtaaDaaWtctgtaagYMttactaHtaat
+cttcataacggcacBtSgcgttNHtgtHccatgttttaaagtatcgaKtMttVcataYBB
+aKtaMVaVgtattNDSataHcagtWMtaggtaSaaKgttgBtVtttgttatcatKcgHac
+acRtctHatNVagSBgatgHtgaRaSgttRcctaacaaattDNttgacctaaYtBgaaaa
+tagttattactcttttgatgtNNtVtgtatMgtcttRttcatttgatgacacttcHSaaa
+ccaWWDtWagtaRDDVNacVaRatgttBccttaatHtgtaaacStcVNtcacaSRttcYa
+gacagaMMttttgMcNttBcgWBtactgVtaRttctccaaYHBtaaagaBattaYacgat
+ttacatctgtaaMKaRYtttttactaaVatWgctBtttDVttctggcDaHaggDaagtcg
+aWcaagtagtWttHtgKtVataStccaMcWcaagataagatcactctHatgtcYgaKcat
+cagatactaagNSStHcctRRNtattgtccttagttagMVgtatagactaactctVcaat
+MctgtttgtgttgccttatWgtaBVtttctggMcaaKgDWtcgtaaYStgSactatttHg
+atctgKagtagBtVacRaagRtMctatgggcaaaKaaaatacttcHctaRtgtDcttDat
+taggaaatttcYHaRaaBttaatggcacKtgctHVcaDcaaaVDaaaVcgMttgtNagcg
+taDWgtcgttaatDgKgagcSatatcSHtagtagttggtgtHaWtaHKtatagctgtVga
+ttaBVaatgaataagtaatVatSttaHctttKtttgtagttaccttaatcgtagtcctgB
+cgactatttVcMacHaaaggaatgDatggKtaHtgStatattaaSagctWcctccRtata
+BaDYcgttgcNaagaggatRaaaYtaWgNtSMcaatttactaacatttaaWttHtatBat
+tgtcgacaatNgattgcNgtMaaaKaBDattHacttggtRtttaYaacgVactBtaBaKt
+gBttatgVttgtVttcaatcWcNctDBaaBgaDHacBttattNtgtDtatttVSaaacag
+gatgcRatSgtaSaNtgBatagttcHBgcBBaaattaHgtDattatDaKaatBaaYaaMa
+ataaataKtttYtagtBgMatNcatgtttgaNagtgttgtgKaNaSagtttgaSMaYBca
+aaacDStagttVacaaaaactaaWttBaagtctgtgcgtMgtaattctcctacctcaNtt
+taaccaaaaVtBcacataacaccccBcWMtatVtggaatgaWtcaaWaaaaaaaaWtDta
+atatRcctDWtcctaccMtVVatKttaWaaKaaatataaagScHBagaggBaSMtaWaVt
+atattactSaaaKNaactatNatccttgaYctattcaaaVgatttYHcRagattttaSat
+aggttattcVtaaagaKgtattattKtRttNcggcRgtgtgtWYtaacHgKatKgatYta
+cYagDtWcHBDctctgRaYKaYagcactKcacSaRtBttttBHKcMtNtcBatttatttt
+tgSatVgaaagaWtcDtagDatatgMacaacRgatatatgtttgtKtNRaatatNatgYc
+aHtgHataacKtgagtagtaacYttaNccaaatHcacaacaVDtagtaYtccagcattNt
+acKtBtactaaagaBatVtKaaHBctgStgtBgtatgaSNtgDataaccctgtagcaBgt
+gatcttaDataStgaMaccaSBBgWagtacKcgattgaDgNNaaaacacagtSatBacKD
+gcgtataBKcatacactaSaatYtYcDaactHttcatRtttaatcaattataRtttgtaa
+gMcgNttcatcBtYBagtNWNMtSHcattcRctttttRWgaKacKttgggagBcgttcgc
+MaWHtaatactgtctctatttataVgtttaBScttttaBMaNaatMacactYtBMggtHa
+cMagtaRtctgcatttaHtcaaaatttgagKtgNtactBacaHtcgtatttctMaSRagc
+agttaatgtNtaaattgagagWcKtaNttagVtacgatttgaatttcgRtgtWcVatcgt
+taaDVctgtttBWgaccagaaagtcSgtVtatagaBccttttcctaaattgHtatcggRa
+ttttcaaggcYSKaagWaWtRactaaaacccBatMtttBaatYtaagaactSttcgaaSc
+aatagtattgaccaagtgttttctaacatgtttNVaatcaaagagaaaNattaaRtttta
+VaaaccgcaggNMtatattVctcaagaggaacgBgtttaacaagttcKcYaatatactaa
+ccBaaaSggttcNtattctagttRtBacgScVctcaatttaatYtaaaaaaatgSaatga
+tagaMBRatgRcMcgttgaWHtcaVYgaatYtaatctttYttatRaWtctgBtDcgatNa
+tcKaBaDgatgtaNatWKctccgatattaacattNaaacDatgBgttctgtDtaaaMggt
+gaBaSHataacgccSctaBtttaRBtcNHcDatcDcctagagtcRtaBgWttDRVHagat
+tYatgtatcWtaHtttYcattWtaaagtctNgtStggRNcgcggagSSaaagaaaatYcH
+DtcgctttaatgYcKBVSgtattRaYBaDaaatBgtatgaHtaaRaRgcaSWNtagatHa
+acttNctBtcaccatctMcatattccaSatttgcgaDagDgtatYtaaaVDtaagtttWV
+aagtagYatRttaagDcNgacKBcScagHtattatcDaDactaaaaaYgHttBcgaDttg
+gataaaKSRcBMaBcgaBSttcWtgNBatRaccgattcatttataacggHVtaattcaca
+agagVttaaRaatVVRKcgWtVgacctgDgYaaHaWtctttcacMagggatVgactagMa
+aataKaaNWagKatagNaaWtaaaatttgaattttatttgctaaVgaHatBatcaaBWcB
+gttcMatcgBaaNgttcgSNaggSaRtttgHtRtattaNttcDcatSaVttttcgaaaaa
+ttgHatctaRaggSaNatMDaaatDcacgattttagaHgHaWtYgattaatHNSttatMS
+gggNtcKtYatRggtttgtMWVtttaYtagcagBagHaYagttatatggtBacYcattaR
+SataBatMtttaaatctHcaaaSaaaagttNSaaWcWRccRtKaagtBWtcaaattSttM
+tattggaaaccttaacgttBtWatttatatWcDaatagattcctScacctaagggRaaYt
+aNaatgVtBcttaaBaacaMVaaattatStYgRcctgtactatcMcVKatttcgSgatRH
+MaaaHtagtaaHtVgcaaataatatcgKKtgccaatBNgaaWcVttgagttaKatagttc
+aggKDatDtattgaKaVcaKtaataDataataHSaHcattagttaatRVYcNaHtaRcaa
+ggtNHcgtcaaccaBaaagYtHWaaaRcKgaYaaDttgcWYtataRgaatatgtYtgcKt
+aNttWacatYHctRaDtYtattcBttttatcSataYaYgttWaRagcacHMgtttHtYtt
+YaatcggtatStttcgtRSattaaDaKMaatatactaNBaWgctacacYtgaYVgtgHta
+aaRaaRgHtagtWattataaaSDaaWtgMattatcgaaaagtaYRSaWtSgNtBgagcRY
+aMDtactaacttaWgtatctagacaagNtattHggataatYttYatcataDcgHgttBtt
+ctttVttgccgaaWtaaaacgKgtatctaaaaaNtccDtaDatBMaMggaatNKtatBaa
+atVtccRaHtaSacataHattgtttKVYattcataVaattWtcgtgMttcttKtgtctaa
+cVtatctatatBRataactcgKatStatattcatHHRttKtccaacgtgggtgRgtgaMt
+attattggctatcgtgacMtRcBDtcttgtactaatRHttttaagatcgVMDStattatY
+BtttDttgtBtNttgRcMtYtgBacHaWaBaatDKctaagtgaaactaatgRaaKgatcc
+aagNaaaatattaggWNtaagtatacttttKcgtcggSYtcttgRctataYcttatataa
+agtatattaatttataVaacacaDHatctatttttKYVatHRactttaBHccaWagtact
+BtcacgaVgcgttRtttttttSVgtSagtBaaattctgaHgactcttgMcattttagVta
+agaattHctHtcaDaaNtaacRggWatagttcgtSttgaDatcNgNagctagDgatcNtt
+KgttgtaDtctttRaaYStRatDtgMggactSttaDtagSaVtBDttgtDgccatcacaM
+attaaaMtNacaVcgSWcVaaDatcaHaatgaattaMtatccVtctBtaattgtWattat
+BRcWcaatgNNtactWYtDaKttaaatcactcagtRaaRgatggtKgcgccaaHgaggat
+StattYcaNMtcaBttacttatgagDaNtaMgaaWtgtttcttctaHtMNgttatctaWW
+atMtBtaaatagDVatgtBYtatcggcttaagacMRtaHScgatatYgRDtcattatSDa
+HggaaataNgaWSRRaaaBaatagBattaDctttgHWNttacaataaaaaaatacggttt
+gHgVtaHtWMttNtBtctagtMcgKMgHgYtataHaNagWtcaacYattaataYRgtaWK
+gaBctataaccgatttaHaNBRaRaMtccggtNgacMtctcatttgcaattcWgMactta
+caaDaaNtactWatVtttagccttMaatcagVaagtctVaaDaBtattaattaYtNaYtg
+gattaKtaKctYaMtattYgatattataatKtVgDcttatatNBtcgttgtStttttMag
+aggttaHYSttcKgtcKtDNtataagttataagSgttatDtRttattgttttSNggRtca
+aKMNatgaatattgtBWtaMacctgggYgaSgaagYataagattacgagaatBtggtRcV
+HtgYggaDgaYaKagWagctatagacgaaHgtWaNgacttHRatVaWacKYtgRVNgVcS
+gRWctacatcKSactctgWYtBggtataagcttNRttVtgRcaWaaatDMatYattaact
+ttcgaagRatSctgccttgcRKaccHtttSNVagtagHagBagttagaccaRtataBcca
+taatSHatRtcHagacBWatagcaMtacaRtgtgaaBatctKRtScttccaNaatcNgta
+atatWtcaMgactctBtWtaaNactHaaaaRctcgcatggctMcaaNtcagaaaaacaca
+gtggggWttRttagtaagaVctVMtcgaatcttcMaaaHcaHBttcgattatgtcaDagc
+YRtBtYcgacMgtDcagcgaNgttaataatagcagKYYtcgtaBtYctMaRtaRtDagaa
+aacacatgYaBttgattattcgaaNttBctSataaMataWRgaHtttccgtDgaYtatgg
+tDgHKgMtatttVtMtVagttaRatMattRagataaccctKctMtSttgaHagtcStcta
+tttccSagatgttccacgaggYNttHRacgattcDatatDcataaaatBBttatcgaHtN
+HaaatatDNaggctgaNcaaggagttBttMgRagVatBcRtaWgatgBtSgaKtcgHttt
+gaatcaaDaHttcSBgHcagtVaaSttDcagccgttNBtgttHagYtattctttRWaaVt
+SttcatatKaaRaaaNacaVtVctMtSDtDtRHRcgtaatgctcttaaatSacacaatcg
+HattcaWcttaaaatHaaatcNctWttaNMcMtaKctVtcctaagYgatgatcYaaaRac
+tctaRDaYagtaacgtDgaggaaatctcaaacatcaScttcKttNtaccatNtaNataca
+tttHaaDHgcaDatMWaaBttcRggctMaagctVYcacgatcaDttatYtaatcKatWat
+caatVYtNagatttgattgaYttttYgacttVtcKaRagaaaHVgDtaMatKYagagttN
+atWttaccNtYtcDWgSatgaRgtMatgKtcgacaagWtacttaagtcgKtgatccttNc
+ttatagMatHVggtagcgHctatagccctYttggtaattKNaacgaaYatatVctaataM
+aaaYtgVtcKaYtaataacagaatHcacVagatYWHttagaaSMaatWtYtgtaaagNaa
+acaVgaWtcacNWgataNttcaSagctMDaRttgNactaccgataMaaatgtttattDtc
+aagacgctDHYYatggttcaagccNctccttcMctttagacBtaaWtaWVHggaaaaNat
+ttaDtDtgctaaHHtMtatNtMtagtcatttgcaaaRatacagRHtatDNtgtDgaatVg
+tVNtcaaatYBMaaaagcaKgtgatgatMgWWMaHttttMgMagatDtataaattaacca
+actMtacataaattgRataatacgBtKtaataattRgtatDagDtcRDacctatRcagag
+cSHatNtcaScNtttggacNtaaggaccgtgKNttgttNcttgaaRgYgRtNtcagttBc
+ttttcHtKtgcttYaaNgYagtaaatgaatggWaMattBHtatctatSgtcYtgcHtaat
+tHgaaMtHcagaaSatggtatgccaHBtYtcNattWtgtNgctttaggtttgtWatNtgH
+tgcDttactttttttgcNtactKtWRaVcttcatagtgSNKaNccgaataaBttataata
+YtSagctttaaatSttggctaaKSaatRccgWHgagDttaaatcatgagMtcgagtVtaD
+ggaBtatttgDacataaacgtagYRagBWtgDStKDgatgaagttcattatttaKWcata
+aatWRgatataRgttRacaaNKttNtKagaaYaStaactScattattaacgatttaaatg
+DtaattagatHgaYataaactatggggatVHtgccgtNgatNYcaStRtagaccacWcaM
+tatRagHgVactYtWHtcttcatgatWgagaKggagtatgaWtDtVtNaNtcgYYgtaaa
+ctttaDtBactagtaDctatagtaatatttatatataacgHaaaRagKattSagttYtSt
+>THREE Homo sapiens frequency
+agagagacgatgaaaattaatcgtcaatacgctggcgaacactgagggggacccaatgct
+cttctcggtctaaaaaggaatgtgtcagaaattggtcagttcaaaagtagaccggatctt
+tgcggagaacaattcacggaacgtagcgttgggaaatatcctttctaccacacatcggat
+tttcgccctctcccattatttattgtgttctcacatagaattattgtttagacatccctc
+gttgtatggagagttgcccgagcgtaaaggcataatccatataccgccgggtgagtgacc
+tgaaattgtttttagttgggatttcgctatggattagcttacacgaagagattctaatgg
+tactataggataattataatgctgcgtggcgcagtacaccgttacaaacgtcgttcgcat
+atgtggctaacacggtgaaaatacctacatcgtatttgcaatttcggtcgtttcatagag
+cgcattgaattactcaaaaattatatatgttgattatttgattagactgcgtggaaagaa
+ggggtactcaagccatttgtaaaagctgcatctcgcttaagtttgagagcttacattagt
+ctatttcagtcttctaggaaatgtctgtgtgagtggttgtcgtccataggtcactggcat
+atgcgattcatgacatgctaaactaagaaagtagattactattaccggcatgcctaatgc
+gattgcactgctatgaaggtgcggacgtcgcgcccatgtagccctgataataccaatact
+tacatttggtcagcaattctgacattatacctagcacccataaatttactcagacttgag
+gacaggctcttggagtcgatcttctgtttgtatgcatgtgatcatatagatgaataagcg
+atgcgactagttagggcatagtatagatctgtgtatacagttcagctgaacgtccgcgag
+tggaagtacagctgagatctatcctaaaatgcaaccatatcgttcacacatgatatgaac
+ccagggggaaacattgagttcagttaaattggcagcgaatcccccaagaagaaggcggag
+tgacgttgaacgggcttatggtttttcagtacttcctccgtataagttgagcgaaatgta
+aacagaataatcgttgtgttaacaacattaaaatcgcggaatatgatgagaatacacagt
+gtgagcatttcacttgtaaaatatctttggtagaacttactttgctttaaatatgttaaa
+ccgatctaataatctacaaaacggtagattttgcctagcacattgcgtccttctctattc
+agatagaggcaatactcagaaggttttatccaaagcactgtgttgactaacctaagtttt
+agtctaataatcatgattgattataggtgccgtggactacatgactcgtccacaaataat
+acttagcagatcagcaattggccaagcacccgacttttatttaatggttgtgcaatagtc
+cagattcgtattcgggactctttcaaataatagtttcctggcatctaagtaagaaaagct
+cataaggaagcgatattatgacacgctcttccgccgctgttttgaaacttgagtattgct
+cgtccgaaattgagggtcacttcaaaatttactgagaagacgaagatcgactaaagttaa
+aatgctagtccacagttggtcaagttgaattcatccacgagttatatagctattttaatt
+tatagtcgagtgtacaaaaaacatccacaataagatttatcttagaataacaacccccgt
+atcatcgaaatcctccgttatggcctgactcctcgagcttatagcatttgtgctggcgct
+cttgccaggaacttgctcgcgaggtggtgacgagtgagatgatcagtttcattatgatga
+tacgattttatcgcgactagttaatcatcatagcaagtaaaatttgaattatgtcattat
+catgctccattaacaggttatttaattgatactgacgaaattttttcacaatgggttttc
+tagaatttaatatcagtaattgaagccttcataggggtcctactagtatcctacacgacg
+caggtccgcagtatcctggagggacgtgttactgattaaaagggtcaaaggaatgaaggc
+tcacaatgttacctgcttcaccatagtgagccgatgagttttacattagtactaaatccc
+aaatcatactttacgatgaggcttgctagcgctaaagagaatacatacaccaccacatag
+aattgttagcgatgatatcaaatagactcctggaagtgtcagggggaaactgttcaatat
+ttcgtccacaggactgaccaggcatggaaaagactgacgttggaaactataccatctcac
+gcccgacgcttcactaattgatgatccaaaaaatatagcccggattcctgattagcaaag
+ggttcacagagaaagatattatcgacgtatatcccaaaaaacagacgtaatgtgcatctt
+cgaatcgggatgaatacttgtatcataaaaatgtgacctctagtatacaggttaatgtta
+gtgatacacaatactcgtgggccatgggttctcaaataaaatgtaatattgcgtcgatca
+ctcacccacgtatttggtctaattatgttttatttagtgacaatccaatagataaccggt
+cctattaagggctatatttttagcgaccacgcgtttaaacaaaggattgtatgtagatgg
+taccagtttaattgccagtgggcaatcctaagcaaaatgagattctatcctaaagtttgg
+gcttgatataagatttcggatgtatgggttttataatcgttggagagctcaatcatgagc
+taatacatggatttcgctacctcaccgagagaccttgcatgaagaattctaaccaaaagt
+ttaataggccggattggattgagttaattaagaccttgttcagtcatagtaaaaaccctt
+aaattttaccgattgacaaagtgagcagtcgcaataccctatgcgaaacgcctcgatagt
+gactaggtatacaaggtttttgagttcctttgaaatagttaactaatttaaaattaatta
+acgacatggaaatcacagaacctaatgctttgtaggagttatttatgctgtttactgcct
+ctacaaccctaataaagcagtcctaagaatgaaacgcatcttttagttcagaaagtggta
+tccagggtggtcaatttaataaattcaacatcgggtctcaggatattcggtcatataatt
+tattaagggctcttcgagtcttactctgagtgaaattggaaacagtcatccttttcgttg
+tgaggcatcttacaccgctatcgatatacaatgcattccaccgcggtgtcccgtacacaa
+ggaaacttgttaccttggggatataagaaaactcacacgtctcattattaaactgagtac
+aatttttgcacgagaaagtaatgcaatacaatatgatgaaagccagctaatgaaaaggga
+tggaacgcacctcggatctgttgcactggattaaaatccgattatttttaaaaatattca
+gtgctagagcatatcaggtctacttttttatctggtatgtaaagcccacggagcgatagt
+gagatccttacgactcaacgaaaagttataacataactcccgttagccaaagcccaatcc
+cgattactgccctaccctaacgtctgccatctaaatatcgaacttgttatgatcaatgtg
+actacctcccaccctttccccttcatttgttccactggggataagctagcgttttcagaa
+tcaatgcaataagaatagccaattgtctcacttcatcagagctcttggcaattccaggcg
+ctacgtggttctggaatatattcatttttcaaatagtaatacgtttagtgttgctattgt
+ctacacgtttggatattacgttatgtgagcggacatcaatagttgtctaactctttagta
+agccagagatagcactcttagcgaatggataccatcttccataagtttagttaatagtcc
+gaaacaactgcttcgagcatatttgaacctccttgtaggcaaatagcctcttcaaagcaa
+tcttactaatagatagagtttgttttaagggactactagaaatgggacaatcttaatagt
+atgacctaaactgacatttaaagatatatccaggtggcaagcataaagatcattgcgcca
+cctccaccgtgggattacttatcagtcgatatcctatatgctaagtttgcgacggcagaa
+tacaaactaagctgagttgatgctaaccttacctatgataccccattggaccggttaaca
+gccctacttattccaaataaaagaacttttatgctgtagaagctattatagtgatgcctg
+gtaacttcagtatattaaaatgacacacatacgccatatagagctcctggaactttgaat
+aatgagcgaacttcgaagttgaagagcaagaaaccatatgtcacggttgcctaaagcccg
+gtaaccagacatgtgctatcattgatcattatcgaggttttcataaccttgacccattat
+cggctgtgcgcggacaagtacttaaatcactagtttcttcacctgcttatcggtaagaaa
+taaggttggcaaagaatcgcataagacggacgtagagccgcagcgttgtgcgagtccagg
+tgcatgcgcagcaataggattttaaattttgttccatttttaatttagccgtaaggatgt
+ccgtaaatgattgaaaattggattcaatctttgggcctatgctactggaacctgatcgac
+aaaatttcaaacatacgttaactccgaaagaccgtatttttgcggctagaatagtcagtc
+gcttggagccatataccttaccacttaaacgacgtgctcctgtagttgaaatataaacag
+aacacaaagactaccgatcatatcaactgaagatctttgtaactttgaggcgaagcaccc
+tcttcgagacaactaagagtaaagtaccgggcgccgcaaggagtcgattgggaccctaaa
+tcttgacgaattgctaagaggctcagagctaccactgtaatttctctagagcccataata
+aatgaacgatacatccgtaggtagcacctaagggattataatggaagccaaatgcagtta
+ataatattatatactggcgtacacgattcgacggatctctcacatagtgattcacgaccc
+ccccctttgattgacacagcgtcagcattttgcaagaacgatcttctgcatagggtgcgc
+caccgtaaggatgacgtcgaagctacaactgggtataatttaccatgcttccctgatgct
+gagtgcaatacactaagaatgagtttttaccccatatcaccagtatttgttctgttattg
+cgaagaaatggctatgctgagttggcgactaaagtcacccatcctttttattaggtaacc
+ccctcccttaaactaactgatttgctggagctgccctgcatacatatactttatcattta
+tggacgtccgtgacgcttattatccaccatagtcgatatgctacacggattcattaatgg
+atcgtaggagtttaagttatatttactaagatcggtctcggctactatcccgccttaccc
+ggcgctatttacggccatttttaatatattgacggtaattattcctatggtttcgaccgc
+acgtccttggacaagaaagaatggcaaaaaaaatgtaaaagaaaaaaaatattgagtccc
+taccatcatataaaaaatatgtgatgagtaacttgacgaaatgttagtggttattaaaga
+ctatctattacaccttttgttttctgtcgtagtatattaaagtctagaagccttacagga
+aaatcagggttatacagccgatactccgcagcatgaatcatcgaggaggtgtcctaccat
+cgcgccttgtaatcttgtctgtgtatactgtatttagaccttttatacaaagtaaatatc
+tcggctttatgtgattgggaggggcctactcaaacatgatgacttgacctaataatcact
+gtgcgggcgtcttatgactagctattccttgaaatccaccaccaaatggttaatatgtaa
+aaactttgacgatgaaacaaggtgaatgtgtagttactttgtgtaattagctgcgtcgag
+cattgcttgtaaaaccgtcaatcgcacacgttacttccataaaatttctacgaatacacc
+cttcttaaaaaaaacgtaggaattcacgagtttaacaaacgataactgtataaagtggaa
+gtccgaagaaagcagatgcccgaactactcgaagatgtttcgttttcttaaccatagggg
+cttcttaatggcccactacgcacattttgttcaagcccgagagggacatccccattacgg
+gagtattactaaaactgttccgtaatacgttcagcaagggatgaaaaaggccactgctca
+agttattgacgtgggagtattacatcggaagcctgaatcccacactatgatggtctgtac
+aggcctagggactgcgtctagacggtattaccggcttctaatcatacgatcgtgagtctt
+aacgggaagtaaggctcacacctaccccaaaccatttatctatgtaagtataaaattgtg
+cgtaagtgttcaaagtggacaataaagacgtggcaaaaacccccgcacataagccgcttt
+agatttcacaaataccaatgcggttaaaaacatccttgagtcgtacatacaccatactcg
+cgttaaacggatataacagaagataataaatccggatgtggagtcggtgtaactatagaa
+agccaagtgaaataatgcttaccagtcatttagctatacggctttcatttcatgtcaaga
+gggtggagtttgacctgtacagttgatatatcaccgatacttagaactcacctaaagcta
+aaattgctcgcagcgtgtaatccgcatattacaaacaatagatgggattcattatacata
+agacacgatgatctgctttttcaggttgcgagatgttgcctatcgtcaatcgagtcctgc
+cttacaccacttaaacaaaagtattgacagggaacctattttcgaggtattatatagtcc
+agcttgaatatcaatttgacagttaacctagtgaaaatcagtaagaggaaatacgccaca
+ttctccagtgaaattctacgggttatcgtctagtccaactatcaattataactcacgaga
+tataagtaaattctcgtacttggcctgatttttattatactttggatccttagtaaacag
+gaagggagaaaccttcaacgaaaaacactggattttgttttactctcaaagctcttatat
+gacggaaataccctgtcaagtcttaactttattactagactaatgaaatgggcttggggt
+ggccagaatcatagtacaatttagcggatacactattcggactttcctatcggctgtctg
+gttggataagtatggggactaataggctagacatacctatacttaaactatacaggcgtc
+atctatctctgcaactttggagttccctgatgttctcccgccctttgggttcacatcttc
+tataccgacacccctaataacgattagtttgtgggttagagtaaattaatacggttaata
+ttaatgtatcgttgaaaagctggtgtcgccaataaggtaaccggctaggcagagtatatg
+tcacgaagtataactaccctaatgataagctgtaggaataaaattaatgctgtctctaag
+cgaagagatatttccgactctgttttaatgacgaatctcattacttctgacttgcaaatg
+ttcaatatggcacggtttcacggcacctttgtgacgcatataatgaacttagaagattat
+aacgacggaactttatatgataatccgttacgattaaagaatctgttaaatatcataatg
+gcattcagttctagaccgtgcatcatggtaaacttactttctctgcatggcgacatacat
+ttcgctattcaaattcgcgtgtggttacacccactcgcacctttggaatattaagagaag
+atgatcagaaaatccattcgctcaatttttctgacgtacgtctaatttatcctaggagac
+aaatcgttttatgtctctcacatttttgaagaaaggttcgagagacaatactcaggtcct
+gaactgctagaagatactcggtggagcgtggcaacaatgaaaaactcgtgacataaatga
+atgatacttttccaagttcagttaagtgaatatgtttaacatacccggcttttcgatctt
+aagctgacgctggacgtgcgagtaatgtcagtctcttacatacactagtgactccaagtt
+tcgtcaaaaacgccccctcccttctcgagcccactcacgctatgtattgacgcgaacttg
+ttcgggatcagacttttcaggagttcggtcgcgtgtccctatgtgctaatatataagtta
+gatcgcattagatgctaatctgaatacttatagacgaccttcaacgagaacgggtaccac
+cttgaggctagagttaggtgtgaaacgacaggtagggacatataaaatttgagtgcggct
+ttagttaagggtttaattacctactcaaacatcacgctcgcgcccttcgtacgtaatcga
+ccatctagaggctaaggggactgtactaggtagtgattaatgatatcctagacgcacgtg
+ccttagatcttcagactctgatggtccgcgatcaccgtaattgtagtcctccaactcgat
+cactttgttggcgtcaaagaaattacgatatctaaatacttataatacaataaccaagga
+tgagaatgactcatcgcgttggagttatattgcttgaagttctatggaatgaaagcacgt
+tatctgccgtcccaatatctccagtgagctaattcattggacggtccactttgatcaatc
+cccgaggagatgttcggacactttagtctgtaacacttagcgttgagaccacgaacaatt
+gattactcagtcttgaaggtgttttccaaagttcattttaaataagactacgataggcct
+ttcctattgatataaactacccggctctgttgttcgtgtgagtcgtacttctctgtgttt
+ttctgattatagcaagattcgattcttagtgtaaacagcgatttttatttgacccgtcaa
+tgagaagcgcataggatctaagcaaaattatcaagttgtgccacaaggtaagatctttcc
+agttattgcaggtaggatgtatcccacgttgatagtatgaggtctgacgtcaactgtcta
+ggagagttgaccgcgtgcgggtacaccggatttgcatcgatgttgagaacgcagaactcc
+cactgtcgtggcggcgttcctgatatttagcaagaggcgttgataaagccctcatcatct
+agatctcgacctcatctgccctcttgctccatcattttctacacagactactttcctatc
+tacgttagtataattgctttctatcttagtatcatttagagcttctccgtcaacaggttc
+gtgctattaaagttagtacgaaagggacaacttgtagcaacgcatttaatcggttttcga
+ctacttcgcacaaaatcagataaagaagtttgtcattctattagacattgaattgcgcaa
+ttgacttgtaccacttatgatcgaacactgaatcaagactgtgattaactaaaatagaca
+agccactatatcaactaataaaaacgcccctggtggtcgaacatagttgactacaggata
+attaattggactggagccattacattctctacaatcgtatcacttcccaagtagacaact
+ttgaccttgtagtttcatgtacaaaaaaatgctttcgcaggagcacattggtagttcaat
+agtttcatgggaacctcttgagccgtcttctgtgggtgtgttcggatagtaggtactgat
+aaagtcgtgtcgctttcgatgagagggaattcaccggaaaacaccttggttaacaggata
+gtctatgtaaacttcgagacatgtttaagagttaccagcttaatccacggtgctctacta
+gtatcatcagctgtcttgcctcgcctagaaatatgcattctatcgttatcctatcaacgg
+ttgccgtactgagcagccttattgtggaagagtaatatataaatgtagtcttgtctttac
+gaagcagacgtaagtaataatgacttggaataccaaaactaaacatagtggattatcata
+ctcaagaactctccagataaataacagtttttacgatacgtcaccaatgagcttaaagat
+taggatcctcaaaactgatacaaacgctaattcatttgttattggatccagtatcagtta
+aactgaatggagtgaagattgtagaatgttgttctggcctcgcatggggtctaggtgata
+tacaatttctcatacttacacggtagtggaaatctgattctagcttcgtagctgactata
+ctcaaggaaccactgctcaaggtaggagactagttccgaccctacagtcaaagtggccga
+agcttaaactatagactagttgttaaatgctgatttcaagatatcatctatatacagttt
+ggacaattatgtgtgcgaaactaaaattcatgctattcagatggatttcacttatgcctt
+agaaacagatattgcccgagctcaatcaacagttttagccggaaacaatcgaagcatagg
+gacaatgtatcttttcctaaattgccatgtgcagatttctgagtgtcacgaagcgcataa
+tagaatcttgtgttgcctcaactcgttgaaaagtttaaaacaatcgcagcagtctttttg
+gggtctactgtgtgtttgcaaaataactgaaagaaacgcttgaacaactctgaagtagct
+cgagtactcattaaagtgtaacacattagtgaatatcggccaatgaaccaaacgcttccc
+ggtacgctatctctctcatcgggaggcgatgtgcaggttatctacgaaagcatcccttta
+cgttgagagtgtcgatgcatgaacctcattgtaacaatagcccagcaaattctcatacgt
+gcctcagggtccgggcgtactcctccatggaagggcgcgcatctagtgttataccaactc
+gctttttaactactatgctgtagttctacaggcatagtggccagtattttctaacttctc
+tggatagatgctctcactcctcatccatcacggcttcagtttacgtcttacttgcttgtt
+cagcaacggatggaggcattaagtatcttcactgttccctaaaattgctgttcaatatca
+aagtaaggacgatacagggaaagctcaagcacactcattgaatactgccccagttgcaac
+ctcacttaatctgacaaaaataatgactactctaagtgttgcggaagcagtctcttccac
+gagcttgtctgtatcacttcgtataggcatgtaactcgatagacacgaacaccgagtgag
+aaactatattcttgcttccgtgtgtgtgacaccaggtaattgatgcggatataagctgga
+gatcactcacgcccacacaaggcgctgctacctctttattccaatgtgtaagaatttgct
+aacttcatttctagaccgcagctttgcggtcataatttcacggtacggacccttgggtta
+gagacttgataacacacttcgcagtttccaccgcgcacatgttttagtggcttctaacat
+agaatttttgttgtgacataaagagtgcgtgggagacttgcccgaccgttaagccataat
+caattgaaagccccgtgagtcacatctaattggttgtactgcgcatttagctatccttta
+gctgactcgaagagattcgattcctaatataggttaattagatggctgccgcgcgaagta
+aaacgtgaaaaacgtagtgcgcagatctgcataactcgcgcttaattacttatgagtagt
+tccaagttcgctacgttatgagagagattggaattaagcaaatatgttttatggtgattt
+tgggatgagaaggactgctaagtacggctactaaacaaatttctaaaaccgccatctacc
+ttatcttggagacatttaagttgtatatgtcactagtctagcttttgtctgtgggacgcg
+ttctcggaatgagggaaatgcaagagccgattcatcaaatgcttatctaagaaagtagtg
+gactattacaccaagcacgaatgccagggaactgctttcttgctcaggacctcgcgacaa
+ggtaccccgcataagtcctagaattacatttggtcagcaatgctgacatttgaccgtgaa
+aacataattttaatcagaaggcagctcacccgcttgctctagatcttatctttgtatgaa
+tgtcagaatttactgcaatatccgttccgaatagtgagggcttagtatagttctctgtat
+acaggtcacatcaaactccccctgtcctagtacagctctgagctttaattaattgcatac
+atttccttcaatcatcagatgaaaacaccgcgaatcatgctcttctcgtatagggcaaga
+gaagcaacaaacaactagcccgactcacgttcatccgccgtatccttgttcagttcttac
+tccgtattaggtcagcgaaatctaatcagaataatcggtcgcgtatcaaaattaaaatcc
+cgcttgaggttgacaattaaaacgctgagcagttatcggctattagatagtggggtgaaa
+gtaattggctggaattatgttaaaacgtgatattaagctaaaatacgctacttgttgccg
+acctaattcagtcattcgatattcagttagagccaagaataacaagcttgtataaattga
+acggggtgcactaaacgatgtgttactctaatattcagcttggagtatacctgaaggcga
+attcatgtatcggccaataataagacgttgaagatcacaatttggactagcaaaagaagg
+tgatttatgcgtggggattgagtccactgtacgagtacggtctctggaaaattataggtt
+cagggaatataaggaagtaaagataattaccaagagatttttggtatcgctatgacccag
+aggtgttctaacgtctgttttgatccgcagaatttctgcctcaatgcatatttgacggac
+ttgaactagagcctctaaagttaaatggcgacgcaactgttcctaaacttcaattattac
+tactctttttttcctagggtattgtagaggccagtggacaaaataaatcaaatttaagat
+gtttcggacattaacatcccccgtagcatagaaatcatcagttatccaatctctcatcga
+gcttttacaatttctgctggcgctatggacagcatatgccgcgagacctccgcaagactc
+acttgatcactgtaagtatcttcattagaggttagagcctatagttaagctgctgaccta
+gtaaaattggtattttctaattttattgctcaagttaaaggttagtgaagggataatgac
+gttatttttgaacaatgggttgtattcaattttatatcacgaatggaacccttcattccc
+ggcataatactagacgacacgaacaagctccgatctatcagccaggcacgtgttaaggtt
+taattccggcaaaccaatgaagcatcaaaaggtgacctgatgcaacttagggtcacgatg
+agtttttcaggactacttattacctattaataagttaacatgagccttcataccccgtaa
+gacaatacatactccaccaattagaattctgagccatcttatctttttgtatcatcgaag
+ggtatggccgaataggttaattagttactcctaacgtctctacaggcatgcatttgacgc
+accttcgaaaatagtcaatctctcgccacacgcgtctagtatgcagcatcaaaaatatag
+tccacggtttccggattaccaaacgcggcaaagagaaacattgtatcgacggagataact
+taatacagaaggaaggggcatcttcgaatacggatgaataattctatctgtttattctga
+catcttgttttcaggttaatcttacgcattcaaatgacgcctgccccatgcgtgcgcaat
+tattttctaatattgacgagagcaatctcactccttttgggtctatttatgttttattga
+ggcacaagcctatacagaacaggtactattaaggccgtgagtgtgagactcaaaccgtgg
+aaacaaaggatgggttgttcttggtacaagttttagtgcatgtgggcaatccttaccaaa
+atcagatgctatccttaactttgggctgcatttaagatggcggttggaggcctgtgagaa
+tcctgcgtgtcatctttaatgaccgaattcatccatgtagattcagatcacacactcatt
+ccttgatgttgtctaaacaaaagttgttgtggacgcattggagggagttaagtaacaact
+tgggatcgcatacttataaaaattatatgttaaactttcacaaacgctgaagtccaaagt
+aactagcccaaacgcctcgagagtcactaggtattaatggtgtttgagttcctgtgaaat
+agtgttcgaaggtaaaatttatgtaccaaatcgaaagaacacttaataaggcttgcttgc
+acggaggtatgatgtttactgactctacaaccctaattttccagtacgtacattcattcc
+aataggttagttctcaaagtgctatacaggctcctcaattgatgatatgcttcagccgct
+ctatggatattagctcattttatttaggaagcccgcttagaggcttactatgagggaaat
+gccaaaatgtcatacttttcggtgtgtcccatatgacaccgctttacatagaatttgaat
+taaaacgcgctctcccgttcactaccatacttggtaccgtgcgcatattacatatagata
+taggatcattttttaaagctgtactaggtttgatcgacaatcttatgctatactatatga
+tgtaaccctcataatcaataccgatcgtacgatcctagcataggtggcaagcgattttat
+gccgattattgtgttaaatagtctgtgagtgtgattatcagggctacgttggtagagggg
+ttgtatagacctcgcacacattgtgacatacttaacaatatacgaaaactgatataataa
+atccccttacccaaacaccaatcccgttgaatcaactaccataacgtctcccatataaat
+tgcctacttgtttgcataaatctgaatacataacaccattgcaccttcttgtgttccaat
+cccgttaagattgccttgtcagatgatatgcaagaacaatagcatttgctagcaattatt
+aacagctcttcgaattgcctccacataacgcgggagggtatattttaatttggcaaatac
+taagtactgttggcgtcatatgctattaacggttggatattaagttatgtcagccgtaag
+caagagtgggcgaaatattttgttacccagtgagagcactcttagagtttggatacaata
+ggccatatgttgacttaagaggacgtaactacgccgtacaccattgttcaaccgacttct
+tggcaaatagaatcgtattagcaatcttaagaatagagacacgttcgtgttagggtatac
+tacaaatccgaaaatcttaagaggatcacctaaactgaaatttatacatatttcaacgtg
+gatagatttaacataattcagccacctccaacctgggagtaattttcagtagatttacta
+gatgattagtggcccaacgcacttgactatataagatctggggatcctaacctgacctat
+gagacaaaattggaaacgttaacagcccttatgtgtacaaagaaaagtaagttgttgctg
+ttcaacagatgatagtcatgacgcgtaacttcactatagtaaattgaaacaaatacgcaa
+tttagacagaatggtacggtcatgaatgacagtaattcgaagtgctagaccaacttaaaa
+taggtaaacgtgcccgaaaccccccttaacagaaagctgctatcatggtgcagtatcgac
+gtgttcagaaacttgtaacttttgagcaggtccgagcacatggaagtatatcacgtgttt
+ctgaaccggcttatccctaagatatatccgtcgcaaactttcgatttagtcccacgtaga
+gcccaagcgttgtgcgactccacgtgcatgcccagaaatacgagtttaaatttggttaca
+tggttaattttgaccgaagcatcgcactttatgattgataattggattcaatatgtcgcc
+ctatgcgaatgcaacatgatccacaatttggctataagacgtttaatccgtatcacactt
+tgtttgcggctagtatagtaacgcccgtgcaccaagagtcagtaacaattataagtactc
+cgcaggtacttcaaatataaaaactaatcaaacacgacccatatgatcatctgaagatat
+ttggaactttctcgacaaccaccctcgtactcaatacttacactaatcgacaggcacacg
+caacgtgtacagtcgcaccatattgagtcaagatttgcttagtggcgatgagcgtacacg
+cttatttctctagtcacaattagttatctacgagacatcacgagggagcaaataagcgat
+gttatggctacacataggcacgtatgaatatgatataagccagttaaacagtcgaaccat
+cgagcaaattctcatgcaccaacccacacgttgaggcacaaagagtaagctgtttgaatg
+taacttcttctgctgagcgggccccaacgtaaggatcaactagaagagaaaactcggtat
+tagtttaaatgcgtcacggagcatgagtgcatttcactaagaatgtctgtgtaaccaata
+taacatctatttgttatctgattgcctacttatggctttgcggtcgtggcgactaatgtc
+tccaatccttttgaggtcggtaccaactccctttaaattacgctgtgcaggctcatgcac
+tgcatacatatacggtagcaggtagggacctcacgcacccttattataatcaatagtagt
+tatcagtcaacgaggcaggaatgctgaggtcgaggtgttggtatattttctatgtgccgt
+ctaggcgactatcacgcattaccaggcgagatttaagccaattttgaatatagtcaacgt
+aatttttactatgggttccaccgaaacgccttgcacaactaagaatcccataaaatatcg
+atatcaaataaaagattgtgtcaataccttcatatatattttttcggttgactaacgtga
+actaaggttaggggttttgtatgtctatataggaaacagtttcttttctgtcctacttta
+gtaaagtcttcaagccttactccaaaatcacggtgattaagccgttactcagcagcatga
+ttctgcctgctcgggtcctaaaatccagccttgtaagagtcgctgtgtattagctaggga
+gacctttgttaaaaaggatatatcgcggcgggatgtgagtgcgtggcgcatactcaatct
+tcagctcgtgtcattataatatctctcccccacgcttttcactagatatgccgtgtaagc
+aaacaccttatgcttaatttcgaaaatattggtacttgaaaaaagctgtaggggtactta
+atgtctggtaggagatcaggagagaattgagtgtaaaaccgtaaagccctcacctgactt
+catgtaaatggcttagaagactccatgatttaataaatactacgaaggaaagactggatc
+taaagataactctagtaaggccaactcccttcaatgctgttgccagttataatccaagag
+ctgtccttttctgaaccatagcggcttctgaagcgaactagaagcaaagttggttctagc
+cagacagccacataccctgtacgggtgtattactaaaactggtccggtattagttcacca
+agggaggaattaggcaaaggatctaggtatgcaagtcggagtattacatccctaccctga
+atccatcaataggttcctctgtactggccttcgcaatgagtattcaaggttgtacagccg
+tataataataagatagtgactatgaacgggaagtaacccgctcaccttccccaaaacatt
+gttatatctaagtattaaagtctgccgtagtgttaatactcgaaaataaacaactggcaa
+attacaccgcacttaagccgcttttgatttatatttttccaatgcgcttttaaaaataat
+tcagtcctacatactaattaagacccttaaacggagatatcacaagttaagttttaacca
+tctcgactaggtggaactatagatacccaactcaatttatcattacctgtaatgttccta
+gaaggattgcatttcatgtcaagacggtggagtttcacagcgaaacttcagtgtgaacag
+attctgagaaatcacctaaacctattagtcagagcacccggttagaaccagttgtcaaaa
+aatagagcggttgcatgagacagaagtaacgatgagatccgttgtaacgttgagacatct
+ggcctatcgtcaatacagtcctcccttaaaaatatttttaaatactaggcaaacccaaca
+taggttagtcctatgtgatacgccacatggtatatcattttgtaacgttacctagggata
+atcaggaagtggaattacgcaaaagtagacagtgaaatgcttagggttatagtctagtcc
+aaagataaaggataaagcacgtcagagaactatattagccgaatgggaatcattgttagg
+agactgtggatcatgtctaaaaagcaacgcagaaacagtcatcgaaaaaatctcgttttt
+gtttgaatctaaaagagctttgatgaccgatagtacctgtatactagttactgtattacg
+tgtctaatgatttcggattggggtccccagaatcagacgtcattgtagacgattcaagtt
+taccaatttaatttcccagctctccttggagaactatcgccaataattgcagtcactttc
+cttttctgaaacgataaagccgtcagagttctctgcaacgttggacttacctgaggttct
+aacccactttcggttctaatagtagttaacgacacaacgaataacctttactgtggggct
+ttcacgatattttttcgcttattattaatggttacgtcataagctggtgtccaaattaag
+gttaccggcttcgcagagtagttgtatccaagtataacttccctaatcataagatcgagg
+tagaaaattaatgctgtctctaaccgaacagatatgtcccactatgtggtatggacgttg
+ctaattacttctgaagggaaattggtcattatggatacgtgtctaccatcaggtcggacg
+cagatatggttctgtcttcagttgatccaccgttctttataggataataactgacgatta
+aagattatggtaaatagattaagccaattctcttcttgtcagtgaagcatccttaactga
+cttgctctgcagcccctcatacatttagctattcaaagtaccggctcgtttcaaactctc
+ccacctttggaagaggttgtcaacttgataagtatatcatttacagcattttttcggacg
+tacctctaatgtttcattgcagaaaattagttttttctatcgcacattttgcaagtaacg
+ttagagacacaattatctgcgaatgaactgctagatctgacgaccgggagcctcgcaaat
+atcaaaaaagactgacatatatcaaggagtcgttgacaagtgctggtaagtcaattggtt
+tatctgtcccggcgtttcgatcttaagctgaccatgcacggcagagtaatgtcactctcg
+ttcttacaagtctgtctccaagggtcggcaaaaaagacccctccattctcgagcccactc
+acgatatgtagggacgacaacttgtgcggcttatgaattgtctggactgcgggcgagggt
+ccatatctccgaagttagaagggacatacctttagatgataagatcaattcttattgacg
+aaattcatccacaacggggaacaacttcaccctagacttacgtctgaaaagacacctagc
+gtcttataaaaggtcagtgccccgtttcgtaaggctggaattacctacgcaaacttaaac
+ctcgcgcccttccttacgtatcgacaagatagaggctatcgcgaatgtactacggaggca
+tgaatcatatactagaaccaagtgcctgtgatattaacaagatgatccgacgcgagcacc
+gtaattctaggcataaaactccagcaatttgggggccgaaaacaaatgacgttagctaat
+taattatatgacatgatcaaaggaggtcaatcacgcatcgagttcgacgtatattcattg
+aacttcgtgcgtttgaaagaaacttttatgaaggcaaaattgatcctgtctcctatttca
+tgcgtacctcctagttgataattccccgagcagtggttaggacacttttgtcggtatcaa
+gttccggtctcaaaacgtaaaattctgtaatctgtatggatggtctgtgaattagttaat
+ttttatgaagtcgtcgagacgcagttcctattgatttattctaaacggagatgtgcttcg
+tgggactcggaagtagatctgtgtttatgattattgctactttagatgctgactgttaac
+tccgtgttgtttttcaaccgtatatcacaaccgaattggatagaacctatagtttcaagt
+tctgccacaaggtatcatatttacagttagtgctggttgcttctttcaaacgtggtgagt
+ttgtgctatcacgtcaacggtagagctcagtggaccgagtgcgcgttcaaccctgttcca
+gagagggtgtgatagcacatataccacgctcgtcgaggcgttcatgatagtttgcaagag
+ccggtgttaaacacatattattattgttatccaactaatcggacctatgcataaagcatt
+gtctaaacagaataattgcctatatacggtagttttagtgatttatatcttagtatcagt
+tagagcttcgaactcttcaggttcctcatatttaacgttcttcgaaagcgaaaacttcta
+caaacgaatgtaagcggttttccaagtagtacctataaatcacagaaagatctgtctcag
+tatagttgaaatggtattcagctagtgacgtgtaccaattatcatagttcactcaagcaa
+gacgctcattaacgaatatagacaagacactatatcatataataaaaaagaacatggtgc
+tcgaacatagttgaattcaccatattgaaggggaatgctgacatgtaattcgctactaga
+cgatcaattccctacttgtcaaagttgaactggtacgttcttggaattaaatatgattgc
+gctggaccaaattgcgacttcttgagtttcagggcaaacgattgagccggaggatgtccg
+tctcttacctttcttgcttatgataaacgacggtccctgtacatcactgggaattctcag
+caaaaataattgggtaaatcgagactcgatgtattcggccacaaaggtgttagacgttaa
+agattattcaacggggcgataataggatcataaccggtatgcaagcgcattgaaagagcc
+atgagatccttatccgataaacgctgcacggtatgtgcagccttattgtcgatcacgaat
+ttataaatgtagtctgggctgtaagttgaagacctaagttataatgaagtgcaataccaa
+atcgattcatagtggattatcagactcaagatatctcctgataaattacagttgttaaga
+tacggataaaatgagatttaagattagcagcctctaatctgtttcaatcccgttggaatg
+tggtatgcgatcaaggttaagttaaaatcaagcctgtcttcagtcttgattcttgttctg
+ccatcgcatgcggtctacgtgagttaatatgtagcttacgttctagcttgtgctaatctg
+agtatagattcgtagaggaatattatcaagcttccacgcctcaacgtacgtgtattggtc
+acacaagacactaaaagtggaagtagcgtaaactatagtctagttgttaaatgctcagtt
+cttgttatattcgatatactcttggctaatttatgtctgagtatataaaattaatgatat
+taacttgcatttcacggatcccttagaaaaagattttgaccgagcgcattataaacggtt
+acaccgaatcaatagaagcatacccaatagctttctttgaatttattgcctgcgcaactt
+ggctgactctctagatccgaataattctatatggtcgtgacgaaactagttcattactgt
+ttaaaatgccaacatgtcttttgggccgataatggctctttgcaaaattactcaatgata
+cgattgatcaaagcggtagttgctagtggtagcatgtaagtctatcaaatgtctgattat
+ccgaaaatcttccaaaagagtccacgtaccatatctatctcatagcgacgcgaggggaac
+cttatctaactatcattccatttaccgggtgactctcgatgcaggatccgattgggataa
+attgcccagaaatggctcattcctgactaagggtaaggccgttctcagcaagggaacccc
+gcgaatctaggcttataccatctagattgttaactacttgcctgtagttctacagccata
+ctggacagttgtttctaaatgatcgggattcatgctagcactcctctgaatgcaccgcgt
+aagtttaactattacgtccgtgggcagataaggatggaggctgtatgtatcttaactgtt
+acctaatatggctggtaattatcaaagtaaggaccttaatgccatagcgctagcaatcgc
+tttgtatactgaccatgtgccaacctctcttaatctgtaaaatataatgtcttagctaac
+tgtggacgatcatgtctctgcctagagcttcgctgtatcaattcctatagccagcgtact
+agtgacacaacaacaccgtgtgagaaaagatattagtccttacgtctgtctctctacagc
+ttattgatgaggattgaacatggacatatagctccccctcaaaagcagatgctacctctt
+tattccattctcgaacatttgccgaacttaatttcgacaaacctgaggtcacgtcttaat
+ttatcggtaacgtcacgtccctttgagactggataaatatattaccaggggccaacgagc
+aattgttggaggcgcttctataatacaaggtgtcttgtcaaagaaagacggcgtgcgtct
+cgtgcaactcacttaaccaatattaatgtgaaacccccctctctcacatcttatgcggtg
+tactgccctggtacatttcctgtacaggactccaacagtgtagattcctaagatagctgt
+tggagttgcctcacgccagatcgaaaaactgaataaactagtgagctgagctgcagaaat
+accgcttaattacttatgactagttcaaagggacctacgtgatgtcagacattgcaagga
+agaaattaggtttgtgcgtcattttggctggactagcactccttacttcccctactattc
+aaatgtcgtaaacagcatgagacaggatcgtgctgacatttaaggtctattgggaacgag
+gctacctttggtcgcgcgctcgcgttctccgaatgaccgaaatgcatgagcacagtatgc
+aattgcttatagatctaaggtctggtcgttgaaaccaagcacgtaggcctgggaaatcag
+ttcttcctcagcaactacacaaaagcgtccaagcattagtacttgtagtaaatgtccgaa
+cctatgcgctcatttgaaagtcaaaaaatatttttaagcagtaggcacctaacccgattc
+ctctacttagtagctttctttgattctcagaattgactgcaatatcactgcacaattctg
+tgccattactagacttctctgtattaacgtctcatcttactaacactcgcctaggacaca
+tctgagagtgaagtatttcaatacatttactgaaatcttcagttctaaaatccccgaata
+aggctcttatcggtttggccaacacaagaaaaaaacttcttgcaccactcaccttcatac
+gcaggagcctggggaacttagtaataactatttcggcagacaaagcttataacaagttgc
+cggcgcgtataatatttaaaagaccccttgagctgctcaattaaaacgctcacctggtat
+aggctattagatagtgccgtcttagtaaggggcgggaattatcggataaactgatatttt
+gataaaataaccgacttgttcacgacataagtcactaaggagattttatctttctccaaa
+gtatatcttccttggataatttcaaagcgctgcaatttaagttctgttactagtttatgc
+tgctgggaggtgaccggaaggcgtagtaatctagaggcaaattataagaagttcatcata
+tcattttcgactacaaaaacaaggtgttgtatgccggcgcattgtgtaaactggacgagt
+accctagatggaaaattatacgttaagccaagatttcgatgtaatgataattacctacac
+atttttgctatccataggaacaagagctgttctataggctcgtggcatacgaacatttgc
+tgccgctatgaatattggaagctcttcaactacagactctattcttaattgccgtcgaaa
+atgggccgaatcggctattattaatactcggtttttccgaggggattgttgtcgacagtc
+gtaattattattaatattgatgttggtgaggtcatttaaatacaaccttgcagacaatga
+ataagggatccaatctctcatactccttttacaattgctcatgcccctatgcaaacctta
+tgccgccacacctccgcaactctctcttctgaactgtaagtagcttcattactggtttga
+gactatactgaagctgatgacattctaaaatggctattttcgaatgtgattcataatgtt
+tatcgtttgggatggcagaatcacgttatttttgatatagcccgggtattctattgtata
+gaacgtatgctacaagtcattccccgaagaagactagaagtaaacaacatgcgaccatcg
+ttaagccacgcaaggctgtagctttatttcccgataacctatcttccataaatagcggac
+agcaggatactgacgctcaacatcagtggttatggtctaatttttaacttttaataaggt
+aacttcagcaggcatacacagtaactctttaatttataatcaaattagaagtctgacact
+tcttatatttttctatcatccaacgcgatcgcccattagcttattgtgttactaataacg
+tatctaaaccaatccttttcaagctactgcctatattgtcaatatatacaaacaacagga
+tagtaggctgcttaaaaaatattgtcaaccgtgtacgctttacaatacccggaaatcaca
+aactttgtagacaacgagtgaaatttatacactacgaagggccagcgtacaagacccatg
+aattaggcgatatgtttattctgacatattggtttatccttaatctgtcgctgtaaaatg
+aagccgcccccatccctgcgaattttttttcgaagattcacgactgaaatataaatacgt
+ttggctatatttatgttggagggaggcaatagcctttactgttaaccgaagatttagcca
+gtgagtgtgacactaaaacactggaataaatgcaggcgttcttctgggtaaaaggtttag
+tcaatctcgcctataagttcatatagctctggatataattatctggcccatgcatttatc
+atggcgcttggtgccctgtgtgaagccggcctctcatattgaaggtccgaagtattccat
+gtacattaagatcactctctcattcatgcatcttggcttaacaaatctggttgtccaagc
+tttccaggcacgtatggtacaaattcggatcgaatacttataaaaatgatatgttaaact
+gtctaaaacgctcatctacaaagtaaagtgcactaaccaatagagtctcaagaccgtgta
+atgctggtgcactgaatgtgtaatacggttagaagggattagttatgttacaaatccatt
+gaaaacttaagaagcattgcgtgctcggagggtgcatcttttatcaagagactaacatta
+ttttcaacgacgtacatgctttacaatagggtacttatcaaacgccgagaaacgcgccta
+tagtgatgttatgattatgacccgatatccattggaccgaattttatgtaggttcccagc
+gtactcgcgtaatatctcggtattgccataatgtaatacttgtcggtctctcccagatga
+aaaagcgttacagagtatttcaatgaaaaacagcgcgcaacgtcaatacctttaggggta
+acggccgctgatttcatatagatatacgataagttggtatagctctactaggtggcatcc
+acaatcgttgcatttactatagctggttacaatcataatctataccgttccttacatact
+accatagcgggatagcgtttttttgccgttgattgggtttaagaggatgtcagtctcatt
+atatccgattcggtgggagagccgttgttttcaaatcgcacactttgtgacataatgtac
+aagataacaaaactgatataagatataaactgtcaatatcaccttgacacttgaatcaaa
+gtaaattaactcgcaaatataatttgactaattgggtgcagatttctcaattaataaaaa
+aatggcaccggatgggcttacaagccccttatcattcacttgtatcatgatttccaagaa
+caatagaatttgctagcaagtatgaacagagattcgaattgcatccacagtacgccggag
+cgtttattttaatgtggatatgacgatgtactgttggcggcatttgctagtaaccggtcc
+ttatttacgtagcgcacacgtaagcatgtctgggagaaatatggtggtacaatctcagag
+aaagattacagtttggtttaaataggacttatcgggtcggaagtggaacttaataagcag
+tacacaattgggcaacagacgtcttgcctattacaataggattacaatgcgttagatttc
+agacacgttcgtgtttggctattcgtcaattccctaaatagttagacgatcaactattat
+caaagtgattctttgttcatcctccattcatgtaacagatggcacactacgcataacgcc
+gaggaattttaacgagatttaagagagcagttcgggcacaacccacttgactttataaca
+gctcggcagcataaacggtaatatgtgacaaatttccaaacgttataagaacgtatgtgt
+acttagaaaactaagtggttcatgttcaacagatgtgacgcagcaagcctaacttatcta
+ttggttttgctataaaagaacaaagttacacagaatcctaagggcttgtttcacacttat
+gcctagtgcttcaccatcttaaaatagcgaaaccggcacgaatcaaaccttaaaacaatg
+cgcagatattggtgatggtgactccgggtatgataatggtaactgttgaccagcgcccac
+ctcatcgaagtatagaaagtggttaggataaggatgagaccgaacttatttccggccata
+actttagattttctacctagtacacaacatcagggcggacacgaaaccgccatcacatca
+tataccaggtttaatttgcttaatgggggaagtgtcaacgaaccttcgaactttagcagg
+catatggccattatatatggccccagagcagaatgctacagcagacaaaatttggattta
+tgtagtttaatacctatcaaacttggtgtgaccatacttgtctaacgacagtgcacaaag
+tgtaagttacaattattactactcagcagcttctgcaatgataaaatcttatcatacacg
+tcacatatgataatatctacttagggggaacgggctccacaacctacatagtactcaata
+cttacactattcgacaggcacaccaaacctgtacagtcccaaaagattgagtcaactttg
+cagtactgcagatcacagtaatagcttagttagcgagtcaaaattagttttctacgagac
+tgcacgaccgtgcaaatttccgatgtgttggctacaaatagcaacgtatgaatttgtttg
+aagccacgtaaactgtacaaccttagagataagtctcaggctactaaaaacacgttgtgg
+cactaacaggatcatggttgattcttacttattcggctgaccggcccaataagtaacctt
+caactagaacagaataatcgggagtagtttaattcagtcaaggtgcaggtctcattgtaa
+ctaacaagctctgtgtaaccaagttaaaatcgttttcttagcggattccctacttatgga
+tttgagctcgtccacaatattcgatacaagaagtttgtggtccgtaacaacgaaatttta
+attacgctgtgcagcctcatccaaggaattaatagaaggttgatggtaggctccgaacgc
+tccatgattataatcaagtggactgtgcagtaaacgaggaaggtatcctgacgtcgtggt
+gttcgtttttgttatttgtgccctatacgagtagataaaccatgaacagcacagtgtgaa
+cccatggttgattttaggctaccttatttttaatttccgttacacagaaacgaattccac
+aactaacatgccattaatttttcgatatcttataaaagatggtcgaaattcattcattta
+ttttttttcggttctcgaaagtcaactaagctgtcgcgttttgtttctctttagaggtaa
+aagtggctttgatctcctacgtttggatactagtcaaccattactccatttgatccgtga
+gtatcacctgtctaacatccagcattatgactcctcggcgaagaaaagacacacttctta
+gagtcgatgtgtattagctagggacacagttgtttaatacgatagtgagcccagggaggg
+cagtgcgtcccccagtagatttattcagctagtgtaagtataagatatctcacccacgag
+gttcaagtgatatgcagtcttagaataatacttatcctgaatttcgatattatgggtact
+tcaataatccgctagcgctactttatgtctcgttggacagcaggacacatggcagtctta
+aacactaaagacatcacctgaatgaatgtaatgggattacaagaatcaatgaggtattat
+atacgacgtaggaaactctggatatatacagtaatctagttacgccatcgcacttcattc
+ctctggaaacttagaagacatcagctgtacgtggaggaaccagacccccgtatgtagcca
+aatagaaccaaagttgcttatacaaacacacccaatgacaatggaccgctggagttcgta
+aactcggaacgtagtactgcacaaacccagcatttagcaataggagctacgtatgcaact
+cccacgtggtaataccttcaagctatcaatatataggtgcctagctaatcgcattcgcaa
+gcagtattcaagcttgtaaaccagtataataattacagaggctctatgaaacccaacttt
+ccagctaaaagtcccaattaaatggttatttcgtacttttaaagtcgcccgttctgttat
+tacgcgaattgattctactccaaaattaaacacaaattatcaaccgtttcatttatattt
+gtcaatgcagctgtttaaaataaggctctactaaattataattaagacacttattaccag
+atttctctagttaagtttgaaccagctcgactaccgcgaaagatacattcccttctctat
+ttttcagttcatctatgggtcagagaagcattgaatttattctattcaccctcgtcgttc
+acagcgaatcgtcagtgtgatcagtgtatgagaaatatcctaaaccgtttagtcagacca
+cacgcttagaacaagtggtctaaaaagactgccctggaaggagtaagaagtatacagctg
+atccggtgtatccttcagtcatctgccctatactaattacacgacgcaaggaaaaatagg
+tttattttctaggcaaacccttcataggtgactccgatgtgttacgaatcatgcttgaga
+atgtgctatcgttaccgacggataataacgatctccaatgaaccaaatgtagaatgtcta
+ttgattacccttttactattcgacttagagataggagatagaacctcagtgtactttttt
+agccgaatgggaatctttgggaggtgaatggccataaggtcgtaaatccaaccctcttaa
+agtcttccatattatatcgttgttcgtggaatcgataacagatttgttgacccatagtaa
+atgtatactagtttatgttgtaagtgtagattgttttccgattgccgtccaaactttatg
+tcgtaattgtagaccagtaaagttgaccaaggtaagtgcccagcgatcctgcgagatcga
+tcgccaatttttccagtcactgtaagtgtaggtttagataaagccgtatgagttatatca
+taagggcctcggaaagcagcttcgaaccaaagttcccttataatagtagtttaactataa
+aagtatatactggtctgtcgccctttcacgatttgttttaccggtttatgaagcgttacg
+tcattagagcggctccaatttaaggttaacggcttccatgtgtagttgtatacaaggata
+acttaaagtatctgttcagcgagctagttaagttatcctcgatagaacacaactcagagg
+tcccaagatcgggtttgcaacttgctaatttattctcaaggcaaattgggaattatcgat
+acctgtataccataaggtcgctcgatgtgatgcttatgtcttctggtgatcctaccttag
+ttagtgctgattaacggaacattaatgtttatcgttttgagatttagccaattctctgat
+tctaactcaagatgccttatctgacgtgctatgcagcccctaagtattttacattgtaat
+aggacacgctcctttaaaactcgccaaaaggtcgttgtggttctctactggttaactata
+taatttacagctttgttgagctagttcctctttggtttaagtcctcaatattagttggtt
+cgagcgataagttggctagttaccttagtcactatattagatccgaatgttatgcttcat
+ctgaagaccgccaccctccaaaatttcttttaagactcacttattgcaaggtgtaggtga
+attcggctcgtttctcaagtggtgtatctgtacacgagtttccatattttcatcaacagc
+caccgcacacttatgtcactctaggtattaaaagtcgctctacaaggggacgcaattaag
+aaacagacatgctagtcaaaaataaacatagcgaggcaccactaattcggccgcttatca
+atgggatgctctgcgcgagacgcgccagagctcagtagttagttcggacatacatttact
+tcagatgatcaattagttttctacaaatgcttactctaccccgaaaaaagtcaccagact
+cttacgtctctttagtatccttccgtcttatataaggtcagtcccccgtttcggtaccct
+ggaatttactaagaataatgaaacagcccccaaggacgtacgtttacaaatgatagacca
+gatcgcctagcttattccgacgcatgttgcatagaattgaaccaacggaatgtgagagta
+actagatgagccgaccacagcacccgtttgcgtcgcagaatacgcctgatagttcggcca
+cgaaatcatatgtcctttgagtattaagtatttgtaatgatcaatcgagctcaagcaagc
+ttacacttcctcggatattcagggaacttagtgcctttgaaagatacgttgatcaacgaa
+aaattgataatggctcatatggaatgcctacctcatagtgctgaattaacacagcactgc
+ggacctaacttttcgaggtttcaagttcacgtctcaaaacctaataggctggaatatgta
+gggatcctcggtgaatttgtgattgggtttgttgtagtactgaccaagtgaatattcttt
+ttttctaaaagcagatctgctgccgggcactacgaaggagatctctgtgtatcattattg
+cttcttgacatgatgactcttaaatcactgtgggtgtgcaaaacgatagcacaacccaat
+tcgatagtacatattgttgatacttcgcactaaaccgttcatatttaaaggttgtgctcc
+ttccttcgttaaatactggtgacttggtcctatctactattagctagacctctggggaac
+cacgcccccgtaaaacctgtgcaagagagggggtcatacatcttagacatcgcgcctcca
+ccagggaagcattgggtgattgaccaggtgtgtaacaaatatgattattcttatactaat
+attagcaaagatgcataatgatttgtattaaatgtataattgaattgataagggtctttt
+agtcagtgatagagtagtataaggtagacattagaactcttaaccggacgcagatttttc
+ggtcttagtaagccaattagtcgacaaaacaaggtaagagcggttactagtagtacctat
+aatgcactgaatcttcggtcgaagtatagttctaatgctatgcagattgtgacggcgaca
+aatgttcagacttatatcatgaaacaagctcttgtaagtattgacaaatgaaaagattga
+atatttttaaatacaaaatgcgcctacttattaggggaattaaccagattgaaggccaat
+cctcacatgtaatgagataatagacgataaatgaaattcttgtaatagttgaactgctac
+gtgatgggtattatatatgattgagatcctccaattgccgacgtcttgtcttgatgccca
+aaagattgtcaacgaggagctccctcgcgtacctgtcgtccgtatcataaacgacgcgac
+atgtacagcactccgaagtataagcaataataatgcgggtaatccagactagatcttttc
+ggactcaatgcggtttcacggtaaacatgattaataccggagagtagtcgagcttatcag
+cgatgcaagcgaattcattgtgccaggagatacgttgcagataaaaccggcaacgtatgt
+caacaagttttggcgatctcgttgtttgtattcgacgaggcgcgggaacttcaagaacta
+tcgtatattcaagtccattaccttttagtttcagactggtggagctgactaaagttatat
+catcattttgtacactggtttagttaacgataatttcagatttaacatgaccagacgata
+atcgctgtatatccagttggaatgtggtttgccagaaaggttaacttataatcaagcctc
+tcttcagtcttgattcgtcgtatcccatccattgcgctatacctcagtgtatttggagct
+gtagttataccgtgtgctaagatcagtagacatgacgagagcaatattatctaccttaca
+agcatcaacggacgtctagtcggaacaaaagactctaaaactcgaacttcaggttaatat
+actatagttctgtattcagcagttattcttatattcgatattatcttgcctattggatgt
+ctgactttagtatattaatcatagtatctgccatgtaaaggtgccagtactaaatctgtt
+tcacagtgcgaattataaacggttacaaccattaaagacaacaagaccctatagctttat
+ttgaattttgtcaatgcgcaacttggagctcgcgatacatcccaattagtctatagggtc
+gggacgattctacggcatttctggttataatgacaacatggattgtggcccgagaatcgc
+tctttcattaattaagcaatcattacagtcttataagcgctacttccgagtggtagcagg
+taactcgatataaggtcgcatgagccgaatagcttaaaaaacaggccaccgaacattgat
+agagaataccgaccacagcgcaacctttgattactttcattaaattgtacggctcactcg
+acatcaagcttaagattgcgataatgtgaactcaaatggatcagtactgaagaaccgtaa
+cccacttcgcagaaagcgtacccagagaagatacgctgttacaatatacagggtgaaatt
+attgcctgttcttcgtaaccatttcgccaaacttggttagaaatgatagccattcatgat
+agaaataagctgaatgataccagtatctttaactatgtagtcagggggaagataacgatg
+gtccatgtatgtttctgatatgtgacagtattggccgcgtaatttgctaacgaagctact
+taatgcctttgagcttcatatagatttctttaatcaaaatcggcaaaaagatagtatgag
+ctataatatatgctagtagagaactctggaccatcatctatatgaatactgattcgagcg
+tgcaattactttagcctgcgtactactgactctacaaaacactctgagataagtttgtag
+tcagtaagtcgctctctataaaccttttggatgaccattgtacagccacttatagatccc
+aataaatagcacaggagacagagtttttcaatgctcgatcatttgccgatagtattttcg
+tctaacctcagggcacctattatttgatacctaacctaacggccctttcacaatggagaa
+atatatgacatcgggacaaacacaaatggtgggtggccaggagatatgacatggtggcgt
+ctctaagaaacacggactccctctaggcaaactcacgtaaccaattttaatgtcaaacaa
+aacgctcgaaaagattttgccgtgtaatgacctggtacattgactggtcaggaatacatc
+actgtagttgccgtagtgtcctgttggtgttccatcaagacacatcgtataacgcaattt
+acgacggacatcagatcaagttatacagattatttaagtatcacgtgtgcattgggacat
+aagggatctcacacatgccttggaacatttttgctttgtgccgctttttcgctgcactac
+caatccttacttaccagtatattcaaaggtcgttaacagaatgagaaaggttagggctct
+aagttatcgtcgattgggatagacgagacatttgcgagcgccctccacggatacgaatct
+cccatatcaatgtgaactggatgctatgcagtttagttcttacgtctcctagtggtaaaa
+atcaaagtagcactcgcatagcagttattcagaacctaatacacaaaaccgtcaaacatt
+ttctaattctaggtatgggccgatcataggagctaaggtgaaactcataaatgttttgtt
+agatctagcatcctaaaaagatgcatatactgagtagctggcgtgcattctctcaattgt
+atcctttttaactgaactagtcggtcccatttcgtgactgagatctattaaccgataaga
+ttaataacactcgcattcgtatcagctcagagtgaagtttttcaataatttgactgatat
+attaacttctaaaataaccctttaagcctcggatccgtttcccaatcacatcaaaaattc
+ttattccaactatctacggattaacaacgtgcatggggatcgtagtaagaacttgttccg
+atcactttgagtatatcaagttgacggcccggttattattgaatagaaacattcacctgc
+taaattaaataccgcacatcggatacccgatttcagagggccgtcttactaagggcaggc
+tttgttcggtttaactgagatgttcattattttacagtatgcttcaactaatatgtaacg
+aaggacagtggatctgtctccatagtagatcttcagtcgtgaatttcataccgctcctat
+ttaagttcgcgttcgagttgttgatcatggcacgtgaaagcaacccctagtattctagac
+gaaaattttttctagttcatctgataatttgccaattcaaaaacaaccgctggtttcccg
+gcgcattctctaaaatggaagtcgaacctagagccattatttgtcggtaacccatgagtt
+ccttcttttcagaagttaatacactgtggtcctatacagaggaaaaacagcggttatata
+cgatcgtggcataacaacattggatcaagatagcaatttggctacctattctaattctca
+ctagattcggtattccactacaatatcggcagattaggattggatgaataatcggtgttt
+aagtccggttgcgtctccaatctcctaatttttattaatattgatcttggtgacctattg
+taaataaaaacttcaagactttgaataacggtgaaaagatagaagactcatttgaaaatg
+gatcatccacagatccaaacattagcaagacactaatccccaactagctattctgatcgc
+gatcgtgctgcagtactcctgtcacaatagtctgttcatgatctaattctttttgggctt
+tgttcgatggtgattcagaatctttatccggtcgcttccctgtagctactttgtggggat
+attgcccggggattatagggttgagatcgtttcctaaaagtatttaaaccaagtagactt
+caactaaactacatcagaacatcgtgaagacaccatacgcggtacctttatttaccgata
+acatttcttcaagaaataccggtaagcagcataatgaccctaaacagctcggggtatcgt
+cgtagttttaaattttatttaggttactgctcaaggaataaaaactaactatttaattta
+taataatattacaaggctcacactgattagatttgtctataagacttcgcgatcccccat
+taccggattgtcttaagaataaactagataaaccatgcattttctagataaggcctttag
+tctaattagatacaaaaaacacgatagttgcatccttaatttattgtgtcaaacctggaa
+ccttttaattacccgcaaatcactttatgtcgagactacctctgaaatttattatctacc
+taccgcatgaggacttgaaccatcttgtaggagttatgtttattagctaagattcgttta
+tcctgtagcggtccatgtatattcaacaagcaaaaagcactcagaattgtttttagttga
+gtcaagactgatatataaataagtttccctagttttttcgtggtgggacgatattgaatt
+gaatcttaaccgaagagtttcccactctgtcgcacaataatacacgccaatatttccagc
+cctgcttatgccttaatcggttactcaatctcccattgaagttcattttgatctgcatag
+aagtttcgggcccagccttttttctgccaccttcctccaagctctgtagacgcactctaa
+gattgatgctcacatgtattaattctacattaacataaatatataagtcatgcatcttcg
+agtaaaatatctggttctccaacatgtcctggcacgtatcgttataatgcccatacatgt
+agtattaaaatgattgggttaactggatattaagatcatcgaaattgtaaagtcaaatta
+acaatactgtctcaagaccgtgtattcctcgtgctcggaagggctattacgcttacttcc
+gttttggtatcttaatatgactttcaaaaattaagttgcagtgagtcctacctgcgtgca
+tcggttagcaagagtataaaagttgtttaaacgaactacttgctttacaataccggtcgt
+atatatcgccgtgaatccagaagattgtcttctttggattatcaaccgagatcctgtgga
+ccgatgttttgggaccttcacagaggactccaggtagagctcgcttttgcattaatctaa
+gaattgtacctctctaaaagatctaaaacagtgaatgtgtatttcatggaaaaacacaga
+gaaacgtaaattactttaggccgaaaggcacatgagttattatacatatacgagatggtg
+gtatacatcgaattcggggcatacactatagttgcattgtatttagctgctttaaataat
+atgatattaccttccttacataagacattaccggcataccctggttttcaacttgtgggg
+ctttttgacgatcgcactctcatttgatccgagtagggcggtgacccctgcttttcaaat
+acaaaaatttcgctatgaaggtaatagattacttttcgctgttatgatagaaacggtaaa
+tttaaaattgaaacttctagaaaagtaaagtaacgagaaatgattttgtgaataatgcgg
+tcatgattgcgcaagtaagaaaaaaaggcaaaaggatgcgcggaatagaaacttatcagt
+cacgggtatcttgatttcattcttcttgtcaattgccgacataggatgaaatcagattcc
+aatgcaatacacagtaacccccacccttgattgtaatgtcgatttgaagttgtacgcgtc
+gacgaagtggatagtatacgggccttttgtacggtgcgatcaactatgaatctcggcgag
+ttagatggtcgtacaatctcacacatagaggtcacttgcctgtaatgacgaattttcggc
+taggtactcgaactttattagaagtaaaaatgtgggcaaaagaaggattccattttacaa
+gacgattacaatgagttacatgtctctcaacgtagtctttccctagtagtctttgaacta
+tttaggtactccagaaaattttagcaaagggtttctgtgtgaatccgccattcatgttta
+tgatggaacaataagaataacgccctcgtatgttatcgacagtgaagtcagcagttcggc
+caaaaacatattcaatttagtacagatccccagaagttaagctaagtgctctaaaatggc
+ctaaacggttatcaaagtaggtctaattactatactaacgggtgcatcgtaataactgct
+gtcgatgcaacactatatgatagtgtcgttttgctatatatgtacaatgtgacaaagaag
+ccttagcgattcttgcaaacttaggacttcggattctcaatcttaaatgtccgaaaacgc
+aaagattcaaaaatttaatctatgagcagatatgcctgatggtgactacgcgtatgttaa
+ggctaaatgttgacaaccgcacacataatcgaactattgatagtcgggagcataaccagg
+tgaacgtactttgttcacgacatttattgacatgttctaaatacgtctcaaaatcacggc
+gcactagaaaacgcaatcaaatcattgtcctggtttaagggccgtaatgccggtagtgtc
+aaacttcatgagaactttagctggcttttggccagtatttagggaccaagagcactagcc
+ttaagctgaatattttgccatttatctactgttataactttaaaacttggtggcaccaga
+cttgtcgatacacacgcatcaatctgtaacgtaaaaggtttactaagaacaagcgtagga
+attgagtttatattatatttaaactaaaagatgatattagcttctgagggcgatagggct
+ccaaatcataaagaggaatatattattacacgattagaaacccacaacatacctcgaatc
+gcccaaaagtttgacgaaacttggcagtactccacatctcagtaatacagttgggagagt
+ctcaaatgttgttttattactcaatgaaccaccctcataatttcactgctgttccattaa
+atttgcaaacgatcatttgctttgaagaaacgtaaaatcgacaaaattacagataagtag
+atgcataataaaaaaaactgctcgctataacacgatcatcgtgcattcttacttaggagc
+atcacccgcacaataacgtaccttaaactacaacactattagaccgagtactgtaattca
+cgaaagctcaagctcgcattgtaaagaacttgctctctcgtaaaatgtgataatagtttg
+cggagaggattcaattattttccattgcacctactccactagattcgataaaagaaggtg
+gtcctcccttaaaaagaaatgttaagtaacatcggaaccataagcaaagcatgtaagtga
+accgtcatccttccctaagaaacataaaggtttttaataatgtcgactgtgaactataac
+tgcatcctttcctgacctactccggttccttgttgttatttctgaacgagaccagtagat
+aaacaatgtaaaccacagtgggtaccaatggtgcatgtgacgctaccgttgttttaagtg
+cccgtacaaacataagaagtcataatcttacttgaaattaattttgccttttattttttt
+tcaggctcgaaattaatgatttgttttttttgaccttctagttacgctaatatgcggtcg
+cctgtggtttctattgagtcctataacgggatgggatctaatacgtttggttactagtaa
+acaaggtataaatttgataccggagtatcaactgtataacatcaagctttatgactcata
+cgcgaagtaatgacacaaggctttcaggagatcgcgagtacagagccactaaggggtgta
+ttacgatagtgacaccaccgagcgcactcactccccaagtagatttatgatcctacgcta
+agtattagatatataaccaaagaggttctagtcagtgcaactcttagaataataattagc
+cggttttgcctttttaggcctaatgcaatattcagctagcccttatgtatctcgcgttcc
+acagcaccactcatggcacgcgtttaaactaatcaaatataatctatgaatgttatgcca
+gtacttgaataaatcaggttttttataagtccttgcatactctcgttatatactgttaga
+gtcttaccccatagaaattctttcatctgcaaacttagaagaattctcagctacggggag
+cataaagtccccaggatgttgacaaatacaacaaatgtggcttatacaaacactccatat
+gaaaatcgaaccctcgtggtagttttagccgaaccttgtacggataaatccctccatttt
+ccaatagcagatacctatcctactacctcgtggtattaaattaaagcttgaaatatagag
+ctgcatagcttatccaattcccaagcacgagtctaccgtcgtaaccacgatttgatttac
+agacgctagagcaaacccatctttaaacatataagtaaaaattaaagggtgagtgcgtac
+gtgtttactagcaacttcgcttattaagacaattgtttataagccataattaaaaacata
+tgttcaacaggttcattgatatttgtaattgcacaggtttttaataaggatctacgtaag
+tataatgaacaaactttttaccagagttatattctgtactttgaaaatgctcctctaccg
+ccttagagactttcaattagattttttgcagttaatctatgcgtaagtgaaccatgcaag
+ggatgcgattcaaccgcctcgtgctaaccctatcgtctgtctcataactgtaggtctaat
+ataattttcagttttcgaacacataaccctttgaaaatctgctatttaatgtctcacctg
+catgcactatcttctatactgctcagaacggctatacgtcactatgctccaagtgacgat
+ttaaacgaagcaaggaataataggtttattttagtgcaaaacaattaagtgcggactacg
+tgctctttacaataagccttgtgattgggctataggttaagtcccatattaacgatctcc
+aatgtacaaaatcgacaatcgctttgcattacccggttactagtcgaattacagatagct
+gttagatactcactctaattttggacaacaatcccaatcttggggtcgtctatcgcctga
+agctcgtaaatccttccatcttaaacgattacatattatagacttgttcggggtagagat
+atcacagttgtgcaaacattgtaaatcgatactagtttatgttggtagtctagttgcttt
+taccattccccgaaaaacttgatctactatttcgacaacagtaaacttgaactaggtaag
+tgaaaacagagaatgcctcatagtgccactatttgtccactatatgtaagtgtagcttta
+cataatccactatgactgagatcattacggcctaggaaagcagcgtagaaaaaaagggcc
+cggatattacgactgtaactataaaactagttactggtagcgcgccatgtatagatttgt
+tttaccggttgtggttgcgttaacgaatttcagccgcgaaaattgatccgttaaccagtc
+catctcgacttctataaaacgataaagtaaagttgatgttcagcctccttcttatggttg
+catcgagagtacactactcagtgggaaatagatcggggttcctacttcagattgtattat
+ctaggcaattgccgattgtgccatacctggataaaataagctacctacatgtgatgctta
+tctattatcgtcatactaccttagggtgtcctgttgaacgctacattaatctttagccgt
+ttgagatgttccaatggataggagtctaacgcatgatgaagtttaggaaggcagagcatc
+ccactaagtatgtgacagtgtatttcgaaacgagacgttataaatagaaaaaaggtcctt
+ctggttctattctgctgaactattgaatggaaagattggttgacctacgtactatttgct
+tgaagtcatcaatttgacggggtgagagacatatggtgcatactttacggactctatatt
+ttagatcagaagcttagcagtcttctctacaccccctcacgacataattgcttttaagaa
+tctatgtttgattcctctacgggaattcggatccgttcgcatgtgcggtttatctaaacc
+aggggacatatgttcagctaaagcatacgaacactttgctaactagacgtatgtatagta
+gctataaatcccgacgatatttacaaaaagaaatgagactcaaatatatacatagcgacc
+ctacacttattcgcaccctgatctaggcgatcctagcacccacacccgaaagtgagcact
+agtgtcttccgtattaaatttactgcagttgagattttagttgtctactaaggattactc
+taacccgtaataaggatcaagactcggtactagctttactatcattccctatgtgttttc
+ctaactcacaagggtacgtaccagcctatgtaattacaataatgataaagacacaaagga
+agtaactttacaaatgagtctccagttacactagcttagtccctcccatcttgctttgaa
+gtctaaatacgcaatctctgaggatatacagcagaagaacactcataacgttggagtcca
+agaattagactcatagggcccccaacatttaatatgtactgtgagtttgaaggtgttcta
+ttgttaattcctgctcttgatacatgacacgtactccgtgtttaaggcttcggactgact
+ttctttcataagttgagcaacgaaaatttcagaatcgataagttggattcactaactaat
+acggctgattgaaaactccactccggacctatatggtcgacctttatacgtaaccgatat
+aaaacttataggctggtatatcgagccttcctagcgcaatttcggatggggtttcttcta
+ctactcaacaacggaatagtctttgtttagtaaaccagagctcaggacgcccaatacgta
+ggagagcgctgtggagcatgtgtcattatggactggagcactcttaaatcactctgcgtg
+tgctaaacgatagatcataacatgtcctgagtaaattttcttgatacgtcgcaatatacc
+gttattagttaaacgttctcatccgtcatgcgtgaaatacggctgtcgtgctcagatata
+ctattagcgactcatctcgcctaacacgcacacgtataaactcggaatgactgccgctct
+tacatattagaaatacagactacaccacggaagcattgggtcattctcaaccgctgtata
+aaagatgattagtcttataataagattaccaaagaggcagaatcatgggtagtaaatcta
+ttattcaagtgattaccgtcgtgtaggcagggagtgaggacgagatggtactcaggacaa
+atattaaccggacgaagtggtttacgtcgtactttcactattagtagtaaatacaaggta
+acaccggggaatagtactaaatataatgatatctatcttcgggagaacgagtcgtctatt
+gctttgaacattctcaaggcgtaaaatgtgctgacttatagcatgatacaaccgattgtt
+acttttgtctattcaaaagattgaatagttttttatacaaaagccgcatacttatgacgg
+ctagtatacagtttcatcccctagcatcaatgctatggacagtattgaacttataggaaa
+ttcttctaatagggcaaatccgtcgtgatgcctattttttttcagtcacatcctcaaatg
+gcactagtattgtcgggatcccattaacaggctcaaccacgagctcacgcgaggacatgt
+agtccgtatctttaacgaagcgacagcgacagaactcccatggataaccaattataaggc
+ccgtaatcctctagacatcgtttaccaataaatccgctttctccgtaatcatgttgaata
+ccccagagtagtccagatgataaccgatgaaacacaagtctttctcaatgcacttacggt
+gaacttattaccgccaacgtagctcatcaaggttgcgacatctagttgtgtgtttgcgac
+gagcccagcgaacttcatcaactttcgtatattcaacgccttgtaattttactttaagac
+gcctggtgatgtagattcttagataatcagtttgttatcggctgtactttaccataattt
+cacaggtttcaggtcaagaagattatagctgtatatacagttccatgctcggtgcacaga
+aacgtgatcggataataatcaatcgcttatgtcgtctttaggcgtatccaatacatgccc
+cgataccgcagtgtatttcgacatgtaggtataccgtcgcatttgagctcgagtcaggac
+gtcagctagattagattccttaatagaatataccgacctctagtccgaactaaactatag
+ataacgccaacttcaggttaattgtctagtcgtctgtttgcagatgggattcttagatga
+gtgagtatcggccatattggttcgagcactttagtttttgatgcataggatatgcaatgt
+atagctgaaagtactttatctgtttcaaactcacattgattaaaccggtaaacctttaaa
+gactacaagaaaatattcagtgagggcaattttgtcaatcacaatcttccagctagagat
+acttcacaatttgtcttgaggctacgcaacattagacggattttcgcgttttattgaaat
+aatcgaggggcccaagagtatccatagttcattttgtaagatttctttacaggcttatta
+cagcttcttcagactcctacatgcttacgagttatatgctagcatgtgaacaatagatta
+atatacaggaaaacgtacattgagagagatgaccctacacagcgcaaccgttgagtactt
+tcattaaagggtaacgctctcgagacagcatccttaagatggccttattgtcaaatcatt
+tgcagaagtacgcaagatccctaaccaacgtagaagaatccctacaaacacatgagacgc
+ggtgaaaatagacagggtgttagtattcaatcttcggagtatcaatttcgccaatcttgg
+tgagaaagcataccctttcttcagagaaagaagatcaatcataacactatctttaacgag
+gtacgcacgcgcatcattacctgcctccatggatctttaggatagcggaaagtattggca
+gcgtattgtgatttcgttcctactttatcaatttcacattcatatacatgtcttttatca
+aaatcgccaataagataggatgagctatattagatgctagtagagttcgcgccaacatca
+tcgataggaatactcaggacagcgtgataggacttttcaatccctaatactctctataat
+tataactctctcttaagtttggaggcagtaacgcgctctatataatcagtttgctgcacc
+attcttcagcctctgatacatacaaataaattccacagcagtaagagggtttaattgaga
+catcttgggaacttaggattttactctaacatcaccgaaacgattattggataccgtacc
+taaacgaactttctcaaggcagtaatataggacatccgcaataacacaaatgctgcctcc
+ccaggagttatgtcttcctggaggctatatcttacacccactcactataggcaaactaaa
+gtttaaatgttgattgtctaaaaaaaagatagataagagttggccggcgtagcacatgcg
+aaagtgaatcgtaagctataattctctggacttgaagttctgtcctgttcctctgcaaga
+aacaaacttcctttaaagctatttacgacgcacatctcagcaagttataaacatgttgga
+agtttctagtcggaattcccaaagaacggatctatctaatgcattcctacatttttcctg
+tctgccgatggtgccatcctattcaaagaatttcttaaaagtagattaaatgggactttt
+aacaatgagtaaccttacgcctctaagggttcctcgagtgccatacaccagtcaggtccg
+agccacatacacggagaacattctaacatagcattctcaactcgatcatttgcaggttac
+ttctttcctatcctagtgctaaaaatcatacttgcaatcccatagcacggattaagaacc
+taagaaacaattcagtaaaacatgttcgaattcttggtatgggaacatcattgcagctat
+ggtctaacgcattaatgtttgggtacatcttccatcatataaacaggaagagtctgacga
+cagggagtgcttgcgatcatgtctatcattgtgaaatcaaattgtagctcacatgtcgtc
+tatgagagcgtgtatccgataagatttagaaaaatagaagtcgtataagatctcactgaa
+cttttgaatgaatgtgaagcatatatgatctgctttaataaaactttatccataggatac
+gtttccaaatcaattcaataattattagtcaaaatagataaggatgaacaacctgaaggc
+cgatcggacgtagaaagtggtcccatcactttgagttgatattgttgaaccacacgttat
+tatggttttcaaacagtctcaggatattgtatatacagataatccgataccagttgtctg
+acgcccctcttacgtaccccaccctttgtgacgtttaaagcagttgttcagtattttaaa
+ctaggcggcaactaatttggaaagaagcacagtggatatgtctaaattcttgttattcag
+gcctgaatttaatacaccgcatagttaacttcgcggtagagttgttcatcatgcctcctc
+taagctaccacttctatgatacaccaatagttgttctacggaatctgataattggccaag
+tcataaacttccgctgcgttcaacccccttgctcgaatatccaactcgaaaagacagcct
+tttggtgtccggaacaaatcagttacttcttttctgatgttaattctctgtggtcagata
+cagaccaaaaactccgcggatttaccatcctccaagaacaaatttgcatcaacatagcat
+tttggctacatattctaagtctcaatagtttaggttttcaactacattatcccaacatta
+ggattggaggaataatagctgggtaagtccccttgcgtctacaatcgactattttttatg
+aatatgcttctgccgcacctatggttattaaaaaagtcatgactttgaagaaccctgaaa
+agatagatgaatcaggtgtaatggcagcagccaaagagcatataattagcaacactctaa
+gaacattatagatatgatgatagcgatcgtcatgatgttatccggtcacaatagtagctt
+catcagctaattcgttttgccagtggtgacttgcgctggaagaatcgttatacggtccct
+tccctcttgatacggtgggggcttattcaaccgcgtggattgggttgtcatacttgcatt
+aaacgatgtaaaccatctagtagtcaactatactaaatcacaaaatagtgatcaatacat
+acccgcttcatggttttaaccatttaattgattaaagatattccgctaagaaccattatc
+tacctaaactgatcgccgtatcctagtagtttgaaatttgatgtaccgtaatgatcaacg
+aagtaaaacgttatattgtatgtagaataataggtcttggagctaaatgatgtgattggt
+agtgaagacttacccttacaactttaccggtttctcggaagaatatactagagaatcaat
+gcatgggctacataagcactttagtctaatgagataaaaaatacacgagtcttccatcat
+gaattttttgtcgaaaaactcgaacctggtaatttaaaccatatatctttatgtcgtcaa
+taactctcatatgttttatataacttcccaatcacgacttgtaactgcttgttcgactga
+gctgtttgagctatgaggccgggatccggttgagctacatctatttgctacaagaaaaat
+gaaagcacatttgttgggagttctggctacactcatagagaaataagtggcccgagtggg
+tgcggcctgcctccatattcaagtgtatcttaaaccaagtggttccaacgctcgcgctaa
+agaattaaagcctttatttcctccacggagtagcccgtaatccggttcgaaagagaccat
+tgaagttaattttcatatccagtgaagtttaggcacaagcatgtgttctgccacatgcct
+caaagcgctcttcaaccaagatatgattcatcctaacttcgatgaatgcgtctgtaacat
+aaatatagaaggaatgattcggcgagttaattttcgccttctccaacatggcatccctac
+gttcgttataaggaccatacatgtaggttttaaaggtttgcggttaatcgatatttacat
+catagaaattctatagtcaaatttacaagactctagatactcactcgttgcagccggcta
+ggaagcgctttgtaccttacttcccttttcgttgcgtaatatgaatttcatatagtaagt
+tcaaggcactcatacctccgtgaagagggtagatagactattaaagttgtttaatagtac
+gtattgatggaaatgacccgtaggagatttaccactcaatccacaagattcgctgctgtg
+cattatcaaaacagtgcatgtcgaaacatgggttgggtccttcaaacacgaatccaggta
+gagatacctttgcaattttt

diff --git a/examples/regexdna-output.txt b/examples/regexdna-output.txt
new file mode 100644
index 0000000..d36baa5
--- /dev/null
+++ b/examples/regexdna-output.txt

@@ -0,0 +1,13 @@
+agggtaaa|tttaccct 0
+[cgt]gggtaaa|tttaccc[acg] 3
+a[act]ggtaaa|tttacc[agt]t 9
+ag[act]gtaaa|tttac[agt]ct 8
+agg[act]taaa|ttta[agt]cct 10
+aggg[acg]aaa|ttt[cgt]ccct 3
+agggt[cgt]aa|tt[acg]accct 4
+agggta[cgt]a|t[acg]taccct 3
+agggtaa[cgt]|[acg]ttaccct 5
+
+101745
+100000
+133640

diff --git a/examples/shootout-regex-dna-bytes.rs b/examples/shootout-regex-dna-bytes.rs
new file mode 100644
index 0000000..a763385
--- /dev/null
+++ b/examples/shootout-regex-dna-bytes.rs

@@ -0,0 +1,70 @@
+// The Computer Language Benchmarks Game
+// http://benchmarksgame.alioth.debian.org/
+//
+// contributed by the Rust Project Developers
+// contributed by TeXitoi
+// contributed by BurntSushi
+
+extern crate regex;
+
+use std::io::{self, Read};
+use std::sync::Arc;
+use std::thread;
+
+macro_rules! regex {
+    ($re:expr) => {
+        ::regex::bytes::Regex::new($re).unwrap()
+    };
+}
+
+fn main() {
+    let mut seq = Vec::with_capacity(51 * (1 << 20));
+    io::stdin().read_to_end(&mut seq).unwrap();
+    let ilen = seq.len();
+
+    seq = regex!(">[^\n]*\n|\n").replace_all(&seq, &b""[..]).into_owned();
+    let clen = seq.len();
+    let seq_arc = Arc::new(seq.clone());
+
+    let variants = vec![
+        regex!("agggtaaa|tttaccct"),
+        regex!("[cgt]gggtaaa|tttaccc[acg]"),
+        regex!("a[act]ggtaaa|tttacc[agt]t"),
+        regex!("ag[act]gtaaa|tttac[agt]ct"),
+        regex!("agg[act]taaa|ttta[agt]cct"),
+        regex!("aggg[acg]aaa|ttt[cgt]ccct"),
+        regex!("agggt[cgt]aa|tt[acg]accct"),
+        regex!("agggta[cgt]a|t[acg]taccct"),
+        regex!("agggtaa[cgt]|[acg]ttaccct"),
+    ];
+    let mut counts = vec![];
+    for variant in variants {
+        let seq = seq_arc.clone();
+        let restr = variant.to_string();
+        let future = thread::spawn(move || variant.find_iter(&seq).count());
+        counts.push((restr, future));
+    }
+
+    let substs = vec![
+        (regex!("B"), &b"(c|g|t)"[..]),
+        (regex!("D"), &b"(a|g|t)"[..]),
+        (regex!("H"), &b"(a|c|t)"[..]),
+        (regex!("K"), &b"(g|t)"[..]),
+        (regex!("M"), &b"(a|c)"[..]),
+        (regex!("N"), &b"(a|c|g|t)"[..]),
+        (regex!("R"), &b"(a|g)"[..]),
+        (regex!("S"), &b"(c|g)"[..]),
+        (regex!("V"), &b"(a|c|g)"[..]),
+        (regex!("W"), &b"(a|t)"[..]),
+        (regex!("Y"), &b"(c|t)"[..]),
+    ];
+    let mut seq = seq;
+    for (re, replacement) in substs {
+        seq = re.replace_all(&seq, replacement).into_owned();
+    }
+
+    for (variant, count) in counts {
+        println!("{} {}", variant, count.join().unwrap());
+    }
+    println!("\n{}\n{}\n{}", ilen, clen, seq.len());
+}

diff --git a/examples/shootout-regex-dna-cheat.rs b/examples/shootout-regex-dna-cheat.rs
new file mode 100644
index 0000000..27c5308
--- /dev/null
+++ b/examples/shootout-regex-dna-cheat.rs

@@ -0,0 +1,92 @@
+// The Computer Language Benchmarks Game
+// http://benchmarksgame.alioth.debian.org/
+//
+// contributed by the Rust Project Developers
+// contributed by TeXitoi
+// contributed by BurntSushi
+
+// This technically solves the problem posed in the `regex-dna` benchmark, but
+// it cheats by combining all of the replacements into a single regex and
+// replacing them with a single linear scan. i.e., it re-implements
+// `replace_all`. As a result, this is around 25% faster. ---AG
+
+extern crate regex;
+
+use std::io::{self, Read};
+use std::sync::Arc;
+use std::thread;
+
+macro_rules! regex {
+    ($re:expr) => {
+        ::regex::Regex::new($re).unwrap()
+    };
+}
+
+fn main() {
+    let mut seq = String::with_capacity(50 * (1 << 20));
+    io::stdin().read_to_string(&mut seq).unwrap();
+    let ilen = seq.len();
+
+    seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "").into_owned();
+    let clen = seq.len();
+    let seq_arc = Arc::new(seq.clone());
+
+    let variants = vec![
+        regex!("agggtaaa|tttaccct"),
+        regex!("[cgt]gggtaaa|tttaccc[acg]"),
+        regex!("a[act]ggtaaa|tttacc[agt]t"),
+        regex!("ag[act]gtaaa|tttac[agt]ct"),
+        regex!("agg[act]taaa|ttta[agt]cct"),
+        regex!("aggg[acg]aaa|ttt[cgt]ccct"),
+        regex!("agggt[cgt]aa|tt[acg]accct"),
+        regex!("agggta[cgt]a|t[acg]taccct"),
+        regex!("agggtaa[cgt]|[acg]ttaccct"),
+    ];
+    let mut counts = vec![];
+    for variant in variants {
+        let seq = seq_arc.clone();
+        let restr = variant.to_string();
+        let future = thread::spawn(move || variant.find_iter(&seq).count());
+        counts.push((restr, future));
+    }
+
+    let substs = vec![
+        (b'B', "(c|g|t)"),
+        (b'D', "(a|g|t)"),
+        (b'H', "(a|c|t)"),
+        (b'K', "(g|t)"),
+        (b'M', "(a|c)"),
+        (b'N', "(a|c|g|t)"),
+        (b'R', "(a|g)"),
+        (b'S', "(c|g)"),
+        (b'V', "(a|c|g)"),
+        (b'W', "(a|t)"),
+        (b'Y', "(c|t)"),
+    ]; // combined into one regex in `replace_all`
+    let seq = replace_all(&seq, substs);
+
+    for (variant, count) in counts {
+        println!("{} {}", variant, count.join().unwrap());
+    }
+    println!("\n{}\n{}\n{}", ilen, clen, seq.len());
+}
+
+fn replace_all(text: &str, substs: Vec<(u8, &str)>) -> String {
+    let mut replacements = vec![""; 256];
+    let mut alternates = vec![];
+    for (re, replacement) in substs {
+        replacements[re as usize] = replacement;
+        alternates.push((re as char).to_string());
+    }
+
+    let re = regex!(&alternates.join("|"));
+    let mut new = String::with_capacity(text.len());
+    let mut last_match = 0;
+    for m in re.find_iter(text) {
+        new.push_str(&text[last_match..m.start()]);
+        new.push_str(replacements[text.as_bytes()[m.start()] as usize]);
+        last_match = m.end();
+    }
+    new.push_str(&text[last_match..]);
+    new
+}

diff --git a/examples/shootout-regex-dna-replace.rs b/examples/shootout-regex-dna-replace.rs
new file mode 100644
index 0000000..681e077
--- /dev/null
+++ b/examples/shootout-regex-dna-replace.rs

@@ -0,0 +1,19 @@
+extern crate regex;
+
+use std::io::{self, Read};
+
+macro_rules! regex {
+    ($re:expr) => {{
+        use regex::internal::ExecBuilder;
+        ExecBuilder::new($re).build().unwrap().into_regex()
+    }};
+}
+
+fn main() {
+    let mut seq = String::with_capacity(50 * (1 << 20));
+    io::stdin().read_to_string(&mut seq).unwrap();
+    let ilen = seq.len();
+
+    seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "").into_owned();
+    println!("original: {}, replaced: {}", ilen, seq.len());
+}

diff --git a/examples/shootout-regex-dna-single-cheat.rs b/examples/shootout-regex-dna-single-cheat.rs
new file mode 100644
index 0000000..ec444b4
--- /dev/null
+++ b/examples/shootout-regex-dna-single-cheat.rs

@@ -0,0 +1,77 @@
+// The Computer Language Benchmarks Game
+// http://benchmarksgame.alioth.debian.org/
+//
+// contributed by the Rust Project Developers
+// contributed by TeXitoi
+// contributed by BurntSushi
+
+extern crate regex;
+
+use std::io::{self, Read};
+
+macro_rules! regex {
+    ($re:expr) => {
+        ::regex::Regex::new($re).unwrap()
+    };
+}
+
+fn main() {
+    let mut seq = String::with_capacity(50 * (1 << 20));
+    io::stdin().read_to_string(&mut seq).unwrap();
+    let ilen = seq.len();
+
+    seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "").into_owned();
+    let clen = seq.len();
+
+    let variants = vec![
+        regex!("agggtaaa|tttaccct"),
+        regex!("[cgt]gggtaaa|tttaccc[acg]"),
+        regex!("a[act]ggtaaa|tttacc[agt]t"),
+        regex!("ag[act]gtaaa|tttac[agt]ct"),
+        regex!("agg[act]taaa|ttta[agt]cct"),
+        regex!("aggg[acg]aaa|ttt[cgt]ccct"),
+        regex!("agggt[cgt]aa|tt[acg]accct"),
+        regex!("agggta[cgt]a|t[acg]taccct"),
+        regex!("agggtaa[cgt]|[acg]ttaccct"),
+    ];
+    for re in variants {
+        println!("{} {}", re.to_string(), re.find_iter(&seq).count());
+    }
+
+    let substs = vec![
+        (b'B', "(c|g|t)"),
+        (b'D', "(a|g|t)"),
+        (b'H', "(a|c|t)"),
+        (b'K', "(g|t)"),
+        (b'M', "(a|c)"),
+        (b'N', "(a|c|g|t)"),
+        (b'R', "(a|g)"),
+        (b'S', "(c|g)"),
+        (b'V', "(a|c|g)"),
+        (b'W', "(a|t)"),
+        (b'Y', "(c|t)"),
+    ]; // combined into one regex in `replace_all`
+    let seq = replace_all(&seq, substs);
+
+    println!("\n{}\n{}\n{}", ilen, clen, seq.len());
+}
+
+fn replace_all(text: &str, substs: Vec<(u8, &str)>) -> String {
+    let mut replacements = vec![""; 256];
+    let mut alternates = vec![];
+    for (re, replacement) in substs {
+        replacements[re as usize] = replacement;
+        alternates.push((re as char).to_string());
+    }
+
+    let re = regex!(&alternates.join("|"));
+    let mut new = String::with_capacity(text.len());
+    let mut last_match = 0;
+    for m in re.find_iter(text) {
+        new.push_str(&text[last_match..m.start()]);
+        new.push_str(replacements[text.as_bytes()[m.start()] as usize]);
+        last_match = m.end();
+    }
+    new.push_str(&text[last_match..]);
+    new
+}

diff --git a/examples/shootout-regex-dna-single.rs b/examples/shootout-regex-dna-single.rs
new file mode 100644
index 0000000..ab34107
--- /dev/null
+++ b/examples/shootout-regex-dna-single.rs

@@ -0,0 +1,59 @@
+// The Computer Language Benchmarks Game
+// http://benchmarksgame.alioth.debian.org/
+//
+// contributed by the Rust Project Developers
+// contributed by TeXitoi
+// contributed by BurntSushi
+
+extern crate regex;
+
+use std::io::{self, Read};
+
+macro_rules! regex {
+    ($re:expr) => {
+        ::regex::Regex::new($re).unwrap()
+    };
+}
+
+fn main() {
+    let mut seq = String::with_capacity(50 * (1 << 20));
+    io::stdin().read_to_string(&mut seq).unwrap();
+    let ilen = seq.len();
+
+    seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "").into_owned();
+    let clen = seq.len();
+
+    let variants = vec![
+        regex!("agggtaaa|tttaccct"),
+        regex!("[cgt]gggtaaa|tttaccc[acg]"),
+        regex!("a[act]ggtaaa|tttacc[agt]t"),
+        regex!("ag[act]gtaaa|tttac[agt]ct"),
+        regex!("agg[act]taaa|ttta[agt]cct"),
+        regex!("aggg[acg]aaa|ttt[cgt]ccct"),
+        regex!("agggt[cgt]aa|tt[acg]accct"),
+        regex!("agggta[cgt]a|t[acg]taccct"),
+        regex!("agggtaa[cgt]|[acg]ttaccct"),
+    ];
+    for re in variants {
+        println!("{} {}", re.to_string(), re.find_iter(&seq).count());
+    }
+
+    let substs = vec![
+        (regex!("B"), "(c|g|t)"),
+        (regex!("D"), "(a|g|t)"),
+        (regex!("H"), "(a|c|t)"),
+        (regex!("K"), "(g|t)"),
+        (regex!("M"), "(a|c)"),
+        (regex!("N"), "(a|c|g|t)"),
+        (regex!("R"), "(a|g)"),
+        (regex!("S"), "(c|g)"),
+        (regex!("V"), "(a|c|g)"),
+        (regex!("W"), "(a|t)"),
+        (regex!("Y"), "(c|t)"),
+    ];
+    let mut seq = seq;
+    for (re, replacement) in substs {
+        seq = re.replace_all(&seq, replacement).into_owned();
+    }
+    println!("\n{}\n{}\n{}", ilen, clen, seq.len());
+}

diff --git a/examples/shootout-regex-dna.rs b/examples/shootout-regex-dna.rs
new file mode 100644
index 0000000..ece7ca5
--- /dev/null
+++ b/examples/shootout-regex-dna.rs

@@ -0,0 +1,70 @@
+// The Computer Language Benchmarks Game
+// http://benchmarksgame.alioth.debian.org/
+//
+// contributed by the Rust Project Developers
+// contributed by TeXitoi
+// contributed by BurntSushi
+
+extern crate regex;
+
+use std::io::{self, Read};
+use std::sync::Arc;
+use std::thread;
+
+macro_rules! regex {
+    ($re:expr) => {
+        ::regex::Regex::new($re).unwrap()
+    };
+}
+
+fn main() {
+    let mut seq = String::with_capacity(51 * (1 << 20));
+    io::stdin().read_to_string(&mut seq).unwrap();
+    let ilen = seq.len();
+
+    seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "").into_owned();
+    let clen = seq.len();
+    let seq_arc = Arc::new(seq.clone());
+
+    let variants = vec![
+        regex!("agggtaaa|tttaccct"),
+        regex!("[cgt]gggtaaa|tttaccc[acg]"),
+        regex!("a[act]ggtaaa|tttacc[agt]t"),
+        regex!("ag[act]gtaaa|tttac[agt]ct"),
+        regex!("agg[act]taaa|ttta[agt]cct"),
+        regex!("aggg[acg]aaa|ttt[cgt]ccct"),
+        regex!("agggt[cgt]aa|tt[acg]accct"),
+        regex!("agggta[cgt]a|t[acg]taccct"),
+        regex!("agggtaa[cgt]|[acg]ttaccct"),
+    ];
+    let mut counts = vec![];
+    for variant in variants {
+        let seq = seq_arc.clone();
+        let restr = variant.to_string();
+        let future = thread::spawn(move || variant.find_iter(&seq).count());
+        counts.push((restr, future));
+    }
+
+    let substs = vec![
+        (regex!("B"), "(c|g|t)"),
+        (regex!("D"), "(a|g|t)"),
+        (regex!("H"), "(a|c|t)"),
+        (regex!("K"), "(g|t)"),
+        (regex!("M"), "(a|c)"),
+        (regex!("N"), "(a|c|g|t)"),
+        (regex!("R"), "(a|g)"),
+        (regex!("S"), "(c|g)"),
+        (regex!("V"), "(a|c|g)"),
+        (regex!("W"), "(a|t)"),
+        (regex!("Y"), "(c|t)"),
+    ];
+    let mut seq = seq;
+    for (re, replacement) in substs {
+        seq = re.replace_all(&seq, replacement).into_owned();
+    }
+
+    for (variant, count) in counts {
+        println!("{} {}", variant, count.join().unwrap());
+    }
+    println!("\n{}\n{}\n{}", ilen, clen, seq.len());
+}

diff --git a/rustfmt.toml b/rustfmt.toml
new file mode 100644
index 0000000..aa37a21
--- /dev/null
+++ b/rustfmt.toml

@@ -0,0 +1,2 @@
+max_width = 79
+use_small_heuristics = "max"

diff --git a/src/backtrack.rs b/src/backtrack.rs
new file mode 100644
index 0000000..2eaeb72
--- /dev/null
+++ b/src/backtrack.rs

@@ -0,0 +1,288 @@
+// This is the backtracking matching engine. It has the same exact capability
+// as the full NFA simulation, except it is artificially restricted to small
+// regexes on small inputs because of its memory requirements.
+//
+// In particular, this is a *bounded* backtracking engine. It retains worst
+// case linear time by keeping track of the states that it has visited (using a
+// bitmap). Namely, once a state is visited, it is never visited again. Since a
+// state is keyed by `(instruction index, input index)`, we have that its time
+// complexity is `O(mn)` (i.e., linear in the size of the search text).
+//
+// The backtracking engine can beat out the NFA simulation on small
+// regexes/inputs because it doesn't have to keep track of multiple copies of
+// the capture groups. In benchmarks, the backtracking engine is roughly twice
+// as fast as the full NFA simulation. Note though that its performance doesn't
+// scale, even if you're willing to live with the memory requirements. Namely,
+// the bitset has to be zeroed on each execution, which becomes quite expensive
+// on large bitsets.
+
+use exec::ProgramCache;
+use input::{Input, InputAt};
+use prog::{InstPtr, Program};
+use re_trait::Slot;
+
+type Bits = u32;
+
+const BIT_SIZE: usize = 32;
+const MAX_SIZE_BYTES: usize = 256 * (1 << 10); // 256 KB
+
+/// Returns true iff the given regex and input should be executed by this
+/// engine with reasonable memory usage.
+pub fn should_exec(num_insts: usize, text_len: usize) -> bool {
+    // Total memory usage in bytes is determined by:
+    //
+    //   ((len(insts) * (len(input) + 1) + bits - 1) / bits) * (size_of(u32))
+    //
+    // The actual limit picked is pretty much a heuristic.
+    // See: https://github.com/rust-lang/regex/issues/215
+    let size = ((num_insts * (text_len + 1) + BIT_SIZE - 1) / BIT_SIZE) * 4;
+    size <= MAX_SIZE_BYTES
+}
+
+/// A backtracking matching engine.
+#[derive(Debug)]
+pub struct Bounded<'a, 'm, 'r, 's, I> {
+    prog: &'r Program,
+    input: I,
+    matches: &'m mut [bool],
+    slots: &'s mut [Slot],
+    m: &'a mut Cache,
+}
+
+/// Shared cached state between multiple invocations of a backtracking engine
+/// in the same thread.
+#[derive(Clone, Debug)]
+pub struct Cache {
+    jobs: Vec<Job>,
+    visited: Vec<Bits>,
+}
+
+impl Cache {
+    /// Create new empty cache for the backtracking engine.
+    pub fn new(_prog: &Program) -> Self {
+        Cache { jobs: vec![], visited: vec![] }
+    }
+}
+
+/// A job is an explicit unit of stack space in the backtracking engine.
+///
+/// The "normal" representation is a single state transition, which corresponds
+/// to an NFA state and a character in the input. However, the backtracking
+/// engine must keep track of old capture group values. We use the explicit
+/// stack to do it.
+#[derive(Clone, Copy, Debug)]
+enum Job {
+    Inst { ip: InstPtr, at: InputAt },
+    SaveRestore { slot: usize, old_pos: Option<usize> },
+}
+
+impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> {
+    /// Execute the backtracking matching engine.
+    ///
+    /// If there's a match, `exec` returns `true` and populates the given
+    /// captures accordingly.
+    pub fn exec(
+        prog: &'r Program,
+        cache: &ProgramCache,
+        matches: &'m mut [bool],
+        slots: &'s mut [Slot],
+        input: I,
+        start: usize,
+        end: usize,
+    ) -> bool {
+        let mut cache = cache.borrow_mut();
+        let cache = &mut cache.backtrack;
+        let start = input.at(start);
+        let mut b = Bounded {
+            prog: prog,
+            input: input,
+            matches: matches,
+            slots: slots,
+            m: cache,
+        };
+        b.exec_(start, end)
+    }
+
+    /// Clears the cache such that the backtracking engine can be executed
+    /// on some input of fixed length.
+    fn clear(&mut self) {
+        // Reset the job memory so that we start fresh.
+        self.m.jobs.clear();
+
+        // Now we need to clear the bit state set.
+        // We do this by figuring out how much space we need to keep track
+        // of the states we've visited.
+        // Then we reset all existing allocated space to 0.
+        // Finally, we request more space if we need it.
+        //
+        // This is all a little circuitous, but doing this unsafely
+        // doesn't seem to have a measurable impact on performance.
+        // (Probably because backtracking is limited to such small
+        // inputs/regexes in the first place.)
+        let visited_len =
+            (self.prog.len() * (self.input.len() + 1) + BIT_SIZE - 1)
+                / BIT_SIZE;
+        self.m.visited.truncate(visited_len);
+        for v in &mut self.m.visited {
+            *v = 0;
+        }
+        if visited_len > self.m.visited.len() {
+            let len = self.m.visited.len();
+            self.m.visited.reserve_exact(visited_len - len);
+            for _ in 0..(visited_len - len) {
+                self.m.visited.push(0);
+            }
+        }
+    }
+
+    /// Start backtracking at the given position in the input, but also look
+    /// for literal prefixes.
+    fn exec_(&mut self, mut at: InputAt, end: usize) -> bool {
+        self.clear();
+        // If this is an anchored regex at the beginning of the input, then
+        // we're either already done or we only need to try backtracking once.
+        if self.prog.is_anchored_start {
+            return if !at.is_start() { false } else { self.backtrack(at) };
+        }
+        let mut matched = false;
+        loop {
+            if !self.prog.prefixes.is_empty() {
+                at = match self.input.prefix_at(&self.prog.prefixes, at) {
+                    None => break,
+                    Some(at) => at,
+                };
+            }
+            matched = self.backtrack(at) || matched;
+            if matched && self.prog.matches.len() == 1 {
+                return true;
+            }
+            if at.pos() >= end {
+                break;
+            }
+            at = self.input.at(at.next_pos());
+        }
+        matched
+    }
+
+    /// The main backtracking loop starting at the given input position.
+    fn backtrack(&mut self, start: InputAt) -> bool {
+        // N.B. We use an explicit stack to avoid recursion.
+        // To avoid excessive pushing and popping, most transitions are handled
+        // in the `step` helper function, which only pushes to the stack when
+        // there's a capture or a branch.
+        let mut matched = false;
+        self.m.jobs.push(Job::Inst { ip: 0, at: start });
+        while let Some(job) = self.m.jobs.pop() {
+            match job {
+                Job::Inst { ip, at } => {
+                    if self.step(ip, at) {
+                        // Only quit if we're matching one regex.
+                        // If we're matching a regex set, then mush on and
+                        // try to find other matches (if we want them).
+                        if self.prog.matches.len() == 1 {
+                            return true;
+                        }
+                        matched = true;
+                    }
+                }
+                Job::SaveRestore { slot, old_pos } => {
+                    if slot < self.slots.len() {
+                        self.slots[slot] = old_pos;
+                    }
+                }
+            }
+        }
+        matched
+    }
+
+    fn step(&mut self, mut ip: InstPtr, mut at: InputAt) -> bool {
+        use prog::Inst::*;
+        loop {
+            // This loop is an optimization to avoid constantly pushing/popping
+            // from the stack. Namely, if we're pushing a job only to run it
+            // next, avoid the push and just mutate `ip` (and possibly `at`)
+            // in place.
+            if self.has_visited(ip, at) {
+                return false;
+            }
+            match self.prog[ip] {
+                Match(slot) => {
+                    if slot < self.matches.len() {
+                        self.matches[slot] = true;
+                    }
+                    return true;
+                }
+                Save(ref inst) => {
+                    if let Some(&old_pos) = self.slots.get(inst.slot) {
+                        // If this path doesn't work out, then we save the old
+                        // capture index (if one exists) in an alternate
+                        // job. If the next path fails, then the alternate
+                        // job is popped and the old capture index is restored.
+                        self.m.jobs.push(Job::SaveRestore {
+                            slot: inst.slot,
+                            old_pos: old_pos,
+                        });
+                        self.slots[inst.slot] = Some(at.pos());
+                    }
+                    ip = inst.goto;
+                }
+                Split(ref inst) => {
+                    self.m.jobs.push(Job::Inst { ip: inst.goto2, at: at });
+                    ip = inst.goto1;
+                }
+                EmptyLook(ref inst) => {
+                    if self.input.is_empty_match(at, inst) {
+                        ip = inst.goto;
+                    } else {
+                        return false;
+                    }
+                }
+                Char(ref inst) => {
+                    if inst.c == at.char() {
+                        ip = inst.goto;
+                        at = self.input.at(at.next_pos());
+                    } else {
+                        return false;
+                    }
+                }
+                Ranges(ref inst) => {
+                    if inst.matches(at.char()) {
+                        ip = inst.goto;
+                        at = self.input.at(at.next_pos());
+                    } else {
+                        return false;
+                    }
+                }
+                Bytes(ref inst) => {
+                    if let Some(b) = at.byte() {
+                        if inst.matches(b) {
+                            ip = inst.goto;
+                            at = self.input.at(at.next_pos());
+                            continue;
+                        }
+                    }
+                    return false;
+                }
+            }
+        }
+    }
+
+    fn has_visited(&mut self, ip: InstPtr, at: InputAt) -> bool {
+        let k = ip * (self.input.len() + 1) + at.pos();
+        let k1 = k / BIT_SIZE;
+        let k2 = usize_to_u32(1 << (k & (BIT_SIZE - 1)));
+        if self.m.visited[k1] & k2 == 0 {
+            self.m.visited[k1] |= k2;
+            false
+        } else {
+            true
+        }
+    }
+}
+
+fn usize_to_u32(n: usize) -> u32 {
+    if (n as u64) > (::std::u32::MAX as u64) {
+        panic!("BUG: {} is too big to fit into u32", n)
+    }
+    n as u32
+}

diff --git a/src/cache.rs b/src/cache.rs
new file mode 100644
index 0000000..dbb7e64
--- /dev/null
+++ b/src/cache.rs

@@ -0,0 +1,100 @@
+// This module defines a common API for caching internal runtime state.
+// The `thread_local` crate provides an extremely optimized version of this.
+// However, if the perf-cache feature is disabled, then we drop the
+// thread_local dependency and instead use a pretty naive caching mechanism
+// with a mutex.
+//
+// Strictly speaking, the CachedGuard isn't necessary for the much more
+// flexible thread_local API, but implementing thread_local's API doesn't
+// seem possible in purely safe code.
+
+pub use self::imp::{Cached, CachedGuard};
+
+#[cfg(feature = "perf-cache")]
+mod imp {
+    use thread_local::CachedThreadLocal;
+
+    #[derive(Debug)]
+    pub struct Cached<T: Send>(CachedThreadLocal<T>);
+
+    #[derive(Debug)]
+    pub struct CachedGuard<'a, T: 'a>(&'a T);
+
+    impl<T: Send> Cached<T> {
+        pub fn new() -> Cached<T> {
+            Cached(CachedThreadLocal::new())
+        }
+
+        pub fn get_or(&self, create: impl FnOnce() -> T) -> CachedGuard<T> {
+            CachedGuard(self.0.get_or(|| create()))
+        }
+    }
+
+    impl<'a, T: Send> CachedGuard<'a, T> {
+        pub fn value(&self) -> &T {
+            self.0
+        }
+    }
+}
+
+#[cfg(not(feature = "perf-cache"))]
+mod imp {
+    use std::marker::PhantomData;
+    use std::panic::UnwindSafe;
+    use std::sync::Mutex;
+
+    #[derive(Debug)]
+    pub struct Cached<T: Send> {
+        stack: Mutex<Vec<T>>,
+        /// When perf-cache is enabled, the thread_local crate is used, and
+        /// its CachedThreadLocal impls Send, Sync and UnwindSafe, but NOT
+        /// RefUnwindSafe. However, a Mutex impls RefUnwindSafe. So in order
+        /// to keep the APIs consistent regardless of whether perf-cache is
+        /// enabled, we force this type to NOT impl RefUnwindSafe too.
+        ///
+        /// Ideally, we should always impl RefUnwindSafe, but it seems a little
+        /// tricky to do that right now.
+        ///
+        /// See also: https://github.com/rust-lang/regex/issues/576
+        _phantom: PhantomData<Box<dyn Send + Sync + UnwindSafe>>,
+    }
+
+    #[derive(Debug)]
+    pub struct CachedGuard<'a, T: 'a + Send> {
+        cache: &'a Cached<T>,
+        value: Option<T>,
+    }
+
+    impl<T: Send> Cached<T> {
+        pub fn new() -> Cached<T> {
+            Cached { stack: Mutex::new(vec![]), _phantom: PhantomData }
+        }
+
+        pub fn get_or(&self, create: impl FnOnce() -> T) -> CachedGuard<T> {
+            let mut stack = self.stack.lock().unwrap();
+            match stack.pop() {
+                None => CachedGuard { cache: self, value: Some(create()) },
+                Some(value) => CachedGuard { cache: self, value: Some(value) },
+            }
+        }
+
+        fn put(&self, value: T) {
+            let mut stack = self.stack.lock().unwrap();
+            stack.push(value);
+        }
+    }
+
+    impl<'a, T: Send> CachedGuard<'a, T> {
+        pub fn value(&self) -> &T {
+            self.value.as_ref().unwrap()
+        }
+    }
+
+    impl<'a, T: Send> Drop for CachedGuard<'a, T> {
+        fn drop(&mut self) {
+            if let Some(value) = self.value.take() {
+                self.cache.put(value);
+            }
+        }
+    }
+}

diff --git a/src/compile.rs b/src/compile.rs
new file mode 100644
index 0000000..2418400
--- /dev/null
+++ b/src/compile.rs

@@ -0,0 +1,1133 @@
+use std::collections::HashMap;
+use std::iter;
+use std::result;
+use std::sync::Arc;
+
+use syntax::hir::{self, Hir};
+use syntax::is_word_byte;
+use syntax::utf8::{Utf8Range, Utf8Sequence, Utf8Sequences};
+
+use prog::{
+    EmptyLook, Inst, InstBytes, InstChar, InstEmptyLook, InstPtr, InstRanges,
+    InstSave, InstSplit, Program,
+};
+
+use Error;
+
+type Result = result::Result<Patch, Error>;
+
+#[derive(Debug)]
+struct Patch {
+    hole: Hole,
+    entry: InstPtr,
+}
+
+/// A compiler translates a regular expression AST to a sequence of
+/// instructions. The sequence of instructions represents an NFA.
+pub struct Compiler {
+    insts: Vec<MaybeInst>,
+    compiled: Program,
+    capture_name_idx: HashMap<String, usize>,
+    num_exprs: usize,
+    size_limit: usize,
+    suffix_cache: SuffixCache,
+    utf8_seqs: Option<Utf8Sequences>,
+    byte_classes: ByteClassSet,
+}
+
+impl Compiler {
+    /// Create a new regular expression compiler.
+    ///
+    /// Various options can be set before calling `compile` on an expression.
+    pub fn new() -> Self {
+        Compiler {
+            insts: vec![],
+            compiled: Program::new(),
+            capture_name_idx: HashMap::new(),
+            num_exprs: 0,
+            size_limit: 10 * (1 << 20),
+            suffix_cache: SuffixCache::new(1000),
+            utf8_seqs: Some(Utf8Sequences::new('\x00', '\x00')),
+            byte_classes: ByteClassSet::new(),
+        }
+    }
+
+    /// The size of the resulting program is limited by size_limit. If
+    /// the program approximately exceeds the given size (in bytes), then
+    /// compilation will stop and return an error.
+    pub fn size_limit(mut self, size_limit: usize) -> Self {
+        self.size_limit = size_limit;
+        self
+    }
+
+    /// If bytes is true, then the program is compiled as a byte based
+    /// automaton, which incorporates UTF-8 decoding into the machine. If it's
+    /// false, then the automaton is Unicode scalar value based, e.g., an
+    /// engine utilizing such an automaton is responsible for UTF-8 decoding.
+    ///
+    /// The specific invariant is that when returning a byte based machine,
+    /// the neither the `Char` nor `Ranges` instructions are produced.
+    /// Conversely, when producing a Unicode scalar value machine, the `Bytes`
+    /// instruction is never produced.
+    ///
+    /// Note that `dfa(true)` implies `bytes(true)`.
+    pub fn bytes(mut self, yes: bool) -> Self {
+        self.compiled.is_bytes = yes;
+        self
+    }
+
+    /// When disabled, the program compiled may match arbitrary bytes.
+    ///
+    /// When enabled (the default), all compiled programs exclusively match
+    /// valid UTF-8 bytes.
+    pub fn only_utf8(mut self, yes: bool) -> Self {
+        self.compiled.only_utf8 = yes;
+        self
+    }
+
+    /// When set, the machine returned is suitable for use in the DFA matching
+    /// engine.
+    ///
+    /// In particular, this ensures that if the regex is not anchored in the
+    /// beginning, then a preceding `.*?` is included in the program. (The NFA
+    /// based engines handle the preceding `.*?` explicitly, which is difficult
+    /// or impossible in the DFA engine.)
+    pub fn dfa(mut self, yes: bool) -> Self {
+        self.compiled.is_dfa = yes;
+        self
+    }
+
+    /// When set, the machine returned is suitable for matching text in
+    /// reverse. In particular, all concatenations are flipped.
+    pub fn reverse(mut self, yes: bool) -> Self {
+        self.compiled.is_reverse = yes;
+        self
+    }
+
+    /// Compile a regular expression given its AST.
+    ///
+    /// The compiler is guaranteed to succeed unless the program exceeds the
+    /// specified size limit. If the size limit is exceeded, then compilation
+    /// stops and returns an error.
+    pub fn compile(mut self, exprs: &[Hir]) -> result::Result<Program, Error> {
+        debug_assert!(!exprs.is_empty());
+        self.num_exprs = exprs.len();
+        if exprs.len() == 1 {
+            self.compile_one(&exprs[0])
+        } else {
+            self.compile_many(exprs)
+        }
+    }
+
+    fn compile_one(mut self, expr: &Hir) -> result::Result<Program, Error> {
+        // If we're compiling a forward DFA and we aren't anchored, then
+        // add a `.*?` before the first capture group.
+        // Other matching engines handle this by baking the logic into the
+        // matching engine itself.
+        let mut dotstar_patch = Patch { hole: Hole::None, entry: 0 };
+        self.compiled.is_anchored_start = expr.is_anchored_start();
+        self.compiled.is_anchored_end = expr.is_anchored_end();
+        if self.compiled.needs_dotstar() {
+            dotstar_patch = self.c_dotstar()?;
+            self.compiled.start = dotstar_patch.entry;
+        }
+        self.compiled.captures = vec![None];
+        let patch = self.c_capture(0, expr)?;
+        if self.compiled.needs_dotstar() {
+            self.fill(dotstar_patch.hole, patch.entry);
+        } else {
+            self.compiled.start = patch.entry;
+        }
+        self.fill_to_next(patch.hole);
+        self.compiled.matches = vec![self.insts.len()];
+        self.push_compiled(Inst::Match(0));
+        self.compile_finish()
+    }
+
+    fn compile_many(
+        mut self,
+        exprs: &[Hir],
+    ) -> result::Result<Program, Error> {
+        debug_assert!(exprs.len() > 1);
+
+        self.compiled.is_anchored_start =
+            exprs.iter().all(|e| e.is_anchored_start());
+        self.compiled.is_anchored_end =
+            exprs.iter().all(|e| e.is_anchored_end());
+        let mut dotstar_patch = Patch { hole: Hole::None, entry: 0 };
+        if self.compiled.needs_dotstar() {
+            dotstar_patch = self.c_dotstar()?;
+            self.compiled.start = dotstar_patch.entry;
+        } else {
+            self.compiled.start = 0; // first instruction is always split
+        }
+        self.fill_to_next(dotstar_patch.hole);
+
+        let mut prev_hole = Hole::None;
+        for (i, expr) in exprs[0..exprs.len() - 1].iter().enumerate() {
+            self.fill_to_next(prev_hole);
+            let split = self.push_split_hole();
+            let Patch { hole, entry } = self.c_capture(0, expr)?;
+            self.fill_to_next(hole);
+            self.compiled.matches.push(self.insts.len());
+            self.push_compiled(Inst::Match(i));
+            prev_hole = self.fill_split(split, Some(entry), None);
+        }
+        let i = exprs.len() - 1;
+        let Patch { hole, entry } = self.c_capture(0, &exprs[i])?;
+        self.fill(prev_hole, entry);
+        self.fill_to_next(hole);
+        self.compiled.matches.push(self.insts.len());
+        self.push_compiled(Inst::Match(i));
+        self.compile_finish()
+    }
+
+    fn compile_finish(mut self) -> result::Result<Program, Error> {
+        self.compiled.insts =
+            self.insts.into_iter().map(|inst| inst.unwrap()).collect();
+        self.compiled.byte_classes = self.byte_classes.byte_classes();
+        self.compiled.capture_name_idx = Arc::new(self.capture_name_idx);
+        Ok(self.compiled)
+    }
+
+    /// Compile expr into self.insts, returning a patch on success,
+    /// or an error if we run out of memory.
+    ///
+    /// All of the c_* methods of the compiler share the contract outlined
+    /// here.
+    ///
+    /// The main thing that a c_* method does is mutate `self.insts`
+    /// to add a list of mostly compiled instructions required to execute
+    /// the given expression. `self.insts` contains MaybeInsts rather than
+    /// Insts because there is some backpatching required.
+    ///
+    /// The `Patch` value returned by each c_* method provides metadata
+    /// about the compiled instructions emitted to `self.insts`. The
+    /// `entry` member of the patch refers to the first instruction
+    /// (the entry point), while the `hole` member contains zero or
+    /// more offsets to partial instructions that need to be backpatched.
+    /// The c_* routine can't know where its list of instructions are going to
+    /// jump to after execution, so it is up to the caller to patch
+    /// these jumps to point to the right place. So compiling some
+    /// expression, e, we would end up with a situation that looked like:
+    ///
+    /// ```text
+    /// self.insts = [ ..., i1, i2, ..., iexit1, ..., iexitn, ...]
+    ///                     ^              ^             ^
+    ///                     |                \         /
+    ///                   entry                \     /
+    ///                                         hole
+    /// ```
+    ///
+    /// To compile two expressions, e1 and e2, concatinated together we
+    /// would do:
+    ///
+    /// ```ignore
+    /// let patch1 = self.c(e1);
+    /// let patch2 = self.c(e2);
+    /// ```
+    ///
+    /// while leaves us with a situation that looks like
+    ///
+    /// ```text
+    /// self.insts = [ ..., i1, ..., iexit1, ..., i2, ..., iexit2 ]
+    ///                     ^        ^            ^        ^
+    ///                     |        |            |        |
+    ///                entry1        hole1   entry2        hole2
+    /// ```
+    ///
+    /// Then to merge the two patches together into one we would backpatch
+    /// hole1 with entry2 and return a new patch that enters at entry1
+    /// and has hole2 for a hole. In fact, if you look at the c_concat
+    /// method you will see that it does exactly this, though it handles
+    /// a list of expressions rather than just the two that we use for
+    /// an example.
+    fn c(&mut self, expr: &Hir) -> Result {
+        use prog;
+        use syntax::hir::HirKind::*;
+
+        self.check_size()?;
+        match *expr.kind() {
+            Empty => Ok(Patch { hole: Hole::None, entry: self.insts.len() }),
+            Literal(hir::Literal::Unicode(c)) => self.c_char(c),
+            Literal(hir::Literal::Byte(b)) => {
+                assert!(self.compiled.uses_bytes());
+                self.c_byte(b)
+            }
+            Class(hir::Class::Unicode(ref cls)) => self.c_class(cls.ranges()),
+            Class(hir::Class::Bytes(ref cls)) => {
+                if self.compiled.uses_bytes() {
+                    self.c_class_bytes(cls.ranges())
+                } else {
+                    assert!(cls.is_all_ascii());
+                    let mut char_ranges = vec![];
+                    for r in cls.iter() {
+                        let (s, e) = (r.start() as char, r.end() as char);
+                        char_ranges.push(hir::ClassUnicodeRange::new(s, e));
+                    }
+                    self.c_class(&char_ranges)
+                }
+            }
+            Anchor(hir::Anchor::StartLine) if self.compiled.is_reverse => {
+                self.byte_classes.set_range(b'\n', b'\n');
+                self.c_empty_look(prog::EmptyLook::EndLine)
+            }
+            Anchor(hir::Anchor::StartLine) => {
+                self.byte_classes.set_range(b'\n', b'\n');
+                self.c_empty_look(prog::EmptyLook::StartLine)
+            }
+            Anchor(hir::Anchor::EndLine) if self.compiled.is_reverse => {
+                self.byte_classes.set_range(b'\n', b'\n');
+                self.c_empty_look(prog::EmptyLook::StartLine)
+            }
+            Anchor(hir::Anchor::EndLine) => {
+                self.byte_classes.set_range(b'\n', b'\n');
+                self.c_empty_look(prog::EmptyLook::EndLine)
+            }
+            Anchor(hir::Anchor::StartText) if self.compiled.is_reverse => {
+                self.c_empty_look(prog::EmptyLook::EndText)
+            }
+            Anchor(hir::Anchor::StartText) => {
+                self.c_empty_look(prog::EmptyLook::StartText)
+            }
+            Anchor(hir::Anchor::EndText) if self.compiled.is_reverse => {
+                self.c_empty_look(prog::EmptyLook::StartText)
+            }
+            Anchor(hir::Anchor::EndText) => {
+                self.c_empty_look(prog::EmptyLook::EndText)
+            }
+            WordBoundary(hir::WordBoundary::Unicode) => {
+                if !cfg!(feature = "unicode-perl") {
+                    return Err(Error::Syntax(
+                        "Unicode word boundaries are unavailable when \
+                         the unicode-perl feature is disabled"
+                            .to_string(),
+                    ));
+                }
+                self.compiled.has_unicode_word_boundary = true;
+                self.byte_classes.set_word_boundary();
+                self.c_empty_look(prog::EmptyLook::WordBoundary)
+            }
+            WordBoundary(hir::WordBoundary::UnicodeNegate) => {
+                if !cfg!(feature = "unicode-perl") {
+                    return Err(Error::Syntax(
+                        "Unicode word boundaries are unavailable when \
+                         the unicode-perl feature is disabled"
+                            .to_string(),
+                    ));
+                }
+                self.compiled.has_unicode_word_boundary = true;
+                self.byte_classes.set_word_boundary();
+                self.c_empty_look(prog::EmptyLook::NotWordBoundary)
+            }
+            WordBoundary(hir::WordBoundary::Ascii) => {
+                self.byte_classes.set_word_boundary();
+                self.c_empty_look(prog::EmptyLook::WordBoundaryAscii)
+            }
+            WordBoundary(hir::WordBoundary::AsciiNegate) => {
+                self.byte_classes.set_word_boundary();
+                self.c_empty_look(prog::EmptyLook::NotWordBoundaryAscii)
+            }
+            Group(ref g) => match g.kind {
+                hir::GroupKind::NonCapturing => self.c(&g.hir),
+                hir::GroupKind::CaptureIndex(index) => {
+                    if index as usize >= self.compiled.captures.len() {
+                        self.compiled.captures.push(None);
+                    }
+                    self.c_capture(2 * index as usize, &g.hir)
+                }
+                hir::GroupKind::CaptureName { index, ref name } => {
+                    if index as usize >= self.compiled.captures.len() {
+                        let n = name.to_string();
+                        self.compiled.captures.push(Some(n.clone()));
+                        self.capture_name_idx.insert(n, index as usize);
+                    }
+                    self.c_capture(2 * index as usize, &g.hir)
+                }
+            },
+            Concat(ref es) => {
+                if self.compiled.is_reverse {
+                    self.c_concat(es.iter().rev())
+                } else {
+                    self.c_concat(es)
+                }
+            }
+            Alternation(ref es) => self.c_alternate(&**es),
+            Repetition(ref rep) => self.c_repeat(rep),
+        }
+    }
+
+    fn c_capture(&mut self, first_slot: usize, expr: &Hir) -> Result {
+        if self.num_exprs > 1 || self.compiled.is_dfa {
+            // Don't ever compile Save instructions for regex sets because
+            // they are never used. They are also never used in DFA programs
+            // because DFAs can't handle captures.
+            self.c(expr)
+        } else {
+            let entry = self.insts.len();
+            let hole = self.push_hole(InstHole::Save { slot: first_slot });
+            let patch = self.c(expr)?;
+            self.fill(hole, patch.entry);
+            self.fill_to_next(patch.hole);
+            let hole = self.push_hole(InstHole::Save { slot: first_slot + 1 });
+            Ok(Patch { hole: hole, entry: entry })
+        }
+    }
+
+    fn c_dotstar(&mut self) -> Result {
+        Ok(if !self.compiled.only_utf8() {
+            self.c(&Hir::repetition(hir::Repetition {
+                kind: hir::RepetitionKind::ZeroOrMore,
+                greedy: false,
+                hir: Box::new(Hir::any(true)),
+            }))?
+        } else {
+            self.c(&Hir::repetition(hir::Repetition {
+                kind: hir::RepetitionKind::ZeroOrMore,
+                greedy: false,
+                hir: Box::new(Hir::any(false)),
+            }))?
+        })
+    }
+
+    fn c_char(&mut self, c: char) -> Result {
+        if self.compiled.uses_bytes() {
+            if c.is_ascii() {
+                let b = c as u8;
+                let hole =
+                    self.push_hole(InstHole::Bytes { start: b, end: b });
+                self.byte_classes.set_range(b, b);
+                Ok(Patch { hole, entry: self.insts.len() - 1 })
+            } else {
+                self.c_class(&[hir::ClassUnicodeRange::new(c, c)])
+            }
+        } else {
+            let hole = self.push_hole(InstHole::Char { c: c });
+            Ok(Patch { hole, entry: self.insts.len() - 1 })
+        }
+    }
+
+    fn c_class(&mut self, ranges: &[hir::ClassUnicodeRange]) -> Result {
+        assert!(!ranges.is_empty());
+        if self.compiled.uses_bytes() {
+            CompileClass { c: self, ranges: ranges }.compile()
+        } else {
+            let ranges: Vec<(char, char)> =
+                ranges.iter().map(|r| (r.start(), r.end())).collect();
+            let hole = if ranges.len() == 1 && ranges[0].0 == ranges[0].1 {
+                self.push_hole(InstHole::Char { c: ranges[0].0 })
+            } else {
+                self.push_hole(InstHole::Ranges { ranges: ranges })
+            };
+            Ok(Patch { hole: hole, entry: self.insts.len() - 1 })
+        }
+    }
+
+    fn c_byte(&mut self, b: u8) -> Result {
+        self.c_class_bytes(&[hir::ClassBytesRange::new(b, b)])
+    }
+
+    fn c_class_bytes(&mut self, ranges: &[hir::ClassBytesRange]) -> Result {
+        debug_assert!(!ranges.is_empty());
+
+        let first_split_entry = self.insts.len();
+        let mut holes = vec![];
+        let mut prev_hole = Hole::None;
+        for r in &ranges[0..ranges.len() - 1] {
+            self.fill_to_next(prev_hole);
+            let split = self.push_split_hole();
+            let next = self.insts.len();
+            self.byte_classes.set_range(r.start(), r.end());
+            holes.push(self.push_hole(InstHole::Bytes {
+                start: r.start(),
+                end: r.end(),
+            }));
+            prev_hole = self.fill_split(split, Some(next), None);
+        }
+        let next = self.insts.len();
+        let r = &ranges[ranges.len() - 1];
+        self.byte_classes.set_range(r.start(), r.end());
+        holes.push(
+            self.push_hole(InstHole::Bytes { start: r.start(), end: r.end() }),
+        );
+        self.fill(prev_hole, next);
+        Ok(Patch { hole: Hole::Many(holes), entry: first_split_entry })
+    }
+
+    fn c_empty_look(&mut self, look: EmptyLook) -> Result {
+        let hole = self.push_hole(InstHole::EmptyLook { look: look });
+        Ok(Patch { hole: hole, entry: self.insts.len() - 1 })
+    }
+
+    fn c_concat<'a, I>(&mut self, exprs: I) -> Result
+    where
+        I: IntoIterator<Item = &'a Hir>,
+    {
+        let mut exprs = exprs.into_iter();
+        let first = match exprs.next() {
+            Some(expr) => expr,
+            None => {
+                return Ok(Patch { hole: Hole::None, entry: self.insts.len() })
+            }
+        };
+        let Patch { mut hole, entry } = self.c(first)?;
+        for e in exprs {
+            let p = self.c(e)?;
+            self.fill(hole, p.entry);
+            hole = p.hole;
+        }
+        Ok(Patch { hole: hole, entry: entry })
+    }
+
+    fn c_alternate(&mut self, exprs: &[Hir]) -> Result {
+        debug_assert!(
+            exprs.len() >= 2,
+            "alternates must have at least 2 exprs"
+        );
+
+        // Initial entry point is always the first split.
+        let first_split_entry = self.insts.len();
+
+        // Save up all of the holes from each alternate. They will all get
+        // patched to point to the same location.
+        let mut holes = vec![];
+
+        let mut prev_hole = Hole::None;
+        for e in &exprs[0..exprs.len() - 1] {
+            self.fill_to_next(prev_hole);
+            let split = self.push_split_hole();
+            let prev_entry = self.insts.len();
+            let Patch { hole, entry } = self.c(e)?;
+            if prev_entry == self.insts.len() {
+                // TODO(burntsushi): It is kind of silly that we don't support
+                // empty-subexpressions in alternates, but it is supremely
+                // awkward to support them in the existing compiler
+                // infrastructure. This entire compiler needs to be thrown out
+                // anyway, so don't feel too bad.
+                return Err(Error::Syntax(
+                    "alternations cannot currently contain \
+                     empty sub-expressions"
+                        .to_string(),
+                ));
+            }
+            holes.push(hole);
+            prev_hole = self.fill_split(split, Some(entry), None);
+        }
+        let prev_entry = self.insts.len();
+        let Patch { hole, entry } = self.c(&exprs[exprs.len() - 1])?;
+        if prev_entry == self.insts.len() {
+            // TODO(burntsushi): See TODO above.
+            return Err(Error::Syntax(
+                "alternations cannot currently contain \
+                 empty sub-expressions"
+                    .to_string(),
+            ));
+        }
+        holes.push(hole);
+        self.fill(prev_hole, entry);
+        Ok(Patch { hole: Hole::Many(holes), entry: first_split_entry })
+    }
+
+    fn c_repeat(&mut self, rep: &hir::Repetition) -> Result {
+        use syntax::hir::RepetitionKind::*;
+        match rep.kind {
+            ZeroOrOne => self.c_repeat_zero_or_one(&rep.hir, rep.greedy),
+            ZeroOrMore => self.c_repeat_zero_or_more(&rep.hir, rep.greedy),
+            OneOrMore => self.c_repeat_one_or_more(&rep.hir, rep.greedy),
+            Range(hir::RepetitionRange::Exactly(min_max)) => {
+                self.c_repeat_range(&rep.hir, rep.greedy, min_max, min_max)
+            }
+            Range(hir::RepetitionRange::AtLeast(min)) => {
+                self.c_repeat_range_min_or_more(&rep.hir, rep.greedy, min)
+            }
+            Range(hir::RepetitionRange::Bounded(min, max)) => {
+                self.c_repeat_range(&rep.hir, rep.greedy, min, max)
+            }
+        }
+    }
+
+    fn c_repeat_zero_or_one(&mut self, expr: &Hir, greedy: bool) -> Result {
+        let split_entry = self.insts.len();
+        let split = self.push_split_hole();
+        let Patch { hole: hole_rep, entry: entry_rep } = self.c(expr)?;
+
+        let split_hole = if greedy {
+            self.fill_split(split, Some(entry_rep), None)
+        } else {
+            self.fill_split(split, None, Some(entry_rep))
+        };
+        let holes = vec![hole_rep, split_hole];
+        Ok(Patch { hole: Hole::Many(holes), entry: split_entry })
+    }
+
+    fn c_repeat_zero_or_more(&mut self, expr: &Hir, greedy: bool) -> Result {
+        let split_entry = self.insts.len();
+        let split = self.push_split_hole();
+        let Patch { hole: hole_rep, entry: entry_rep } = self.c(expr)?;
+
+        self.fill(hole_rep, split_entry);
+        let split_hole = if greedy {
+            self.fill_split(split, Some(entry_rep), None)
+        } else {
+            self.fill_split(split, None, Some(entry_rep))
+        };
+        Ok(Patch { hole: split_hole, entry: split_entry })
+    }
+
+    fn c_repeat_one_or_more(&mut self, expr: &Hir, greedy: bool) -> Result {
+        let Patch { hole: hole_rep, entry: entry_rep } = self.c(expr)?;
+        self.fill_to_next(hole_rep);
+        let split = self.push_split_hole();
+
+        let split_hole = if greedy {
+            self.fill_split(split, Some(entry_rep), None)
+        } else {
+            self.fill_split(split, None, Some(entry_rep))
+        };
+        Ok(Patch { hole: split_hole, entry: entry_rep })
+    }
+
+    fn c_repeat_range_min_or_more(
+        &mut self,
+        expr: &Hir,
+        greedy: bool,
+        min: u32,
+    ) -> Result {
+        let min = u32_to_usize(min);
+        let patch_concat = self.c_concat(iter::repeat(expr).take(min))?;
+        let patch_rep = self.c_repeat_zero_or_more(expr, greedy)?;
+        self.fill(patch_concat.hole, patch_rep.entry);
+        Ok(Patch { hole: patch_rep.hole, entry: patch_concat.entry })
+    }
+
+    fn c_repeat_range(
+        &mut self,
+        expr: &Hir,
+        greedy: bool,
+        min: u32,
+        max: u32,
+    ) -> Result {
+        let (min, max) = (u32_to_usize(min), u32_to_usize(max));
+        let patch_concat = self.c_concat(iter::repeat(expr).take(min))?;
+        let initial_entry = patch_concat.entry;
+        if min == max {
+            return Ok(patch_concat);
+        }
+        // It is much simpler to compile, e.g., `a{2,5}` as:
+        //
+        //     aaa?a?a?
+        //
+        // But you end up with a sequence of instructions like this:
+        //
+        //     0: 'a'
+        //     1: 'a',
+        //     2: split(3, 4)
+        //     3: 'a'
+        //     4: split(5, 6)
+        //     5: 'a'
+        //     6: split(7, 8)
+        //     7: 'a'
+        //     8: MATCH
+        //
+        // This is *incredibly* inefficient because the splits end
+        // up forming a chain, which has to be resolved everything a
+        // transition is followed.
+        let mut holes = vec![];
+        let mut prev_hole = patch_concat.hole;
+        for _ in min..max {
+            self.fill_to_next(prev_hole);
+            let split = self.push_split_hole();
+            let Patch { hole, entry } = self.c(expr)?;
+            prev_hole = hole;
+            if greedy {
+                holes.push(self.fill_split(split, Some(entry), None));
+            } else {
+                holes.push(self.fill_split(split, None, Some(entry)));
+            }
+        }
+        holes.push(prev_hole);
+        Ok(Patch { hole: Hole::Many(holes), entry: initial_entry })
+    }
+
+    fn fill(&mut self, hole: Hole, goto: InstPtr) {
+        match hole {
+            Hole::None => {}
+            Hole::One(pc) => {
+                self.insts[pc].fill(goto);
+            }
+            Hole::Many(holes) => {
+                for hole in holes {
+                    self.fill(hole, goto);
+                }
+            }
+        }
+    }
+
+    fn fill_to_next(&mut self, hole: Hole) {
+        let next = self.insts.len();
+        self.fill(hole, next);
+    }
+
+    fn fill_split(
+        &mut self,
+        hole: Hole,
+        goto1: Option<InstPtr>,
+        goto2: Option<InstPtr>,
+    ) -> Hole {
+        match hole {
+            Hole::None => Hole::None,
+            Hole::One(pc) => match (goto1, goto2) {
+                (Some(goto1), Some(goto2)) => {
+                    self.insts[pc].fill_split(goto1, goto2);
+                    Hole::None
+                }
+                (Some(goto1), None) => {
+                    self.insts[pc].half_fill_split_goto1(goto1);
+                    Hole::One(pc)
+                }
+                (None, Some(goto2)) => {
+                    self.insts[pc].half_fill_split_goto2(goto2);
+                    Hole::One(pc)
+                }
+                (None, None) => unreachable!(
+                    "at least one of the split \
+                     holes must be filled"
+                ),
+            },
+            Hole::Many(holes) => {
+                let mut new_holes = vec![];
+                for hole in holes {
+                    new_holes.push(self.fill_split(hole, goto1, goto2));
+                }
+                if new_holes.is_empty() {
+                    Hole::None
+                } else if new_holes.len() == 1 {
+                    new_holes.pop().unwrap()
+                } else {
+                    Hole::Many(new_holes)
+                }
+            }
+        }
+    }
+
+    fn push_compiled(&mut self, inst: Inst) {
+        self.insts.push(MaybeInst::Compiled(inst));
+    }
+
+    fn push_hole(&mut self, inst: InstHole) -> Hole {
+        let hole = self.insts.len();
+        self.insts.push(MaybeInst::Uncompiled(inst));
+        Hole::One(hole)
+    }
+
+    fn push_split_hole(&mut self) -> Hole {
+        let hole = self.insts.len();
+        self.insts.push(MaybeInst::Split);
+        Hole::One(hole)
+    }
+
+    fn check_size(&self) -> result::Result<(), Error> {
+        use std::mem::size_of;
+
+        if self.insts.len() * size_of::<Inst>() > self.size_limit {
+            Err(Error::CompiledTooBig(self.size_limit))
+        } else {
+            Ok(())
+        }
+    }
+}
+
+#[derive(Debug)]
+enum Hole {
+    None,
+    One(InstPtr),
+    Many(Vec<Hole>),
+}
+
+#[derive(Clone, Debug)]
+enum MaybeInst {
+    Compiled(Inst),
+    Uncompiled(InstHole),
+    Split,
+    Split1(InstPtr),
+    Split2(InstPtr),
+}
+
+impl MaybeInst {
+    fn fill(&mut self, goto: InstPtr) {
+        let filled = match *self {
+            MaybeInst::Uncompiled(ref inst) => inst.fill(goto),
+            MaybeInst::Split1(goto1) => {
+                Inst::Split(InstSplit { goto1: goto1, goto2: goto })
+            }
+            MaybeInst::Split2(goto2) => {
+                Inst::Split(InstSplit { goto1: goto, goto2: goto2 })
+            }
+            _ => unreachable!(
+                "not all instructions were compiled! \
+                 found uncompiled instruction: {:?}",
+                self
+            ),
+        };
+        *self = MaybeInst::Compiled(filled);
+    }
+
+    fn fill_split(&mut self, goto1: InstPtr, goto2: InstPtr) {
+        let filled = match *self {
+            MaybeInst::Split => {
+                Inst::Split(InstSplit { goto1: goto1, goto2: goto2 })
+            }
+            _ => unreachable!(
+                "must be called on Split instruction, \
+                 instead it was called on: {:?}",
+                self
+            ),
+        };
+        *self = MaybeInst::Compiled(filled);
+    }
+
+    fn half_fill_split_goto1(&mut self, goto1: InstPtr) {
+        let half_filled = match *self {
+            MaybeInst::Split => goto1,
+            _ => unreachable!(
+                "must be called on Split instruction, \
+                 instead it was called on: {:?}",
+                self
+            ),
+        };
+        *self = MaybeInst::Split1(half_filled);
+    }
+
+    fn half_fill_split_goto2(&mut self, goto2: InstPtr) {
+        let half_filled = match *self {
+            MaybeInst::Split => goto2,
+            _ => unreachable!(
+                "must be called on Split instruction, \
+                 instead it was called on: {:?}",
+                self
+            ),
+        };
+        *self = MaybeInst::Split2(half_filled);
+    }
+
+    fn unwrap(self) -> Inst {
+        match self {
+            MaybeInst::Compiled(inst) => inst,
+            _ => unreachable!(
+                "must be called on a compiled instruction, \
+                 instead it was called on: {:?}",
+                self
+            ),
+        }
+    }
+}
+
+#[derive(Clone, Debug)]
+enum InstHole {
+    Save { slot: usize },
+    EmptyLook { look: EmptyLook },
+    Char { c: char },
+    Ranges { ranges: Vec<(char, char)> },
+    Bytes { start: u8, end: u8 },
+}
+
+impl InstHole {
+    fn fill(&self, goto: InstPtr) -> Inst {
+        match *self {
+            InstHole::Save { slot } => {
+                Inst::Save(InstSave { goto: goto, slot: slot })
+            }
+            InstHole::EmptyLook { look } => {
+                Inst::EmptyLook(InstEmptyLook { goto: goto, look: look })
+            }
+            InstHole::Char { c } => Inst::Char(InstChar { goto: goto, c: c }),
+            InstHole::Ranges { ref ranges } => {
+                Inst::Ranges(InstRanges { goto: goto, ranges: ranges.clone() })
+            }
+            InstHole::Bytes { start, end } => {
+                Inst::Bytes(InstBytes { goto: goto, start: start, end: end })
+            }
+        }
+    }
+}
+
+struct CompileClass<'a, 'b> {
+    c: &'a mut Compiler,
+    ranges: &'b [hir::ClassUnicodeRange],
+}
+
+impl<'a, 'b> CompileClass<'a, 'b> {
+    fn compile(mut self) -> Result {
+        let mut holes = vec![];
+        let mut initial_entry = None;
+        let mut last_split = Hole::None;
+        let mut utf8_seqs = self.c.utf8_seqs.take().unwrap();
+        self.c.suffix_cache.clear();
+
+        for (i, range) in self.ranges.iter().enumerate() {
+            let is_last_range = i + 1 == self.ranges.len();
+            utf8_seqs.reset(range.start(), range.end());
+            let mut it = (&mut utf8_seqs).peekable();
+            loop {
+                let utf8_seq = match it.next() {
+                    None => break,
+                    Some(utf8_seq) => utf8_seq,
+                };
+                if is_last_range && it.peek().is_none() {
+                    let Patch { hole, entry } = self.c_utf8_seq(&utf8_seq)?;
+                    holes.push(hole);
+                    self.c.fill(last_split, entry);
+                    last_split = Hole::None;
+                    if initial_entry.is_none() {
+                        initial_entry = Some(entry);
+                    }
+                } else {
+                    if initial_entry.is_none() {
+                        initial_entry = Some(self.c.insts.len());
+                    }
+                    self.c.fill_to_next(last_split);
+                    last_split = self.c.push_split_hole();
+                    let Patch { hole, entry } = self.c_utf8_seq(&utf8_seq)?;
+                    holes.push(hole);
+                    last_split =
+                        self.c.fill_split(last_split, Some(entry), None);
+                }
+            }
+        }
+        self.c.utf8_seqs = Some(utf8_seqs);
+        Ok(Patch { hole: Hole::Many(holes), entry: initial_entry.unwrap() })
+    }
+
+    fn c_utf8_seq(&mut self, seq: &Utf8Sequence) -> Result {
+        if self.c.compiled.is_reverse {
+            self.c_utf8_seq_(seq)
+        } else {
+            self.c_utf8_seq_(seq.into_iter().rev())
+        }
+    }
+
+    fn c_utf8_seq_<'r, I>(&mut self, seq: I) -> Result
+    where
+        I: IntoIterator<Item = &'r Utf8Range>,
+    {
+        // The initial instruction for each UTF-8 sequence should be the same.
+        let mut from_inst = ::std::usize::MAX;
+        let mut last_hole = Hole::None;
+        for byte_range in seq {
+            let key = SuffixCacheKey {
+                from_inst: from_inst,
+                start: byte_range.start,
+                end: byte_range.end,
+            };
+            {
+                let pc = self.c.insts.len();
+                if let Some(cached_pc) = self.c.suffix_cache.get(key, pc) {
+                    from_inst = cached_pc;
+                    continue;
+                }
+            }
+            self.c.byte_classes.set_range(byte_range.start, byte_range.end);
+            if from_inst == ::std::usize::MAX {
+                last_hole = self.c.push_hole(InstHole::Bytes {
+                    start: byte_range.start,
+                    end: byte_range.end,
+                });
+            } else {
+                self.c.push_compiled(Inst::Bytes(InstBytes {
+                    goto: from_inst,
+                    start: byte_range.start,
+                    end: byte_range.end,
+                }));
+            }
+            from_inst = self.c.insts.len().checked_sub(1).unwrap();
+            debug_assert!(from_inst < ::std::usize::MAX);
+        }
+        debug_assert!(from_inst < ::std::usize::MAX);
+        Ok(Patch { hole: last_hole, entry: from_inst })
+    }
+}
+
+/// `SuffixCache` is a simple bounded hash map for caching suffix entries in
+/// UTF-8 automata. For example, consider the Unicode range \u{0}-\u{FFFF}.
+/// The set of byte ranges looks like this:
+///
+/// [0-7F]
+/// [C2-DF][80-BF]
+/// [E0][A0-BF][80-BF]
+/// [E1-EC][80-BF][80-BF]
+/// [ED][80-9F][80-BF]
+/// [EE-EF][80-BF][80-BF]
+///
+/// Each line above translates to one alternate in the compiled regex program.
+/// However, all but one of the alternates end in the same suffix, which is
+/// a waste of an instruction. The suffix cache facilitates reusing them across
+/// alternates.
+///
+/// Note that a HashMap could be trivially used for this, but we don't need its
+/// overhead. Some small bounded space (LRU style) is more than enough.
+///
+/// This uses similar idea to [`SparseSet`](../sparse/struct.SparseSet.html),
+/// except it uses hashes as original indices and then compares full keys for
+/// validation against `dense` array.
+struct SuffixCache {
+    sparse: Box<[usize]>,
+    dense: Vec<SuffixCacheEntry>,
+}
+
+#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)]
+struct SuffixCacheEntry {
+    key: SuffixCacheKey,
+    pc: InstPtr,
+}
+
+#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)]
+struct SuffixCacheKey {
+    from_inst: InstPtr,
+    start: u8,
+    end: u8,
+}
+
+impl SuffixCache {
+    fn new(size: usize) -> Self {
+        SuffixCache {
+            sparse: vec![0usize; size].into(),
+            dense: Vec::with_capacity(size),
+        }
+    }
+
+    fn get(&mut self, key: SuffixCacheKey, pc: InstPtr) -> Option<InstPtr> {
+        let hash = self.hash(&key);
+        let pos = &mut self.sparse[hash];
+        if let Some(entry) = self.dense.get(*pos) {
+            if entry.key == key {
+                return Some(entry.pc);
+            }
+        }
+        *pos = self.dense.len();
+        self.dense.push(SuffixCacheEntry { key: key, pc: pc });
+        None
+    }
+
+    fn clear(&mut self) {
+        self.dense.clear();
+    }
+
+    fn hash(&self, suffix: &SuffixCacheKey) -> usize {
+        // Basic FNV-1a hash as described:
+        // https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
+        const FNV_PRIME: u64 = 1099511628211;
+        let mut h = 14695981039346656037;
+        h = (h ^ (suffix.from_inst as u64)).wrapping_mul(FNV_PRIME);
+        h = (h ^ (suffix.start as u64)).wrapping_mul(FNV_PRIME);
+        h = (h ^ (suffix.end as u64)).wrapping_mul(FNV_PRIME);
+        (h as usize) % self.sparse.len()
+    }
+}
+
+struct ByteClassSet([bool; 256]);
+
+impl ByteClassSet {
+    fn new() -> Self {
+        ByteClassSet([false; 256])
+    }
+
+    fn set_range(&mut self, start: u8, end: u8) {
+        debug_assert!(start <= end);
+        if start > 0 {
+            self.0[start as usize - 1] = true;
+        }
+        self.0[end as usize] = true;
+    }
+
+    fn set_word_boundary(&mut self) {
+        // We need to mark all ranges of bytes whose pairs result in
+        // evaluating \b differently.
+        let iswb = is_word_byte;
+        let mut b1: u16 = 0;
+        let mut b2: u16;
+        while b1 <= 255 {
+            b2 = b1 + 1;
+            while b2 <= 255 && iswb(b1 as u8) == iswb(b2 as u8) {
+                b2 += 1;
+            }
+            self.set_range(b1 as u8, (b2 - 1) as u8);
+            b1 = b2;
+        }
+    }
+
+    fn byte_classes(&self) -> Vec<u8> {
+        // N.B. If you're debugging the DFA, it's useful to simply return
+        // `(0..256).collect()`, which effectively removes the byte classes
+        // and makes the transitions easier to read.
+        // (0usize..256).map(|x| x as u8).collect()
+        let mut byte_classes = vec![0; 256];
+        let mut class = 0u8;
+        let mut i = 0;
+        loop {
+            byte_classes[i] = class as u8;
+            if i >= 255 {
+                break;
+            }
+            if self.0[i] {
+                class = class.checked_add(1).unwrap();
+            }
+            i += 1;
+        }
+        byte_classes
+    }
+}
+
+fn u32_to_usize(n: u32) -> usize {
+    // In case usize is less than 32 bits, we need to guard against overflow.
+    // On most platforms this compiles to nothing.
+    // TODO Use `std::convert::TryFrom` once it's stable.
+    if (n as u64) > (::std::usize::MAX as u64) {
+        panic!("BUG: {} is too big to be pointer sized", n)
+    }
+    n as usize
+}
+
+#[cfg(test)]
+mod tests {
+    use super::ByteClassSet;
+
+    #[test]
+    fn byte_classes() {
+        let mut set = ByteClassSet::new();
+        set.set_range(b'a', b'z');
+        let classes = set.byte_classes();
+        assert_eq!(classes[0], 0);
+        assert_eq!(classes[1], 0);
+        assert_eq!(classes[2], 0);
+        assert_eq!(classes[b'a' as usize - 1], 0);
+        assert_eq!(classes[b'a' as usize], 1);
+        assert_eq!(classes[b'm' as usize], 1);
+        assert_eq!(classes[b'z' as usize], 1);
+        assert_eq!(classes[b'z' as usize + 1], 2);
+        assert_eq!(classes[254], 2);
+        assert_eq!(classes[255], 2);
+
+        let mut set = ByteClassSet::new();
+        set.set_range(0, 2);
+        set.set_range(4, 6);
+        let classes = set.byte_classes();
+        assert_eq!(classes[0], 0);
+        assert_eq!(classes[1], 0);
+        assert_eq!(classes[2], 0);
+        assert_eq!(classes[3], 1);
+        assert_eq!(classes[4], 2);
+        assert_eq!(classes[5], 2);
+        assert_eq!(classes[6], 2);
+        assert_eq!(classes[7], 3);
+        assert_eq!(classes[255], 3);
+    }
+
+    #[test]
+    fn full_byte_classes() {
+        let mut set = ByteClassSet::new();
+        for i in 0..256u16 {
+            set.set_range(i as u8, i as u8);
+        }
+        assert_eq!(set.byte_classes().len(), 256);
+    }
+}

diff --git a/src/dfa.rs b/src/dfa.rs
new file mode 100644
index 0000000..decc3b9
--- /dev/null
+++ b/src/dfa.rs

@@ -0,0 +1,1942 @@
+/*!
+The DFA matching engine.
+
+A DFA provides faster matching because the engine is in exactly one state at
+any point in time. In the NFA, there may be multiple active states, and
+considerable CPU cycles are spent shuffling them around. In finite automata
+speak, the DFA follows epsilon transitions in the regex far less than the NFA.
+
+A DFA is a classic trade off between time and space. The NFA is slower, but
+its memory requirements are typically small and predictable. The DFA is faster,
+but given the right regex and the right input, the number of states in the
+DFA can grow exponentially. To mitigate this space problem, we do two things:
+
+1. We implement an *online* DFA. That is, the DFA is constructed from the NFA
+   during a search. When a new state is computed, it is stored in a cache so
+   that it may be reused. An important consequence of this implementation
+   is that states that are never reached for a particular input are never
+   computed. (This is impossible in an "offline" DFA which needs to compute
+   all possible states up front.)
+2. If the cache gets too big, we wipe it and continue matching.
+
+In pathological cases, a new state can be created for every byte of input.
+(e.g., The regex `(a|b)*a(a|b){20}` on a long sequence of a's and b's.)
+In this case, performance regresses to slightly slower than the full NFA
+simulation, in large part because the cache becomes useless. If the cache
+is wiped too frequently, the DFA quits and control falls back to one of the
+NFA simulations.
+
+Because of the "lazy" nature of this DFA, the inner matching loop is
+considerably more complex than one might expect out of a DFA. A number of
+tricks are employed to make it fast. Tread carefully.
+
+N.B. While this implementation is heavily commented, Russ Cox's series of
+articles on regexes is strongly recommended: https://swtch.com/~rsc/regexp/
+(As is the DFA implementation in RE2, which heavily influenced this
+implementation.)
+*/
+
+use std::collections::HashMap;
+use std::fmt;
+use std::iter::repeat;
+use std::mem;
+use std::sync::Arc;
+
+use exec::ProgramCache;
+use prog::{Inst, Program};
+use sparse::SparseSet;
+
+/// Return true if and only if the given program can be executed by a DFA.
+///
+/// Generally, a DFA is always possible. A pathological case where it is not
+/// possible is if the number of NFA states exceeds `u32::MAX`, in which case,
+/// this function will return false.
+///
+/// This function will also return false if the given program has any Unicode
+/// instructions (Char or Ranges) since the DFA operates on bytes only.
+pub fn can_exec(insts: &Program) -> bool {
+    use prog::Inst::*;
+    // If for some reason we manage to allocate a regex program with more
+    // than i32::MAX instructions, then we can't execute the DFA because we
+    // use 32 bit instruction pointer deltas for memory savings.
+    // If i32::MAX is the largest positive delta,
+    // then -i32::MAX == i32::MIN + 1 is the largest negative delta,
+    // and we are OK to use 32 bits.
+    if insts.dfa_size_limit == 0 || insts.len() > ::std::i32::MAX as usize {
+        return false;
+    }
+    for inst in insts {
+        match *inst {
+            Char(_) | Ranges(_) => return false,
+            EmptyLook(_) | Match(_) | Save(_) | Split(_) | Bytes(_) => {}
+        }
+    }
+    true
+}
+
+/// A reusable cache of DFA states.
+///
+/// This cache is reused between multiple invocations of the same regex
+/// program. (It is not shared simultaneously between threads. If there is
+/// contention, then new caches are created.)
+#[derive(Debug)]
+pub struct Cache {
+    /// Group persistent DFA related cache state together. The sparse sets
+    /// listed below are used as scratch space while computing uncached states.
+    inner: CacheInner,
+    /// qcur and qnext are ordered sets with constant time
+    /// addition/membership/clearing-whole-set and linear time iteration. They
+    /// are used to manage the sets of NFA states in DFA states when computing
+    /// cached DFA states. In particular, the order of the NFA states matters
+    /// for leftmost-first style matching. Namely, when computing a cached
+    /// state, the set of NFA states stops growing as soon as the first Match
+    /// instruction is observed.
+    qcur: SparseSet,
+    qnext: SparseSet,
+}
+
+/// `CacheInner` is logically just a part of Cache, but groups together fields
+/// that aren't passed as function parameters throughout search. (This split
+/// is mostly an artifact of the borrow checker. It is happily paid.)
+#[derive(Debug)]
+struct CacheInner {
+    /// A cache of pre-compiled DFA states, keyed by the set of NFA states
+    /// and the set of empty-width flags set at the byte in the input when the
+    /// state was observed.
+    ///
+    /// A StatePtr is effectively a `*State`, but to avoid various inconvenient
+    /// things, we just pass indexes around manually. The performance impact of
+    /// this is probably an instruction or two in the inner loop. However, on
+    /// 64 bit, each StatePtr is half the size of a *State.
+    compiled: StateMap,
+    /// The transition table.
+    ///
+    /// The transition table is laid out in row-major order, where states are
+    /// rows and the transitions for each state are columns. At a high level,
+    /// given state `s` and byte `b`, the next state can be found at index
+    /// `s * 256 + b`.
+    ///
+    /// This is, of course, a lie. A StatePtr is actually a pointer to the
+    /// *start* of a row in this table. When indexing in the DFA's inner loop,
+    /// this removes the need to multiply the StatePtr by the stride. Yes, it
+    /// matters. This reduces the number of states we can store, but: the
+    /// stride is rarely 256 since we define transitions in terms of
+    /// *equivalence classes* of bytes. Each class corresponds to a set of
+    /// bytes that never discriminate a distinct path through the DFA from each
+    /// other.
+    trans: Transitions,
+    /// A set of cached start states, which are limited to the number of
+    /// permutations of flags set just before the initial byte of input. (The
+    /// index into this vec is a `EmptyFlags`.)
+    ///
+    /// N.B. A start state can be "dead" (i.e., no possible match), so we
+    /// represent it with a StatePtr.
+    start_states: Vec<StatePtr>,
+    /// Stack scratch space used to follow epsilon transitions in the NFA.
+    /// (This permits us to avoid recursion.)
+    ///
+    /// The maximum stack size is the number of NFA states.
+    stack: Vec<InstPtr>,
+    /// The total number of times this cache has been flushed by the DFA
+    /// because of space constraints.
+    flush_count: u64,
+    /// The total heap size of the DFA's cache. We use this to determine when
+    /// we should flush the cache.
+    size: usize,
+    /// Scratch space used when building instruction pointer lists for new
+    /// states. This helps amortize allocation.
+    insts_scratch_space: Vec<u8>,
+}
+
+/// The transition table.
+///
+/// It is laid out in row-major order, with states as rows and byte class
+/// transitions as columns.
+///
+/// The transition table is responsible for producing valid `StatePtrs`. A
+/// `StatePtr` points to the start of a particular row in this table. When
+/// indexing to find the next state this allows us to avoid a multiplication
+/// when computing an index into the table.
+#[derive(Clone)]
+struct Transitions {
+    /// The table.
+    table: Vec<StatePtr>,
+    /// The stride.
+    num_byte_classes: usize,
+}
+
+/// Fsm encapsulates the actual execution of the DFA.
+#[derive(Debug)]
+pub struct Fsm<'a> {
+    /// prog contains the NFA instruction opcodes. DFA execution uses either
+    /// the `dfa` instructions or the `dfa_reverse` instructions from
+    /// `exec::ExecReadOnly`. (It never uses `ExecReadOnly.nfa`, which may have
+    /// Unicode opcodes that cannot be executed by the DFA.)
+    prog: &'a Program,
+    /// The start state. We record it here because the pointer may change
+    /// when the cache is wiped.
+    start: StatePtr,
+    /// The current position in the input.
+    at: usize,
+    /// Should we quit after seeing the first match? e.g., When the caller
+    /// uses `is_match` or `shortest_match`.
+    quit_after_match: bool,
+    /// The last state that matched.
+    ///
+    /// When no match has occurred, this is set to STATE_UNKNOWN.
+    ///
+    /// This is only useful when matching regex sets. The last match state
+    /// is useful because it contains all of the match instructions seen,
+    /// thereby allowing us to enumerate which regexes in the set matched.
+    last_match_si: StatePtr,
+    /// The input position of the last cache flush. We use this to determine
+    /// if we're thrashing in the cache too often. If so, the DFA quits so
+    /// that we can fall back to the NFA algorithm.
+    last_cache_flush: usize,
+    /// All cached DFA information that is persisted between searches.
+    cache: &'a mut CacheInner,
+}
+
+/// The result of running the DFA.
+///
+/// Generally, the result is either a match or not a match, but sometimes the
+/// DFA runs too slowly because the cache size is too small. In that case, it
+/// gives up with the intent of falling back to the NFA algorithm.
+///
+/// The DFA can also give up if it runs out of room to create new states, or if
+/// it sees non-ASCII bytes in the presence of a Unicode word boundary.
+#[derive(Clone, Debug)]
+pub enum Result<T> {
+    Match(T),
+    NoMatch(usize),
+    Quit,
+}
+
+impl<T> Result<T> {
+    /// Returns true if this result corresponds to a match.
+    pub fn is_match(&self) -> bool {
+        match *self {
+            Result::Match(_) => true,
+            Result::NoMatch(_) | Result::Quit => false,
+        }
+    }
+
+    /// Maps the given function onto T and returns the result.
+    ///
+    /// If this isn't a match, then this is a no-op.
+    #[cfg(feature = "perf-literal")]
+    pub fn map<U, F: FnMut(T) -> U>(self, mut f: F) -> Result<U> {
+        match self {
+            Result::Match(t) => Result::Match(f(t)),
+            Result::NoMatch(x) => Result::NoMatch(x),
+            Result::Quit => Result::Quit,
+        }
+    }
+
+    /// Sets the non-match position.
+    ///
+    /// If this isn't a non-match, then this is a no-op.
+    fn set_non_match(self, at: usize) -> Result<T> {
+        match self {
+            Result::NoMatch(_) => Result::NoMatch(at),
+            r => r,
+        }
+    }
+}
+
+/// `State` is a DFA state. It contains an ordered set of NFA states (not
+/// necessarily complete) and a smattering of flags.
+///
+/// The flags are packed into the first byte of data.
+///
+/// States don't carry their transitions. Instead, transitions are stored in
+/// a single row-major table.
+///
+/// Delta encoding is used to store the instruction pointers.
+/// The first instruction pointer is stored directly starting
+/// at data[1], and each following pointer is stored as an offset
+/// to the previous one. If a delta is in the range -127..127,
+/// it is packed into a single byte; Otherwise the byte 128 (-128 as an i8)
+/// is coded as a flag, followed by 4 bytes encoding the delta.
+#[derive(Clone, Eq, Hash, PartialEq)]
+struct State {
+    data: Arc<[u8]>,
+}
+
+/// `InstPtr` is a 32 bit pointer into a sequence of opcodes (i.e., it indexes
+/// an NFA state).
+///
+/// Throughout this library, this is usually set to `usize`, but we force a
+/// `u32` here for the DFA to save on space.
+type InstPtr = u32;
+
+/// Adds ip to data using delta encoding with respect to prev.
+///
+/// After completion, `data` will contain `ip` and `prev` will be set to `ip`.
+fn push_inst_ptr(data: &mut Vec<u8>, prev: &mut InstPtr, ip: InstPtr) {
+    let delta = (ip as i32) - (*prev as i32);
+    write_vari32(data, delta);
+    *prev = ip;
+}
+
+struct InstPtrs<'a> {
+    base: usize,
+    data: &'a [u8],
+}
+
+impl<'a> Iterator for InstPtrs<'a> {
+    type Item = usize;
+
+    fn next(&mut self) -> Option<usize> {
+        if self.data.is_empty() {
+            return None;
+        }
+        let (delta, nread) = read_vari32(self.data);
+        let base = self.base as i32 + delta;
+        debug_assert!(base >= 0);
+        debug_assert!(nread > 0);
+        self.data = &self.data[nread..];
+        self.base = base as usize;
+        Some(self.base)
+    }
+}
+
+impl State {
+    fn flags(&self) -> StateFlags {
+        StateFlags(self.data[0])
+    }
+
+    fn inst_ptrs(&self) -> InstPtrs {
+        InstPtrs { base: 0, data: &self.data[1..] }
+    }
+}
+
+/// `StatePtr` is a 32 bit pointer to the start of a row in the transition
+/// table.
+///
+/// It has many special values. There are two types of special values:
+/// sentinels and flags.
+///
+/// Sentinels corresponds to special states that carry some kind of
+/// significance. There are three such states: unknown, dead and quit states.
+///
+/// Unknown states are states that haven't been computed yet. They indicate
+/// that a transition should be filled in that points to either an existing
+/// cached state or a new state altogether. In general, an unknown state means
+/// "follow the NFA's epsilon transitions."
+///
+/// Dead states are states that can never lead to a match, no matter what
+/// subsequent input is observed. This means that the DFA should quit
+/// immediately and return the longest match it has found thus far.
+///
+/// Quit states are states that imply the DFA is not capable of matching the
+/// regex correctly. Currently, this is only used when a Unicode word boundary
+/// exists in the regex *and* a non-ASCII byte is observed.
+///
+/// The other type of state pointer is a state pointer with special flag bits.
+/// There are two flags: a start flag and a match flag. The lower bits of both
+/// kinds always contain a "valid" `StatePtr` (indicated by the `STATE_MAX`
+/// mask).
+///
+/// The start flag means that the state is a start state, and therefore may be
+/// subject to special prefix scanning optimizations.
+///
+/// The match flag means that the state is a match state, and therefore the
+/// current position in the input (while searching) should be recorded.
+///
+/// The above exists mostly in the service of making the inner loop fast.
+/// In particular, the inner *inner* loop looks something like this:
+///
+/// ```ignore
+/// while state <= STATE_MAX and i < len(text):
+///     state = state.next[i]
+/// ```
+///
+/// This is nice because it lets us execute a lazy DFA as if it were an
+/// entirely offline DFA (i.e., with very few instructions). The loop will
+/// quit only when we need to examine a case that needs special attention.
+type StatePtr = u32;
+
+/// An unknown state means that the state has not been computed yet, and that
+/// the only way to progress is to compute it.
+const STATE_UNKNOWN: StatePtr = 1 << 31;
+
+/// A dead state means that the state has been computed and it is known that
+/// once it is entered, no future match can ever occur.
+const STATE_DEAD: StatePtr = STATE_UNKNOWN + 1;
+
+/// A quit state means that the DFA came across some input that it doesn't
+/// know how to process correctly. The DFA should quit and another matching
+/// engine should be run in its place.
+const STATE_QUIT: StatePtr = STATE_DEAD + 1;
+
+/// A start state is a state that the DFA can start in.
+///
+/// Note that start states have their lower bits set to a state pointer.
+const STATE_START: StatePtr = 1 << 30;
+
+/// A match state means that the regex has successfully matched.
+///
+/// Note that match states have their lower bits set to a state pointer.
+const STATE_MATCH: StatePtr = 1 << 29;
+
+/// The maximum state pointer. This is useful to mask out the "valid" state
+/// pointer from a state with the "start" or "match" bits set.
+///
+/// It doesn't make sense to use this with unknown, dead or quit state
+/// pointers, since those pointers are sentinels and never have their lower
+/// bits set to anything meaningful.
+const STATE_MAX: StatePtr = STATE_MATCH - 1;
+
+/// Byte is a u8 in spirit, but a u16 in practice so that we can represent the
+/// special EOF sentinel value.
+#[derive(Copy, Clone, Debug)]
+struct Byte(u16);
+
+/// A set of flags for zero-width assertions.
+#[derive(Clone, Copy, Eq, Debug, Default, Hash, PartialEq)]
+struct EmptyFlags {
+    start: bool,
+    end: bool,
+    start_line: bool,
+    end_line: bool,
+    word_boundary: bool,
+    not_word_boundary: bool,
+}
+
+/// A set of flags describing various configurations of a DFA state. This is
+/// represented by a `u8` so that it is compact.
+#[derive(Clone, Copy, Eq, Default, Hash, PartialEq)]
+struct StateFlags(u8);
+
+impl Cache {
+    /// Create new empty cache for the DFA engine.
+    pub fn new(prog: &Program) -> Self {
+        // We add 1 to account for the special EOF byte.
+        let num_byte_classes = (prog.byte_classes[255] as usize + 1) + 1;
+        let starts = vec![STATE_UNKNOWN; 256];
+        let mut cache = Cache {
+            inner: CacheInner {
+                compiled: StateMap::new(num_byte_classes),
+                trans: Transitions::new(num_byte_classes),
+                start_states: starts,
+                stack: vec![],
+                flush_count: 0,
+                size: 0,
+                insts_scratch_space: vec![],
+            },
+            qcur: SparseSet::new(prog.insts.len()),
+            qnext: SparseSet::new(prog.insts.len()),
+        };
+        cache.inner.reset_size();
+        cache
+    }
+}
+
+impl CacheInner {
+    /// Resets the cache size to account for fixed costs, such as the program
+    /// and stack sizes.
+    fn reset_size(&mut self) {
+        self.size = (self.start_states.len() * mem::size_of::<StatePtr>())
+            + (self.stack.len() * mem::size_of::<InstPtr>());
+    }
+}
+
+impl<'a> Fsm<'a> {
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    pub fn forward(
+        prog: &'a Program,
+        cache: &ProgramCache,
+        quit_after_match: bool,
+        text: &[u8],
+        at: usize,
+    ) -> Result<usize> {
+        let mut cache = cache.borrow_mut();
+        let cache = &mut cache.dfa;
+        let mut dfa = Fsm {
+            prog: prog,
+            start: 0, // filled in below
+            at: at,
+            quit_after_match: quit_after_match,
+            last_match_si: STATE_UNKNOWN,
+            last_cache_flush: at,
+            cache: &mut cache.inner,
+        };
+        let (empty_flags, state_flags) = dfa.start_flags(text, at);
+        dfa.start =
+            match dfa.start_state(&mut cache.qcur, empty_flags, state_flags) {
+                None => return Result::Quit,
+                Some(STATE_DEAD) => return Result::NoMatch(at),
+                Some(si) => si,
+            };
+        debug_assert!(dfa.start != STATE_UNKNOWN);
+        dfa.exec_at(&mut cache.qcur, &mut cache.qnext, text)
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    pub fn reverse(
+        prog: &'a Program,
+        cache: &ProgramCache,
+        quit_after_match: bool,
+        text: &[u8],
+        at: usize,
+    ) -> Result<usize> {
+        let mut cache = cache.borrow_mut();
+        let cache = &mut cache.dfa_reverse;
+        let mut dfa = Fsm {
+            prog: prog,
+            start: 0, // filled in below
+            at: at,
+            quit_after_match: quit_after_match,
+            last_match_si: STATE_UNKNOWN,
+            last_cache_flush: at,
+            cache: &mut cache.inner,
+        };
+        let (empty_flags, state_flags) = dfa.start_flags_reverse(text, at);
+        dfa.start =
+            match dfa.start_state(&mut cache.qcur, empty_flags, state_flags) {
+                None => return Result::Quit,
+                Some(STATE_DEAD) => return Result::NoMatch(at),
+                Some(si) => si,
+            };
+        debug_assert!(dfa.start != STATE_UNKNOWN);
+        dfa.exec_at_reverse(&mut cache.qcur, &mut cache.qnext, text)
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    pub fn forward_many(
+        prog: &'a Program,
+        cache: &ProgramCache,
+        matches: &mut [bool],
+        text: &[u8],
+        at: usize,
+    ) -> Result<usize> {
+        debug_assert!(matches.len() == prog.matches.len());
+        let mut cache = cache.borrow_mut();
+        let cache = &mut cache.dfa;
+        let mut dfa = Fsm {
+            prog: prog,
+            start: 0, // filled in below
+            at: at,
+            quit_after_match: false,
+            last_match_si: STATE_UNKNOWN,
+            last_cache_flush: at,
+            cache: &mut cache.inner,
+        };
+        let (empty_flags, state_flags) = dfa.start_flags(text, at);
+        dfa.start =
+            match dfa.start_state(&mut cache.qcur, empty_flags, state_flags) {
+                None => return Result::Quit,
+                Some(STATE_DEAD) => return Result::NoMatch(at),
+                Some(si) => si,
+            };
+        debug_assert!(dfa.start != STATE_UNKNOWN);
+        let result = dfa.exec_at(&mut cache.qcur, &mut cache.qnext, text);
+        if result.is_match() {
+            if matches.len() == 1 {
+                matches[0] = true;
+            } else {
+                debug_assert!(dfa.last_match_si != STATE_UNKNOWN);
+                debug_assert!(dfa.last_match_si != STATE_DEAD);
+                for ip in dfa.state(dfa.last_match_si).inst_ptrs() {
+                    if let Inst::Match(slot) = dfa.prog[ip] {
+                        matches[slot] = true;
+                    }
+                }
+            }
+        }
+        result
+    }
+
+    /// Executes the DFA on a forward NFA.
+    ///
+    /// {qcur,qnext} are scratch ordered sets which may be non-empty.
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn exec_at(
+        &mut self,
+        qcur: &mut SparseSet,
+        qnext: &mut SparseSet,
+        text: &[u8],
+    ) -> Result<usize> {
+        // For the most part, the DFA is basically:
+        //
+        //   last_match = null
+        //   while current_byte != EOF:
+        //     si = current_state.next[current_byte]
+        //     if si is match
+        //       last_match = si
+        //   return last_match
+        //
+        // However, we need to deal with a few things:
+        //
+        //   1. This is an *online* DFA, so the current state's next list
+        //      may not point to anywhere yet, so we must go out and compute
+        //      them. (They are then cached into the current state's next list
+        //      to avoid re-computation.)
+        //   2. If we come across a state that is known to be dead (i.e., never
+        //      leads to a match), then we can quit early.
+        //   3. If the caller just wants to know if a match occurs, then we
+        //      can quit as soon as we know we have a match. (Full leftmost
+        //      first semantics require continuing on.)
+        //   4. If we're in the start state, then we can use a pre-computed set
+        //      of prefix literals to skip quickly along the input.
+        //   5. After the input is exhausted, we run the DFA on one symbol
+        //      that stands for EOF. This is useful for handling empty width
+        //      assertions.
+        //   6. We can't actually do state.next[byte]. Instead, we have to do
+        //      state.next[byte_classes[byte]], which permits us to keep the
+        //      'next' list very small.
+        //
+        // Since there's a bunch of extra stuff we need to consider, we do some
+        // pretty hairy tricks to get the inner loop to run as fast as
+        // possible.
+        debug_assert!(!self.prog.is_reverse);
+
+        // The last match is the currently known ending match position. It is
+        // reported as an index to the most recent byte that resulted in a
+        // transition to a match state and is always stored in capture slot `1`
+        // when searching forwards. Its maximum value is `text.len()`.
+        let mut result = Result::NoMatch(self.at);
+        let (mut prev_si, mut next_si) = (self.start, self.start);
+        let mut at = self.at;
+        while at < text.len() {
+            // This is the real inner loop. We take advantage of special bits
+            // set in the state pointer to determine whether a state is in the
+            // "common" case or not. Specifically, the common case is a
+            // non-match non-start non-dead state that has already been
+            // computed. So long as we remain in the common case, this inner
+            // loop will chew through the input.
+            //
+            // We also unroll the loop 4 times to amortize the cost of checking
+            // whether we've consumed the entire input. We are also careful
+            // to make sure that `prev_si` always represents the previous state
+            // and `next_si` always represents the next state after the loop
+            // exits, even if it isn't always true inside the loop.
+            while next_si <= STATE_MAX && at < text.len() {
+                // Argument for safety is in the definition of next_si.
+                prev_si = unsafe { self.next_si(next_si, text, at) };
+                at += 1;
+                if prev_si > STATE_MAX || at + 2 >= text.len() {
+                    mem::swap(&mut prev_si, &mut next_si);
+                    break;
+                }
+                next_si = unsafe { self.next_si(prev_si, text, at) };
+                at += 1;
+                if next_si > STATE_MAX {
+                    break;
+                }
+                prev_si = unsafe { self.next_si(next_si, text, at) };
+                at += 1;
+                if prev_si > STATE_MAX {
+                    mem::swap(&mut prev_si, &mut next_si);
+                    break;
+                }
+                next_si = unsafe { self.next_si(prev_si, text, at) };
+                at += 1;
+            }
+            if next_si & STATE_MATCH > 0 {
+                // A match state is outside of the common case because it needs
+                // special case analysis. In particular, we need to record the
+                // last position as having matched and possibly quit the DFA if
+                // we don't need to keep matching.
+                next_si &= !STATE_MATCH;
+                result = Result::Match(at - 1);
+                if self.quit_after_match {
+                    return result;
+                }
+                self.last_match_si = next_si;
+                prev_si = next_si;
+
+                // This permits short-circuiting when matching a regex set.
+                // In particular, if this DFA state contains only match states,
+                // then it's impossible to extend the set of matches since
+                // match states are final. Therefore, we can quit.
+                if self.prog.matches.len() > 1 {
+                    let state = self.state(next_si);
+                    let just_matches =
+                        state.inst_ptrs().all(|ip| self.prog[ip].is_match());
+                    if just_matches {
+                        return result;
+                    }
+                }
+
+                // Another inner loop! If the DFA stays in this particular
+                // match state, then we can rip through all of the input
+                // very quickly, and only recording the match location once
+                // we've left this particular state.
+                let cur = at;
+                while (next_si & !STATE_MATCH) == prev_si
+                    && at + 2 < text.len()
+                {
+                    // Argument for safety is in the definition of next_si.
+                    next_si = unsafe {
+                        self.next_si(next_si & !STATE_MATCH, text, at)
+                    };
+                    at += 1;
+                }
+                if at > cur {
+                    result = Result::Match(at - 2);
+                }
+            } else if next_si & STATE_START > 0 {
+                // A start state isn't in the common case because we may
+                // what to do quick prefix scanning. If the program doesn't
+                // have a detected prefix, then start states are actually
+                // considered common and this case is never reached.
+                debug_assert!(self.has_prefix());
+                next_si &= !STATE_START;
+                prev_si = next_si;
+                at = match self.prefix_at(text, at) {
+                    None => return Result::NoMatch(text.len()),
+                    Some(i) => i,
+                };
+            } else if next_si >= STATE_UNKNOWN {
+                if next_si == STATE_QUIT {
+                    return Result::Quit;
+                }
+                // Finally, this corresponds to the case where the transition
+                // entered a state that can never lead to a match or a state
+                // that hasn't been computed yet. The latter being the "slow"
+                // path.
+                let byte = Byte::byte(text[at - 1]);
+                // We no longer care about the special bits in the state
+                // pointer.
+                prev_si &= STATE_MAX;
+                // Record where we are. This is used to track progress for
+                // determining whether we should quit if we've flushed the
+                // cache too much.
+                self.at = at;
+                next_si = match self.next_state(qcur, qnext, prev_si, byte) {
+                    None => return Result::Quit,
+                    Some(STATE_DEAD) => return result.set_non_match(at),
+                    Some(si) => si,
+                };
+                debug_assert!(next_si != STATE_UNKNOWN);
+                if next_si & STATE_MATCH > 0 {
+                    next_si &= !STATE_MATCH;
+                    result = Result::Match(at - 1);
+                    if self.quit_after_match {
+                        return result;
+                    }
+                    self.last_match_si = next_si;
+                }
+                prev_si = next_si;
+            } else {
+                prev_si = next_si;
+            }
+        }
+
+        // Run the DFA once more on the special EOF senitnel value.
+        // We don't care about the special bits in the state pointer any more,
+        // so get rid of them.
+        prev_si &= STATE_MAX;
+        prev_si = match self.next_state(qcur, qnext, prev_si, Byte::eof()) {
+            None => return Result::Quit,
+            Some(STATE_DEAD) => return result.set_non_match(text.len()),
+            Some(si) => si & !STATE_START,
+        };
+        debug_assert!(prev_si != STATE_UNKNOWN);
+        if prev_si & STATE_MATCH > 0 {
+            prev_si &= !STATE_MATCH;
+            self.last_match_si = prev_si;
+            result = Result::Match(text.len());
+        }
+        result
+    }
+
+    /// Executes the DFA on a reverse NFA.
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn exec_at_reverse(
+        &mut self,
+        qcur: &mut SparseSet,
+        qnext: &mut SparseSet,
+        text: &[u8],
+    ) -> Result<usize> {
+        // The comments in `exec_at` above mostly apply here too. The main
+        // difference is that we move backwards over the input and we look for
+        // the longest possible match instead of the leftmost-first match.
+        //
+        // N.B. The code duplication here is regrettable. Efforts to improve
+        // it without sacrificing performance are welcome. ---AG
+        debug_assert!(self.prog.is_reverse);
+        let mut result = Result::NoMatch(self.at);
+        let (mut prev_si, mut next_si) = (self.start, self.start);
+        let mut at = self.at;
+        while at > 0 {
+            while next_si <= STATE_MAX && at > 0 {
+                // Argument for safety is in the definition of next_si.
+                at -= 1;
+                prev_si = unsafe { self.next_si(next_si, text, at) };
+                if prev_si > STATE_MAX || at <= 4 {
+                    mem::swap(&mut prev_si, &mut next_si);
+                    break;
+                }
+                at -= 1;
+                next_si = unsafe { self.next_si(prev_si, text, at) };
+                if next_si > STATE_MAX {
+                    break;
+                }
+                at -= 1;
+                prev_si = unsafe { self.next_si(next_si, text, at) };
+                if prev_si > STATE_MAX {
+                    mem::swap(&mut prev_si, &mut next_si);
+                    break;
+                }
+                at -= 1;
+                next_si = unsafe { self.next_si(prev_si, text, at) };
+            }
+            if next_si & STATE_MATCH > 0 {
+                next_si &= !STATE_MATCH;
+                result = Result::Match(at + 1);
+                if self.quit_after_match {
+                    return result;
+                }
+                self.last_match_si = next_si;
+                prev_si = next_si;
+                let cur = at;
+                while (next_si & !STATE_MATCH) == prev_si && at >= 2 {
+                    // Argument for safety is in the definition of next_si.
+                    at -= 1;
+                    next_si = unsafe {
+                        self.next_si(next_si & !STATE_MATCH, text, at)
+                    };
+                }
+                if at < cur {
+                    result = Result::Match(at + 2);
+                }
+            } else if next_si >= STATE_UNKNOWN {
+                if next_si == STATE_QUIT {
+                    return Result::Quit;
+                }
+                let byte = Byte::byte(text[at]);
+                prev_si &= STATE_MAX;
+                self.at = at;
+                next_si = match self.next_state(qcur, qnext, prev_si, byte) {
+                    None => return Result::Quit,
+                    Some(STATE_DEAD) => return result.set_non_match(at),
+                    Some(si) => si,
+                };
+                debug_assert!(next_si != STATE_UNKNOWN);
+                if next_si & STATE_MATCH > 0 {
+                    next_si &= !STATE_MATCH;
+                    result = Result::Match(at + 1);
+                    if self.quit_after_match {
+                        return result;
+                    }
+                    self.last_match_si = next_si;
+                }
+                prev_si = next_si;
+            } else {
+                prev_si = next_si;
+            }
+        }
+
+        // Run the DFA once more on the special EOF senitnel value.
+        prev_si = match self.next_state(qcur, qnext, prev_si, Byte::eof()) {
+            None => return Result::Quit,
+            Some(STATE_DEAD) => return result.set_non_match(0),
+            Some(si) => si,
+        };
+        debug_assert!(prev_si != STATE_UNKNOWN);
+        if prev_si & STATE_MATCH > 0 {
+            prev_si &= !STATE_MATCH;
+            self.last_match_si = prev_si;
+            result = Result::Match(0);
+        }
+        result
+    }
+
+    /// next_si transitions to the next state, where the transition input
+    /// corresponds to text[i].
+    ///
+    /// This elides bounds checks, and is therefore unsafe.
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    unsafe fn next_si(&self, si: StatePtr, text: &[u8], i: usize) -> StatePtr {
+        // What is the argument for safety here?
+        // We have three unchecked accesses that could possibly violate safety:
+        //
+        //   1. The given byte of input (`text[i]`).
+        //   2. The class of the byte of input (`classes[text[i]]`).
+        //   3. The transition for the class (`trans[si + cls]`).
+        //
+        // (1) is only safe when calling next_si is guarded by
+        // `i < text.len()`.
+        //
+        // (2) is the easiest case to guarantee since `text[i]` is always a
+        // `u8` and `self.prog.byte_classes` always has length `u8::MAX`.
+        // (See `ByteClassSet.byte_classes` in `compile.rs`.)
+        //
+        // (3) is only safe if (1)+(2) are safe. Namely, the transitions
+        // of every state are defined to have length equal to the number of
+        // byte classes in the program. Therefore, a valid class leads to a
+        // valid transition. (All possible transitions are valid lookups, even
+        // if it points to a state that hasn't been computed yet.) (3) also
+        // relies on `si` being correct, but StatePtrs should only ever be
+        // retrieved from the transition table, which ensures they are correct.
+        debug_assert!(i < text.len());
+        let b = *text.get_unchecked(i);
+        debug_assert!((b as usize) < self.prog.byte_classes.len());
+        let cls = *self.prog.byte_classes.get_unchecked(b as usize);
+        self.cache.trans.next_unchecked(si, cls as usize)
+    }
+
+    /// Computes the next state given the current state and the current input
+    /// byte (which may be EOF).
+    ///
+    /// If STATE_DEAD is returned, then there is no valid state transition.
+    /// This implies that no permutation of future input can lead to a match
+    /// state.
+    ///
+    /// STATE_UNKNOWN can never be returned.
+    fn exec_byte(
+        &mut self,
+        qcur: &mut SparseSet,
+        qnext: &mut SparseSet,
+        mut si: StatePtr,
+        b: Byte,
+    ) -> Option<StatePtr> {
+        use prog::Inst::*;
+
+        // Initialize a queue with the current DFA state's NFA states.
+        qcur.clear();
+        for ip in self.state(si).inst_ptrs() {
+            qcur.insert(ip);
+        }
+
+        // Before inspecting the current byte, we may need to also inspect
+        // whether the position immediately preceding the current byte
+        // satisfies the empty assertions found in the current state.
+        //
+        // We only need to do this step if there are any empty assertions in
+        // the current state.
+        let is_word_last = self.state(si).flags().is_word();
+        let is_word = b.is_ascii_word();
+        if self.state(si).flags().has_empty() {
+            // Compute the flags immediately preceding the current byte.
+            // This means we only care about the "end" or "end line" flags.
+            // (The "start" flags are computed immediately proceding the
+            // current byte and is handled below.)
+            let mut flags = EmptyFlags::default();
+            if b.is_eof() {
+                flags.end = true;
+                flags.end_line = true;
+            } else if b.as_byte().map_or(false, |b| b == b'\n') {
+                flags.end_line = true;
+            }
+            if is_word_last == is_word {
+                flags.not_word_boundary = true;
+            } else {
+                flags.word_boundary = true;
+            }
+            // Now follow epsilon transitions from every NFA state, but make
+            // sure we only follow transitions that satisfy our flags.
+            qnext.clear();
+            for &ip in &*qcur {
+                self.follow_epsilons(usize_to_u32(ip), qnext, flags);
+            }
+            mem::swap(qcur, qnext);
+        }
+
+        // Now we set flags for immediately after the current byte. Since start
+        // states are processed separately, and are the only states that can
+        // have the StartText flag set, we therefore only need to worry about
+        // the StartLine flag here.
+        //
+        // We do also keep track of whether this DFA state contains a NFA state
+        // that is a matching state. This is precisely how we delay the DFA
+        // matching by one byte in order to process the special EOF sentinel
+        // byte. Namely, if this DFA state containing a matching NFA state,
+        // then it is the *next* DFA state that is marked as a match.
+        let mut empty_flags = EmptyFlags::default();
+        let mut state_flags = StateFlags::default();
+        empty_flags.start_line = b.as_byte().map_or(false, |b| b == b'\n');
+        if b.is_ascii_word() {
+            state_flags.set_word();
+        }
+        // Now follow all epsilon transitions again, but only after consuming
+        // the current byte.
+        qnext.clear();
+        for &ip in &*qcur {
+            match self.prog[ip as usize] {
+                // These states never happen in a byte-based program.
+                Char(_) | Ranges(_) => unreachable!(),
+                // These states are handled when following epsilon transitions.
+                Save(_) | Split(_) | EmptyLook(_) => {}
+                Match(_) => {
+                    state_flags.set_match();
+                    if !self.continue_past_first_match() {
+                        break;
+                    } else if self.prog.matches.len() > 1
+                        && !qnext.contains(ip as usize)
+                    {
+                        // If we are continuing on to find other matches,
+                        // then keep a record of the match states we've seen.
+                        qnext.insert(ip);
+                    }
+                }
+                Bytes(ref inst) => {
+                    if b.as_byte().map_or(false, |b| inst.matches(b)) {
+                        self.follow_epsilons(
+                            inst.goto as InstPtr,
+                            qnext,
+                            empty_flags,
+                        );
+                    }
+                }
+            }
+        }
+
+        let cache = if b.is_eof() && self.prog.matches.len() > 1 {
+            // If we're processing the last byte of the input and we're
+            // matching a regex set, then make the next state contain the
+            // previous states transitions. We do this so that the main
+            // matching loop can extract all of the match instructions.
+            mem::swap(qcur, qnext);
+            // And don't cache this state because it's totally bunk.
+            false
+        } else {
+            true
+        };
+
+        // We've now built up the set of NFA states that ought to comprise the
+        // next DFA state, so try to find it in the cache, and if it doesn't
+        // exist, cache it.
+        //
+        // N.B. We pass `&mut si` here because the cache may clear itself if
+        // it has gotten too full. When that happens, the location of the
+        // current state may change.
+        let mut next =
+            match self.cached_state(qnext, state_flags, Some(&mut si)) {
+                None => return None,
+                Some(next) => next,
+            };
+        if (self.start & !STATE_START) == next {
+            // Start states can never be match states since all matches are
+            // delayed by one byte.
+            debug_assert!(!self.state(next).flags().is_match());
+            next = self.start_ptr(next);
+        }
+        if next <= STATE_MAX && self.state(next).flags().is_match() {
+            next |= STATE_MATCH;
+        }
+        debug_assert!(next != STATE_UNKNOWN);
+        // And now store our state in the current state's next list.
+        if cache {
+            let cls = self.byte_class(b);
+            self.cache.trans.set_next(si, cls, next);
+        }
+        Some(next)
+    }
+
+    /// Follows the epsilon transitions starting at (and including) `ip`. The
+    /// resulting states are inserted into the ordered set `q`.
+    ///
+    /// Conditional epsilon transitions (i.e., empty width assertions) are only
+    /// followed if they are satisfied by the given flags, which should
+    /// represent the flags set at the current location in the input.
+    ///
+    /// If the current location corresponds to the empty string, then only the
+    /// end line and/or end text flags may be set. If the current location
+    /// corresponds to a real byte in the input, then only the start line
+    /// and/or start text flags may be set.
+    ///
+    /// As an exception to the above, when finding the initial state, any of
+    /// the above flags may be set:
+    ///
+    /// If matching starts at the beginning of the input, then start text and
+    /// start line should be set. If the input is empty, then end text and end
+    /// line should also be set.
+    ///
+    /// If matching starts after the beginning of the input, then only start
+    /// line should be set if the preceding byte is `\n`. End line should never
+    /// be set in this case. (Even if the proceding byte is a `\n`, it will
+    /// be handled in a subsequent DFA state.)
+    fn follow_epsilons(
+        &mut self,
+        ip: InstPtr,
+        q: &mut SparseSet,
+        flags: EmptyFlags,
+    ) {
+        use prog::EmptyLook::*;
+        use prog::Inst::*;
+
+        // We need to traverse the NFA to follow epsilon transitions, so avoid
+        // recursion with an explicit stack.
+        self.cache.stack.push(ip);
+        while let Some(mut ip) = self.cache.stack.pop() {
+            // Try to munch through as many states as possible without
+            // pushes/pops to the stack.
+            loop {
+                // Don't visit states we've already added.
+                if q.contains(ip as usize) {
+                    break;
+                }
+                q.insert(ip as usize);
+                match self.prog[ip as usize] {
+                    Char(_) | Ranges(_) => unreachable!(),
+                    Match(_) | Bytes(_) => {
+                        break;
+                    }
+                    EmptyLook(ref inst) => {
+                        // Only follow empty assertion states if our flags
+                        // satisfy the assertion.
+                        match inst.look {
+                            StartLine if flags.start_line => {
+                                ip = inst.goto as InstPtr;
+                            }
+                            EndLine if flags.end_line => {
+                                ip = inst.goto as InstPtr;
+                            }
+                            StartText if flags.start => {
+                                ip = inst.goto as InstPtr;
+                            }
+                            EndText if flags.end => {
+                                ip = inst.goto as InstPtr;
+                            }
+                            WordBoundaryAscii if flags.word_boundary => {
+                                ip = inst.goto as InstPtr;
+                            }
+                            NotWordBoundaryAscii
+                                if flags.not_word_boundary =>
+                            {
+                                ip = inst.goto as InstPtr;
+                            }
+                            WordBoundary if flags.word_boundary => {
+                                ip = inst.goto as InstPtr;
+                            }
+                            NotWordBoundary if flags.not_word_boundary => {
+                                ip = inst.goto as InstPtr;
+                            }
+                            StartLine | EndLine | StartText | EndText
+                            | WordBoundaryAscii | NotWordBoundaryAscii
+                            | WordBoundary | NotWordBoundary => {
+                                break;
+                            }
+                        }
+                    }
+                    Save(ref inst) => {
+                        ip = inst.goto as InstPtr;
+                    }
+                    Split(ref inst) => {
+                        self.cache.stack.push(inst.goto2 as InstPtr);
+                        ip = inst.goto1 as InstPtr;
+                    }
+                }
+            }
+        }
+    }
+
+    /// Find a previously computed state matching the given set of instructions
+    /// and is_match bool.
+    ///
+    /// The given set of instructions should represent a single state in the
+    /// NFA along with all states reachable without consuming any input.
+    ///
+    /// The is_match bool should be true if and only if the preceding DFA state
+    /// contains an NFA matching state. The cached state produced here will
+    /// then signify a match. (This enables us to delay a match by one byte,
+    /// in order to account for the EOF sentinel byte.)
+    ///
+    /// If the cache is full, then it is wiped before caching a new state.
+    ///
+    /// The current state should be specified if it exists, since it will need
+    /// to be preserved if the cache clears itself. (Start states are
+    /// always saved, so they should not be passed here.) It takes a mutable
+    /// pointer to the index because if the cache is cleared, the state's
+    /// location may change.
+    fn cached_state(
+        &mut self,
+        q: &SparseSet,
+        mut state_flags: StateFlags,
+        current_state: Option<&mut StatePtr>,
+    ) -> Option<StatePtr> {
+        // If we couldn't come up with a non-empty key to represent this state,
+        // then it is dead and can never lead to a match.
+        //
+        // Note that inst_flags represent the set of empty width assertions
+        // in q. We use this as an optimization in exec_byte to determine when
+        // we should follow epsilon transitions at the empty string preceding
+        // the current byte.
+        let key = match self.cached_state_key(q, &mut state_flags) {
+            None => return Some(STATE_DEAD),
+            Some(v) => v,
+        };
+        // In the cache? Cool. Done.
+        if let Some(si) = self.cache.compiled.get_ptr(&key) {
+            return Some(si);
+        }
+        // If the cache has gotten too big, wipe it.
+        if self.approximate_size() > self.prog.dfa_size_limit
+            && !self.clear_cache_and_save(current_state)
+        {
+            // Ooops. DFA is giving up.
+            return None;
+        }
+        // Allocate room for our state and add it.
+        self.add_state(key)
+    }
+
+    /// Produces a key suitable for describing a state in the DFA cache.
+    ///
+    /// The key invariant here is that equivalent keys are produced for any two
+    /// sets of ordered NFA states (and toggling of whether the previous NFA
+    /// states contain a match state) that do not discriminate a match for any
+    /// input.
+    ///
+    /// Specifically, q should be an ordered set of NFA states and is_match
+    /// should be true if and only if the previous NFA states contained a match
+    /// state.
+    fn cached_state_key(
+        &mut self,
+        q: &SparseSet,
+        state_flags: &mut StateFlags,
+    ) -> Option<State> {
+        use prog::Inst::*;
+
+        // We need to build up enough information to recognize pre-built states
+        // in the DFA. Generally speaking, this includes every instruction
+        // except for those which are purely epsilon transitions, e.g., the
+        // Save and Split instructions.
+        //
+        // Empty width assertions are also epsilon transitions, but since they
+        // are conditional, we need to make them part of a state's key in the
+        // cache.
+
+        let mut insts =
+            mem::replace(&mut self.cache.insts_scratch_space, vec![]);
+        insts.clear();
+        // Reserve 1 byte for flags.
+        insts.push(0);
+
+        let mut prev = 0;
+        for &ip in q {
+            let ip = usize_to_u32(ip);
+            match self.prog[ip as usize] {
+                Char(_) | Ranges(_) => unreachable!(),
+                Save(_) | Split(_) => {}
+                Bytes(_) => push_inst_ptr(&mut insts, &mut prev, ip),
+                EmptyLook(_) => {
+                    state_flags.set_empty();
+                    push_inst_ptr(&mut insts, &mut prev, ip)
+                }
+                Match(_) => {
+                    push_inst_ptr(&mut insts, &mut prev, ip);
+                    if !self.continue_past_first_match() {
+                        break;
+                    }
+                }
+            }
+        }
+        // If we couldn't transition to any other instructions and we didn't
+        // see a match when expanding NFA states previously, then this is a
+        // dead state and no amount of additional input can transition out
+        // of this state.
+        let opt_state = if insts.len() == 1 && !state_flags.is_match() {
+            None
+        } else {
+            let StateFlags(f) = *state_flags;
+            insts[0] = f;
+            Some(State { data: Arc::from(&*insts) })
+        };
+        self.cache.insts_scratch_space = insts;
+        opt_state
+    }
+
+    /// Clears the cache, but saves and restores current_state if it is not
+    /// none.
+    ///
+    /// The current state must be provided here in case its location in the
+    /// cache changes.
+    ///
+    /// This returns false if the cache is not cleared and the DFA should
+    /// give up.
+    fn clear_cache_and_save(
+        &mut self,
+        current_state: Option<&mut StatePtr>,
+    ) -> bool {
+        if self.cache.compiled.is_empty() {
+            // Nothing to clear...
+            return true;
+        }
+        match current_state {
+            None => self.clear_cache(),
+            Some(si) => {
+                let cur = self.state(*si).clone();
+                if !self.clear_cache() {
+                    return false;
+                }
+                // The unwrap is OK because we just cleared the cache and
+                // therefore know that the next state pointer won't exceed
+                // STATE_MAX.
+                *si = self.restore_state(cur).unwrap();
+                true
+            }
+        }
+    }
+
+    /// Wipes the state cache, but saves and restores the current start state.
+    ///
+    /// This returns false if the cache is not cleared and the DFA should
+    /// give up.
+    fn clear_cache(&mut self) -> bool {
+        // Bail out of the DFA if we're moving too "slowly."
+        // A heuristic from RE2: assume the DFA is too slow if it is processing
+        // 10 or fewer bytes per state.
+        // Additionally, we permit the cache to be flushed a few times before
+        // caling it quits.
+        let nstates = self.cache.compiled.len();
+        if self.cache.flush_count >= 3
+            && self.at >= self.last_cache_flush
+            && (self.at - self.last_cache_flush) <= 10 * nstates
+        {
+            return false;
+        }
+        // Update statistics tracking cache flushes.
+        self.last_cache_flush = self.at;
+        self.cache.flush_count += 1;
+
+        // OK, actually flush the cache.
+        let start = self.state(self.start & !STATE_START).clone();
+        let last_match = if self.last_match_si <= STATE_MAX {
+            Some(self.state(self.last_match_si).clone())
+        } else {
+            None
+        };
+        self.cache.reset_size();
+        self.cache.trans.clear();
+        self.cache.compiled.clear();
+        for s in &mut self.cache.start_states {
+            *s = STATE_UNKNOWN;
+        }
+        // The unwraps are OK because we just cleared the cache and therefore
+        // know that the next state pointer won't exceed STATE_MAX.
+        let start_ptr = self.restore_state(start).unwrap();
+        self.start = self.start_ptr(start_ptr);
+        if let Some(last_match) = last_match {
+            self.last_match_si = self.restore_state(last_match).unwrap();
+        }
+        true
+    }
+
+    /// Restores the given state back into the cache, and returns a pointer
+    /// to it.
+    fn restore_state(&mut self, state: State) -> Option<StatePtr> {
+        // If we've already stored this state, just return a pointer to it.
+        // None will be the wiser.
+        if let Some(si) = self.cache.compiled.get_ptr(&state) {
+            return Some(si);
+        }
+        self.add_state(state)
+    }
+
+    /// Returns the next state given the current state si and current byte
+    /// b. {qcur,qnext} are used as scratch space for storing ordered NFA
+    /// states.
+    ///
+    /// This tries to fetch the next state from the cache, but if that fails,
+    /// it computes the next state, caches it and returns a pointer to it.
+    ///
+    /// The pointer can be to a real state, or it can be STATE_DEAD.
+    /// STATE_UNKNOWN cannot be returned.
+    ///
+    /// None is returned if a new state could not be allocated (i.e., the DFA
+    /// ran out of space and thinks it's running too slowly).
+    fn next_state(
+        &mut self,
+        qcur: &mut SparseSet,
+        qnext: &mut SparseSet,
+        si: StatePtr,
+        b: Byte,
+    ) -> Option<StatePtr> {
+        if si == STATE_DEAD {
+            return Some(STATE_DEAD);
+        }
+        match self.cache.trans.next(si, self.byte_class(b)) {
+            STATE_UNKNOWN => self.exec_byte(qcur, qnext, si, b),
+            STATE_QUIT => None,
+            STATE_DEAD => Some(STATE_DEAD),
+            nsi => Some(nsi),
+        }
+    }
+
+    /// Computes and returns the start state, where searching begins at
+    /// position `at` in `text`. If the state has already been computed,
+    /// then it is pulled from the cache. If the state hasn't been cached,
+    /// then it is computed, cached and a pointer to it is returned.
+    ///
+    /// This may return STATE_DEAD but never STATE_UNKNOWN.
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn start_state(
+        &mut self,
+        q: &mut SparseSet,
+        empty_flags: EmptyFlags,
+        state_flags: StateFlags,
+    ) -> Option<StatePtr> {
+        // Compute an index into our cache of start states based on the set
+        // of empty/state flags set at the current position in the input. We
+        // don't use every flag since not all flags matter. For example, since
+        // matches are delayed by one byte, start states can never be match
+        // states.
+        let flagi = {
+            (((empty_flags.start as u8) << 0)
+                | ((empty_flags.end as u8) << 1)
+                | ((empty_flags.start_line as u8) << 2)
+                | ((empty_flags.end_line as u8) << 3)
+                | ((empty_flags.word_boundary as u8) << 4)
+                | ((empty_flags.not_word_boundary as u8) << 5)
+                | ((state_flags.is_word() as u8) << 6)) as usize
+        };
+        match self.cache.start_states[flagi] {
+            STATE_UNKNOWN => {}
+            STATE_DEAD => return Some(STATE_DEAD),
+            si => return Some(si),
+        }
+        q.clear();
+        let start = usize_to_u32(self.prog.start);
+        self.follow_epsilons(start, q, empty_flags);
+        // Start states can never be match states because we delay every match
+        // by one byte. Given an empty string and an empty match, the match
+        // won't actually occur until the DFA processes the special EOF
+        // sentinel byte.
+        let sp = match self.cached_state(q, state_flags, None) {
+            None => return None,
+            Some(sp) => self.start_ptr(sp),
+        };
+        self.cache.start_states[flagi] = sp;
+        Some(sp)
+    }
+
+    /// Computes the set of starting flags for the given position in text.
+    ///
+    /// This should only be used when executing the DFA forwards over the
+    /// input.
+    fn start_flags(&self, text: &[u8], at: usize) -> (EmptyFlags, StateFlags) {
+        let mut empty_flags = EmptyFlags::default();
+        let mut state_flags = StateFlags::default();
+        empty_flags.start = at == 0;
+        empty_flags.end = text.is_empty();
+        empty_flags.start_line = at == 0 || text[at - 1] == b'\n';
+        empty_flags.end_line = text.is_empty();
+
+        let is_word_last = at > 0 && Byte::byte(text[at - 1]).is_ascii_word();
+        let is_word = at < text.len() && Byte::byte(text[at]).is_ascii_word();
+        if is_word_last {
+            state_flags.set_word();
+        }
+        if is_word == is_word_last {
+            empty_flags.not_word_boundary = true;
+        } else {
+            empty_flags.word_boundary = true;
+        }
+        (empty_flags, state_flags)
+    }
+
+    /// Computes the set of starting flags for the given position in text.
+    ///
+    /// This should only be used when executing the DFA in reverse over the
+    /// input.
+    fn start_flags_reverse(
+        &self,
+        text: &[u8],
+        at: usize,
+    ) -> (EmptyFlags, StateFlags) {
+        let mut empty_flags = EmptyFlags::default();
+        let mut state_flags = StateFlags::default();
+        empty_flags.start = at == text.len();
+        empty_flags.end = text.is_empty();
+        empty_flags.start_line = at == text.len() || text[at] == b'\n';
+        empty_flags.end_line = text.is_empty();
+
+        let is_word_last =
+            at < text.len() && Byte::byte(text[at]).is_ascii_word();
+        let is_word = at > 0 && Byte::byte(text[at - 1]).is_ascii_word();
+        if is_word_last {
+            state_flags.set_word();
+        }
+        if is_word == is_word_last {
+            empty_flags.not_word_boundary = true;
+        } else {
+            empty_flags.word_boundary = true;
+        }
+        (empty_flags, state_flags)
+    }
+
+    /// Returns a reference to a State given a pointer to it.
+    fn state(&self, si: StatePtr) -> &State {
+        self.cache.compiled.get_state(si).unwrap()
+    }
+
+    /// Adds the given state to the DFA.
+    ///
+    /// This allocates room for transitions out of this state in
+    /// self.cache.trans. The transitions can be set with the returned
+    /// StatePtr.
+    ///
+    /// If None is returned, then the state limit was reached and the DFA
+    /// should quit.
+    fn add_state(&mut self, state: State) -> Option<StatePtr> {
+        // This will fail if the next state pointer exceeds STATE_PTR. In
+        // practice, the cache limit will prevent us from ever getting here,
+        // but maybe callers will set the cache size to something ridiculous...
+        let si = match self.cache.trans.add() {
+            None => return None,
+            Some(si) => si,
+        };
+        // If the program has a Unicode word boundary, then set any transitions
+        // for non-ASCII bytes to STATE_QUIT. If the DFA stumbles over such a
+        // transition, then it will quit and an alternative matching engine
+        // will take over.
+        if self.prog.has_unicode_word_boundary {
+            for b in 128..256 {
+                let cls = self.byte_class(Byte::byte(b as u8));
+                self.cache.trans.set_next(si, cls, STATE_QUIT);
+            }
+        }
+        // Finally, put our actual state on to our heap of states and index it
+        // so we can find it later.
+        self.cache.size += self.cache.trans.state_heap_size()
+            + state.data.len()
+            + (2 * mem::size_of::<State>())
+            + mem::size_of::<StatePtr>();
+        self.cache.compiled.insert(state, si);
+        // Transition table and set of states and map should all be in sync.
+        debug_assert!(
+            self.cache.compiled.len() == self.cache.trans.num_states()
+        );
+        Some(si)
+    }
+
+    /// Quickly finds the next occurrence of any literal prefixes in the regex.
+    /// If there are no literal prefixes, then the current position is
+    /// returned. If there are literal prefixes and one could not be found,
+    /// then None is returned.
+    ///
+    /// This should only be called when the DFA is in a start state.
+    fn prefix_at(&self, text: &[u8], at: usize) -> Option<usize> {
+        self.prog.prefixes.find(&text[at..]).map(|(s, _)| at + s)
+    }
+
+    /// Returns the number of byte classes required to discriminate transitions
+    /// in each state.
+    ///
+    /// invariant: num_byte_classes() == len(State.next)
+    fn num_byte_classes(&self) -> usize {
+        // We add 1 to account for the special EOF byte.
+        (self.prog.byte_classes[255] as usize + 1) + 1
+    }
+
+    /// Given an input byte or the special EOF sentinel, return its
+    /// corresponding byte class.
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn byte_class(&self, b: Byte) -> usize {
+        match b.as_byte() {
+            None => self.num_byte_classes() - 1,
+            Some(b) => self.u8_class(b),
+        }
+    }
+
+    /// Like byte_class, but explicitly for u8s.
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn u8_class(&self, b: u8) -> usize {
+        self.prog.byte_classes[b as usize] as usize
+    }
+
+    /// Returns true if the DFA should continue searching past the first match.
+    ///
+    /// Leftmost first semantics in the DFA are preserved by not following NFA
+    /// transitions after the first match is seen.
+    ///
+    /// On occasion, we want to avoid leftmost first semantics to find either
+    /// the longest match (for reverse search) or all possible matches (for
+    /// regex sets).
+    fn continue_past_first_match(&self) -> bool {
+        self.prog.is_reverse || self.prog.matches.len() > 1
+    }
+
+    /// Returns true if there is a prefix we can quickly search for.
+    fn has_prefix(&self) -> bool {
+        !self.prog.is_reverse
+            && !self.prog.prefixes.is_empty()
+            && !self.prog.is_anchored_start
+    }
+
+    /// Sets the STATE_START bit in the given state pointer if and only if
+    /// we have a prefix to scan for.
+    ///
+    /// If there's no prefix, then it's a waste to treat the start state
+    /// specially.
+    fn start_ptr(&self, si: StatePtr) -> StatePtr {
+        if self.has_prefix() {
+            si | STATE_START
+        } else {
+            si
+        }
+    }
+
+    /// Approximate size returns the approximate heap space currently used by
+    /// the DFA. It is used to determine whether the DFA's state cache needs to
+    /// be wiped. Namely, it is possible that for certain regexes on certain
+    /// inputs, a new state could be created for every byte of input. (This is
+    /// bad for memory use, so we bound it with a cache.)
+    fn approximate_size(&self) -> usize {
+        self.cache.size + self.prog.approximate_size()
+    }
+}
+
+/// An abstraction for representing a map of states. The map supports two
+/// different ways of state lookup. One is fast constant time access via a
+/// state pointer. The other is a hashmap lookup based on the DFA's
+/// constituent NFA states.
+///
+/// A DFA state internally uses an Arc such that we only need to store the
+/// set of NFA states on the heap once, even though we support looking up
+/// states by two different means. A more natural way to express this might
+/// use raw pointers, but an Arc is safe and effectively achieves the same
+/// thing.
+#[derive(Debug)]
+struct StateMap {
+    /// The keys are not actually static but rely on always pointing to a
+    /// buffer in `states` which will never be moved except when clearing
+    /// the map or on drop, in which case the keys of this map will be
+    /// removed before
+    map: HashMap<State, StatePtr>,
+    /// Our set of states. Note that `StatePtr / num_byte_classes` indexes
+    /// this Vec rather than just a `StatePtr`.
+    states: Vec<State>,
+    /// The number of byte classes in the DFA. Used to index `states`.
+    num_byte_classes: usize,
+}
+
+impl StateMap {
+    fn new(num_byte_classes: usize) -> StateMap {
+        StateMap {
+            map: HashMap::new(),
+            states: vec![],
+            num_byte_classes: num_byte_classes,
+        }
+    }
+
+    fn len(&self) -> usize {
+        self.states.len()
+    }
+
+    fn is_empty(&self) -> bool {
+        self.states.is_empty()
+    }
+
+    fn get_ptr(&self, state: &State) -> Option<StatePtr> {
+        self.map.get(state).cloned()
+    }
+
+    fn get_state(&self, si: StatePtr) -> Option<&State> {
+        self.states.get(si as usize / self.num_byte_classes)
+    }
+
+    fn insert(&mut self, state: State, si: StatePtr) {
+        self.map.insert(state.clone(), si);
+        self.states.push(state);
+    }
+
+    fn clear(&mut self) {
+        self.map.clear();
+        self.states.clear();
+    }
+}
+
+impl Transitions {
+    /// Create a new transition table.
+    ///
+    /// The number of byte classes corresponds to the stride. Every state will
+    /// have `num_byte_classes` slots for transitions.
+    fn new(num_byte_classes: usize) -> Transitions {
+        Transitions { table: vec![], num_byte_classes: num_byte_classes }
+    }
+
+    /// Returns the total number of states currently in this table.
+    fn num_states(&self) -> usize {
+        self.table.len() / self.num_byte_classes
+    }
+
+    /// Allocates room for one additional state and returns a pointer to it.
+    ///
+    /// If there's no more room, None is returned.
+    fn add(&mut self) -> Option<StatePtr> {
+        let si = self.table.len();
+        if si > STATE_MAX as usize {
+            return None;
+        }
+        self.table.extend(repeat(STATE_UNKNOWN).take(self.num_byte_classes));
+        Some(usize_to_u32(si))
+    }
+
+    /// Clears the table of all states.
+    fn clear(&mut self) {
+        self.table.clear();
+    }
+
+    /// Sets the transition from (si, cls) to next.
+    fn set_next(&mut self, si: StatePtr, cls: usize, next: StatePtr) {
+        self.table[si as usize + cls] = next;
+    }
+
+    /// Returns the transition corresponding to (si, cls).
+    fn next(&self, si: StatePtr, cls: usize) -> StatePtr {
+        self.table[si as usize + cls]
+    }
+
+    /// The heap size, in bytes, of a single state in the transition table.
+    fn state_heap_size(&self) -> usize {
+        self.num_byte_classes * mem::size_of::<StatePtr>()
+    }
+
+    /// Like `next`, but uses unchecked access and is therefore unsafe.
+    unsafe fn next_unchecked(&self, si: StatePtr, cls: usize) -> StatePtr {
+        debug_assert!((si as usize) < self.table.len());
+        debug_assert!(cls < self.num_byte_classes);
+        *self.table.get_unchecked(si as usize + cls)
+    }
+}
+
+impl StateFlags {
+    fn is_match(&self) -> bool {
+        self.0 & 0b0000000_1 > 0
+    }
+
+    fn set_match(&mut self) {
+        self.0 |= 0b0000000_1;
+    }
+
+    fn is_word(&self) -> bool {
+        self.0 & 0b000000_1_0 > 0
+    }
+
+    fn set_word(&mut self) {
+        self.0 |= 0b000000_1_0;
+    }
+
+    fn has_empty(&self) -> bool {
+        self.0 & 0b00000_1_00 > 0
+    }
+
+    fn set_empty(&mut self) {
+        self.0 |= 0b00000_1_00;
+    }
+}
+
+impl Byte {
+    fn byte(b: u8) -> Self {
+        Byte(b as u16)
+    }
+    fn eof() -> Self {
+        Byte(256)
+    }
+    fn is_eof(&self) -> bool {
+        self.0 == 256
+    }
+
+    fn is_ascii_word(&self) -> bool {
+        let b = match self.as_byte() {
+            None => return false,
+            Some(b) => b,
+        };
+        match b {
+            b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'_' => true,
+            _ => false,
+        }
+    }
+
+    fn as_byte(&self) -> Option<u8> {
+        if self.is_eof() {
+            None
+        } else {
+            Some(self.0 as u8)
+        }
+    }
+}
+
+impl fmt::Debug for State {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let ips: Vec<usize> = self.inst_ptrs().collect();
+        f.debug_struct("State")
+            .field("flags", &self.flags())
+            .field("insts", &ips)
+            .finish()
+    }
+}
+
+impl fmt::Debug for Transitions {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let mut fmtd = f.debug_map();
+        for si in 0..self.num_states() {
+            let s = si * self.num_byte_classes;
+            let e = s + self.num_byte_classes;
+            fmtd.entry(&si.to_string(), &TransitionsRow(&self.table[s..e]));
+        }
+        fmtd.finish()
+    }
+}
+
+struct TransitionsRow<'a>(&'a [StatePtr]);
+
+impl<'a> fmt::Debug for TransitionsRow<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let mut fmtd = f.debug_map();
+        for (b, si) in self.0.iter().enumerate() {
+            match *si {
+                STATE_UNKNOWN => {}
+                STATE_DEAD => {
+                    fmtd.entry(&vb(b as usize), &"DEAD");
+                }
+                si => {
+                    fmtd.entry(&vb(b as usize), &si.to_string());
+                }
+            }
+        }
+        fmtd.finish()
+    }
+}
+
+impl fmt::Debug for StateFlags {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.debug_struct("StateFlags")
+            .field("is_match", &self.is_match())
+            .field("is_word", &self.is_word())
+            .field("has_empty", &self.has_empty())
+            .finish()
+    }
+}
+
+/// Helper function for formatting a byte as a nice-to-read escaped string.
+fn vb(b: usize) -> String {
+    use std::ascii::escape_default;
+
+    if b > ::std::u8::MAX as usize {
+        "EOF".to_owned()
+    } else {
+        let escaped = escape_default(b as u8).collect::<Vec<u8>>();
+        String::from_utf8_lossy(&escaped).into_owned()
+    }
+}
+
+fn usize_to_u32(n: usize) -> u32 {
+    if (n as u64) > (::std::u32::MAX as u64) {
+        panic!("BUG: {} is too big to fit into u32", n)
+    }
+    n as u32
+}
+
+#[allow(dead_code)] // useful for debugging
+fn show_state_ptr(si: StatePtr) -> String {
+    let mut s = format!("{:?}", si & STATE_MAX);
+    if si == STATE_UNKNOWN {
+        s = format!("{} (unknown)", s);
+    }
+    if si == STATE_DEAD {
+        s = format!("{} (dead)", s);
+    }
+    if si == STATE_QUIT {
+        s = format!("{} (quit)", s);
+    }
+    if si & STATE_START > 0 {
+        s = format!("{} (start)", s);
+    }
+    if si & STATE_MATCH > 0 {
+        s = format!("{} (match)", s);
+    }
+    s
+}
+
+/// https://developers.google.com/protocol-buffers/docs/encoding#varints
+fn write_vari32(data: &mut Vec<u8>, n: i32) {
+    let mut un = (n as u32) << 1;
+    if n < 0 {
+        un = !un;
+    }
+    write_varu32(data, un)
+}
+
+/// https://developers.google.com/protocol-buffers/docs/encoding#varints
+fn read_vari32(data: &[u8]) -> (i32, usize) {
+    let (un, i) = read_varu32(data);
+    let mut n = (un >> 1) as i32;
+    if un & 1 != 0 {
+        n = !n;
+    }
+    (n, i)
+}
+
+/// https://developers.google.com/protocol-buffers/docs/encoding#varints
+fn write_varu32(data: &mut Vec<u8>, mut n: u32) {
+    while n >= 0b1000_0000 {
+        data.push((n as u8) | 0b1000_0000);
+        n >>= 7;
+    }
+    data.push(n as u8);
+}
+
+/// https://developers.google.com/protocol-buffers/docs/encoding#varints
+fn read_varu32(data: &[u8]) -> (u32, usize) {
+    let mut n: u32 = 0;
+    let mut shift: u32 = 0;
+    for (i, &b) in data.iter().enumerate() {
+        if b < 0b1000_0000 {
+            return (n | ((b as u32) << shift), i + 1);
+        }
+        n |= ((b as u32) & 0b0111_1111) << shift;
+        shift += 7;
+    }
+    (0, 0)
+}
+
+#[cfg(test)]
+mod tests {
+    extern crate rand;
+
+    use super::{
+        push_inst_ptr, read_vari32, read_varu32, write_vari32, write_varu32,
+        State, StateFlags,
+    };
+    use quickcheck::{quickcheck, QuickCheck, StdGen};
+    use std::sync::Arc;
+
+    #[test]
+    fn prop_state_encode_decode() {
+        fn p(ips: Vec<u32>, flags: u8) -> bool {
+            let mut data = vec![flags];
+            let mut prev = 0;
+            for &ip in ips.iter() {
+                push_inst_ptr(&mut data, &mut prev, ip);
+            }
+            let state = State { data: Arc::from(&data[..]) };
+
+            let expected: Vec<usize> =
+                ips.into_iter().map(|ip| ip as usize).collect();
+            let got: Vec<usize> = state.inst_ptrs().collect();
+            expected == got && state.flags() == StateFlags(flags)
+        }
+        QuickCheck::new()
+            .gen(StdGen::new(self::rand::thread_rng(), 10_000))
+            .quickcheck(p as fn(Vec<u32>, u8) -> bool);
+    }
+
+    #[test]
+    fn prop_read_write_u32() {
+        fn p(n: u32) -> bool {
+            let mut buf = vec![];
+            write_varu32(&mut buf, n);
+            let (got, nread) = read_varu32(&buf);
+            nread == buf.len() && got == n
+        }
+        quickcheck(p as fn(u32) -> bool);
+    }
+
+    #[test]
+    fn prop_read_write_i32() {
+        fn p(n: i32) -> bool {
+            let mut buf = vec![];
+            write_vari32(&mut buf, n);
+            let (got, nread) = read_vari32(&buf);
+            nread == buf.len() && got == n
+        }
+        quickcheck(p as fn(i32) -> bool);
+    }
+}

diff --git a/src/error.rs b/src/error.rs
new file mode 100644
index 0000000..1c32c85
--- /dev/null
+++ b/src/error.rs

@@ -0,0 +1,71 @@
+use std::fmt;
+use std::iter::repeat;
+
+/// An error that occurred during parsing or compiling a regular expression.
+#[derive(Clone, PartialEq)]
+pub enum Error {
+    /// A syntax error.
+    Syntax(String),
+    /// The compiled program exceeded the set size limit.
+    /// The argument is the size limit imposed.
+    CompiledTooBig(usize),
+    /// Hints that destructuring should not be exhaustive.
+    ///
+    /// This enum may grow additional variants, so this makes sure clients
+    /// don't count on exhaustive matching. (Otherwise, adding a new variant
+    /// could break existing code.)
+    #[doc(hidden)]
+    __Nonexhaustive,
+}
+
+impl ::std::error::Error for Error {
+    // TODO: Remove this method entirely on the next breaking semver release.
+    #[allow(deprecated)]
+    fn description(&self) -> &str {
+        match *self {
+            Error::Syntax(ref err) => err,
+            Error::CompiledTooBig(_) => "compiled program too big",
+            Error::__Nonexhaustive => unreachable!(),
+        }
+    }
+}
+
+impl fmt::Display for Error {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match *self {
+            Error::Syntax(ref err) => err.fmt(f),
+            Error::CompiledTooBig(limit) => write!(
+                f,
+                "Compiled regex exceeds size limit of {} bytes.",
+                limit
+            ),
+            Error::__Nonexhaustive => unreachable!(),
+        }
+    }
+}
+
+// We implement our own Debug implementation so that we show nicer syntax
+// errors when people use `Regex::new(...).unwrap()`. It's a little weird,
+// but the `Syntax` variant is already storing a `String` anyway, so we might
+// as well format it nicely.
+impl fmt::Debug for Error {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match *self {
+            Error::Syntax(ref err) => {
+                let hr: String = repeat('~').take(79).collect();
+                writeln!(f, "Syntax(")?;
+                writeln!(f, "{}", hr)?;
+                writeln!(f, "{}", err)?;
+                writeln!(f, "{}", hr)?;
+                write!(f, ")")?;
+                Ok(())
+            }
+            Error::CompiledTooBig(limit) => {
+                f.debug_tuple("CompiledTooBig").field(&limit).finish()
+            }
+            Error::__Nonexhaustive => {
+                f.debug_tuple("__Nonexhaustive").finish()
+            }
+        }
+    }
+}

diff --git a/src/exec.rs b/src/exec.rs
new file mode 100644
index 0000000..acca2dc
--- /dev/null
+++ b/src/exec.rs

@@ -0,0 +1,1632 @@
+use std::cell::RefCell;
+use std::collections::HashMap;
+use std::sync::Arc;
+
+#[cfg(feature = "perf-literal")]
+use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind};
+use syntax::hir::literal::Literals;
+use syntax::hir::Hir;
+use syntax::ParserBuilder;
+
+use backtrack;
+use cache::{Cached, CachedGuard};
+use compile::Compiler;
+#[cfg(feature = "perf-dfa")]
+use dfa;
+use error::Error;
+use input::{ByteInput, CharInput};
+use literal::LiteralSearcher;
+use pikevm;
+use prog::Program;
+use re_builder::RegexOptions;
+use re_bytes;
+use re_set;
+use re_trait::{Locations, RegularExpression, Slot};
+use re_unicode;
+use utf8::next_utf8;
+
+/// `Exec` manages the execution of a regular expression.
+///
+/// In particular, this manages the various compiled forms of a single regular
+/// expression and the choice of which matching engine to use to execute a
+/// regular expression.
+pub struct Exec {
+    /// All read only state.
+    ro: Arc<ExecReadOnly>,
+    /// Caches for the various matching engines.
+    cache: Cached<ProgramCache>,
+}
+
+/// `ExecNoSync` is like `Exec`, except it embeds a reference to a cache. This
+/// means it is no longer Sync, but we can now avoid the overhead of
+/// synchronization to fetch the cache.
+#[derive(Debug)]
+pub struct ExecNoSync<'c> {
+    /// All read only state.
+    ro: &'c Arc<ExecReadOnly>,
+    /// Caches for the various matching engines.
+    cache: CachedGuard<'c, ProgramCache>,
+}
+
+/// `ExecNoSyncStr` is like `ExecNoSync`, but matches on &str instead of &[u8].
+pub struct ExecNoSyncStr<'c>(ExecNoSync<'c>);
+
+/// `ExecReadOnly` comprises all read only state for a regex. Namely, all such
+/// state is determined at compile time and never changes during search.
+#[derive(Debug)]
+struct ExecReadOnly {
+    /// The original regular expressions given by the caller to compile.
+    res: Vec<String>,
+    /// A compiled program that is used in the NFA simulation and backtracking.
+    /// It can be byte-based or Unicode codepoint based.
+    ///
+    /// N.B. It is not possibly to make this byte-based from the public API.
+    /// It is only used for testing byte based programs in the NFA simulations.
+    nfa: Program,
+    /// A compiled byte based program for DFA execution. This is only used
+    /// if a DFA can be executed. (Currently, only word boundary assertions are
+    /// not supported.) Note that this program contains an embedded `.*?`
+    /// preceding the first capture group, unless the regex is anchored at the
+    /// beginning.
+    dfa: Program,
+    /// The same as above, except the program is reversed (and there is no
+    /// preceding `.*?`). This is used by the DFA to find the starting location
+    /// of matches.
+    dfa_reverse: Program,
+    /// A set of suffix literals extracted from the regex.
+    ///
+    /// Prefix literals are stored on the `Program`, since they are used inside
+    /// the matching engines.
+    suffixes: LiteralSearcher,
+    /// An Aho-Corasick automaton with leftmost-first match semantics.
+    ///
+    /// This is only set when the entire regex is a simple unanchored
+    /// alternation of literals. We could probably use it more circumstances,
+    /// but this is already hacky enough in this architecture.
+    ///
+    /// N.B. We use u32 as a state ID representation under the assumption that
+    /// if we were to exhaust the ID space, we probably would have long
+    /// surpassed the compilation size limit.
+    #[cfg(feature = "perf-literal")]
+    ac: Option<AhoCorasick<u32>>,
+    /// match_type encodes as much upfront knowledge about how we're going to
+    /// execute a search as possible.
+    match_type: MatchType,
+}
+
+/// Facilitates the construction of an executor by exposing various knobs
+/// to control how a regex is executed and what kinds of resources it's
+/// permitted to use.
+pub struct ExecBuilder {
+    options: RegexOptions,
+    match_type: Option<MatchType>,
+    bytes: bool,
+    only_utf8: bool,
+}
+
+/// Parsed represents a set of parsed regular expressions and their detected
+/// literals.
+struct Parsed {
+    exprs: Vec<Hir>,
+    prefixes: Literals,
+    suffixes: Literals,
+    bytes: bool,
+}
+
+impl ExecBuilder {
+    /// Create a regex execution builder.
+    ///
+    /// This uses default settings for everything except the regex itself,
+    /// which must be provided. Further knobs can be set by calling methods,
+    /// and then finally, `build` to actually create the executor.
+    pub fn new(re: &str) -> Self {
+        Self::new_many(&[re])
+    }
+
+    /// Like new, but compiles the union of the given regular expressions.
+    ///
+    /// Note that when compiling 2 or more regular expressions, capture groups
+    /// are completely unsupported. (This means both `find` and `captures`
+    /// wont work.)
+    pub fn new_many<I, S>(res: I) -> Self
+    where
+        S: AsRef<str>,
+        I: IntoIterator<Item = S>,
+    {
+        let mut opts = RegexOptions::default();
+        opts.pats = res.into_iter().map(|s| s.as_ref().to_owned()).collect();
+        Self::new_options(opts)
+    }
+
+    /// Create a regex execution builder.
+    pub fn new_options(opts: RegexOptions) -> Self {
+        ExecBuilder {
+            options: opts,
+            match_type: None,
+            bytes: false,
+            only_utf8: true,
+        }
+    }
+
+    /// Set the matching engine to be automatically determined.
+    ///
+    /// This is the default state and will apply whatever optimizations are
+    /// possible, such as running a DFA.
+    ///
+    /// This overrides whatever was previously set via the `nfa` or
+    /// `bounded_backtracking` methods.
+    pub fn automatic(mut self) -> Self {
+        self.match_type = None;
+        self
+    }
+
+    /// Sets the matching engine to use the NFA algorithm no matter what
+    /// optimizations are possible.
+    ///
+    /// This overrides whatever was previously set via the `automatic` or
+    /// `bounded_backtracking` methods.
+    pub fn nfa(mut self) -> Self {
+        self.match_type = Some(MatchType::Nfa(MatchNfaType::PikeVM));
+        self
+    }
+
+    /// Sets the matching engine to use a bounded backtracking engine no
+    /// matter what optimizations are possible.
+    ///
+    /// One must use this with care, since the bounded backtracking engine
+    /// uses memory proportion to `len(regex) * len(text)`.
+    ///
+    /// This overrides whatever was previously set via the `automatic` or
+    /// `nfa` methods.
+    pub fn bounded_backtracking(mut self) -> Self {
+        self.match_type = Some(MatchType::Nfa(MatchNfaType::Backtrack));
+        self
+    }
+
+    /// Compiles byte based programs for use with the NFA matching engines.
+    ///
+    /// By default, the NFA engines match on Unicode scalar values. They can
+    /// be made to use byte based programs instead. In general, the byte based
+    /// programs are slower because of a less efficient encoding of character
+    /// classes.
+    ///
+    /// Note that this does not impact DFA matching engines, which always
+    /// execute on bytes.
+    pub fn bytes(mut self, yes: bool) -> Self {
+        self.bytes = yes;
+        self
+    }
+
+    /// When disabled, the program compiled may match arbitrary bytes.
+    ///
+    /// When enabled (the default), all compiled programs exclusively match
+    /// valid UTF-8 bytes.
+    pub fn only_utf8(mut self, yes: bool) -> Self {
+        self.only_utf8 = yes;
+        self
+    }
+
+    /// Set the Unicode flag.
+    pub fn unicode(mut self, yes: bool) -> Self {
+        self.options.unicode = yes;
+        self
+    }
+
+    /// Parse the current set of patterns into their AST and extract literals.
+    fn parse(&self) -> Result<Parsed, Error> {
+        let mut exprs = Vec::with_capacity(self.options.pats.len());
+        let mut prefixes = Some(Literals::empty());
+        let mut suffixes = Some(Literals::empty());
+        let mut bytes = false;
+        let is_set = self.options.pats.len() > 1;
+        // If we're compiling a regex set and that set has any anchored
+        // expressions, then disable all literal optimizations.
+        for pat in &self.options.pats {
+            let mut parser = ParserBuilder::new()
+                .octal(self.options.octal)
+                .case_insensitive(self.options.case_insensitive)
+                .multi_line(self.options.multi_line)
+                .dot_matches_new_line(self.options.dot_matches_new_line)
+                .swap_greed(self.options.swap_greed)
+                .ignore_whitespace(self.options.ignore_whitespace)
+                .unicode(self.options.unicode)
+                .allow_invalid_utf8(!self.only_utf8)
+                .nest_limit(self.options.nest_limit)
+                .build();
+            let expr =
+                parser.parse(pat).map_err(|e| Error::Syntax(e.to_string()))?;
+            bytes = bytes || !expr.is_always_utf8();
+
+            if cfg!(feature = "perf-literal") {
+                if !expr.is_anchored_start() && expr.is_any_anchored_start() {
+                    // Partial anchors unfortunately make it hard to use
+                    // prefixes, so disable them.
+                    prefixes = None;
+                } else if is_set && expr.is_anchored_start() {
+                    // Regex sets with anchors do not go well with literal
+                    // optimizations.
+                    prefixes = None;
+                }
+                prefixes = prefixes.and_then(|mut prefixes| {
+                    if !prefixes.union_prefixes(&expr) {
+                        None
+                    } else {
+                        Some(prefixes)
+                    }
+                });
+
+                if !expr.is_anchored_end() && expr.is_any_anchored_end() {
+                    // Partial anchors unfortunately make it hard to use
+                    // suffixes, so disable them.
+                    suffixes = None;
+                } else if is_set && expr.is_anchored_end() {
+                    // Regex sets with anchors do not go well with literal
+                    // optimizations.
+                    suffixes = None;
+                }
+                suffixes = suffixes.and_then(|mut suffixes| {
+                    if !suffixes.union_suffixes(&expr) {
+                        None
+                    } else {
+                        Some(suffixes)
+                    }
+                });
+            }
+            exprs.push(expr);
+        }
+        Ok(Parsed {
+            exprs: exprs,
+            prefixes: prefixes.unwrap_or_else(Literals::empty),
+            suffixes: suffixes.unwrap_or_else(Literals::empty),
+            bytes: bytes,
+        })
+    }
+
+    /// Build an executor that can run a regular expression.
+    pub fn build(self) -> Result<Exec, Error> {
+        // Special case when we have no patterns to compile.
+        // This can happen when compiling a regex set.
+        if self.options.pats.is_empty() {
+            let ro = Arc::new(ExecReadOnly {
+                res: vec![],
+                nfa: Program::new(),
+                dfa: Program::new(),
+                dfa_reverse: Program::new(),
+                suffixes: LiteralSearcher::empty(),
+                #[cfg(feature = "perf-literal")]
+                ac: None,
+                match_type: MatchType::Nothing,
+            });
+            return Ok(Exec { ro: ro, cache: Cached::new() });
+        }
+        let parsed = self.parse()?;
+        let mut nfa = Compiler::new()
+            .size_limit(self.options.size_limit)
+            .bytes(self.bytes || parsed.bytes)
+            .only_utf8(self.only_utf8)
+            .compile(&parsed.exprs)?;
+        let mut dfa = Compiler::new()
+            .size_limit(self.options.size_limit)
+            .dfa(true)
+            .only_utf8(self.only_utf8)
+            .compile(&parsed.exprs)?;
+        let mut dfa_reverse = Compiler::new()
+            .size_limit(self.options.size_limit)
+            .dfa(true)
+            .only_utf8(self.only_utf8)
+            .reverse(true)
+            .compile(&parsed.exprs)?;
+
+        #[cfg(feature = "perf-literal")]
+        let ac = self.build_aho_corasick(&parsed);
+        nfa.prefixes = LiteralSearcher::prefixes(parsed.prefixes);
+        dfa.prefixes = nfa.prefixes.clone();
+        dfa.dfa_size_limit = self.options.dfa_size_limit;
+        dfa_reverse.dfa_size_limit = self.options.dfa_size_limit;
+
+        let mut ro = ExecReadOnly {
+            res: self.options.pats,
+            nfa: nfa,
+            dfa: dfa,
+            dfa_reverse: dfa_reverse,
+            suffixes: LiteralSearcher::suffixes(parsed.suffixes),
+            #[cfg(feature = "perf-literal")]
+            ac: ac,
+            match_type: MatchType::Nothing,
+        };
+        ro.match_type = ro.choose_match_type(self.match_type);
+
+        let ro = Arc::new(ro);
+        Ok(Exec { ro: ro, cache: Cached::new() })
+    }
+
+    #[cfg(feature = "perf-literal")]
+    fn build_aho_corasick(&self, parsed: &Parsed) -> Option<AhoCorasick<u32>> {
+        if parsed.exprs.len() != 1 {
+            return None;
+        }
+        let lits = match alternation_literals(&parsed.exprs[0]) {
+            None => return None,
+            Some(lits) => lits,
+        };
+        // If we have a small number of literals, then let Teddy handle
+        // things (see literal/mod.rs).
+        if lits.len() <= 32 {
+            return None;
+        }
+        Some(
+            AhoCorasickBuilder::new()
+                .match_kind(MatchKind::LeftmostFirst)
+                .auto_configure(&lits)
+                // We always want this to reduce size, regardless
+                // of what auto-configure does.
+                .byte_classes(true)
+                .build_with_size::<u32, _, _>(&lits)
+                // This should never happen because we'd long exceed the
+                // compilation limit for regexes first.
+                .expect("AC automaton too big"),
+        )
+    }
+}
+
+impl<'c> RegularExpression for ExecNoSyncStr<'c> {
+    type Text = str;
+
+    fn slots_len(&self) -> usize {
+        self.0.slots_len()
+    }
+
+    fn next_after_empty(&self, text: &str, i: usize) -> usize {
+        next_utf8(text.as_bytes(), i)
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn shortest_match_at(&self, text: &str, start: usize) -> Option<usize> {
+        self.0.shortest_match_at(text.as_bytes(), start)
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn is_match_at(&self, text: &str, start: usize) -> bool {
+        self.0.is_match_at(text.as_bytes(), start)
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn find_at(&self, text: &str, start: usize) -> Option<(usize, usize)> {
+        self.0.find_at(text.as_bytes(), start)
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn captures_read_at(
+        &self,
+        locs: &mut Locations,
+        text: &str,
+        start: usize,
+    ) -> Option<(usize, usize)> {
+        self.0.captures_read_at(locs, text.as_bytes(), start)
+    }
+}
+
+impl<'c> RegularExpression for ExecNoSync<'c> {
+    type Text = [u8];
+
+    /// Returns the number of capture slots in the regular expression. (There
+    /// are two slots for every capture group, corresponding to possibly empty
+    /// start and end locations of the capture.)
+    fn slots_len(&self) -> usize {
+        self.ro.nfa.captures.len() * 2
+    }
+
+    fn next_after_empty(&self, _text: &[u8], i: usize) -> usize {
+        i + 1
+    }
+
+    /// Returns the end of a match location, possibly occurring before the
+    /// end location of the correct leftmost-first match.
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn shortest_match_at(&self, text: &[u8], start: usize) -> Option<usize> {
+        if !self.is_anchor_end_match(text) {
+            return None;
+        }
+        match self.ro.match_type {
+            #[cfg(feature = "perf-literal")]
+            MatchType::Literal(ty) => {
+                self.find_literals(ty, text, start).map(|(_, e)| e)
+            }
+            #[cfg(feature = "perf-dfa")]
+            MatchType::Dfa | MatchType::DfaMany => {
+                match self.shortest_dfa(text, start) {
+                    dfa::Result::Match(end) => Some(end),
+                    dfa::Result::NoMatch(_) => None,
+                    dfa::Result::Quit => self.shortest_nfa(text, start),
+                }
+            }
+            #[cfg(feature = "perf-dfa")]
+            MatchType::DfaAnchoredReverse => {
+                match dfa::Fsm::reverse(
+                    &self.ro.dfa_reverse,
+                    self.cache.value(),
+                    true,
+                    &text[start..],
+                    text.len(),
+                ) {
+                    dfa::Result::Match(_) => Some(text.len()),
+                    dfa::Result::NoMatch(_) => None,
+                    dfa::Result::Quit => self.shortest_nfa(text, start),
+                }
+            }
+            #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
+            MatchType::DfaSuffix => {
+                match self.shortest_dfa_reverse_suffix(text, start) {
+                    dfa::Result::Match(e) => Some(e),
+                    dfa::Result::NoMatch(_) => None,
+                    dfa::Result::Quit => self.shortest_nfa(text, start),
+                }
+            }
+            MatchType::Nfa(ty) => self.shortest_nfa_type(ty, text, start),
+            MatchType::Nothing => None,
+        }
+    }
+
+    /// Returns true if and only if the regex matches text.
+    ///
+    /// For single regular expressions, this is equivalent to calling
+    /// shortest_match(...).is_some().
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn is_match_at(&self, text: &[u8], start: usize) -> bool {
+        if !self.is_anchor_end_match(text) {
+            return false;
+        }
+        // We need to do this dance because shortest_match relies on the NFA
+        // filling in captures[1], but a RegexSet has no captures. In other
+        // words, a RegexSet can't (currently) use shortest_match. ---AG
+        match self.ro.match_type {
+            #[cfg(feature = "perf-literal")]
+            MatchType::Literal(ty) => {
+                self.find_literals(ty, text, start).is_some()
+            }
+            #[cfg(feature = "perf-dfa")]
+            MatchType::Dfa | MatchType::DfaMany => {
+                match self.shortest_dfa(text, start) {
+                    dfa::Result::Match(_) => true,
+                    dfa::Result::NoMatch(_) => false,
+                    dfa::Result::Quit => self.match_nfa(text, start),
+                }
+            }
+            #[cfg(feature = "perf-dfa")]
+            MatchType::DfaAnchoredReverse => {
+                match dfa::Fsm::reverse(
+                    &self.ro.dfa_reverse,
+                    self.cache.value(),
+                    true,
+                    &text[start..],
+                    text.len(),
+                ) {
+                    dfa::Result::Match(_) => true,
+                    dfa::Result::NoMatch(_) => false,
+                    dfa::Result::Quit => self.match_nfa(text, start),
+                }
+            }
+            #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
+            MatchType::DfaSuffix => {
+                match self.shortest_dfa_reverse_suffix(text, start) {
+                    dfa::Result::Match(_) => true,
+                    dfa::Result::NoMatch(_) => false,
+                    dfa::Result::Quit => self.match_nfa(text, start),
+                }
+            }
+            MatchType::Nfa(ty) => self.match_nfa_type(ty, text, start),
+            MatchType::Nothing => false,
+        }
+    }
+
+    /// Finds the start and end location of the leftmost-first match, starting
+    /// at the given location.
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn find_at(&self, text: &[u8], start: usize) -> Option<(usize, usize)> {
+        if !self.is_anchor_end_match(text) {
+            return None;
+        }
+        match self.ro.match_type {
+            #[cfg(feature = "perf-literal")]
+            MatchType::Literal(ty) => self.find_literals(ty, text, start),
+            #[cfg(feature = "perf-dfa")]
+            MatchType::Dfa => match self.find_dfa_forward(text, start) {
+                dfa::Result::Match((s, e)) => Some((s, e)),
+                dfa::Result::NoMatch(_) => None,
+                dfa::Result::Quit => {
+                    self.find_nfa(MatchNfaType::Auto, text, start)
+                }
+            },
+            #[cfg(feature = "perf-dfa")]
+            MatchType::DfaAnchoredReverse => {
+                match self.find_dfa_anchored_reverse(text, start) {
+                    dfa::Result::Match((s, e)) => Some((s, e)),
+                    dfa::Result::NoMatch(_) => None,
+                    dfa::Result::Quit => {
+                        self.find_nfa(MatchNfaType::Auto, text, start)
+                    }
+                }
+            }
+            #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
+            MatchType::DfaSuffix => {
+                match self.find_dfa_reverse_suffix(text, start) {
+                    dfa::Result::Match((s, e)) => Some((s, e)),
+                    dfa::Result::NoMatch(_) => None,
+                    dfa::Result::Quit => {
+                        self.find_nfa(MatchNfaType::Auto, text, start)
+                    }
+                }
+            }
+            MatchType::Nfa(ty) => self.find_nfa(ty, text, start),
+            MatchType::Nothing => None,
+            #[cfg(feature = "perf-dfa")]
+            MatchType::DfaMany => {
+                unreachable!("BUG: RegexSet cannot be used with find")
+            }
+        }
+    }
+
+    /// Finds the start and end location of the leftmost-first match and also
+    /// fills in all matching capture groups.
+    ///
+    /// The number of capture slots given should be equal to the total number
+    /// of capture slots in the compiled program.
+    ///
+    /// Note that the first two slots always correspond to the start and end
+    /// locations of the overall match.
+    fn captures_read_at(
+        &self,
+        locs: &mut Locations,
+        text: &[u8],
+        start: usize,
+    ) -> Option<(usize, usize)> {
+        let slots = locs.as_slots();
+        for slot in slots.iter_mut() {
+            *slot = None;
+        }
+        // If the caller unnecessarily uses this, then we try to save them
+        // from themselves.
+        match slots.len() {
+            0 => return self.find_at(text, start),
+            2 => {
+                return self.find_at(text, start).map(|(s, e)| {
+                    slots[0] = Some(s);
+                    slots[1] = Some(e);
+                    (s, e)
+                });
+            }
+            _ => {} // fallthrough
+        }
+        if !self.is_anchor_end_match(text) {
+            return None;
+        }
+        match self.ro.match_type {
+            #[cfg(feature = "perf-literal")]
+            MatchType::Literal(ty) => {
+                self.find_literals(ty, text, start).and_then(|(s, e)| {
+                    self.captures_nfa_type(
+                        MatchNfaType::Auto,
+                        slots,
+                        text,
+                        s,
+                        e,
+                    )
+                })
+            }
+            #[cfg(feature = "perf-dfa")]
+            MatchType::Dfa => {
+                if self.ro.nfa.is_anchored_start {
+                    self.captures_nfa(slots, text, start)
+                } else {
+                    match self.find_dfa_forward(text, start) {
+                        dfa::Result::Match((s, e)) => self.captures_nfa_type(
+                            MatchNfaType::Auto,
+                            slots,
+                            text,
+                            s,
+                            e,
+                        ),
+                        dfa::Result::NoMatch(_) => None,
+                        dfa::Result::Quit => {
+                            self.captures_nfa(slots, text, start)
+                        }
+                    }
+                }
+            }
+            #[cfg(feature = "perf-dfa")]
+            MatchType::DfaAnchoredReverse => {
+                match self.find_dfa_anchored_reverse(text, start) {
+                    dfa::Result::Match((s, e)) => self.captures_nfa_type(
+                        MatchNfaType::Auto,
+                        slots,
+                        text,
+                        s,
+                        e,
+                    ),
+                    dfa::Result::NoMatch(_) => None,
+                    dfa::Result::Quit => self.captures_nfa(slots, text, start),
+                }
+            }
+            #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
+            MatchType::DfaSuffix => {
+                match self.find_dfa_reverse_suffix(text, start) {
+                    dfa::Result::Match((s, e)) => self.captures_nfa_type(
+                        MatchNfaType::Auto,
+                        slots,
+                        text,
+                        s,
+                        e,
+                    ),
+                    dfa::Result::NoMatch(_) => None,
+                    dfa::Result::Quit => self.captures_nfa(slots, text, start),
+                }
+            }
+            MatchType::Nfa(ty) => {
+                self.captures_nfa_type(ty, slots, text, start, text.len())
+            }
+            MatchType::Nothing => None,
+            #[cfg(feature = "perf-dfa")]
+            MatchType::DfaMany => {
+                unreachable!("BUG: RegexSet cannot be used with captures")
+            }
+        }
+    }
+}
+
+impl<'c> ExecNoSync<'c> {
+    /// Finds the leftmost-first match using only literal search.
+    #[cfg(feature = "perf-literal")]
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn find_literals(
+        &self,
+        ty: MatchLiteralType,
+        text: &[u8],
+        start: usize,
+    ) -> Option<(usize, usize)> {
+        use self::MatchLiteralType::*;
+        match ty {
+            Unanchored => {
+                let lits = &self.ro.nfa.prefixes;
+                lits.find(&text[start..]).map(|(s, e)| (start + s, start + e))
+            }
+            AnchoredStart => {
+                let lits = &self.ro.nfa.prefixes;
+                if start == 0 || !self.ro.nfa.is_anchored_start {
+                    lits.find_start(&text[start..])
+                        .map(|(s, e)| (start + s, start + e))
+                } else {
+                    None
+                }
+            }
+            AnchoredEnd => {
+                let lits = &self.ro.suffixes;
+                lits.find_end(&text[start..])
+                    .map(|(s, e)| (start + s, start + e))
+            }
+            AhoCorasick => self
+                .ro
+                .ac
+                .as_ref()
+                .unwrap()
+                .find(&text[start..])
+                .map(|m| (start + m.start(), start + m.end())),
+        }
+    }
+
+    /// Finds the leftmost-first match (start and end) using only the DFA.
+    ///
+    /// If the result returned indicates that the DFA quit, then another
+    /// matching engine should be used.
+    #[cfg(feature = "perf-dfa")]
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn find_dfa_forward(
+        &self,
+        text: &[u8],
+        start: usize,
+    ) -> dfa::Result<(usize, usize)> {
+        use dfa::Result::*;
+        let end = match dfa::Fsm::forward(
+            &self.ro.dfa,
+            self.cache.value(),
+            false,
+            text,
+            start,
+        ) {
+            NoMatch(i) => return NoMatch(i),
+            Quit => return Quit,
+            Match(end) if start == end => return Match((start, start)),
+            Match(end) => end,
+        };
+        // Now run the DFA in reverse to find the start of the match.
+        match dfa::Fsm::reverse(
+            &self.ro.dfa_reverse,
+            self.cache.value(),
+            false,
+            &text[start..],
+            end - start,
+        ) {
+            Match(s) => Match((start + s, end)),
+            NoMatch(i) => NoMatch(i),
+            Quit => Quit,
+        }
+    }
+
+    /// Finds the leftmost-first match (start and end) using only the DFA,
+    /// but assumes the regex is anchored at the end and therefore starts at
+    /// the end of the regex and matches in reverse.
+    ///
+    /// If the result returned indicates that the DFA quit, then another
+    /// matching engine should be used.
+    #[cfg(feature = "perf-dfa")]
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn find_dfa_anchored_reverse(
+        &self,
+        text: &[u8],
+        start: usize,
+    ) -> dfa::Result<(usize, usize)> {
+        use dfa::Result::*;
+        match dfa::Fsm::reverse(
+            &self.ro.dfa_reverse,
+            self.cache.value(),
+            false,
+            &text[start..],
+            text.len() - start,
+        ) {
+            Match(s) => Match((start + s, text.len())),
+            NoMatch(i) => NoMatch(i),
+            Quit => Quit,
+        }
+    }
+
+    /// Finds the end of the shortest match using only the DFA.
+    #[cfg(feature = "perf-dfa")]
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn shortest_dfa(&self, text: &[u8], start: usize) -> dfa::Result<usize> {
+        dfa::Fsm::forward(&self.ro.dfa, self.cache.value(), true, text, start)
+    }
+
+    /// Finds the end of the shortest match using only the DFA by scanning for
+    /// suffix literals.
+    #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn shortest_dfa_reverse_suffix(
+        &self,
+        text: &[u8],
+        start: usize,
+    ) -> dfa::Result<usize> {
+        match self.exec_dfa_reverse_suffix(text, start) {
+            None => self.shortest_dfa(text, start),
+            Some(r) => r.map(|(_, end)| end),
+        }
+    }
+
+    /// Finds the end of the shortest match using only the DFA by scanning for
+    /// suffix literals. It also reports the start of the match.
+    ///
+    /// Note that if None is returned, then the optimization gave up to avoid
+    /// worst case quadratic behavior. A forward scanning DFA should be tried
+    /// next.
+    ///
+    /// If a match is returned and the full leftmost-first match is desired,
+    /// then a forward scan starting from the beginning of the match must be
+    /// done.
+    ///
+    /// If the result returned indicates that the DFA quit, then another
+    /// matching engine should be used.
+    #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn exec_dfa_reverse_suffix(
+        &self,
+        text: &[u8],
+        original_start: usize,
+    ) -> Option<dfa::Result<(usize, usize)>> {
+        use dfa::Result::*;
+
+        let lcs = self.ro.suffixes.lcs();
+        debug_assert!(lcs.len() >= 1);
+        let mut start = original_start;
+        let mut end = start;
+        let mut last_literal = start;
+        while end <= text.len() {
+            last_literal += match lcs.find(&text[last_literal..]) {
+                None => return Some(NoMatch(text.len())),
+                Some(i) => i,
+            };
+            end = last_literal + lcs.len();
+            match dfa::Fsm::reverse(
+                &self.ro.dfa_reverse,
+                self.cache.value(),
+                false,
+                &text[start..end],
+                end - start,
+            ) {
+                Match(0) | NoMatch(0) => return None,
+                Match(i) => return Some(Match((start + i, end))),
+                NoMatch(i) => {
+                    start += i;
+                    last_literal += 1;
+                    continue;
+                }
+                Quit => return Some(Quit),
+            };
+        }
+        Some(NoMatch(text.len()))
+    }
+
+    /// Finds the leftmost-first match (start and end) using only the DFA
+    /// by scanning for suffix literals.
+    ///
+    /// If the result returned indicates that the DFA quit, then another
+    /// matching engine should be used.
+    #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn find_dfa_reverse_suffix(
+        &self,
+        text: &[u8],
+        start: usize,
+    ) -> dfa::Result<(usize, usize)> {
+        use dfa::Result::*;
+
+        let match_start = match self.exec_dfa_reverse_suffix(text, start) {
+            None => return self.find_dfa_forward(text, start),
+            Some(Match((start, _))) => start,
+            Some(r) => return r,
+        };
+        // At this point, we've found a match. The only way to quit now
+        // without a match is if the DFA gives up (seems unlikely).
+        //
+        // Now run the DFA forwards to find the proper end of the match.
+        // (The suffix literal match can only indicate the earliest
+        // possible end location, which may appear before the end of the
+        // leftmost-first match.)
+        match dfa::Fsm::forward(
+            &self.ro.dfa,
+            self.cache.value(),
+            false,
+            text,
+            match_start,
+        ) {
+            NoMatch(_) => panic!("BUG: reverse match implies forward match"),
+            Quit => Quit,
+            Match(e) => Match((match_start, e)),
+        }
+    }
+
+    /// Executes the NFA engine to return whether there is a match or not.
+    ///
+    /// Ideally, we could use shortest_nfa(...).is_some() and get the same
+    /// performance characteristics, but regex sets don't have captures, which
+    /// shortest_nfa depends on.
+    #[cfg(feature = "perf-dfa")]
+    fn match_nfa(&self, text: &[u8], start: usize) -> bool {
+        self.match_nfa_type(MatchNfaType::Auto, text, start)
+    }
+
+    /// Like match_nfa, but allows specification of the type of NFA engine.
+    fn match_nfa_type(
+        &self,
+        ty: MatchNfaType,
+        text: &[u8],
+        start: usize,
+    ) -> bool {
+        self.exec_nfa(
+            ty,
+            &mut [false],
+            &mut [],
+            true,
+            false,
+            text,
+            start,
+            text.len(),
+        )
+    }
+
+    /// Finds the shortest match using an NFA.
+    #[cfg(feature = "perf-dfa")]
+    fn shortest_nfa(&self, text: &[u8], start: usize) -> Option<usize> {
+        self.shortest_nfa_type(MatchNfaType::Auto, text, start)
+    }
+
+    /// Like shortest_nfa, but allows specification of the type of NFA engine.
+    fn shortest_nfa_type(
+        &self,
+        ty: MatchNfaType,
+        text: &[u8],
+        start: usize,
+    ) -> Option<usize> {
+        let mut slots = [None, None];
+        if self.exec_nfa(
+            ty,
+            &mut [false],
+            &mut slots,
+            true,
+            true,
+            text,
+            start,
+            text.len(),
+        ) {
+            slots[1]
+        } else {
+            None
+        }
+    }
+
+    /// Like find, but executes an NFA engine.
+    fn find_nfa(
+        &self,
+        ty: MatchNfaType,
+        text: &[u8],
+        start: usize,
+    ) -> Option<(usize, usize)> {
+        let mut slots = [None, None];
+        if self.exec_nfa(
+            ty,
+            &mut [false],
+            &mut slots,
+            false,
+            false,
+            text,
+            start,
+            text.len(),
+        ) {
+            match (slots[0], slots[1]) {
+                (Some(s), Some(e)) => Some((s, e)),
+                _ => None,
+            }
+        } else {
+            None
+        }
+    }
+
+    /// Like find_nfa, but fills in captures.
+    ///
+    /// `slots` should have length equal to `2 * nfa.captures.len()`.
+    #[cfg(feature = "perf-dfa")]
+    fn captures_nfa(
+        &self,
+        slots: &mut [Slot],
+        text: &[u8],
+        start: usize,
+    ) -> Option<(usize, usize)> {
+        self.captures_nfa_type(
+            MatchNfaType::Auto,
+            slots,
+            text,
+            start,
+            text.len(),
+        )
+    }
+
+    /// Like captures_nfa, but allows specification of type of NFA engine.
+    fn captures_nfa_type(
+        &self,
+        ty: MatchNfaType,
+        slots: &mut [Slot],
+        text: &[u8],
+        start: usize,
+        end: usize,
+    ) -> Option<(usize, usize)> {
+        if self.exec_nfa(
+            ty,
+            &mut [false],
+            slots,
+            false,
+            false,
+            text,
+            start,
+            end,
+        ) {
+            match (slots[0], slots[1]) {
+                (Some(s), Some(e)) => Some((s, e)),
+                _ => None,
+            }
+        } else {
+            None
+        }
+    }
+
+    fn exec_nfa(
+        &self,
+        mut ty: MatchNfaType,
+        matches: &mut [bool],
+        slots: &mut [Slot],
+        quit_after_match: bool,
+        quit_after_match_with_pos: bool,
+        text: &[u8],
+        start: usize,
+        end: usize,
+    ) -> bool {
+        use self::MatchNfaType::*;
+        if let Auto = ty {
+            if backtrack::should_exec(self.ro.nfa.len(), text.len()) {
+                ty = Backtrack;
+            } else {
+                ty = PikeVM;
+            }
+        }
+        // The backtracker can't return the shortest match position as it is
+        // implemented today. So if someone calls `shortest_match` and we need
+        // to run an NFA, then use the PikeVM.
+        if quit_after_match_with_pos || ty == PikeVM {
+            self.exec_pikevm(
+                matches,
+                slots,
+                quit_after_match,
+                text,
+                start,
+                end,
+            )
+        } else {
+            self.exec_backtrack(matches, slots, text, start, end)
+        }
+    }
+
+    /// Always run the NFA algorithm.
+    fn exec_pikevm(
+        &self,
+        matches: &mut [bool],
+        slots: &mut [Slot],
+        quit_after_match: bool,
+        text: &[u8],
+        start: usize,
+        end: usize,
+    ) -> bool {
+        if self.ro.nfa.uses_bytes() {
+            pikevm::Fsm::exec(
+                &self.ro.nfa,
+                self.cache.value(),
+                matches,
+                slots,
+                quit_after_match,
+                ByteInput::new(text, self.ro.nfa.only_utf8),
+                start,
+                end,
+            )
+        } else {
+            pikevm::Fsm::exec(
+                &self.ro.nfa,
+                self.cache.value(),
+                matches,
+                slots,
+                quit_after_match,
+                CharInput::new(text),
+                start,
+                end,
+            )
+        }
+    }
+
+    /// Always runs the NFA using bounded backtracking.
+    fn exec_backtrack(
+        &self,
+        matches: &mut [bool],
+        slots: &mut [Slot],
+        text: &[u8],
+        start: usize,
+        end: usize,
+    ) -> bool {
+        if self.ro.nfa.uses_bytes() {
+            backtrack::Bounded::exec(
+                &self.ro.nfa,
+                self.cache.value(),
+                matches,
+                slots,
+                ByteInput::new(text, self.ro.nfa.only_utf8),
+                start,
+                end,
+            )
+        } else {
+            backtrack::Bounded::exec(
+                &self.ro.nfa,
+                self.cache.value(),
+                matches,
+                slots,
+                CharInput::new(text),
+                start,
+                end,
+            )
+        }
+    }
+
+    /// Finds which regular expressions match the given text.
+    ///
+    /// `matches` should have length equal to the number of regexes being
+    /// searched.
+    ///
+    /// This is only useful when one wants to know which regexes in a set
+    /// match some text.
+    pub fn many_matches_at(
+        &self,
+        matches: &mut [bool],
+        text: &[u8],
+        start: usize,
+    ) -> bool {
+        use self::MatchType::*;
+        if !self.is_anchor_end_match(text) {
+            return false;
+        }
+        match self.ro.match_type {
+            #[cfg(feature = "perf-literal")]
+            Literal(ty) => {
+                debug_assert_eq!(matches.len(), 1);
+                matches[0] = self.find_literals(ty, text, start).is_some();
+                matches[0]
+            }
+            #[cfg(feature = "perf-dfa")]
+            Dfa | DfaAnchoredReverse | DfaMany => {
+                match dfa::Fsm::forward_many(
+                    &self.ro.dfa,
+                    self.cache.value(),
+                    matches,
+                    text,
+                    start,
+                ) {
+                    dfa::Result::Match(_) => true,
+                    dfa::Result::NoMatch(_) => false,
+                    dfa::Result::Quit => self.exec_nfa(
+                        MatchNfaType::Auto,
+                        matches,
+                        &mut [],
+                        false,
+                        false,
+                        text,
+                        start,
+                        text.len(),
+                    ),
+                }
+            }
+            #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
+            DfaSuffix => {
+                match dfa::Fsm::forward_many(
+                    &self.ro.dfa,
+                    self.cache.value(),
+                    matches,
+                    text,
+                    start,
+                ) {
+                    dfa::Result::Match(_) => true,
+                    dfa::Result::NoMatch(_) => false,
+                    dfa::Result::Quit => self.exec_nfa(
+                        MatchNfaType::Auto,
+                        matches,
+                        &mut [],
+                        false,
+                        false,
+                        text,
+                        start,
+                        text.len(),
+                    ),
+                }
+            }
+            Nfa(ty) => self.exec_nfa(
+                ty,
+                matches,
+                &mut [],
+                false,
+                false,
+                text,
+                start,
+                text.len(),
+            ),
+            Nothing => false,
+        }
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn is_anchor_end_match(&self, text: &[u8]) -> bool {
+        #[cfg(not(feature = "perf-literal"))]
+        fn imp(_: &ExecReadOnly, _: &[u8]) -> bool {
+            true
+        }
+
+        #[cfg(feature = "perf-literal")]
+        fn imp(ro: &ExecReadOnly, text: &[u8]) -> bool {
+            // Only do this check if the haystack is big (>1MB).
+            if text.len() > (1 << 20) && ro.nfa.is_anchored_end {
+                let lcs = ro.suffixes.lcs();
+                if lcs.len() >= 1 && !lcs.is_suffix(text) {
+                    return false;
+                }
+            }
+            true
+        }
+
+        imp(&self.ro, text)
+    }
+
+    pub fn capture_name_idx(&self) -> &Arc<HashMap<String, usize>> {
+        &self.ro.nfa.capture_name_idx
+    }
+}
+
+impl<'c> ExecNoSyncStr<'c> {
+    pub fn capture_name_idx(&self) -> &Arc<HashMap<String, usize>> {
+        self.0.capture_name_idx()
+    }
+}
+
+impl Exec {
+    /// Get a searcher that isn't Sync.
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    pub fn searcher(&self) -> ExecNoSync {
+        let create = || RefCell::new(ProgramCacheInner::new(&self.ro));
+        ExecNoSync {
+            ro: &self.ro, // a clone is too expensive here! (and not needed)
+            cache: self.cache.get_or(create),
+        }
+    }
+
+    /// Get a searcher that isn't Sync and can match on &str.
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    pub fn searcher_str(&self) -> ExecNoSyncStr {
+        ExecNoSyncStr(self.searcher())
+    }
+
+    /// Build a Regex from this executor.
+    pub fn into_regex(self) -> re_unicode::Regex {
+        re_unicode::Regex::from(self)
+    }
+
+    /// Build a RegexSet from this executor.
+    pub fn into_regex_set(self) -> re_set::unicode::RegexSet {
+        re_set::unicode::RegexSet::from(self)
+    }
+
+    /// Build a Regex from this executor that can match arbitrary bytes.
+    pub fn into_byte_regex(self) -> re_bytes::Regex {
+        re_bytes::Regex::from(self)
+    }
+
+    /// Build a RegexSet from this executor that can match arbitrary bytes.
+    pub fn into_byte_regex_set(self) -> re_set::bytes::RegexSet {
+        re_set::bytes::RegexSet::from(self)
+    }
+
+    /// The original regular expressions given by the caller that were
+    /// compiled.
+    pub fn regex_strings(&self) -> &[String] {
+        &self.ro.res
+    }
+
+    /// Return a slice of capture names.
+    ///
+    /// Any capture that isn't named is None.
+    pub fn capture_names(&self) -> &[Option<String>] {
+        &self.ro.nfa.captures
+    }
+
+    /// Return a reference to named groups mapping (from group name to
+    /// group position).
+    pub fn capture_name_idx(&self) -> &Arc<HashMap<String, usize>> {
+        &self.ro.nfa.capture_name_idx
+    }
+}
+
+impl Clone for Exec {
+    fn clone(&self) -> Exec {
+        Exec { ro: self.ro.clone(), cache: Cached::new() }
+    }
+}
+
+impl ExecReadOnly {
+    fn choose_match_type(&self, hint: Option<MatchType>) -> MatchType {
+        if let Some(MatchType::Nfa(_)) = hint {
+            return hint.unwrap();
+        }
+        // If the NFA is empty, then we'll never match anything.
+        if self.nfa.insts.is_empty() {
+            return MatchType::Nothing;
+        }
+        if let Some(literalty) = self.choose_literal_match_type() {
+            return literalty;
+        }
+        if let Some(dfaty) = self.choose_dfa_match_type() {
+            return dfaty;
+        }
+        // We're so totally hosed.
+        MatchType::Nfa(MatchNfaType::Auto)
+    }
+
+    /// If a plain literal scan can be used, then a corresponding literal
+    /// search type is returned.
+    fn choose_literal_match_type(&self) -> Option<MatchType> {
+        #[cfg(not(feature = "perf-literal"))]
+        fn imp(_: &ExecReadOnly) -> Option<MatchType> {
+            None
+        }
+
+        #[cfg(feature = "perf-literal")]
+        fn imp(ro: &ExecReadOnly) -> Option<MatchType> {
+            // If our set of prefixes is complete, then we can use it to find
+            // a match in lieu of a regex engine. This doesn't quite work well
+            // in the presence of multiple regexes, so only do it when there's
+            // one.
+            //
+            // TODO(burntsushi): Also, don't try to match literals if the regex
+            // is partially anchored. We could technically do it, but we'd need
+            // to create two sets of literals: all of them and then the subset
+            // that aren't anchored. We would then only search for all of them
+            // when at the beginning of the input and use the subset in all
+            // other cases.
+            if ro.res.len() != 1 {
+                return None;
+            }
+            if ro.ac.is_some() {
+                return Some(MatchType::Literal(
+                    MatchLiteralType::AhoCorasick,
+                ));
+            }
+            if ro.nfa.prefixes.complete() {
+                return if ro.nfa.is_anchored_start {
+                    Some(MatchType::Literal(MatchLiteralType::AnchoredStart))
+                } else {
+                    Some(MatchType::Literal(MatchLiteralType::Unanchored))
+                };
+            }
+            if ro.suffixes.complete() {
+                return if ro.nfa.is_anchored_end {
+                    Some(MatchType::Literal(MatchLiteralType::AnchoredEnd))
+                } else {
+                    // This case shouldn't happen. When the regex isn't
+                    // anchored, then complete prefixes should imply complete
+                    // suffixes.
+                    Some(MatchType::Literal(MatchLiteralType::Unanchored))
+                };
+            }
+            None
+        }
+
+        imp(self)
+    }
+
+    /// If a DFA scan can be used, then choose the appropriate DFA strategy.
+    fn choose_dfa_match_type(&self) -> Option<MatchType> {
+        #[cfg(not(feature = "perf-dfa"))]
+        fn imp(_: &ExecReadOnly) -> Option<MatchType> {
+            None
+        }
+
+        #[cfg(feature = "perf-dfa")]
+        fn imp(ro: &ExecReadOnly) -> Option<MatchType> {
+            if !dfa::can_exec(&ro.dfa) {
+                return None;
+            }
+            // Regex sets require a slightly specialized path.
+            if ro.res.len() >= 2 {
+                return Some(MatchType::DfaMany);
+            }
+            // If the regex is anchored at the end but not the start, then
+            // just match in reverse from the end of the haystack.
+            if !ro.nfa.is_anchored_start && ro.nfa.is_anchored_end {
+                return Some(MatchType::DfaAnchoredReverse);
+            }
+            #[cfg(feature = "perf-literal")]
+            {
+                // If there's a longish suffix literal, then it might be faster
+                // to look for that first.
+                if ro.should_suffix_scan() {
+                    return Some(MatchType::DfaSuffix);
+                }
+            }
+            // Fall back to your garden variety forward searching lazy DFA.
+            Some(MatchType::Dfa)
+        }
+
+        imp(self)
+    }
+
+    /// Returns true if the program is amenable to suffix scanning.
+    ///
+    /// When this is true, as a heuristic, we assume it is OK to quickly scan
+    /// for suffix literals and then do a *reverse* DFA match from any matches
+    /// produced by the literal scan. (And then followed by a forward DFA
+    /// search, since the previously found suffix literal maybe not actually be
+    /// the end of a match.)
+    ///
+    /// This is a bit of a specialized optimization, but can result in pretty
+    /// big performance wins if 1) there are no prefix literals and 2) the
+    /// suffix literals are pretty rare in the text. (1) is obviously easy to
+    /// account for but (2) is harder. As a proxy, we assume that longer
+    /// strings are generally rarer, so we only enable this optimization when
+    /// we have a meaty suffix.
+    #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
+    fn should_suffix_scan(&self) -> bool {
+        if self.suffixes.is_empty() {
+            return false;
+        }
+        let lcs_len = self.suffixes.lcs().char_len();
+        lcs_len >= 3 && lcs_len > self.dfa.prefixes.lcp().char_len()
+    }
+}
+
+#[derive(Clone, Copy, Debug)]
+enum MatchType {
+    /// A single or multiple literal search. This is only used when the regex
+    /// can be decomposed into a literal search.
+    #[cfg(feature = "perf-literal")]
+    Literal(MatchLiteralType),
+    /// A normal DFA search.
+    #[cfg(feature = "perf-dfa")]
+    Dfa,
+    /// A reverse DFA search starting from the end of a haystack.
+    #[cfg(feature = "perf-dfa")]
+    DfaAnchoredReverse,
+    /// A reverse DFA search with suffix literal scanning.
+    #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
+    DfaSuffix,
+    /// Use the DFA on two or more regular expressions.
+    #[cfg(feature = "perf-dfa")]
+    DfaMany,
+    /// An NFA variant.
+    Nfa(MatchNfaType),
+    /// No match is ever possible, so don't ever try to search.
+    Nothing,
+}
+
+#[derive(Clone, Copy, Debug)]
+#[cfg(feature = "perf-literal")]
+enum MatchLiteralType {
+    /// Match literals anywhere in text.
+    Unanchored,
+    /// Match literals only at the start of text.
+    AnchoredStart,
+    /// Match literals only at the end of text.
+    AnchoredEnd,
+    /// Use an Aho-Corasick automaton. This requires `ac` to be Some on
+    /// ExecReadOnly.
+    AhoCorasick,
+}
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+enum MatchNfaType {
+    /// Choose between Backtrack and PikeVM.
+    Auto,
+    /// NFA bounded backtracking.
+    ///
+    /// (This is only set by tests, since it never makes sense to always want
+    /// backtracking.)
+    Backtrack,
+    /// The Pike VM.
+    ///
+    /// (This is only set by tests, since it never makes sense to always want
+    /// the Pike VM.)
+    PikeVM,
+}
+
+/// `ProgramCache` maintains reusable allocations for each matching engine
+/// available to a particular program.
+pub type ProgramCache = RefCell<ProgramCacheInner>;
+
+#[derive(Debug)]
+pub struct ProgramCacheInner {
+    pub pikevm: pikevm::Cache,
+    pub backtrack: backtrack::Cache,
+    #[cfg(feature = "perf-dfa")]
+    pub dfa: dfa::Cache,
+    #[cfg(feature = "perf-dfa")]
+    pub dfa_reverse: dfa::Cache,
+}
+
+impl ProgramCacheInner {
+    fn new(ro: &ExecReadOnly) -> Self {
+        ProgramCacheInner {
+            pikevm: pikevm::Cache::new(&ro.nfa),
+            backtrack: backtrack::Cache::new(&ro.nfa),
+            #[cfg(feature = "perf-dfa")]
+            dfa: dfa::Cache::new(&ro.dfa),
+            #[cfg(feature = "perf-dfa")]
+            dfa_reverse: dfa::Cache::new(&ro.dfa_reverse),
+        }
+    }
+}
+
+/// Alternation literals checks if the given HIR is a simple alternation of
+/// literals, and if so, returns them. Otherwise, this returns None.
+#[cfg(feature = "perf-literal")]
+fn alternation_literals(expr: &Hir) -> Option<Vec<Vec<u8>>> {
+    use syntax::hir::{HirKind, Literal};
+
+    // This is pretty hacky, but basically, if `is_alternation_literal` is
+    // true, then we can make several assumptions about the structure of our
+    // HIR. This is what justifies the `unreachable!` statements below.
+    //
+    // This code should be refactored once we overhaul this crate's
+    // optimization pipeline, because this is a terribly inflexible way to go
+    // about things.
+
+    if !expr.is_alternation_literal() {
+        return None;
+    }
+    let alts = match *expr.kind() {
+        HirKind::Alternation(ref alts) => alts,
+        _ => return None, // one literal isn't worth it
+    };
+
+    let extendlit = |lit: &Literal, dst: &mut Vec<u8>| match *lit {
+        Literal::Unicode(c) => {
+            let mut buf = [0; 4];
+            dst.extend_from_slice(c.encode_utf8(&mut buf).as_bytes());
+        }
+        Literal::Byte(b) => {
+            dst.push(b);
+        }
+    };
+
+    let mut lits = vec![];
+    for alt in alts {
+        let mut lit = vec![];
+        match *alt.kind() {
+            HirKind::Literal(ref x) => extendlit(x, &mut lit),
+            HirKind::Concat(ref exprs) => {
+                for e in exprs {
+                    match *e.kind() {
+                        HirKind::Literal(ref x) => extendlit(x, &mut lit),
+                        _ => unreachable!("expected literal, got {:?}", e),
+                    }
+                }
+            }
+            _ => unreachable!("expected literal or concat, got {:?}", alt),
+        }
+        lits.push(lit);
+    }
+    Some(lits)
+}
+
+#[cfg(test)]
+mod test {
+    #[test]
+    fn uppercut_s_backtracking_bytes_default_bytes_mismatch() {
+        use internal::ExecBuilder;
+
+        let backtrack_bytes_re = ExecBuilder::new("^S")
+            .bounded_backtracking()
+            .only_utf8(false)
+            .build()
+            .map(|exec| exec.into_byte_regex())
+            .map_err(|err| format!("{}", err))
+            .unwrap();
+
+        let default_bytes_re = ExecBuilder::new("^S")
+            .only_utf8(false)
+            .build()
+            .map(|exec| exec.into_byte_regex())
+            .map_err(|err| format!("{}", err))
+            .unwrap();
+
+        let input = vec![83, 83];
+
+        let s1 = backtrack_bytes_re.split(&input);
+        let s2 = default_bytes_re.split(&input);
+        for (chunk1, chunk2) in s1.zip(s2) {
+            assert_eq!(chunk1, chunk2);
+        }
+    }
+
+    #[test]
+    fn unicode_lit_star_backtracking_utf8bytes_default_utf8bytes_mismatch() {
+        use internal::ExecBuilder;
+
+        let backtrack_bytes_re = ExecBuilder::new(r"^(?u:\*)")
+            .bounded_backtracking()
+            .bytes(true)
+            .build()
+            .map(|exec| exec.into_regex())
+            .map_err(|err| format!("{}", err))
+            .unwrap();
+
+        let default_bytes_re = ExecBuilder::new(r"^(?u:\*)")
+            .bytes(true)
+            .build()
+            .map(|exec| exec.into_regex())
+            .map_err(|err| format!("{}", err))
+            .unwrap();
+
+        let input = "**";
+
+        let s1 = backtrack_bytes_re.split(input);
+        let s2 = default_bytes_re.split(input);
+        for (chunk1, chunk2) in s1.zip(s2) {
+            assert_eq!(chunk1, chunk2);
+        }
+    }
+}

diff --git a/src/expand.rs b/src/expand.rs
new file mode 100644
index 0000000..528f55e
--- /dev/null
+++ b/src/expand.rs

@@ -0,0 +1,220 @@
+use std::str;
+
+use find_byte::find_byte;
+
+use re_bytes;
+use re_unicode;
+
+pub fn expand_str(
+    caps: &re_unicode::Captures,
+    mut replacement: &str,
+    dst: &mut String,
+) {
+    while !replacement.is_empty() {
+        match find_byte(b'$', replacement.as_bytes()) {
+            None => break,
+            Some(i) => {
+                dst.push_str(&replacement[..i]);
+                replacement = &replacement[i..];
+            }
+        }
+        if replacement.as_bytes().get(1).map_or(false, |&b| b == b'$') {
+            dst.push_str("$");
+            replacement = &replacement[2..];
+            continue;
+        }
+        debug_assert!(!replacement.is_empty());
+        let cap_ref = match find_cap_ref(replacement) {
+            Some(cap_ref) => cap_ref,
+            None => {
+                dst.push_str("$");
+                replacement = &replacement[1..];
+                continue;
+            }
+        };
+        replacement = &replacement[cap_ref.end..];
+        match cap_ref.cap {
+            Ref::Number(i) => {
+                dst.push_str(caps.get(i).map(|m| m.as_str()).unwrap_or(""));
+            }
+            Ref::Named(name) => {
+                dst.push_str(
+                    caps.name(name).map(|m| m.as_str()).unwrap_or(""),
+                );
+            }
+        }
+    }
+    dst.push_str(replacement);
+}
+
+pub fn expand_bytes(
+    caps: &re_bytes::Captures,
+    mut replacement: &[u8],
+    dst: &mut Vec<u8>,
+) {
+    while !replacement.is_empty() {
+        match find_byte(b'$', replacement) {
+            None => break,
+            Some(i) => {
+                dst.extend(&replacement[..i]);
+                replacement = &replacement[i..];
+            }
+        }
+        if replacement.get(1).map_or(false, |&b| b == b'$') {
+            dst.push(b'$');
+            replacement = &replacement[2..];
+            continue;
+        }
+        debug_assert!(!replacement.is_empty());
+        let cap_ref = match find_cap_ref(replacement) {
+            Some(cap_ref) => cap_ref,
+            None => {
+                dst.push(b'$');
+                replacement = &replacement[1..];
+                continue;
+            }
+        };
+        replacement = &replacement[cap_ref.end..];
+        match cap_ref.cap {
+            Ref::Number(i) => {
+                dst.extend(caps.get(i).map(|m| m.as_bytes()).unwrap_or(b""));
+            }
+            Ref::Named(name) => {
+                dst.extend(
+                    caps.name(name).map(|m| m.as_bytes()).unwrap_or(b""),
+                );
+            }
+        }
+    }
+    dst.extend(replacement);
+}
+
+/// `CaptureRef` represents a reference to a capture group inside some text.
+/// The reference is either a capture group name or a number.
+///
+/// It is also tagged with the position in the text following the
+/// capture reference.
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+struct CaptureRef<'a> {
+    cap: Ref<'a>,
+    end: usize,
+}
+
+/// A reference to a capture group in some text.
+///
+/// e.g., `$2`, `$foo`, `${foo}`.
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+enum Ref<'a> {
+    Named(&'a str),
+    Number(usize),
+}
+
+impl<'a> From<&'a str> for Ref<'a> {
+    fn from(x: &'a str) -> Ref<'a> {
+        Ref::Named(x)
+    }
+}
+
+impl From<usize> for Ref<'static> {
+    fn from(x: usize) -> Ref<'static> {
+        Ref::Number(x)
+    }
+}
+
+/// Parses a possible reference to a capture group name in the given text,
+/// starting at the beginning of `replacement`.
+///
+/// If no such valid reference could be found, None is returned.
+fn find_cap_ref<T: ?Sized + AsRef<[u8]>>(
+    replacement: &T,
+) -> Option<CaptureRef> {
+    let mut i = 0;
+    let rep: &[u8] = replacement.as_ref();
+    if rep.len() <= 1 || rep[0] != b'$' {
+        return None;
+    }
+    let mut brace = false;
+    i += 1;
+    if rep[i] == b'{' {
+        brace = true;
+        i += 1;
+    }
+    let mut cap_end = i;
+    while rep.get(cap_end).map_or(false, is_valid_cap_letter) {
+        cap_end += 1;
+    }
+    if cap_end == i {
+        return None;
+    }
+    // We just verified that the range 0..cap_end is valid ASCII, so it must
+    // therefore be valid UTF-8. If we really cared, we could avoid this UTF-8
+    // check with either unsafe or by parsing the number straight from &[u8].
+    let cap =
+        str::from_utf8(&rep[i..cap_end]).expect("valid UTF-8 capture name");
+    if brace {
+        if !rep.get(cap_end).map_or(false, |&b| b == b'}') {
+            return None;
+        }
+        cap_end += 1;
+    }
+    Some(CaptureRef {
+        cap: match cap.parse::<u32>() {
+            Ok(i) => Ref::Number(i as usize),
+            Err(_) => Ref::Named(cap),
+        },
+        end: cap_end,
+    })
+}
+
+/// Returns true if and only if the given byte is allowed in a capture name.
+fn is_valid_cap_letter(b: &u8) -> bool {
+    match *b {
+        b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' => true,
+        _ => false,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::{find_cap_ref, CaptureRef};
+
+    macro_rules! find {
+        ($name:ident, $text:expr) => {
+            #[test]
+            fn $name() {
+                assert_eq!(None, find_cap_ref($text));
+            }
+        };
+        ($name:ident, $text:expr, $capref:expr) => {
+            #[test]
+            fn $name() {
+                assert_eq!(Some($capref), find_cap_ref($text));
+            }
+        };
+    }
+
+    macro_rules! c {
+        ($name_or_number:expr, $pos:expr) => {
+            CaptureRef { cap: $name_or_number.into(), end: $pos }
+        };
+    }
+
+    find!(find_cap_ref1, "$foo", c!("foo", 4));
+    find!(find_cap_ref2, "${foo}", c!("foo", 6));
+    find!(find_cap_ref3, "$0", c!(0, 2));
+    find!(find_cap_ref4, "$5", c!(5, 2));
+    find!(find_cap_ref5, "$10", c!(10, 3));
+    // see https://github.com/rust-lang/regex/pull/585 for more on characters following numbers
+    find!(find_cap_ref6, "$42a", c!("42a", 4));
+    find!(find_cap_ref7, "${42}a", c!(42, 5));
+    find!(find_cap_ref8, "${42");
+    find!(find_cap_ref9, "${42 ");
+    find!(find_cap_ref10, " $0 ");
+    find!(find_cap_ref11, "$");
+    find!(find_cap_ref12, " ");
+    find!(find_cap_ref13, "");
+    find!(find_cap_ref14, "$1-$2", c!(1, 2));
+    find!(find_cap_ref15, "$1_$2", c!("1_", 3));
+    find!(find_cap_ref16, "$x-$y", c!("x", 2));
+    find!(find_cap_ref17, "$x_$y", c!("x_", 3));
+}

diff --git a/src/find_byte.rs b/src/find_byte.rs
new file mode 100644
index 0000000..e95f72a
--- /dev/null
+++ b/src/find_byte.rs

@@ -0,0 +1,18 @@
+/// Searches for the given needle in the given haystack.
+///
+/// If the perf-literal feature is enabled, then this uses the super optimized
+/// memchr crate. Otherwise, it uses the naive byte-at-a-time implementation.
+pub fn find_byte(needle: u8, haystack: &[u8]) -> Option<usize> {
+    #[cfg(not(feature = "perf-literal"))]
+    fn imp(needle: u8, haystack: &[u8]) -> Option<usize> {
+        haystack.iter().position(|&b| b == needle)
+    }
+
+    #[cfg(feature = "perf-literal")]
+    fn imp(needle: u8, haystack: &[u8]) -> Option<usize> {
+        use memchr::memchr;
+        memchr(needle, haystack)
+    }
+
+    imp(needle, haystack)
+}

diff --git a/src/freqs.rs b/src/freqs.rs
new file mode 100644
index 0000000..fcffa95
--- /dev/null
+++ b/src/freqs.rs

@@ -0,0 +1,261 @@
+// NOTE: The following code was generated by "scripts/frequencies.py", do not
+// edit directly
+
+pub const BYTE_FREQUENCIES: [u8; 256] = [
+    55,  // '\x00'
+    52,  // '\x01'
+    51,  // '\x02'
+    50,  // '\x03'
+    49,  // '\x04'
+    48,  // '\x05'
+    47,  // '\x06'
+    46,  // '\x07'
+    45,  // '\x08'
+    103, // '\t'
+    242, // '\n'
+    66,  // '\x0b'
+    67,  // '\x0c'
+    229, // '\r'
+    44,  // '\x0e'
+    43,  // '\x0f'
+    42,  // '\x10'
+    41,  // '\x11'
+    40,  // '\x12'
+    39,  // '\x13'
+    38,  // '\x14'
+    37,  // '\x15'
+    36,  // '\x16'
+    35,  // '\x17'
+    34,  // '\x18'
+    33,  // '\x19'
+    56,  // '\x1a'
+    32,  // '\x1b'
+    31,  // '\x1c'
+    30,  // '\x1d'
+    29,  // '\x1e'
+    28,  // '\x1f'
+    255, // ' '
+    148, // '!'
+    164, // '"'
+    149, // '#'
+    136, // '$'
+    160, // '%'
+    155, // '&'
+    173, // "'"
+    221, // '('
+    222, // ')'
+    134, // '*'
+    122, // '+'
+    232, // ','
+    202, // '-'
+    215, // '.'
+    224, // '/'
+    208, // '0'
+    220, // '1'
+    204, // '2'
+    187, // '3'
+    183, // '4'
+    179, // '5'
+    177, // '6'
+    168, // '7'
+    178, // '8'
+    200, // '9'
+    226, // ':'
+    195, // ';'
+    154, // '<'
+    184, // '='
+    174, // '>'
+    126, // '?'
+    120, // '@'
+    191, // 'A'
+    157, // 'B'
+    194, // 'C'
+    170, // 'D'
+    189, // 'E'
+    162, // 'F'
+    161, // 'G'
+    150, // 'H'
+    193, // 'I'
+    142, // 'J'
+    137, // 'K'
+    171, // 'L'
+    176, // 'M'
+    185, // 'N'
+    167, // 'O'
+    186, // 'P'
+    112, // 'Q'
+    175, // 'R'
+    192, // 'S'
+    188, // 'T'
+    156, // 'U'
+    140, // 'V'
+    143, // 'W'
+    123, // 'X'
+    133, // 'Y'
+    128, // 'Z'
+    147, // '['
+    138, // '\\'
+    146, // ']'
+    114, // '^'
+    223, // '_'
+    151, // '`'
+    249, // 'a'
+    216, // 'b'
+    238, // 'c'
+    236, // 'd'
+    253, // 'e'
+    227, // 'f'
+    218, // 'g'
+    230, // 'h'
+    247, // 'i'
+    135, // 'j'
+    180, // 'k'
+    241, // 'l'
+    233, // 'm'
+    246, // 'n'
+    244, // 'o'
+    231, // 'p'
+    139, // 'q'
+    245, // 'r'
+    243, // 's'
+    251, // 't'
+    235, // 'u'
+    201, // 'v'
+    196, // 'w'
+    240, // 'x'
+    214, // 'y'
+    152, // 'z'
+    182, // '{'
+    205, // '|'
+    181, // '}'
+    127, // '~'
+    27,  // '\x7f'
+    212, // '\x80'
+    211, // '\x81'
+    210, // '\x82'
+    213, // '\x83'
+    228, // '\x84'
+    197, // '\x85'
+    169, // '\x86'
+    159, // '\x87'
+    131, // '\x88'
+    172, // '\x89'
+    105, // '\x8a'
+    80,  // '\x8b'
+    98,  // '\x8c'
+    96,  // '\x8d'
+    97,  // '\x8e'
+    81,  // '\x8f'
+    207, // '\x90'
+    145, // '\x91'
+    116, // '\x92'
+    115, // '\x93'
+    144, // '\x94'
+    130, // '\x95'
+    153, // '\x96'
+    121, // '\x97'
+    107, // '\x98'
+    132, // '\x99'
+    109, // '\x9a'
+    110, // '\x9b'
+    124, // '\x9c'
+    111, // '\x9d'
+    82,  // '\x9e'
+    108, // '\x9f'
+    118, // '\xa0'
+    141, // '¡'
+    113, // '¢'
+    129, // '£'
+    119, // '¤'
+    125, // '¥'
+    165, // '¦'
+    117, // '§'
+    92,  // '¨'
+    106, // '©'
+    83,  // 'ª'
+    72,  // '«'
+    99,  // '¬'
+    93,  // '\xad'
+    65,  // '®'
+    79,  // '¯'
+    166, // '°'
+    237, // '±'
+    163, // '²'
+    199, // '³'
+    190, // '´'
+    225, // 'µ'
+    209, // '¶'
+    203, // '·'
+    198, // '¸'
+    217, // '¹'
+    219, // 'º'
+    206, // '»'
+    234, // '¼'
+    248, // '½'
+    158, // '¾'
+    239, // '¿'
+    255, // 'À'
+    255, // 'Á'
+    255, // 'Â'
+    255, // 'Ã'
+    255, // 'Ä'
+    255, // 'Å'
+    255, // 'Æ'
+    255, // 'Ç'
+    255, // 'È'
+    255, // 'É'
+    255, // 'Ê'
+    255, // 'Ë'
+    255, // 'Ì'
+    255, // 'Í'
+    255, // 'Î'
+    255, // 'Ï'
+    255, // 'Ð'
+    255, // 'Ñ'
+    255, // 'Ò'
+    255, // 'Ó'
+    255, // 'Ô'
+    255, // 'Õ'
+    255, // 'Ö'
+    255, // '×'
+    255, // 'Ø'
+    255, // 'Ù'
+    255, // 'Ú'
+    255, // 'Û'
+    255, // 'Ü'
+    255, // 'Ý'
+    255, // 'Þ'
+    255, // 'ß'
+    255, // 'à'
+    255, // 'á'
+    255, // 'â'
+    255, // 'ã'
+    255, // 'ä'
+    255, // 'å'
+    255, // 'æ'
+    255, // 'ç'
+    255, // 'è'
+    255, // 'é'
+    255, // 'ê'
+    255, // 'ë'
+    255, // 'ì'
+    255, // 'í'
+    255, // 'î'
+    255, // 'ï'
+    255, // 'ð'
+    255, // 'ñ'
+    255, // 'ò'
+    255, // 'ó'
+    255, // 'ô'
+    255, // 'õ'
+    255, // 'ö'
+    255, // '÷'
+    255, // 'ø'
+    255, // 'ù'
+    255, // 'ú'
+    255, // 'û'
+    255, // 'ü'
+    255, // 'ý'
+    255, // 'þ'
+    255, // 'ÿ'
+];

diff --git a/src/input.rs b/src/input.rs
new file mode 100644
index 0000000..3afa2d0
--- /dev/null
+++ b/src/input.rs

@@ -0,0 +1,434 @@
+use std::char;
+use std::cmp::Ordering;
+use std::fmt;
+use std::ops;
+use std::u32;
+
+use syntax;
+
+use literal::LiteralSearcher;
+use prog::InstEmptyLook;
+use utf8::{decode_last_utf8, decode_utf8};
+
+/// Represents a location in the input.
+#[derive(Clone, Copy, Debug)]
+pub struct InputAt {
+    pos: usize,
+    c: Char,
+    byte: Option<u8>,
+    len: usize,
+}
+
+impl InputAt {
+    /// Returns true iff this position is at the beginning of the input.
+    pub fn is_start(&self) -> bool {
+        self.pos == 0
+    }
+
+    /// Returns true iff this position is past the end of the input.
+    pub fn is_end(&self) -> bool {
+        self.c.is_none() && self.byte.is_none()
+    }
+
+    /// Returns the character at this position.
+    ///
+    /// If this position is just before or after the input, then an absent
+    /// character is returned.
+    pub fn char(&self) -> Char {
+        self.c
+    }
+
+    /// Returns the byte at this position.
+    pub fn byte(&self) -> Option<u8> {
+        self.byte
+    }
+
+    /// Returns the UTF-8 width of the character at this position.
+    pub fn len(&self) -> usize {
+        self.len
+    }
+
+    /// Returns whether the UTF-8 width of the character at this position
+    /// is zero.
+    pub fn is_empty(&self) -> bool {
+        self.len == 0
+    }
+
+    /// Returns the byte offset of this position.
+    pub fn pos(&self) -> usize {
+        self.pos
+    }
+
+    /// Returns the byte offset of the next position in the input.
+    pub fn next_pos(&self) -> usize {
+        self.pos + self.len
+    }
+}
+
+/// An abstraction over input used in the matching engines.
+pub trait Input: fmt::Debug {
+    /// Return an encoding of the position at byte offset `i`.
+    fn at(&self, i: usize) -> InputAt;
+
+    /// Return the Unicode character occurring next to `at`.
+    ///
+    /// If no such character could be decoded, then `Char` is absent.
+    fn next_char(&self, at: InputAt) -> Char;
+
+    /// Return the Unicode character occurring previous to `at`.
+    ///
+    /// If no such character could be decoded, then `Char` is absent.
+    fn previous_char(&self, at: InputAt) -> Char;
+
+    /// Return true if the given empty width instruction matches at the
+    /// input position given.
+    fn is_empty_match(&self, at: InputAt, empty: &InstEmptyLook) -> bool;
+
+    /// Scan the input for a matching prefix.
+    fn prefix_at(
+        &self,
+        prefixes: &LiteralSearcher,
+        at: InputAt,
+    ) -> Option<InputAt>;
+
+    /// The number of bytes in the input.
+    fn len(&self) -> usize;
+
+    /// Whether the input is empty.
+    fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    /// Return the given input as a sequence of bytes.
+    fn as_bytes(&self) -> &[u8];
+}
+
+impl<'a, T: Input> Input for &'a T {
+    fn at(&self, i: usize) -> InputAt {
+        (**self).at(i)
+    }
+
+    fn next_char(&self, at: InputAt) -> Char {
+        (**self).next_char(at)
+    }
+
+    fn previous_char(&self, at: InputAt) -> Char {
+        (**self).previous_char(at)
+    }
+
+    fn is_empty_match(&self, at: InputAt, empty: &InstEmptyLook) -> bool {
+        (**self).is_empty_match(at, empty)
+    }
+
+    fn prefix_at(
+        &self,
+        prefixes: &LiteralSearcher,
+        at: InputAt,
+    ) -> Option<InputAt> {
+        (**self).prefix_at(prefixes, at)
+    }
+
+    fn len(&self) -> usize {
+        (**self).len()
+    }
+
+    fn as_bytes(&self) -> &[u8] {
+        (**self).as_bytes()
+    }
+}
+
+/// An input reader over characters.
+#[derive(Clone, Copy, Debug)]
+pub struct CharInput<'t>(&'t [u8]);
+
+impl<'t> CharInput<'t> {
+    /// Return a new character input reader for the given string.
+    pub fn new(s: &'t [u8]) -> CharInput<'t> {
+        CharInput(s)
+    }
+}
+
+impl<'t> ops::Deref for CharInput<'t> {
+    type Target = [u8];
+
+    fn deref(&self) -> &[u8] {
+        self.0
+    }
+}
+
+impl<'t> Input for CharInput<'t> {
+    fn at(&self, i: usize) -> InputAt {
+        if i >= self.len() {
+            InputAt { pos: self.len(), c: None.into(), byte: None, len: 0 }
+        } else {
+            let c = decode_utf8(&self[i..]).map(|(c, _)| c).into();
+            InputAt { pos: i, c: c, byte: None, len: c.len_utf8() }
+        }
+    }
+
+    fn next_char(&self, at: InputAt) -> Char {
+        at.char()
+    }
+
+    fn previous_char(&self, at: InputAt) -> Char {
+        decode_last_utf8(&self[..at.pos()]).map(|(c, _)| c).into()
+    }
+
+    fn is_empty_match(&self, at: InputAt, empty: &InstEmptyLook) -> bool {
+        use prog::EmptyLook::*;
+        match empty.look {
+            StartLine => {
+                let c = self.previous_char(at);
+                at.pos() == 0 || c == '\n'
+            }
+            EndLine => {
+                let c = self.next_char(at);
+                at.pos() == self.len() || c == '\n'
+            }
+            StartText => at.pos() == 0,
+            EndText => at.pos() == self.len(),
+            WordBoundary => {
+                let (c1, c2) = (self.previous_char(at), self.next_char(at));
+                c1.is_word_char() != c2.is_word_char()
+            }
+            NotWordBoundary => {
+                let (c1, c2) = (self.previous_char(at), self.next_char(at));
+                c1.is_word_char() == c2.is_word_char()
+            }
+            WordBoundaryAscii => {
+                let (c1, c2) = (self.previous_char(at), self.next_char(at));
+                c1.is_word_byte() != c2.is_word_byte()
+            }
+            NotWordBoundaryAscii => {
+                let (c1, c2) = (self.previous_char(at), self.next_char(at));
+                c1.is_word_byte() == c2.is_word_byte()
+            }
+        }
+    }
+
+    fn prefix_at(
+        &self,
+        prefixes: &LiteralSearcher,
+        at: InputAt,
+    ) -> Option<InputAt> {
+        prefixes.find(&self[at.pos()..]).map(|(s, _)| self.at(at.pos() + s))
+    }
+
+    fn len(&self) -> usize {
+        self.0.len()
+    }
+
+    fn as_bytes(&self) -> &[u8] {
+        self.0
+    }
+}
+
+/// An input reader over bytes.
+#[derive(Clone, Copy, Debug)]
+pub struct ByteInput<'t> {
+    text: &'t [u8],
+    only_utf8: bool,
+}
+
+impl<'t> ByteInput<'t> {
+    /// Return a new byte-based input reader for the given string.
+    pub fn new(text: &'t [u8], only_utf8: bool) -> ByteInput<'t> {
+        ByteInput { text: text, only_utf8: only_utf8 }
+    }
+}
+
+impl<'t> ops::Deref for ByteInput<'t> {
+    type Target = [u8];
+
+    fn deref(&self) -> &[u8] {
+        self.text
+    }
+}
+
+impl<'t> Input for ByteInput<'t> {
+    fn at(&self, i: usize) -> InputAt {
+        if i >= self.len() {
+            InputAt { pos: self.len(), c: None.into(), byte: None, len: 0 }
+        } else {
+            InputAt {
+                pos: i,
+                c: None.into(),
+                byte: self.get(i).cloned(),
+                len: 1,
+            }
+        }
+    }
+
+    fn next_char(&self, at: InputAt) -> Char {
+        decode_utf8(&self[at.pos()..]).map(|(c, _)| c).into()
+    }
+
+    fn previous_char(&self, at: InputAt) -> Char {
+        decode_last_utf8(&self[..at.pos()]).map(|(c, _)| c).into()
+    }
+
+    fn is_empty_match(&self, at: InputAt, empty: &InstEmptyLook) -> bool {
+        use prog::EmptyLook::*;
+        match empty.look {
+            StartLine => {
+                let c = self.previous_char(at);
+                at.pos() == 0 || c == '\n'
+            }
+            EndLine => {
+                let c = self.next_char(at);
+                at.pos() == self.len() || c == '\n'
+            }
+            StartText => at.pos() == 0,
+            EndText => at.pos() == self.len(),
+            WordBoundary => {
+                let (c1, c2) = (self.previous_char(at), self.next_char(at));
+                c1.is_word_char() != c2.is_word_char()
+            }
+            NotWordBoundary => {
+                let (c1, c2) = (self.previous_char(at), self.next_char(at));
+                c1.is_word_char() == c2.is_word_char()
+            }
+            WordBoundaryAscii => {
+                let (c1, c2) = (self.previous_char(at), self.next_char(at));
+                if self.only_utf8 {
+                    // If we must match UTF-8, then we can't match word
+                    // boundaries at invalid UTF-8.
+                    if c1.is_none() && !at.is_start() {
+                        return false;
+                    }
+                    if c2.is_none() && !at.is_end() {
+                        return false;
+                    }
+                }
+                c1.is_word_byte() != c2.is_word_byte()
+            }
+            NotWordBoundaryAscii => {
+                let (c1, c2) = (self.previous_char(at), self.next_char(at));
+                if self.only_utf8 {
+                    // If we must match UTF-8, then we can't match word
+                    // boundaries at invalid UTF-8.
+                    if c1.is_none() && !at.is_start() {
+                        return false;
+                    }
+                    if c2.is_none() && !at.is_end() {
+                        return false;
+                    }
+                }
+                c1.is_word_byte() == c2.is_word_byte()
+            }
+        }
+    }
+
+    fn prefix_at(
+        &self,
+        prefixes: &LiteralSearcher,
+        at: InputAt,
+    ) -> Option<InputAt> {
+        prefixes.find(&self[at.pos()..]).map(|(s, _)| self.at(at.pos() + s))
+    }
+
+    fn len(&self) -> usize {
+        self.text.len()
+    }
+
+    fn as_bytes(&self) -> &[u8] {
+        self.text
+    }
+}
+
+/// An inline representation of `Option<char>`.
+///
+/// This eliminates the need to do case analysis on `Option<char>` to determine
+/// ordinality with other characters.
+///
+/// (The `Option<char>` is not related to encoding. Instead, it is used in the
+/// matching engines to represent the beginning and ending boundaries of the
+/// search text.)
+#[derive(Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
+pub struct Char(u32);
+
+impl fmt::Debug for Char {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match char::from_u32(self.0) {
+            None => write!(f, "Empty"),
+            Some(c) => write!(f, "{:?}", c),
+        }
+    }
+}
+
+impl Char {
+    /// Returns true iff the character is absent.
+    #[inline]
+    pub fn is_none(self) -> bool {
+        self.0 == u32::MAX
+    }
+
+    /// Returns the length of the character's UTF-8 encoding.
+    ///
+    /// If the character is absent, then `1` is returned.
+    #[inline]
+    pub fn len_utf8(self) -> usize {
+        char::from_u32(self.0).map_or(1, |c| c.len_utf8())
+    }
+
+    /// Returns true iff the character is a word character.
+    ///
+    /// If the character is absent, then false is returned.
+    pub fn is_word_char(self) -> bool {
+        // is_word_character can panic if the Unicode data for \w isn't
+        // available. However, our compiler ensures that if a Unicode word
+        // boundary is used, then the data must also be available. If it isn't,
+        // then the compiler returns an error.
+        char::from_u32(self.0).map_or(false, syntax::is_word_character)
+    }
+
+    /// Returns true iff the byte is a word byte.
+    ///
+    /// If the byte is absent, then false is returned.
+    pub fn is_word_byte(self) -> bool {
+        match char::from_u32(self.0) {
+            Some(c) if c <= '\u{7F}' => syntax::is_word_byte(c as u8),
+            None | Some(_) => false,
+        }
+    }
+}
+
+impl From<char> for Char {
+    fn from(c: char) -> Char {
+        Char(c as u32)
+    }
+}
+
+impl From<Option<char>> for Char {
+    fn from(c: Option<char>) -> Char {
+        c.map_or(Char(u32::MAX), |c| c.into())
+    }
+}
+
+impl PartialEq<char> for Char {
+    #[inline]
+    fn eq(&self, other: &char) -> bool {
+        self.0 == *other as u32
+    }
+}
+
+impl PartialEq<Char> for char {
+    #[inline]
+    fn eq(&self, other: &Char) -> bool {
+        *self as u32 == other.0
+    }
+}
+
+impl PartialOrd<char> for Char {
+    #[inline]
+    fn partial_cmp(&self, other: &char) -> Option<Ordering> {
+        self.0.partial_cmp(&(*other as u32))
+    }
+}
+
+impl PartialOrd<Char> for char {
+    #[inline]
+    fn partial_cmp(&self, other: &Char) -> Option<Ordering> {
+        (*self as u32).partial_cmp(&other.0)
+    }
+}

diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..2a74bf8
--- /dev/null
+++ b/src/lib.rs

@@ -0,0 +1,785 @@
+/*!
+This crate provides a library for parsing, compiling, and executing regular
+expressions. Its syntax is similar to Perl-style regular expressions, but lacks
+a few features like look around and backreferences. In exchange, all searches
+execute in linear time with respect to the size of the regular expression and
+search text.
+
+This crate's documentation provides some simple examples, describes
+[Unicode support](#unicode) and exhaustively lists the
+[supported syntax](#syntax).
+
+For more specific details on the API for regular expressions, please see the
+documentation for the [`Regex`](struct.Regex.html) type.
+
+# Usage
+
+This crate is [on crates.io](https://crates.io/crates/regex) and can be
+used by adding `regex` to your dependencies in your project's `Cargo.toml`.
+
+```toml
+[dependencies]
+regex = "1"
+```
+
+If you're using Rust 2015, then you'll also need to add it to your crate root:
+
+```rust
+extern crate regex;
+```
+
+# Example: find a date
+
+General use of regular expressions in this package involves compiling an
+expression and then using it to search, split or replace text. For example,
+to confirm that some text resembles a date:
+
+```rust
+use regex::Regex;
+let re = Regex::new(r"^\d{4}-\d{2}-\d{2}$").unwrap();
+assert!(re.is_match("2014-01-01"));
+```
+
+Notice the use of the `^` and `$` anchors. In this crate, every expression
+is executed with an implicit `.*?` at the beginning and end, which allows
+it to match anywhere in the text. Anchors can be used to ensure that the
+full text matches an expression.
+
+This example also demonstrates the utility of
+[raw strings](https://doc.rust-lang.org/stable/reference/tokens.html#raw-string-literals)
+in Rust, which
+are just like regular strings except they are prefixed with an `r` and do
+not process any escape sequences. For example, `"\\d"` is the same
+expression as `r"\d"`.
+
+# Example: Avoid compiling the same regex in a loop
+
+It is an anti-pattern to compile the same regular expression in a loop
+since compilation is typically expensive. (It takes anywhere from a few
+microseconds to a few **milliseconds** depending on the size of the
+regex.) Not only is compilation itself expensive, but this also prevents
+optimizations that reuse allocations internally to the matching engines.
+
+In Rust, it can sometimes be a pain to pass regular expressions around if
+they're used from inside a helper function. Instead, we recommend using the
+[`lazy_static`](https://crates.io/crates/lazy_static) crate to ensure that
+regular expressions are compiled exactly once.
+
+For example:
+
+```rust
+#[macro_use] extern crate lazy_static;
+extern crate regex;
+
+use regex::Regex;
+
+fn some_helper_function(text: &str) -> bool {
+    lazy_static! {
+        static ref RE: Regex = Regex::new("...").unwrap();
+    }
+    RE.is_match(text)
+}
+
+fn main() {}
+```
+
+Specifically, in this example, the regex will be compiled when it is used for
+the first time. On subsequent uses, it will reuse the previous compilation.
+
+# Example: iterating over capture groups
+
+This crate provides convenient iterators for matching an expression
+repeatedly against a search string to find successive non-overlapping
+matches. For example, to find all dates in a string and be able to access
+them by their component pieces:
+
+```rust
+# extern crate regex; use regex::Regex;
+# fn main() {
+let re = Regex::new(r"(\d{4})-(\d{2})-(\d{2})").unwrap();
+let text = "2012-03-14, 2013-01-01 and 2014-07-05";
+for cap in re.captures_iter(text) {
+    println!("Month: {} Day: {} Year: {}", &cap[2], &cap[3], &cap[1]);
+}
+// Output:
+// Month: 03 Day: 14 Year: 2012
+// Month: 01 Day: 01 Year: 2013
+// Month: 07 Day: 05 Year: 2014
+# }
+```
+
+Notice that the year is in the capture group indexed at `1`. This is
+because the *entire match* is stored in the capture group at index `0`.
+
+# Example: replacement with named capture groups
+
+Building on the previous example, perhaps we'd like to rearrange the date
+formats. This can be done with text replacement. But to make the code
+clearer, we can *name*  our capture groups and use those names as variables
+in our replacement text:
+
+```rust
+# extern crate regex; use regex::Regex;
+# fn main() {
+let re = Regex::new(r"(?P<y>\d{4})-(?P<m>\d{2})-(?P<d>\d{2})").unwrap();
+let before = "2012-03-14, 2013-01-01 and 2014-07-05";
+let after = re.replace_all(before, "$m/$d/$y");
+assert_eq!(after, "03/14/2012, 01/01/2013 and 07/05/2014");
+# }
+```
+
+The `replace` methods are actually polymorphic in the replacement, which
+provides more flexibility than is seen here. (See the documentation for
+`Regex::replace` for more details.)
+
+Note that if your regex gets complicated, you can use the `x` flag to
+enable insignificant whitespace mode, which also lets you write comments:
+
+```rust
+# extern crate regex; use regex::Regex;
+# fn main() {
+let re = Regex::new(r"(?x)
+  (?P<y>\d{4}) # the year
+  -
+  (?P<m>\d{2}) # the month
+  -
+  (?P<d>\d{2}) # the day
+").unwrap();
+let before = "2012-03-14, 2013-01-01 and 2014-07-05";
+let after = re.replace_all(before, "$m/$d/$y");
+assert_eq!(after, "03/14/2012, 01/01/2013 and 07/05/2014");
+# }
+```
+
+If you wish to match against whitespace in this mode, you can still use `\s`,
+`\n`, `\t`, etc. For escaping a single space character, you can use its hex
+character code `\x20` or temporarily disable the `x` flag, e.g., `(?-x: )`.
+
+# Example: match multiple regular expressions simultaneously
+
+This demonstrates how to use a `RegexSet` to match multiple (possibly
+overlapping) regular expressions in a single scan of the search text:
+
+```rust
+use regex::RegexSet;
+
+let set = RegexSet::new(&[
+    r"\w+",
+    r"\d+",
+    r"\pL+",
+    r"foo",
+    r"bar",
+    r"barfoo",
+    r"foobar",
+]).unwrap();
+
+// Iterate over and collect all of the matches.
+let matches: Vec<_> = set.matches("foobar").into_iter().collect();
+assert_eq!(matches, vec![0, 2, 3, 4, 6]);
+
+// You can also test whether a particular regex matched:
+let matches = set.matches("foobar");
+assert!(!matches.matched(5));
+assert!(matches.matched(6));
+```
+
+# Pay for what you use
+
+With respect to searching text with a regular expression, there are three
+questions that can be asked:
+
+1. Does the text match this expression?
+2. If so, where does it match?
+3. Where did the capturing groups match?
+
+Generally speaking, this crate could provide a function to answer only #3,
+which would subsume #1 and #2 automatically. However, it can be significantly
+more expensive to compute the location of capturing group matches, so it's best
+not to do it if you don't need to.
+
+Therefore, only use what you need. For example, don't use `find` if you
+only need to test if an expression matches a string. (Use `is_match`
+instead.)
+
+# Unicode
+
+This implementation executes regular expressions **only** on valid UTF-8
+while exposing match locations as byte indices into the search string. (To
+relax this restriction, use the [`bytes`](bytes/index.html) sub-module.)
+
+Only simple case folding is supported. Namely, when matching
+case-insensitively, the characters are first mapped using the "simple" case
+folding rules defined by Unicode.
+
+Regular expressions themselves are **only** interpreted as a sequence of
+Unicode scalar values. This means you can use Unicode characters directly
+in your expression:
+
+```rust
+# extern crate regex; use regex::Regex;
+# fn main() {
+let re = Regex::new(r"(?i)Δ+").unwrap();
+let mat = re.find("ΔδΔ").unwrap();
+assert_eq!((mat.start(), mat.end()), (0, 6));
+# }
+```
+
+Most features of the regular expressions in this crate are Unicode aware. Here
+are some examples:
+
+* `.` will match any valid UTF-8 encoded Unicode scalar value except for `\n`.
+  (To also match `\n`, enable the `s` flag, e.g., `(?s:.)`.)
+* `\w`, `\d` and `\s` are Unicode aware. For example, `\s` will match all forms
+  of whitespace categorized by Unicode.
+* `\b` matches a Unicode word boundary.
+* Negated character classes like `[^a]` match all Unicode scalar values except
+  for `a`.
+* `^` and `$` are **not** Unicode aware in multi-line mode. Namely, they only
+  recognize `\n` and not any of the other forms of line terminators defined
+  by Unicode.
+
+Unicode general categories, scripts, script extensions, ages and a smattering
+of boolean properties are available as character classes. For example, you can
+match a sequence of numerals, Greek or Cherokee letters:
+
+```rust
+# extern crate regex; use regex::Regex;
+# fn main() {
+let re = Regex::new(r"[\pN\p{Greek}\p{Cherokee}]+").unwrap();
+let mat = re.find("abcΔᎠβⅠᏴγδⅡxyz").unwrap();
+assert_eq!((mat.start(), mat.end()), (3, 23));
+# }
+```
+
+For a more detailed breakdown of Unicode support with respect to
+[UTS#18](http://unicode.org/reports/tr18/),
+please see the
+[UNICODE](https://github.com/rust-lang/regex/blob/master/UNICODE.md)
+document in the root of the regex repository.
+
+# Opt out of Unicode support
+
+The `bytes` sub-module provides a `Regex` type that can be used to match
+on `&[u8]`. By default, text is interpreted as UTF-8 just like it is with
+the main `Regex` type. However, this behavior can be disabled by turning
+off the `u` flag, even if doing so could result in matching invalid UTF-8.
+For example, when the `u` flag is disabled, `.` will match any byte instead
+of any Unicode scalar value.
+
+Disabling the `u` flag is also possible with the standard `&str`-based `Regex`
+type, but it is only allowed where the UTF-8 invariant is maintained. For
+example, `(?-u:\w)` is an ASCII-only `\w` character class and is legal in an
+`&str`-based `Regex`, but `(?-u:\xFF)` will attempt to match the raw byte
+`\xFF`, which is invalid UTF-8 and therefore is illegal in `&str`-based
+regexes.
+
+Finally, since Unicode support requires bundling large Unicode data
+tables, this crate exposes knobs to disable the compilation of those
+data tables, which can be useful for shrinking binary size and reducing
+compilation times. For details on how to do that, see the section on [crate
+features](#crate-features).
+
+# Syntax
+
+The syntax supported in this crate is documented below.
+
+Note that the regular expression parser and abstract syntax are exposed in
+a separate crate, [`regex-syntax`](https://docs.rs/regex-syntax).
+
+## Matching one character
+
+<pre class="rust">
+.             any character except new line (includes new line with s flag)
+\d            digit (\p{Nd})
+\D            not digit
+\pN           One-letter name Unicode character class
+\p{Greek}     Unicode character class (general category or script)
+\PN           Negated one-letter name Unicode character class
+\P{Greek}     negated Unicode character class (general category or script)
+</pre>
+
+### Character classes
+
+<pre class="rust">
+[xyz]         A character class matching either x, y or z (union).
+[^xyz]        A character class matching any character except x, y and z.
+[a-z]         A character class matching any character in range a-z.
+[[:alpha:]]   ASCII character class ([A-Za-z])
+[[:^alpha:]]  Negated ASCII character class ([^A-Za-z])
+[x[^xyz]]     Nested/grouping character class (matching any character except y and z)
+[a-y&&xyz]    Intersection (matching x or y)
+[0-9&&[^4]]   Subtraction using intersection and negation (matching 0-9 except 4)
+[0-9--4]      Direct subtraction (matching 0-9 except 4)
+[a-g~~b-h]    Symmetric difference (matching `a` and `h` only)
+[\[\]]        Escaping in character classes (matching [ or ])
+</pre>
+
+Any named character class may appear inside a bracketed `[...]` character
+class. For example, `[\p{Greek}[:digit:]]` matches any Greek or ASCII
+digit. `[\p{Greek}&&\pL]` matches Greek letters.
+
+Precedence in character classes, from most binding to least:
+
+1. Ranges: `a-cd` == `[a-c]d`
+2. Union: `ab&&bc` == `[ab]&&[bc]`
+3. Intersection: `^a-z&&b` == `^[a-z&&b]`
+4. Negation
+
+## Composites
+
+<pre class="rust">
+xy    concatenation (x followed by y)
+x|y   alternation (x or y, prefer x)
+</pre>
+
+## Repetitions
+
+<pre class="rust">
+x*        zero or more of x (greedy)
+x+        one or more of x (greedy)
+x?        zero or one of x (greedy)
+x*?       zero or more of x (ungreedy/lazy)
+x+?       one or more of x (ungreedy/lazy)
+x??       zero or one of x (ungreedy/lazy)
+x{n,m}    at least n x and at most m x (greedy)
+x{n,}     at least n x (greedy)
+x{n}      exactly n x
+x{n,m}?   at least n x and at most m x (ungreedy/lazy)
+x{n,}?    at least n x (ungreedy/lazy)
+x{n}?     exactly n x
+</pre>
+
+## Empty matches
+
+<pre class="rust">
+^     the beginning of text (or start-of-line with multi-line mode)
+$     the end of text (or end-of-line with multi-line mode)
+\A    only the beginning of text (even with multi-line mode enabled)
+\z    only the end of text (even with multi-line mode enabled)
+\b    a Unicode word boundary (\w on one side and \W, \A, or \z on other)
+\B    not a Unicode word boundary
+</pre>
+
+## Grouping and flags
+
+<pre class="rust">
+(exp)          numbered capture group (indexed by opening parenthesis)
+(?P&lt;name&gt;exp)  named (also numbered) capture group (allowed chars: [_0-9a-zA-Z])
+(?:exp)        non-capturing group
+(?flags)       set flags within current group
+(?flags:exp)   set flags for exp (non-capturing)
+</pre>
+
+Flags are each a single character. For example, `(?x)` sets the flag `x`
+and `(?-x)` clears the flag `x`. Multiple flags can be set or cleared at
+the same time: `(?xy)` sets both the `x` and `y` flags and `(?x-y)` sets
+the `x` flag and clears the `y` flag.
+
+All flags are by default disabled unless stated otherwise. They are:
+
+<pre class="rust">
+i     case-insensitive: letters match both upper and lower case
+m     multi-line mode: ^ and $ match begin/end of line
+s     allow . to match \n
+U     swap the meaning of x* and x*?
+u     Unicode support (enabled by default)
+x     ignore whitespace and allow line comments (starting with `#`)
+</pre>
+
+Flags can be toggled within a pattern. Here's an example that matches
+case-insensitively for the first part but case-sensitively for the second part:
+
+```rust
+# extern crate regex; use regex::Regex;
+# fn main() {
+let re = Regex::new(r"(?i)a+(?-i)b+").unwrap();
+let cap = re.captures("AaAaAbbBBBb").unwrap();
+assert_eq!(&cap[0], "AaAaAbb");
+# }
+```
+
+Notice that the `a+` matches either `a` or `A`, but the `b+` only matches
+`b`.
+
+Multi-line mode means `^` and `$` no longer match just at the beginning/end of
+the input, but at the beginning/end of lines:
+
+```
+# use regex::Regex;
+let re = Regex::new(r"(?m)^line \d+").unwrap();
+let m = re.find("line one\nline 2\n").unwrap();
+assert_eq!(m.as_str(), "line 2");
+```
+
+Note that `^` matches after new lines, even at the end of input:
+
+```
+# use regex::Regex;
+let re = Regex::new(r"(?m)^").unwrap();
+let m = re.find_iter("test\n").last().unwrap();
+assert_eq!((m.start(), m.end()), (5, 5));
+```
+
+Here is an example that uses an ASCII word boundary instead of a Unicode
+word boundary:
+
+```rust
+# extern crate regex; use regex::Regex;
+# fn main() {
+let re = Regex::new(r"(?-u:\b).+(?-u:\b)").unwrap();
+let cap = re.captures("$$abc$$").unwrap();
+assert_eq!(&cap[0], "abc");
+# }
+```
+
+## Escape sequences
+
+<pre class="rust">
+\*          literal *, works for any punctuation character: \.+*?()|[]{}^$
+\a          bell (\x07)
+\f          form feed (\x0C)
+\t          horizontal tab
+\n          new line
+\r          carriage return
+\v          vertical tab (\x0B)
+\123        octal character code (up to three digits) (when enabled)
+\x7F        hex character code (exactly two digits)
+\x{10FFFF}  any hex character code corresponding to a Unicode code point
+\u007F      hex character code (exactly four digits)
+\u{7F}      any hex character code corresponding to a Unicode code point
+\U0000007F  hex character code (exactly eight digits)
+\U{7F}      any hex character code corresponding to a Unicode code point
+</pre>
+
+## Perl character classes (Unicode friendly)
+
+These classes are based on the definitions provided in
+[UTS#18](http://www.unicode.org/reports/tr18/#Compatibility_Properties):
+
+<pre class="rust">
+\d     digit (\p{Nd})
+\D     not digit
+\s     whitespace (\p{White_Space})
+\S     not whitespace
+\w     word character (\p{Alphabetic} + \p{M} + \d + \p{Pc} + \p{Join_Control})
+\W     not word character
+</pre>
+
+## ASCII character classes
+
+<pre class="rust">
+[[:alnum:]]    alphanumeric ([0-9A-Za-z])
+[[:alpha:]]    alphabetic ([A-Za-z])
+[[:ascii:]]    ASCII ([\x00-\x7F])
+[[:blank:]]    blank ([\t ])
+[[:cntrl:]]    control ([\x00-\x1F\x7F])
+[[:digit:]]    digits ([0-9])
+[[:graph:]]    graphical ([!-~])
+[[:lower:]]    lower case ([a-z])
+[[:print:]]    printable ([ -~])
+[[:punct:]]    punctuation ([!-/:-@\[-`{-~])
+[[:space:]]    whitespace ([\t\n\v\f\r ])
+[[:upper:]]    upper case ([A-Z])
+[[:word:]]     word characters ([0-9A-Za-z_])
+[[:xdigit:]]   hex digit ([0-9A-Fa-f])
+</pre>
+
+# Crate features
+
+By default, this crate tries pretty hard to make regex matching both as fast
+as possible and as correct as it can be, within reason. This means that there
+is a lot of code dedicated to performance, the handling of Unicode data and the
+Unicode data itself. Overall, this leads to more dependencies, larger binaries
+and longer compile times.  This trade off may not be appropriate in all cases,
+and indeed, even when all Unicode and performance features are disabled, one
+is still left with a perfectly serviceable regex engine that will work well
+in many cases.
+
+This crate exposes a number of features for controlling that trade off. Some
+of these features are strictly performance oriented, such that disabling them
+won't result in a loss of functionality, but may result in worse performance.
+Other features, such as the ones controlling the presence or absence of Unicode
+data, can result in a loss of functionality. For example, if one disables the
+`unicode-case` feature (described below), then compiling the regex `(?i)a`
+will fail since Unicode case insensitivity is enabled by default. Instead,
+callers must use `(?i-u)a` instead to disable Unicode case folding. Stated
+differently, enabling or disabling any of the features below can only add or
+subtract from the total set of valid regular expressions. Enabling or disabling
+a feature will never modify the match semantics of a regular expression.
+
+All features below are enabled by default.
+
+### Ecosystem features
+
+* **std** -
+  When enabled, this will cause `regex` to use the standard library. Currently,
+  disabling this feature will always result in a compilation error. It is
+  intended to add `alloc`-only support to regex in the future.
+
+### Performance features
+
+* **perf** -
+  Enables all performance related features. This feature is enabled by default
+  and will always cover all features that improve performance, even if more
+  are added in the future.
+* **perf-cache** -
+  Enables the use of very fast thread safe caching for internal match state.
+  When this is disabled, caching is still used, but with a slower and simpler
+  implementation. Disabling this drops the `thread_local` and `lazy_static`
+  dependencies.
+* **perf-dfa** -
+  Enables the use of a lazy DFA for matching. The lazy DFA is used to compile
+  portions of a regex to a very fast DFA on an as-needed basis. This can
+  result in substantial speedups, usually by an order of magnitude on large
+  haystacks. The lazy DFA does not bring in any new dependencies, but it can
+  make compile times longer.
+* **perf-inline** -
+  Enables the use of aggressive inlining inside match routines. This reduces
+  the overhead of each match. The aggressive inlining, however, increases
+  compile times and binary size.
+* **perf-literal** -
+  Enables the use of literal optimizations for speeding up matches. In some
+  cases, literal optimizations can result in speedups of _several_ orders of
+  magnitude. Disabling this drops the `aho-corasick` and `memchr` dependencies.
+
+### Unicode features
+
+* **unicode** -
+  Enables all Unicode features. This feature is enabled by default, and will
+  always cover all Unicode features, even if more are added in the future.
+* **unicode-age** -
+  Provide the data for the
+  [Unicode `Age` property](https://www.unicode.org/reports/tr44/tr44-24.html#Character_Age).
+  This makes it possible to use classes like `\p{Age:6.0}` to refer to all
+  codepoints first introduced in Unicode 6.0
+* **unicode-bool** -
+  Provide the data for numerous Unicode boolean properties. The full list
+  is not included here, but contains properties like `Alphabetic`, `Emoji`,
+  `Lowercase`, `Math`, `Uppercase` and `White_Space`.
+* **unicode-case** -
+  Provide the data for case insensitive matching using
+  [Unicode's "simple loose matches" specification](https://www.unicode.org/reports/tr18/#Simple_Loose_Matches).
+* **unicode-gencat** -
+  Provide the data for
+  [Uncode general categories](https://www.unicode.org/reports/tr44/tr44-24.html#General_Category_Values).
+  This includes, but is not limited to, `Decimal_Number`, `Letter`,
+  `Math_Symbol`, `Number` and `Punctuation`.
+* **unicode-perl** -
+  Provide the data for supporting the Unicode-aware Perl character classes,
+  corresponding to `\w`, `\s` and `\d`. This is also necessary for using
+  Unicode-aware word boundary assertions. Note that if this feature is
+  disabled, the `\s` and `\d` character classes are still available if the
+  `unicode-bool` and `unicode-gencat` features are enabled, respectively.
+* **unicode-script** -
+  Provide the data for
+  [Unicode scripts and script extensions](https://www.unicode.org/reports/tr24/).
+  This includes, but is not limited to, `Arabic`, `Cyrillic`, `Hebrew`,
+  `Latin` and `Thai`.
+* **unicode-segment** -
+  Provide the data necessary to provide the properties used to implement the
+  [Unicode text segmentation algorithms](https://www.unicode.org/reports/tr29/).
+  This enables using classes like `\p{gcb=Extend}`, `\p{wb=Katakana}` and
+  `\p{sb=ATerm}`.
+
+
+# Untrusted input
+
+This crate can handle both untrusted regular expressions and untrusted
+search text.
+
+Untrusted regular expressions are handled by capping the size of a compiled
+regular expression.
+(See [`RegexBuilder::size_limit`](struct.RegexBuilder.html#method.size_limit).)
+Without this, it would be trivial for an attacker to exhaust your system's
+memory with expressions like `a{100}{100}{100}`.
+
+Untrusted search text is allowed because the matching engine(s) in this
+crate have time complexity `O(mn)` (with `m ~ regex` and `n ~ search
+text`), which means there's no way to cause exponential blow-up like with
+some other regular expression engines. (We pay for this by disallowing
+features like arbitrary look-ahead and backreferences.)
+
+When a DFA is used, pathological cases with exponential state blow-up are
+avoided by constructing the DFA lazily or in an "online" manner. Therefore,
+at most one new state can be created for each byte of input. This satisfies
+our time complexity guarantees, but can lead to memory growth
+proportional to the size of the input. As a stopgap, the DFA is only
+allowed to store a fixed number of states. When the limit is reached, its
+states are wiped and continues on, possibly duplicating previous work. If
+the limit is reached too frequently, it gives up and hands control off to
+another matching engine with fixed memory requirements.
+(The DFA size limit can also be tweaked. See
+[`RegexBuilder::dfa_size_limit`](struct.RegexBuilder.html#method.dfa_size_limit).)
+*/
+
+#![deny(missing_docs)]
+#![cfg_attr(test, deny(warnings))]
+#![cfg_attr(feature = "pattern", feature(pattern))]
+
+#[cfg(not(feature = "std"))]
+compile_error!("`std` feature is currently required to build this crate");
+
+#[cfg(feature = "perf-literal")]
+extern crate aho_corasick;
+#[cfg(test)]
+extern crate doc_comment;
+#[cfg(feature = "perf-literal")]
+extern crate memchr;
+#[cfg(test)]
+#[cfg_attr(feature = "perf-literal", macro_use)]
+extern crate quickcheck;
+extern crate regex_syntax as syntax;
+#[cfg(feature = "perf-cache")]
+extern crate thread_local;
+
+#[cfg(test)]
+doc_comment::doctest!("../README.md");
+
+#[cfg(feature = "std")]
+pub use error::Error;
+#[cfg(feature = "std")]
+pub use re_builder::set_unicode::*;
+#[cfg(feature = "std")]
+pub use re_builder::unicode::*;
+#[cfg(feature = "std")]
+pub use re_set::unicode::*;
+#[cfg(feature = "std")]
+#[cfg(feature = "std")]
+pub use re_unicode::{
+    escape, CaptureLocations, CaptureMatches, CaptureNames, Captures,
+    Locations, Match, Matches, NoExpand, Regex, Replacer, ReplacerRef, Split,
+    SplitN, SubCaptureMatches,
+};
+
+/**
+Match regular expressions on arbitrary bytes.
+
+This module provides a nearly identical API to the one found in the
+top-level of this crate. There are two important differences:
+
+1. Matching is done on `&[u8]` instead of `&str`. Additionally, `Vec<u8>`
+is used where `String` would have been used.
+2. Unicode support can be disabled even when disabling it would result in
+matching invalid UTF-8 bytes.
+
+# Example: match null terminated string
+
+This shows how to find all null-terminated strings in a slice of bytes:
+
+```rust
+# use regex::bytes::Regex;
+let re = Regex::new(r"(?-u)(?P<cstr>[^\x00]+)\x00").unwrap();
+let text = b"foo\x00bar\x00baz\x00";
+
+// Extract all of the strings without the null terminator from each match.
+// The unwrap is OK here since a match requires the `cstr` capture to match.
+let cstrs: Vec<&[u8]> =
+    re.captures_iter(text)
+      .map(|c| c.name("cstr").unwrap().as_bytes())
+      .collect();
+assert_eq!(vec![&b"foo"[..], &b"bar"[..], &b"baz"[..]], cstrs);
+```
+
+# Example: selectively enable Unicode support
+
+This shows how to match an arbitrary byte pattern followed by a UTF-8 encoded
+string (e.g., to extract a title from a Matroska file):
+
+```rust
+# use std::str;
+# use regex::bytes::Regex;
+let re = Regex::new(
+    r"(?-u)\x7b\xa9(?:[\x80-\xfe]|[\x40-\xff].)(?u:(.*))"
+).unwrap();
+let text = b"\x12\xd0\x3b\x5f\x7b\xa9\x85\xe2\x98\x83\x80\x98\x54\x76\x68\x65";
+let caps = re.captures(text).unwrap();
+
+// Notice that despite the `.*` at the end, it will only match valid UTF-8
+// because Unicode mode was enabled with the `u` flag. Without the `u` flag,
+// the `.*` would match the rest of the bytes.
+let mat = caps.get(1).unwrap();
+assert_eq!((7, 10), (mat.start(), mat.end()));
+
+// If there was a match, Unicode mode guarantees that `title` is valid UTF-8.
+let title = str::from_utf8(&caps[1]).unwrap();
+assert_eq!("☃", title);
+```
+
+In general, if the Unicode flag is enabled in a capture group and that capture
+is part of the overall match, then the capture is *guaranteed* to be valid
+UTF-8.
+
+# Syntax
+
+The supported syntax is pretty much the same as the syntax for Unicode
+regular expressions with a few changes that make sense for matching arbitrary
+bytes:
+
+1. The `u` flag can be disabled even when disabling it might cause the regex to
+match invalid UTF-8. When the `u` flag is disabled, the regex is said to be in
+"ASCII compatible" mode.
+2. In ASCII compatible mode, neither Unicode scalar values nor Unicode
+character classes are allowed.
+3. In ASCII compatible mode, Perl character classes (`\w`, `\d` and `\s`)
+revert to their typical ASCII definition. `\w` maps to `[[:word:]]`, `\d` maps
+to `[[:digit:]]` and `\s` maps to `[[:space:]]`.
+4. In ASCII compatible mode, word boundaries use the ASCII compatible `\w` to
+determine whether a byte is a word byte or not.
+5. Hexadecimal notation can be used to specify arbitrary bytes instead of
+Unicode codepoints. For example, in ASCII compatible mode, `\xFF` matches the
+literal byte `\xFF`, while in Unicode mode, `\xFF` is a Unicode codepoint that
+matches its UTF-8 encoding of `\xC3\xBF`. Similarly for octal notation when
+enabled.
+6. `.` matches any *byte* except for `\n` instead of any Unicode scalar value.
+When the `s` flag is enabled, `.` matches any byte.
+
+# Performance
+
+In general, one should expect performance on `&[u8]` to be roughly similar to
+performance on `&str`.
+*/
+#[cfg(feature = "std")]
+pub mod bytes {
+    pub use re_builder::bytes::*;
+    pub use re_builder::set_bytes::*;
+    pub use re_bytes::*;
+    pub use re_set::bytes::*;
+}
+
+mod backtrack;
+mod cache;
+mod compile;
+#[cfg(feature = "perf-dfa")]
+mod dfa;
+mod error;
+mod exec;
+mod expand;
+mod find_byte;
+#[cfg(feature = "perf-literal")]
+mod freqs;
+mod input;
+mod literal;
+#[cfg(feature = "pattern")]
+mod pattern;
+mod pikevm;
+mod prog;
+mod re_builder;
+mod re_bytes;
+mod re_set;
+mod re_trait;
+mod re_unicode;
+mod sparse;
+mod utf8;
+
+/// The `internal` module exists to support suspicious activity, such as
+/// testing different matching engines and supporting the `regex-debug` CLI
+/// utility.
+#[doc(hidden)]
+#[cfg(feature = "std")]
+pub mod internal {
+    pub use compile::Compiler;
+    pub use exec::{Exec, ExecBuilder};
+    pub use input::{Char, CharInput, Input, InputAt};
+    pub use literal::LiteralSearcher;
+    pub use prog::{EmptyLook, Inst, InstRanges, Program};
+}

diff --git a/src/literal/imp.rs b/src/literal/imp.rs
new file mode 100644
index 0000000..fe07ffc
--- /dev/null
+++ b/src/literal/imp.rs

@@ -0,0 +1,1121 @@
+use std::cmp;
+use std::mem;
+
+use aho_corasick::{self, packed, AhoCorasick, AhoCorasickBuilder};
+use memchr::{memchr, memchr2, memchr3};
+use syntax::hir::literal::{Literal, Literals};
+
+use freqs::BYTE_FREQUENCIES;
+
+/// A prefix extracted from a compiled regular expression.
+///
+/// A regex prefix is a set of literal strings that *must* be matched at the
+/// beginning of a regex in order for the entire regex to match. Similarly
+/// for a regex suffix.
+#[derive(Clone, Debug)]
+pub struct LiteralSearcher {
+    complete: bool,
+    lcp: FreqyPacked,
+    lcs: FreqyPacked,
+    matcher: Matcher,
+}
+
+#[derive(Clone, Debug)]
+enum Matcher {
+    /// No literals. (Never advances through the input.)
+    Empty,
+    /// A set of four or more single byte literals.
+    Bytes(SingleByteSet),
+    /// A single substring, find using memchr and frequency analysis.
+    FreqyPacked(FreqyPacked),
+    /// A single substring, find using Boyer-Moore.
+    BoyerMoore(BoyerMooreSearch),
+    /// An Aho-Corasick automaton.
+    AC { ac: AhoCorasick<u32>, lits: Vec<Literal> },
+    /// A packed multiple substring searcher, using SIMD.
+    ///
+    /// Note that Aho-Corasick will actually use this packed searcher
+    /// internally automatically, however, there is some overhead associated
+    /// with going through the Aho-Corasick machinery. So using the packed
+    /// searcher directly results in some gains.
+    Packed { s: packed::Searcher, lits: Vec<Literal> },
+}
+
+impl LiteralSearcher {
+    /// Returns a matcher that never matches and never advances the input.
+    pub fn empty() -> Self {
+        Self::new(Literals::empty(), Matcher::Empty)
+    }
+
+    /// Returns a matcher for literal prefixes from the given set.
+    pub fn prefixes(lits: Literals) -> Self {
+        let matcher = Matcher::prefixes(&lits);
+        Self::new(lits, matcher)
+    }
+
+    /// Returns a matcher for literal suffixes from the given set.
+    pub fn suffixes(lits: Literals) -> Self {
+        let matcher = Matcher::suffixes(&lits);
+        Self::new(lits, matcher)
+    }
+
+    fn new(lits: Literals, matcher: Matcher) -> Self {
+        let complete = lits.all_complete();
+        LiteralSearcher {
+            complete: complete,
+            lcp: FreqyPacked::new(lits.longest_common_prefix().to_vec()),
+            lcs: FreqyPacked::new(lits.longest_common_suffix().to_vec()),
+            matcher: matcher,
+        }
+    }
+
+    /// Returns true if all matches comprise the entire regular expression.
+    ///
+    /// This does not necessarily mean that a literal match implies a match
+    /// of the regular expression. For example, the regular expresison `^a`
+    /// is comprised of a single complete literal `a`, but the regular
+    /// expression demands that it only match at the beginning of a string.
+    pub fn complete(&self) -> bool {
+        self.complete && !self.is_empty()
+    }
+
+    /// Find the position of a literal in `haystack` if it exists.
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    pub fn find(&self, haystack: &[u8]) -> Option<(usize, usize)> {
+        use self::Matcher::*;
+        match self.matcher {
+            Empty => Some((0, 0)),
+            Bytes(ref sset) => sset.find(haystack).map(|i| (i, i + 1)),
+            FreqyPacked(ref s) => s.find(haystack).map(|i| (i, i + s.len())),
+            BoyerMoore(ref s) => s.find(haystack).map(|i| (i, i + s.len())),
+            AC { ref ac, .. } => {
+                ac.find(haystack).map(|m| (m.start(), m.end()))
+            }
+            Packed { ref s, .. } => {
+                s.find(haystack).map(|m| (m.start(), m.end()))
+            }
+        }
+    }
+
+    /// Like find, except matches must start at index `0`.
+    pub fn find_start(&self, haystack: &[u8]) -> Option<(usize, usize)> {
+        for lit in self.iter() {
+            if lit.len() > haystack.len() {
+                continue;
+            }
+            if lit == &haystack[0..lit.len()] {
+                return Some((0, lit.len()));
+            }
+        }
+        None
+    }
+
+    /// Like find, except matches must end at index `haystack.len()`.
+    pub fn find_end(&self, haystack: &[u8]) -> Option<(usize, usize)> {
+        for lit in self.iter() {
+            if lit.len() > haystack.len() {
+                continue;
+            }
+            if lit == &haystack[haystack.len() - lit.len()..] {
+                return Some((haystack.len() - lit.len(), haystack.len()));
+            }
+        }
+        None
+    }
+
+    /// Returns an iterator over all literals to be matched.
+    pub fn iter(&self) -> LiteralIter {
+        match self.matcher {
+            Matcher::Empty => LiteralIter::Empty,
+            Matcher::Bytes(ref sset) => LiteralIter::Bytes(&sset.dense),
+            Matcher::FreqyPacked(ref s) => LiteralIter::Single(&s.pat),
+            Matcher::BoyerMoore(ref s) => LiteralIter::Single(&s.pattern),
+            Matcher::AC { ref lits, .. } => LiteralIter::AC(lits),
+            Matcher::Packed { ref lits, .. } => LiteralIter::Packed(lits),
+        }
+    }
+
+    /// Returns a matcher for the longest common prefix of this matcher.
+    pub fn lcp(&self) -> &FreqyPacked {
+        &self.lcp
+    }
+
+    /// Returns a matcher for the longest common suffix of this matcher.
+    pub fn lcs(&self) -> &FreqyPacked {
+        &self.lcs
+    }
+
+    /// Returns true iff this prefix is empty.
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    /// Returns the number of prefixes in this machine.
+    pub fn len(&self) -> usize {
+        use self::Matcher::*;
+        match self.matcher {
+            Empty => 0,
+            Bytes(ref sset) => sset.dense.len(),
+            FreqyPacked(_) => 1,
+            BoyerMoore(_) => 1,
+            AC { ref ac, .. } => ac.pattern_count(),
+            Packed { ref lits, .. } => lits.len(),
+        }
+    }
+
+    /// Return the approximate heap usage of literals in bytes.
+    pub fn approximate_size(&self) -> usize {
+        use self::Matcher::*;
+        match self.matcher {
+            Empty => 0,
+            Bytes(ref sset) => sset.approximate_size(),
+            FreqyPacked(ref single) => single.approximate_size(),
+            BoyerMoore(ref single) => single.approximate_size(),
+            AC { ref ac, .. } => ac.heap_bytes(),
+            Packed { ref s, .. } => s.heap_bytes(),
+        }
+    }
+}
+
+impl Matcher {
+    fn prefixes(lits: &Literals) -> Self {
+        let sset = SingleByteSet::prefixes(lits);
+        Matcher::new(lits, sset)
+    }
+
+    fn suffixes(lits: &Literals) -> Self {
+        let sset = SingleByteSet::suffixes(lits);
+        Matcher::new(lits, sset)
+    }
+
+    fn new(lits: &Literals, sset: SingleByteSet) -> Self {
+        if lits.literals().is_empty() {
+            return Matcher::Empty;
+        }
+        if sset.dense.len() >= 26 {
+            // Avoid trying to match a large number of single bytes.
+            // This is *very* sensitive to a frequency analysis comparison
+            // between the bytes in sset and the composition of the haystack.
+            // No matter the size of sset, if its members all are rare in the
+            // haystack, then it'd be worth using it. How to tune this... IDK.
+            // ---AG
+            return Matcher::Empty;
+        }
+        if sset.complete {
+            return Matcher::Bytes(sset);
+        }
+        if lits.literals().len() == 1 {
+            let lit = lits.literals()[0].to_vec();
+            if BoyerMooreSearch::should_use(lit.as_slice()) {
+                return Matcher::BoyerMoore(BoyerMooreSearch::new(lit));
+            } else {
+                return Matcher::FreqyPacked(FreqyPacked::new(lit));
+            }
+        }
+
+        let pats = lits.literals().to_owned();
+        let is_aho_corasick_fast = sset.dense.len() <= 1 && sset.all_ascii;
+        if lits.literals().len() <= 100 && !is_aho_corasick_fast {
+            let mut builder = packed::Config::new()
+                .match_kind(packed::MatchKind::LeftmostFirst)
+                .builder();
+            if let Some(s) = builder.extend(&pats).build() {
+                return Matcher::Packed { s, lits: pats };
+            }
+        }
+        let ac = AhoCorasickBuilder::new()
+            .match_kind(aho_corasick::MatchKind::LeftmostFirst)
+            .dfa(true)
+            .build_with_size::<u32, _, _>(&pats)
+            .unwrap();
+        Matcher::AC { ac, lits: pats }
+    }
+}
+
+pub enum LiteralIter<'a> {
+    Empty,
+    Bytes(&'a [u8]),
+    Single(&'a [u8]),
+    AC(&'a [Literal]),
+    Packed(&'a [Literal]),
+}
+
+impl<'a> Iterator for LiteralIter<'a> {
+    type Item = &'a [u8];
+
+    fn next(&mut self) -> Option<Self::Item> {
+        match *self {
+            LiteralIter::Empty => None,
+            LiteralIter::Bytes(ref mut many) => {
+                if many.is_empty() {
+                    None
+                } else {
+                    let next = &many[0..1];
+                    *many = &many[1..];
+                    Some(next)
+                }
+            }
+            LiteralIter::Single(ref mut one) => {
+                if one.is_empty() {
+                    None
+                } else {
+                    let next = &one[..];
+                    *one = &[];
+                    Some(next)
+                }
+            }
+            LiteralIter::AC(ref mut lits) => {
+                if lits.is_empty() {
+                    None
+                } else {
+                    let next = &lits[0];
+                    *lits = &lits[1..];
+                    Some(&**next)
+                }
+            }
+            LiteralIter::Packed(ref mut lits) => {
+                if lits.is_empty() {
+                    None
+                } else {
+                    let next = &lits[0];
+                    *lits = &lits[1..];
+                    Some(&**next)
+                }
+            }
+        }
+    }
+}
+
+#[derive(Clone, Debug)]
+struct SingleByteSet {
+    sparse: Vec<bool>,
+    dense: Vec<u8>,
+    complete: bool,
+    all_ascii: bool,
+}
+
+impl SingleByteSet {
+    fn new() -> SingleByteSet {
+        SingleByteSet {
+            sparse: vec![false; 256],
+            dense: vec![],
+            complete: true,
+            all_ascii: true,
+        }
+    }
+
+    fn prefixes(lits: &Literals) -> SingleByteSet {
+        let mut sset = SingleByteSet::new();
+        for lit in lits.literals() {
+            sset.complete = sset.complete && lit.len() == 1;
+            if let Some(&b) = lit.get(0) {
+                if !sset.sparse[b as usize] {
+                    if b > 0x7F {
+                        sset.all_ascii = false;
+                    }
+                    sset.dense.push(b);
+                    sset.sparse[b as usize] = true;
+                }
+            }
+        }
+        sset
+    }
+
+    fn suffixes(lits: &Literals) -> SingleByteSet {
+        let mut sset = SingleByteSet::new();
+        for lit in lits.literals() {
+            sset.complete = sset.complete && lit.len() == 1;
+            if let Some(&b) = lit.get(lit.len().checked_sub(1).unwrap()) {
+                if !sset.sparse[b as usize] {
+                    if b > 0x7F {
+                        sset.all_ascii = false;
+                    }
+                    sset.dense.push(b);
+                    sset.sparse[b as usize] = true;
+                }
+            }
+        }
+        sset
+    }
+
+    /// Faster find that special cases certain sizes to use memchr.
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn find(&self, text: &[u8]) -> Option<usize> {
+        match self.dense.len() {
+            0 => None,
+            1 => memchr(self.dense[0], text),
+            2 => memchr2(self.dense[0], self.dense[1], text),
+            3 => memchr3(self.dense[0], self.dense[1], self.dense[2], text),
+            _ => self._find(text),
+        }
+    }
+
+    /// Generic find that works on any sized set.
+    fn _find(&self, haystack: &[u8]) -> Option<usize> {
+        for (i, &b) in haystack.iter().enumerate() {
+            if self.sparse[b as usize] {
+                return Some(i);
+            }
+        }
+        None
+    }
+
+    fn approximate_size(&self) -> usize {
+        (self.dense.len() * mem::size_of::<u8>())
+            + (self.sparse.len() * mem::size_of::<bool>())
+    }
+}
+
+/// Provides an implementation of fast subtring search using frequency
+/// analysis.
+///
+/// memchr is so fast that we do everything we can to keep the loop in memchr
+/// for as long as possible. The easiest way to do this is to intelligently
+/// pick the byte to send to memchr. The best byte is the byte that occurs
+/// least frequently in the haystack. Since doing frequency analysis on the
+/// haystack is far too expensive, we compute a set of fixed frequencies up
+/// front and hard code them in src/freqs.rs. Frequency analysis is done via
+/// scripts/frequencies.py.
+#[derive(Clone, Debug)]
+pub struct FreqyPacked {
+    /// The pattern.
+    pat: Vec<u8>,
+    /// The number of Unicode characters in the pattern. This is useful for
+    /// determining the effective length of a pattern when deciding which
+    /// optimizations to perform. A trailing incomplete UTF-8 sequence counts
+    /// as one character.
+    char_len: usize,
+    /// The rarest byte in the pattern, according to pre-computed frequency
+    /// analysis.
+    rare1: u8,
+    /// The offset of the rarest byte in `pat`.
+    rare1i: usize,
+    /// The second rarest byte in the pattern, according to pre-computed
+    /// frequency analysis. (This may be equivalent to the rarest byte.)
+    ///
+    /// The second rarest byte is used as a type of guard for quickly detecting
+    /// a mismatch after memchr locates an instance of the rarest byte. This
+    /// is a hedge against pathological cases where the pre-computed frequency
+    /// analysis may be off. (But of course, does not prevent *all*
+    /// pathological cases.)
+    rare2: u8,
+    /// The offset of the second rarest byte in `pat`.
+    rare2i: usize,
+}
+
+impl FreqyPacked {
+    fn new(pat: Vec<u8>) -> FreqyPacked {
+        if pat.is_empty() {
+            return FreqyPacked::empty();
+        }
+
+        // Find the rarest two bytes. Try to make them distinct (but it's not
+        // required).
+        let mut rare1 = pat[0];
+        let mut rare2 = pat[0];
+        for b in pat[1..].iter().cloned() {
+            if freq_rank(b) < freq_rank(rare1) {
+                rare1 = b;
+            }
+        }
+        for &b in &pat {
+            if rare1 == rare2 {
+                rare2 = b
+            } else if b != rare1 && freq_rank(b) < freq_rank(rare2) {
+                rare2 = b;
+            }
+        }
+
+        // And find the offsets of their last occurrences.
+        let rare1i = pat.iter().rposition(|&b| b == rare1).unwrap();
+        let rare2i = pat.iter().rposition(|&b| b == rare2).unwrap();
+
+        let char_len = char_len_lossy(&pat);
+        FreqyPacked {
+            pat: pat,
+            char_len: char_len,
+            rare1: rare1,
+            rare1i: rare1i,
+            rare2: rare2,
+            rare2i: rare2i,
+        }
+    }
+
+    fn empty() -> FreqyPacked {
+        FreqyPacked {
+            pat: vec![],
+            char_len: 0,
+            rare1: 0,
+            rare1i: 0,
+            rare2: 0,
+            rare2i: 0,
+        }
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    pub fn find(&self, haystack: &[u8]) -> Option<usize> {
+        let pat = &*self.pat;
+        if haystack.len() < pat.len() || pat.is_empty() {
+            return None;
+        }
+        let mut i = self.rare1i;
+        while i < haystack.len() {
+            i += match memchr(self.rare1, &haystack[i..]) {
+                None => return None,
+                Some(i) => i,
+            };
+            let start = i - self.rare1i;
+            let end = start + pat.len();
+            if end > haystack.len() {
+                return None;
+            }
+            let aligned = &haystack[start..end];
+            if aligned[self.rare2i] == self.rare2 && aligned == &*self.pat {
+                return Some(start);
+            }
+            i += 1;
+        }
+        None
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    pub fn is_suffix(&self, text: &[u8]) -> bool {
+        if text.len() < self.len() {
+            return false;
+        }
+        text[text.len() - self.len()..] == *self.pat
+    }
+
+    pub fn len(&self) -> usize {
+        self.pat.len()
+    }
+
+    pub fn char_len(&self) -> usize {
+        self.char_len
+    }
+
+    fn approximate_size(&self) -> usize {
+        self.pat.len() * mem::size_of::<u8>()
+    }
+}
+
+fn char_len_lossy(bytes: &[u8]) -> usize {
+    String::from_utf8_lossy(bytes).chars().count()
+}
+
+/// An implementation of Tuned Boyer-Moore as laid out by
+/// Andrew Hume and Daniel Sunday in "Fast String Searching".
+/// O(n) in the size of the input.
+///
+/// Fast string searching algorithms come in many variations,
+/// but they can generally be described in terms of three main
+/// components.
+///
+/// The skip loop is where the string searcher wants to spend
+/// as much time as possible. Exactly which character in the
+/// pattern the skip loop examines varies from algorithm to
+/// algorithm, but in the simplest case this loop repeated
+/// looks at the last character in the pattern and jumps
+/// forward in the input if it is not in the pattern.
+/// Robert Boyer and J Moore called this the "fast" loop in
+/// their original paper.
+///
+/// The match loop is responsible for actually examining the
+/// whole potentially matching substring. In order to fail
+/// faster, the match loop sometimes has a guard test attached.
+/// The guard test uses frequency analysis of the different
+/// characters in the pattern to choose the least frequency
+/// occurring character and use it to find match failures
+/// as quickly as possible.
+///
+/// The shift rule governs how the algorithm will shuffle its
+/// test window in the event of a failure during the match loop.
+/// Certain shift rules allow the worst-case run time of the
+/// algorithm to be shown to be O(n) in the size of the input
+/// rather than O(nm) in the size of the input and the size
+/// of the pattern (as naive Boyer-Moore is).
+///
+/// "Fast String Searching", in addition to presenting a tuned
+/// algorithm, provides a comprehensive taxonomy of the many
+/// different flavors of string searchers. Under that taxonomy
+/// TBM, the algorithm implemented here, uses an unrolled fast
+/// skip loop with memchr fallback, a forward match loop with guard,
+/// and the mini Sunday's delta shift rule. To unpack that you'll have to
+/// read the paper.
+#[derive(Clone, Debug)]
+pub struct BoyerMooreSearch {
+    /// The pattern we are going to look for in the haystack.
+    pattern: Vec<u8>,
+
+    /// The skip table for the skip loop.
+    ///
+    /// Maps the character at the end of the input
+    /// to a shift.
+    skip_table: Vec<usize>,
+
+    /// The guard character (least frequently occurring char).
+    guard: u8,
+    /// The reverse-index of the guard character in the pattern.
+    guard_reverse_idx: usize,
+
+    /// Daniel Sunday's mini generalized delta2 shift table.
+    ///
+    /// We use a skip loop, so we only have to provide a shift
+    /// for the skip char (last char). This is why it is a mini
+    /// shift rule.
+    md2_shift: usize,
+}
+
+impl BoyerMooreSearch {
+    /// Create a new string searcher, performing whatever
+    /// compilation steps are required.
+    fn new(pattern: Vec<u8>) -> Self {
+        debug_assert!(!pattern.is_empty());
+
+        let (g, gi) = Self::select_guard(pattern.as_slice());
+        let skip_table = Self::compile_skip_table(pattern.as_slice());
+        let md2_shift = Self::compile_md2_shift(pattern.as_slice());
+        BoyerMooreSearch {
+            pattern: pattern,
+            skip_table: skip_table,
+            guard: g,
+            guard_reverse_idx: gi,
+            md2_shift: md2_shift,
+        }
+    }
+
+    /// Find the pattern in `haystack`, returning the offset
+    /// of the start of the first occurrence of the pattern
+    /// in `haystack`.
+    #[inline]
+    fn find(&self, haystack: &[u8]) -> Option<usize> {
+        if haystack.len() < self.pattern.len() {
+            return None;
+        }
+
+        let mut window_end = self.pattern.len() - 1;
+
+        // Inspired by the grep source. It is a way
+        // to do correct loop unrolling without having to place
+        // a crashpad of terminating charicters at the end in
+        // the way described in the Fast String Searching paper.
+        const NUM_UNROLL: usize = 10;
+        // 1 for the initial position, and 1 for the md2 shift
+        let short_circut = (NUM_UNROLL + 2) * self.pattern.len();
+
+        if haystack.len() > short_circut {
+            // just 1 for the md2 shift
+            let backstop =
+                haystack.len() - ((NUM_UNROLL + 1) * self.pattern.len());
+            loop {
+                window_end =
+                    match self.skip_loop(haystack, window_end, backstop) {
+                        Some(i) => i,
+                        None => return None,
+                    };
+                if window_end >= backstop {
+                    break;
+                }
+
+                if self.check_match(haystack, window_end) {
+                    return Some(window_end - (self.pattern.len() - 1));
+                } else {
+                    let skip = self.skip_table[haystack[window_end] as usize];
+                    window_end +=
+                        if skip == 0 { self.md2_shift } else { skip };
+                    continue;
+                }
+            }
+        }
+
+        // now process the input after the backstop
+        while window_end < haystack.len() {
+            let mut skip = self.skip_table[haystack[window_end] as usize];
+            if skip == 0 {
+                if self.check_match(haystack, window_end) {
+                    return Some(window_end - (self.pattern.len() - 1));
+                } else {
+                    skip = self.md2_shift;
+                }
+            }
+            window_end += skip;
+        }
+
+        None
+    }
+
+    fn len(&self) -> usize {
+        return self.pattern.len();
+    }
+
+    /// The key heuristic behind which the BoyerMooreSearch lives.
+    ///
+    /// See `rust-lang/regex/issues/408`.
+    ///
+    /// Tuned Boyer-Moore is actually pretty slow! It turns out a handrolled
+    /// platform-specific memchr routine with a bit of frequency
+    /// analysis sprinkled on top actually wins most of the time.
+    /// However, there are a few cases where Tuned Boyer-Moore still
+    /// wins.
+    ///
+    /// If the haystack is random, frequency analysis doesn't help us,
+    /// so Boyer-Moore will win for sufficiently large needles.
+    /// Unfortunately, there is no obvious way to determine this
+    /// ahead of time.
+    ///
+    /// If the pattern itself consists of very common characters,
+    /// frequency analysis won't get us anywhere. The most extreme
+    /// example of this is a pattern like `eeeeeeeeeeeeeeee`. Fortunately,
+    /// this case is wholly determined by the pattern, so we can actually
+    /// implement the heuristic.
+    ///
+    /// A third case is if the pattern is sufficiently long. The idea
+    /// here is that once the pattern gets long enough the Tuned
+    /// Boyer-Moore skip loop will start making strides long enough
+    /// to beat the asm deep magic that is memchr.
+    fn should_use(pattern: &[u8]) -> bool {
+        // The minimum pattern length required to use TBM.
+        const MIN_LEN: usize = 9;
+        // The minimum frequency rank (lower is rarer) that every byte in the
+        // pattern must have in order to use TBM. That is, if the pattern
+        // contains _any_ byte with a lower rank, then TBM won't be used.
+        const MIN_CUTOFF: usize = 150;
+        // The maximum frequency rank for any byte.
+        const MAX_CUTOFF: usize = 255;
+        // The scaling factor used to determine the actual cutoff frequency
+        // to use (keeping in mind that the minimum frequency rank is bounded
+        // by MIN_CUTOFF). This scaling factor is an attempt to make TBM more
+        // likely to be used as the pattern grows longer. That is, longer
+        // patterns permit somewhat less frequent bytes than shorter patterns,
+        // under the assumption that TBM gets better as the pattern gets
+        // longer.
+        const LEN_CUTOFF_PROPORTION: usize = 4;
+
+        let scaled_rank = pattern.len().wrapping_mul(LEN_CUTOFF_PROPORTION);
+        let cutoff = cmp::max(
+            MIN_CUTOFF,
+            MAX_CUTOFF - cmp::min(MAX_CUTOFF, scaled_rank),
+        );
+        // The pattern must be long enough to be worthwhile. e.g., memchr will
+        // be faster on `e` because it is short even though e is quite common.
+        pattern.len() > MIN_LEN
+            // all the bytes must be more common than the cutoff.
+            && pattern.iter().all(|c| freq_rank(*c) >= cutoff)
+    }
+
+    /// Check to see if there is a match at the given position
+    #[inline]
+    fn check_match(&self, haystack: &[u8], window_end: usize) -> bool {
+        // guard test
+        if haystack[window_end - self.guard_reverse_idx] != self.guard {
+            return false;
+        }
+
+        // match loop
+        let window_start = window_end - (self.pattern.len() - 1);
+        for i in 0..self.pattern.len() {
+            if self.pattern[i] != haystack[window_start + i] {
+                return false;
+            }
+        }
+
+        true
+    }
+
+    /// Skip forward according to the shift table.
+    ///
+    /// Returns the offset of the next occurrence
+    /// of the last char in the pattern, or the none
+    /// if it never reappears. If `skip_loop` hits the backstop
+    /// it will leave early.
+    #[inline]
+    fn skip_loop(
+        &self,
+        haystack: &[u8],
+        mut window_end: usize,
+        backstop: usize,
+    ) -> Option<usize> {
+        let window_end_snapshot = window_end;
+        let skip_of = |we: usize| -> usize {
+            // Unsafe might make this faster, but the benchmarks
+            // were hard to interpret.
+            self.skip_table[haystack[we] as usize]
+        };
+
+        loop {
+            let mut skip = skip_of(window_end);
+            window_end += skip;
+            skip = skip_of(window_end);
+            window_end += skip;
+            if skip != 0 {
+                skip = skip_of(window_end);
+                window_end += skip;
+                skip = skip_of(window_end);
+                window_end += skip;
+                skip = skip_of(window_end);
+                window_end += skip;
+                if skip != 0 {
+                    skip = skip_of(window_end);
+                    window_end += skip;
+                    skip = skip_of(window_end);
+                    window_end += skip;
+                    skip = skip_of(window_end);
+                    window_end += skip;
+                    if skip != 0 {
+                        skip = skip_of(window_end);
+                        window_end += skip;
+                        skip = skip_of(window_end);
+                        window_end += skip;
+
+                        // If ten iterations did not make at least 16 words
+                        // worth of progress, we just fall back on memchr.
+                        if window_end - window_end_snapshot
+                            > 16 * mem::size_of::<usize>()
+                        {
+                            // Returning a window_end >= backstop will
+                            // immediatly break us out of the inner loop in
+                            // `find`.
+                            if window_end >= backstop {
+                                return Some(window_end);
+                            }
+
+                            continue; // we made enough progress
+                        } else {
+                            // In case we are already there, and so that
+                            // we will catch the guard char.
+                            window_end = window_end
+                                .checked_sub(1 + self.guard_reverse_idx)
+                                .unwrap_or(0);
+
+                            match memchr(self.guard, &haystack[window_end..]) {
+                                None => return None,
+                                Some(g_idx) => {
+                                    return Some(
+                                        window_end
+                                            + g_idx
+                                            + self.guard_reverse_idx,
+                                    );
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+
+            return Some(window_end);
+        }
+    }
+
+    /// Compute the ufast skip table.
+    fn compile_skip_table(pattern: &[u8]) -> Vec<usize> {
+        let mut tab = vec![pattern.len(); 256];
+
+        // For every char in the pattern, we write a skip
+        // that will line us up with the rightmost occurrence.
+        //
+        // N.B. the sentinel (0) is written by the last
+        // loop iteration.
+        for (i, c) in pattern.iter().enumerate() {
+            tab[*c as usize] = (pattern.len() - 1) - i;
+        }
+
+        tab
+    }
+
+    /// Select the guard character based off of the precomputed
+    /// frequency table.
+    fn select_guard(pattern: &[u8]) -> (u8, usize) {
+        let mut rarest = pattern[0];
+        let mut rarest_rev_idx = pattern.len() - 1;
+        for (i, c) in pattern.iter().enumerate() {
+            if freq_rank(*c) < freq_rank(rarest) {
+                rarest = *c;
+                rarest_rev_idx = (pattern.len() - 1) - i;
+            }
+        }
+
+        (rarest, rarest_rev_idx)
+    }
+
+    /// If there is another occurrence of the skip
+    /// char, shift to it, otherwise just shift to
+    /// the next window.
+    fn compile_md2_shift(pattern: &[u8]) -> usize {
+        let shiftc = *pattern.last().unwrap();
+
+        // For a pattern of length 1 we will never apply the
+        // shift rule, so we use a poison value on the principle
+        // that failing fast is a good thing.
+        if pattern.len() == 1 {
+            return 0xDEADBEAF;
+        }
+
+        let mut i = pattern.len() - 2;
+        while i > 0 {
+            if pattern[i] == shiftc {
+                return (pattern.len() - 1) - i;
+            }
+            i -= 1;
+        }
+
+        // The skip char never re-occurs in the pattern, so
+        // we can just shift the whole window length.
+        pattern.len() - 1
+    }
+
+    fn approximate_size(&self) -> usize {
+        (self.pattern.len() * mem::size_of::<u8>())
+            + (256 * mem::size_of::<usize>()) // skip table
+    }
+}
+
+fn freq_rank(b: u8) -> usize {
+    BYTE_FREQUENCIES[b as usize] as usize
+}
+
+#[cfg(test)]
+mod tests {
+    use super::{BoyerMooreSearch, FreqyPacked};
+
+    //
+    // Unit Tests
+    //
+
+    // The "hello, world" of string searching
+    #[test]
+    fn bm_find_subs() {
+        let searcher = BoyerMooreSearch::new(Vec::from(&b"pattern"[..]));
+        let haystack = b"I keep seeing patterns in this text";
+        assert_eq!(14, searcher.find(haystack).unwrap());
+    }
+
+    #[test]
+    fn bm_find_no_subs() {
+        let searcher = BoyerMooreSearch::new(Vec::from(&b"pattern"[..]));
+        let haystack = b"I keep seeing needles in this text";
+        assert_eq!(None, searcher.find(haystack));
+    }
+
+    //
+    // Regression Tests
+    //
+
+    #[test]
+    fn bm_skip_reset_bug() {
+        let haystack = vec![0, 0, 0, 0, 0, 1, 1, 0];
+        let needle = vec![0, 1, 1, 0];
+
+        let searcher = BoyerMooreSearch::new(needle);
+        let offset = searcher.find(haystack.as_slice()).unwrap();
+        assert_eq!(4, offset);
+    }
+
+    #[test]
+    fn bm_backstop_underflow_bug() {
+        let haystack = vec![0, 0];
+        let needle = vec![0, 0];
+
+        let searcher = BoyerMooreSearch::new(needle);
+        let offset = searcher.find(haystack.as_slice()).unwrap();
+        assert_eq!(0, offset);
+    }
+
+    #[test]
+    fn bm_naive_off_by_one_bug() {
+        let haystack = vec![91];
+        let needle = vec![91];
+
+        let naive_offset = naive_find(&needle, &haystack).unwrap();
+        assert_eq!(0, naive_offset);
+    }
+
+    #[test]
+    fn bm_memchr_fallback_indexing_bug() {
+        let mut haystack = vec![
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 87, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        ];
+        let needle = vec![1, 1, 1, 1, 32, 32, 87];
+        let needle_start = haystack.len();
+        haystack.extend(needle.clone());
+
+        let searcher = BoyerMooreSearch::new(needle);
+        assert_eq!(needle_start, searcher.find(haystack.as_slice()).unwrap());
+    }
+
+    #[test]
+    fn bm_backstop_boundary() {
+        let haystack = b"\
+// aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+e_data.clone_created(entity_id, entity_to_add.entity_id);
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+"
+        .to_vec();
+        let needle = b"clone_created".to_vec();
+
+        let searcher = BoyerMooreSearch::new(needle);
+        let result = searcher.find(&haystack);
+        assert_eq!(Some(43), result);
+    }
+
+    #[test]
+    fn bm_win_gnu_indexing_bug() {
+        let haystack_raw = vec![
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        ];
+        let needle = vec![1, 1, 1, 1, 1, 1, 1];
+        let haystack = haystack_raw.as_slice();
+
+        BoyerMooreSearch::new(needle.clone()).find(haystack);
+    }
+
+    //
+    // QuickCheck Properties
+    //
+
+    use quickcheck::TestResult;
+
+    fn naive_find(needle: &[u8], haystack: &[u8]) -> Option<usize> {
+        assert!(needle.len() <= haystack.len());
+
+        for i in 0..(haystack.len() - (needle.len() - 1)) {
+            if haystack[i] == needle[0]
+                && &haystack[i..(i + needle.len())] == needle
+            {
+                return Some(i);
+            }
+        }
+
+        None
+    }
+
+    quickcheck! {
+        fn qc_bm_equals_nieve_find(pile1: Vec<u8>, pile2: Vec<u8>) -> TestResult {
+            if pile1.len() == 0 || pile2.len() == 0 {
+                return TestResult::discard();
+            }
+
+            let (needle, haystack) = if pile1.len() < pile2.len() {
+                (pile1, pile2.as_slice())
+            } else {
+                (pile2, pile1.as_slice())
+            };
+
+            let searcher = BoyerMooreSearch::new(needle.clone());
+            TestResult::from_bool(
+                searcher.find(haystack) == naive_find(&needle, haystack))
+        }
+
+        fn qc_bm_equals_single(pile1: Vec<u8>, pile2: Vec<u8>) -> TestResult {
+            if pile1.len() == 0 || pile2.len() == 0 {
+                return TestResult::discard();
+            }
+
+            let (needle, haystack) = if pile1.len() < pile2.len() {
+                (pile1, pile2.as_slice())
+            } else {
+                (pile2, pile1.as_slice())
+            };
+
+            let bm_searcher = BoyerMooreSearch::new(needle.clone());
+            let freqy_memchr = FreqyPacked::new(needle);
+            TestResult::from_bool(
+                bm_searcher.find(haystack) == freqy_memchr.find(haystack))
+        }
+
+        fn qc_bm_finds_trailing_needle(
+            haystack_pre: Vec<u8>,
+            needle: Vec<u8>
+        ) -> TestResult {
+            if needle.len() == 0 {
+                return TestResult::discard();
+            }
+
+            let mut haystack = haystack_pre.clone();
+            let searcher = BoyerMooreSearch::new(needle.clone());
+
+            if haystack.len() >= needle.len() &&
+                searcher.find(haystack.as_slice()).is_some() {
+                return TestResult::discard();
+            }
+
+            haystack.extend(needle.clone());
+
+            // What if the the tail of the haystack can start the
+            // needle?
+            let start = haystack_pre.len()
+                .checked_sub(needle.len())
+                .unwrap_or(0);
+            for i in 0..(needle.len() - 1) {
+                if searcher.find(&haystack[(i + start)..]).is_some() {
+                    return TestResult::discard();
+                }
+            }
+
+            TestResult::from_bool(
+                searcher.find(haystack.as_slice())
+                        .map(|x| x == haystack_pre.len())
+                        .unwrap_or(false))
+        }
+
+        // qc_equals_* is only testing the negative case as @burntsushi
+        // pointed out in https://github.com/rust-lang/regex/issues/446.
+        // This quickcheck prop represents an effort to force testing of
+        // the positive case. qc_bm_finds_first and qc_bm_finds_trailing_needle
+        // already check some of the positive cases, but they don't cover
+        // cases where the needle is in the middle of haystack. This prop
+        // fills that hole.
+        fn qc_bm_finds_subslice(
+            haystack: Vec<u8>,
+            needle_start: usize,
+            needle_length: usize
+        ) -> TestResult {
+            if haystack.len() == 0 {
+                return TestResult::discard();
+            }
+
+            let needle_start = needle_start % haystack.len();
+            let needle_length = needle_length % (haystack.len() - needle_start);
+
+            if needle_length == 0 {
+                return TestResult::discard();
+            }
+
+            let needle = &haystack[needle_start..(needle_start + needle_length)];
+
+            let bm_searcher = BoyerMooreSearch::new(needle.to_vec());
+
+            let start = naive_find(&needle, &haystack);
+            match start {
+                None => TestResult::from_bool(false),
+                Some(nf_start) =>
+                    TestResult::from_bool(
+                        nf_start <= needle_start
+                            && bm_searcher.find(&haystack) == start
+                    )
+            }
+        }
+
+        fn qc_bm_finds_first(needle: Vec<u8>) -> TestResult {
+            if needle.len() == 0 {
+                return TestResult::discard();
+            }
+
+            let mut haystack = needle.clone();
+            let searcher = BoyerMooreSearch::new(needle.clone());
+            haystack.extend(needle);
+
+            TestResult::from_bool(
+                searcher.find(haystack.as_slice())
+                        .map(|x| x == 0)
+                        .unwrap_or(false))
+        }
+    }
+}

diff --git a/src/literal/mod.rs b/src/literal/mod.rs
new file mode 100644
index 0000000..783c63b
--- /dev/null
+++ b/src/literal/mod.rs

@@ -0,0 +1,55 @@
+pub use self::imp::*;
+
+#[cfg(feature = "perf-literal")]
+mod imp;
+
+#[allow(missing_docs)]
+#[cfg(not(feature = "perf-literal"))]
+mod imp {
+    use syntax::hir::literal::Literals;
+
+    #[derive(Clone, Debug)]
+    pub struct LiteralSearcher(());
+
+    impl LiteralSearcher {
+        pub fn empty() -> Self {
+            LiteralSearcher(())
+        }
+
+        pub fn prefixes(_: Literals) -> Self {
+            LiteralSearcher(())
+        }
+
+        pub fn suffixes(_: Literals) -> Self {
+            LiteralSearcher(())
+        }
+
+        pub fn complete(&self) -> bool {
+            false
+        }
+
+        pub fn find(&self, _: &[u8]) -> Option<(usize, usize)> {
+            unreachable!()
+        }
+
+        pub fn find_start(&self, _: &[u8]) -> Option<(usize, usize)> {
+            unreachable!()
+        }
+
+        pub fn find_end(&self, _: &[u8]) -> Option<(usize, usize)> {
+            unreachable!()
+        }
+
+        pub fn is_empty(&self) -> bool {
+            true
+        }
+
+        pub fn len(&self) -> usize {
+            0
+        }
+
+        pub fn approximate_size(&self) -> usize {
+            0
+        }
+    }
+}

diff --git a/src/pattern.rs b/src/pattern.rs
new file mode 100644
index 0000000..e942667
--- /dev/null
+++ b/src/pattern.rs

@@ -0,0 +1,62 @@
+use std::str::pattern::{Pattern, SearchStep, Searcher};
+
+use re_unicode::{Matches, Regex};
+
+pub struct RegexSearcher<'r, 't> {
+    haystack: &'t str,
+    it: Matches<'r, 't>,
+    last_step_end: usize,
+    next_match: Option<(usize, usize)>,
+}
+
+impl<'r, 't> Pattern<'t> for &'r Regex {
+    type Searcher = RegexSearcher<'r, 't>;
+
+    fn into_searcher(self, haystack: &'t str) -> RegexSearcher<'r, 't> {
+        RegexSearcher {
+            haystack: haystack,
+            it: self.find_iter(haystack),
+            last_step_end: 0,
+            next_match: None,
+        }
+    }
+}
+
+unsafe impl<'r, 't> Searcher<'t> for RegexSearcher<'r, 't> {
+    #[inline]
+    fn haystack(&self) -> &'t str {
+        self.haystack
+    }
+
+    #[inline]
+    fn next(&mut self) -> SearchStep {
+        if let Some((s, e)) = self.next_match {
+            self.next_match = None;
+            self.last_step_end = e;
+            return SearchStep::Match(s, e);
+        }
+        match self.it.next() {
+            None => {
+                if self.last_step_end < self.haystack().len() {
+                    let last = self.last_step_end;
+                    self.last_step_end = self.haystack().len();
+                    SearchStep::Reject(last, self.haystack().len())
+                } else {
+                    SearchStep::Done
+                }
+            }
+            Some(m) => {
+                let (s, e) = (m.start(), m.end());
+                if s == self.last_step_end {
+                    self.last_step_end = e;
+                    SearchStep::Match(s, e)
+                } else {
+                    self.next_match = Some((s, e));
+                    let last = self.last_step_end;
+                    self.last_step_end = s;
+                    SearchStep::Reject(last, s)
+                }
+            }
+        }
+    }
+}

diff --git a/src/pikevm.rs b/src/pikevm.rs
new file mode 100644
index 0000000..c106c76
--- /dev/null
+++ b/src/pikevm.rs

@@ -0,0 +1,360 @@
+// This module implements the Pike VM. That is, it guarantees linear time
+// search of a regex on any text with memory use proportional to the size of
+// the regex.
+//
+// It is equal in power to the backtracking engine in this crate, except the
+// backtracking engine is typically faster on small regexes/texts at the
+// expense of a bigger memory footprint.
+//
+// It can do more than the DFA can (specifically, record capture locations
+// and execute Unicode word boundary assertions), but at a slower speed.
+// Specifically, the Pike VM exectues a DFA implicitly by repeatedly expanding
+// epsilon transitions. That is, the Pike VM engine can be in multiple states
+// at once where as the DFA is only ever in one state at a time.
+//
+// Therefore, the Pike VM is generally treated as the fallback when the other
+// matching engines either aren't feasible to run or are insufficient.
+
+use std::mem;
+
+use exec::ProgramCache;
+use input::{Input, InputAt};
+use prog::{InstPtr, Program};
+use re_trait::Slot;
+use sparse::SparseSet;
+
+/// An NFA simulation matching engine.
+#[derive(Debug)]
+pub struct Fsm<'r, I> {
+    /// The sequence of opcodes (among other things) that is actually executed.
+    ///
+    /// The program may be byte oriented or Unicode codepoint oriented.
+    prog: &'r Program,
+    /// An explicit stack used for following epsilon transitions. (This is
+    /// borrowed from the cache.)
+    stack: &'r mut Vec<FollowEpsilon>,
+    /// The input to search.
+    input: I,
+}
+
+/// A cached allocation that can be reused on each execution.
+#[derive(Clone, Debug)]
+pub struct Cache {
+    /// A pair of ordered sets for tracking NFA states.
+    clist: Threads,
+    nlist: Threads,
+    /// An explicit stack used for following epsilon transitions.
+    stack: Vec<FollowEpsilon>,
+}
+
+/// An ordered set of NFA states and their captures.
+#[derive(Clone, Debug)]
+struct Threads {
+    /// An ordered set of opcodes (each opcode is an NFA state).
+    set: SparseSet,
+    /// Captures for every NFA state.
+    ///
+    /// It is stored in row-major order, where the columns are the capture
+    /// slots and the rows are the states.
+    caps: Vec<Slot>,
+    /// The number of capture slots stored per thread. (Every capture has
+    /// two slots.)
+    slots_per_thread: usize,
+}
+
+/// A representation of an explicit stack frame when following epsilon
+/// transitions. This is used to avoid recursion.
+#[derive(Clone, Debug)]
+enum FollowEpsilon {
+    /// Follow transitions at the given instruction pointer.
+    IP(InstPtr),
+    /// Restore the capture slot with the given position in the input.
+    Capture { slot: usize, pos: Slot },
+}
+
+impl Cache {
+    /// Create a new allocation used by the NFA machine to record execution
+    /// and captures.
+    pub fn new(_prog: &Program) -> Self {
+        Cache { clist: Threads::new(), nlist: Threads::new(), stack: vec![] }
+    }
+}
+
+impl<'r, I: Input> Fsm<'r, I> {
+    /// Execute the NFA matching engine.
+    ///
+    /// If there's a match, `exec` returns `true` and populates the given
+    /// captures accordingly.
+    pub fn exec(
+        prog: &'r Program,
+        cache: &ProgramCache,
+        matches: &mut [bool],
+        slots: &mut [Slot],
+        quit_after_match: bool,
+        input: I,
+        start: usize,
+        end: usize,
+    ) -> bool {
+        let mut cache = cache.borrow_mut();
+        let cache = &mut cache.pikevm;
+        cache.clist.resize(prog.len(), prog.captures.len());
+        cache.nlist.resize(prog.len(), prog.captures.len());
+        let at = input.at(start);
+        Fsm { prog: prog, stack: &mut cache.stack, input: input }.exec_(
+            &mut cache.clist,
+            &mut cache.nlist,
+            matches,
+            slots,
+            quit_after_match,
+            at,
+            end,
+        )
+    }
+
+    fn exec_(
+        &mut self,
+        mut clist: &mut Threads,
+        mut nlist: &mut Threads,
+        matches: &mut [bool],
+        slots: &mut [Slot],
+        quit_after_match: bool,
+        mut at: InputAt,
+        end: usize,
+    ) -> bool {
+        let mut matched = false;
+        let mut all_matched = false;
+        clist.set.clear();
+        nlist.set.clear();
+        'LOOP: loop {
+            if clist.set.is_empty() {
+                // Three ways to bail out when our current set of threads is
+                // empty.
+                //
+                // 1. We have a match---so we're done exploring any possible
+                //    alternatives. Time to quit. (We can't do this if we're
+                //    looking for matches for multiple regexes, unless we know
+                //    they all matched.)
+                //
+                // 2. If the expression starts with a '^' we can terminate as
+                //    soon as the last thread dies.
+                if (matched && matches.len() <= 1)
+                    || all_matched
+                    || (!at.is_start() && self.prog.is_anchored_start)
+                {
+                    break;
+                }
+
+                // 3. If there's a literal prefix for the program, try to
+                //    jump ahead quickly. If it can't be found, then we can
+                //    bail out early.
+                if !self.prog.prefixes.is_empty() {
+                    at = match self.input.prefix_at(&self.prog.prefixes, at) {
+                        None => break,
+                        Some(at) => at,
+                    };
+                }
+            }
+
+            // This simulates a preceding '.*?' for every regex by adding
+            // a state starting at the current position in the input for the
+            // beginning of the program only if we don't already have a match.
+            if clist.set.is_empty()
+                || (!self.prog.is_anchored_start && !all_matched)
+            {
+                self.add(&mut clist, slots, 0, at);
+            }
+            // The previous call to "add" actually inspects the position just
+            // before the current character. For stepping through the machine,
+            // we can to look at the current character, so we advance the
+            // input.
+            let at_next = self.input.at(at.next_pos());
+            for i in 0..clist.set.len() {
+                let ip = clist.set[i];
+                if self.step(
+                    &mut nlist,
+                    matches,
+                    slots,
+                    clist.caps(ip),
+                    ip,
+                    at,
+                    at_next,
+                ) {
+                    matched = true;
+                    all_matched = all_matched || matches.iter().all(|&b| b);
+                    if quit_after_match {
+                        // If we only care if a match occurs (not its
+                        // position), then we can quit right now.
+                        break 'LOOP;
+                    }
+                    if self.prog.matches.len() == 1 {
+                        // We don't need to check the rest of the threads
+                        // in this set because we've matched something
+                        // ("leftmost-first"). However, we still need to check
+                        // threads in the next set to support things like
+                        // greedy matching.
+                        //
+                        // This is only true on normal regexes. For regex sets,
+                        // we need to mush on to observe other matches.
+                        break;
+                    }
+                }
+            }
+            if at.pos() >= end {
+                break;
+            }
+            at = at_next;
+            mem::swap(clist, nlist);
+            nlist.set.clear();
+        }
+        matched
+    }
+
+    /// Step through the input, one token (byte or codepoint) at a time.
+    ///
+    /// nlist is the set of states that will be processed on the next token
+    /// in the input.
+    ///
+    /// caps is the set of captures passed by the caller of the NFA. They are
+    /// written to only when a match state is visited.
+    ///
+    /// thread_caps is the set of captures set for the current NFA state, ip.
+    ///
+    /// at and at_next are the current and next positions in the input. at or
+    /// at_next may be EOF.
+    fn step(
+        &mut self,
+        nlist: &mut Threads,
+        matches: &mut [bool],
+        slots: &mut [Slot],
+        thread_caps: &mut [Option<usize>],
+        ip: usize,
+        at: InputAt,
+        at_next: InputAt,
+    ) -> bool {
+        use prog::Inst::*;
+        match self.prog[ip] {
+            Match(match_slot) => {
+                if match_slot < matches.len() {
+                    matches[match_slot] = true;
+                }
+                for (slot, val) in slots.iter_mut().zip(thread_caps.iter()) {
+                    *slot = *val;
+                }
+                true
+            }
+            Char(ref inst) => {
+                if inst.c == at.char() {
+                    self.add(nlist, thread_caps, inst.goto, at_next);
+                }
+                false
+            }
+            Ranges(ref inst) => {
+                if inst.matches(at.char()) {
+                    self.add(nlist, thread_caps, inst.goto, at_next);
+                }
+                false
+            }
+            Bytes(ref inst) => {
+                if let Some(b) = at.byte() {
+                    if inst.matches(b) {
+                        self.add(nlist, thread_caps, inst.goto, at_next);
+                    }
+                }
+                false
+            }
+            EmptyLook(_) | Save(_) | Split(_) => false,
+        }
+    }
+
+    /// Follows epsilon transitions and adds them for processing to nlist,
+    /// starting at and including ip.
+    fn add(
+        &mut self,
+        nlist: &mut Threads,
+        thread_caps: &mut [Option<usize>],
+        ip: usize,
+        at: InputAt,
+    ) {
+        self.stack.push(FollowEpsilon::IP(ip));
+        while let Some(frame) = self.stack.pop() {
+            match frame {
+                FollowEpsilon::IP(ip) => {
+                    self.add_step(nlist, thread_caps, ip, at);
+                }
+                FollowEpsilon::Capture { slot, pos } => {
+                    thread_caps[slot] = pos;
+                }
+            }
+        }
+    }
+
+    /// A helper function for add that avoids excessive pushing to the stack.
+    fn add_step(
+        &mut self,
+        nlist: &mut Threads,
+        thread_caps: &mut [Option<usize>],
+        mut ip: usize,
+        at: InputAt,
+    ) {
+        // Instead of pushing and popping to the stack, we mutate ip as we
+        // traverse the set of states. We only push to the stack when we
+        // absolutely need recursion (restoring captures or following a
+        // branch).
+        use prog::Inst::*;
+        loop {
+            // Don't visit states we've already added.
+            if nlist.set.contains(ip) {
+                return;
+            }
+            nlist.set.insert(ip);
+            match self.prog[ip] {
+                EmptyLook(ref inst) => {
+                    if self.input.is_empty_match(at, inst) {
+                        ip = inst.goto;
+                    }
+                }
+                Save(ref inst) => {
+                    if inst.slot < thread_caps.len() {
+                        self.stack.push(FollowEpsilon::Capture {
+                            slot: inst.slot,
+                            pos: thread_caps[inst.slot],
+                        });
+                        thread_caps[inst.slot] = Some(at.pos());
+                    }
+                    ip = inst.goto;
+                }
+                Split(ref inst) => {
+                    self.stack.push(FollowEpsilon::IP(inst.goto2));
+                    ip = inst.goto1;
+                }
+                Match(_) | Char(_) | Ranges(_) | Bytes(_) => {
+                    let t = &mut nlist.caps(ip);
+                    for (slot, val) in t.iter_mut().zip(thread_caps.iter()) {
+                        *slot = *val;
+                    }
+                    return;
+                }
+            }
+        }
+    }
+}
+
+impl Threads {
+    fn new() -> Self {
+        Threads { set: SparseSet::new(0), caps: vec![], slots_per_thread: 0 }
+    }
+
+    fn resize(&mut self, num_insts: usize, ncaps: usize) {
+        if num_insts == self.set.capacity() {
+            return;
+        }
+        self.slots_per_thread = ncaps * 2;
+        self.set = SparseSet::new(num_insts);
+        self.caps = vec![None; self.slots_per_thread * num_insts];
+    }
+
+    fn caps(&mut self, pc: usize) -> &mut [Option<usize>] {
+        let i = pc * self.slots_per_thread;
+        &mut self.caps[i..i + self.slots_per_thread]
+    }
+}

diff --git a/src/prog.rs b/src/prog.rs
new file mode 100644
index 0000000..74e5f2f
--- /dev/null
+++ b/src/prog.rs

@@ -0,0 +1,434 @@
+use std::cmp::Ordering;
+use std::collections::HashMap;
+use std::fmt;
+use std::mem;
+use std::ops::Deref;
+use std::slice;
+use std::sync::Arc;
+
+use input::Char;
+use literal::LiteralSearcher;
+
+/// `InstPtr` represents the index of an instruction in a regex program.
+pub type InstPtr = usize;
+
+/// Program is a sequence of instructions and various facts about thos
+/// instructions.
+#[derive(Clone)]
+pub struct Program {
+    /// A sequence of instructions that represents an NFA.
+    pub insts: Vec<Inst>,
+    /// Pointers to each Match instruction in the sequence.
+    ///
+    /// This is always length 1 unless this program represents a regex set.
+    pub matches: Vec<InstPtr>,
+    /// The ordered sequence of all capture groups extracted from the AST.
+    /// Unnamed groups are `None`.
+    pub captures: Vec<Option<String>>,
+    /// Pointers to all named capture groups into `captures`.
+    pub capture_name_idx: Arc<HashMap<String, usize>>,
+    /// A pointer to the start instruction. This can vary depending on how
+    /// the program was compiled. For example, programs for use with the DFA
+    /// engine have a `.*?` inserted at the beginning of unanchored regular
+    /// expressions. The actual starting point of the program is after the
+    /// `.*?`.
+    pub start: InstPtr,
+    /// A set of equivalence classes for discriminating bytes in the compiled
+    /// program.
+    pub byte_classes: Vec<u8>,
+    /// When true, this program can only match valid UTF-8.
+    pub only_utf8: bool,
+    /// When true, this program uses byte range instructions instead of Unicode
+    /// range instructions.
+    pub is_bytes: bool,
+    /// When true, the program is compiled for DFA matching. For example, this
+    /// implies `is_bytes` and also inserts a preceding `.*?` for unanchored
+    /// regexes.
+    pub is_dfa: bool,
+    /// When true, the program matches text in reverse (for use only in the
+    /// DFA).
+    pub is_reverse: bool,
+    /// Whether the regex must match from the start of the input.
+    pub is_anchored_start: bool,
+    /// Whether the regex must match at the end of the input.
+    pub is_anchored_end: bool,
+    /// Whether this program contains a Unicode word boundary instruction.
+    pub has_unicode_word_boundary: bool,
+    /// A possibly empty machine for very quickly matching prefix literals.
+    pub prefixes: LiteralSearcher,
+    /// A limit on the size of the cache that the DFA is allowed to use while
+    /// matching.
+    ///
+    /// The cache limit specifies approximately how much space we're willing to
+    /// give to the state cache. Once the state cache exceeds the size, it is
+    /// wiped and all states must be re-computed.
+    ///
+    /// Note that this value does not impact correctness. It can be set to 0
+    /// and the DFA will run just fine. (It will only ever store exactly one
+    /// state in the cache, and will likely run very slowly, but it will work.)
+    ///
+    /// Also note that this limit is *per thread of execution*. That is,
+    /// if the same regex is used to search text across multiple threads
+    /// simultaneously, then the DFA cache is not shared. Instead, copies are
+    /// made.
+    pub dfa_size_limit: usize,
+}
+
+impl Program {
+    /// Creates an empty instruction sequence. Fields are given default
+    /// values.
+    pub fn new() -> Self {
+        Program {
+            insts: vec![],
+            matches: vec![],
+            captures: vec![],
+            capture_name_idx: Arc::new(HashMap::new()),
+            start: 0,
+            byte_classes: vec![0; 256],
+            only_utf8: true,
+            is_bytes: false,
+            is_dfa: false,
+            is_reverse: false,
+            is_anchored_start: false,
+            is_anchored_end: false,
+            has_unicode_word_boundary: false,
+            prefixes: LiteralSearcher::empty(),
+            dfa_size_limit: 2 * (1 << 20),
+        }
+    }
+
+    /// If pc is an index to a no-op instruction (like Save), then return the
+    /// next pc that is not a no-op instruction.
+    pub fn skip(&self, mut pc: usize) -> usize {
+        loop {
+            match self[pc] {
+                Inst::Save(ref i) => pc = i.goto,
+                _ => return pc,
+            }
+        }
+    }
+
+    /// Return true if and only if an execution engine at instruction `pc` will
+    /// always lead to a match.
+    pub fn leads_to_match(&self, pc: usize) -> bool {
+        if self.matches.len() > 1 {
+            // If we have a regex set, then we have more than one ending
+            // state, so leading to one of those states is generally
+            // meaningless.
+            return false;
+        }
+        match self[self.skip(pc)] {
+            Inst::Match(_) => true,
+            _ => false,
+        }
+    }
+
+    /// Returns true if the current configuration demands that an implicit
+    /// `.*?` be prepended to the instruction sequence.
+    pub fn needs_dotstar(&self) -> bool {
+        self.is_dfa && !self.is_reverse && !self.is_anchored_start
+    }
+
+    /// Returns true if this program uses Byte instructions instead of
+    /// Char/Range instructions.
+    pub fn uses_bytes(&self) -> bool {
+        self.is_bytes || self.is_dfa
+    }
+
+    /// Returns true if this program exclusively matches valid UTF-8 bytes.
+    ///
+    /// That is, if an invalid UTF-8 byte is seen, then no match is possible.
+    pub fn only_utf8(&self) -> bool {
+        self.only_utf8
+    }
+
+    /// Return the approximate heap usage of this instruction sequence in
+    /// bytes.
+    pub fn approximate_size(&self) -> usize {
+        // The only instruction that uses heap space is Ranges (for
+        // Unicode codepoint programs) to store non-overlapping codepoint
+        // ranges. To keep this operation constant time, we ignore them.
+        (self.len() * mem::size_of::<Inst>())
+            + (self.matches.len() * mem::size_of::<InstPtr>())
+            + (self.captures.len() * mem::size_of::<Option<String>>())
+            + (self.capture_name_idx.len()
+                * (mem::size_of::<String>() + mem::size_of::<usize>()))
+            + (self.byte_classes.len() * mem::size_of::<u8>())
+            + self.prefixes.approximate_size()
+    }
+}
+
+impl Deref for Program {
+    type Target = [Inst];
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn deref(&self) -> &Self::Target {
+        &*self.insts
+    }
+}
+
+impl fmt::Debug for Program {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        use self::Inst::*;
+
+        fn with_goto(cur: usize, goto: usize, fmtd: String) -> String {
+            if goto == cur + 1 {
+                fmtd
+            } else {
+                format!("{} (goto: {})", fmtd, goto)
+            }
+        }
+
+        fn visible_byte(b: u8) -> String {
+            use std::ascii::escape_default;
+            let escaped = escape_default(b).collect::<Vec<u8>>();
+            String::from_utf8_lossy(&escaped).into_owned()
+        }
+
+        for (pc, inst) in self.iter().enumerate() {
+            match *inst {
+                Match(slot) => write!(f, "{:04} Match({:?})", pc, slot)?,
+                Save(ref inst) => {
+                    let s = format!("{:04} Save({})", pc, inst.slot);
+                    write!(f, "{}", with_goto(pc, inst.goto, s))?;
+                }
+                Split(ref inst) => {
+                    write!(
+                        f,
+                        "{:04} Split({}, {})",
+                        pc, inst.goto1, inst.goto2
+                    )?;
+                }
+                EmptyLook(ref inst) => {
+                    let s = format!("{:?}", inst.look);
+                    write!(f, "{:04} {}", pc, with_goto(pc, inst.goto, s))?;
+                }
+                Char(ref inst) => {
+                    let s = format!("{:?}", inst.c);
+                    write!(f, "{:04} {}", pc, with_goto(pc, inst.goto, s))?;
+                }
+                Ranges(ref inst) => {
+                    let ranges = inst
+                        .ranges
+                        .iter()
+                        .map(|r| format!("{:?}-{:?}", r.0, r.1))
+                        .collect::<Vec<String>>()
+                        .join(", ");
+                    write!(
+                        f,
+                        "{:04} {}",
+                        pc,
+                        with_goto(pc, inst.goto, ranges)
+                    )?;
+                }
+                Bytes(ref inst) => {
+                    let s = format!(
+                        "Bytes({}, {})",
+                        visible_byte(inst.start),
+                        visible_byte(inst.end)
+                    );
+                    write!(f, "{:04} {}", pc, with_goto(pc, inst.goto, s))?;
+                }
+            }
+            if pc == self.start {
+                write!(f, " (start)")?;
+            }
+            write!(f, "\n")?;
+        }
+        Ok(())
+    }
+}
+
+impl<'a> IntoIterator for &'a Program {
+    type Item = &'a Inst;
+    type IntoIter = slice::Iter<'a, Inst>;
+    fn into_iter(self) -> Self::IntoIter {
+        self.iter()
+    }
+}
+
+/// Inst is an instruction code in a Regex program.
+///
+/// Regrettably, a regex program either contains Unicode codepoint
+/// instructions (Char and Ranges) or it contains byte instructions (Bytes).
+/// A regex program can never contain both.
+///
+/// It would be worth investigating splitting this into two distinct types and
+/// then figuring out how to make the matching engines polymorphic over those
+/// types without sacrificing performance.
+///
+/// Other than the benefit of moving invariants into the type system, another
+/// benefit is the decreased size. If we remove the `Char` and `Ranges`
+/// instructions from the `Inst` enum, then its size shrinks from 40 bytes to
+/// 24 bytes. (This is because of the removal of a `Vec` in the `Ranges`
+/// variant.) Given that byte based machines are typically much bigger than
+/// their Unicode analogues (because they can decode UTF-8 directly), this ends
+/// up being a pretty significant savings.
+#[derive(Clone, Debug)]
+pub enum Inst {
+    /// Match indicates that the program has reached a match state.
+    ///
+    /// The number in the match corresponds to the Nth logical regular
+    /// expression in this program. This index is always 0 for normal regex
+    /// programs. Values greater than 0 appear when compiling regex sets, and
+    /// each match instruction gets its own unique value. The value corresponds
+    /// to the Nth regex in the set.
+    Match(usize),
+    /// Save causes the program to save the current location of the input in
+    /// the slot indicated by InstSave.
+    Save(InstSave),
+    /// Split causes the program to diverge to one of two paths in the
+    /// program, preferring goto1 in InstSplit.
+    Split(InstSplit),
+    /// EmptyLook represents a zero-width assertion in a regex program. A
+    /// zero-width assertion does not consume any of the input text.
+    EmptyLook(InstEmptyLook),
+    /// Char requires the regex program to match the character in InstChar at
+    /// the current position in the input.
+    Char(InstChar),
+    /// Ranges requires the regex program to match the character at the current
+    /// position in the input with one of the ranges specified in InstRanges.
+    Ranges(InstRanges),
+    /// Bytes is like Ranges, except it expresses a single byte range. It is
+    /// used in conjunction with Split instructions to implement multi-byte
+    /// character classes.
+    Bytes(InstBytes),
+}
+
+impl Inst {
+    /// Returns true if and only if this is a match instruction.
+    pub fn is_match(&self) -> bool {
+        match *self {
+            Inst::Match(_) => true,
+            _ => false,
+        }
+    }
+}
+
+/// Representation of the Save instruction.
+#[derive(Clone, Debug)]
+pub struct InstSave {
+    /// The next location to execute in the program.
+    pub goto: InstPtr,
+    /// The capture slot (there are two slots for every capture in a regex,
+    /// including the zeroth capture for the entire match).
+    pub slot: usize,
+}
+
+/// Representation of the Split instruction.
+#[derive(Clone, Debug)]
+pub struct InstSplit {
+    /// The first instruction to try. A match resulting from following goto1
+    /// has precedence over a match resulting from following goto2.
+    pub goto1: InstPtr,
+    /// The second instruction to try. A match resulting from following goto1
+    /// has precedence over a match resulting from following goto2.
+    pub goto2: InstPtr,
+}
+
+/// Representation of the `EmptyLook` instruction.
+#[derive(Clone, Debug)]
+pub struct InstEmptyLook {
+    /// The next location to execute in the program if this instruction
+    /// succeeds.
+    pub goto: InstPtr,
+    /// The type of zero-width assertion to check.
+    pub look: EmptyLook,
+}
+
+/// The set of zero-width match instructions.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum EmptyLook {
+    /// Start of line or input.
+    StartLine,
+    /// End of line or input.
+    EndLine,
+    /// Start of input.
+    StartText,
+    /// End of input.
+    EndText,
+    /// Word character on one side and non-word character on other.
+    WordBoundary,
+    /// Word character on both sides or non-word character on both sides.
+    NotWordBoundary,
+    /// ASCII word boundary.
+    WordBoundaryAscii,
+    /// Not ASCII word boundary.
+    NotWordBoundaryAscii,
+}
+
+/// Representation of the Char instruction.
+#[derive(Clone, Debug)]
+pub struct InstChar {
+    /// The next location to execute in the program if this instruction
+    /// succeeds.
+    pub goto: InstPtr,
+    /// The character to test.
+    pub c: char,
+}
+
+/// Representation of the Ranges instruction.
+#[derive(Clone, Debug)]
+pub struct InstRanges {
+    /// The next location to execute in the program if this instruction
+    /// succeeds.
+    pub goto: InstPtr,
+    /// The set of Unicode scalar value ranges to test.
+    pub ranges: Vec<(char, char)>,
+}
+
+impl InstRanges {
+    /// Tests whether the given input character matches this instruction.
+    pub fn matches(&self, c: Char) -> bool {
+        // This speeds up the `match_class_unicode` benchmark by checking
+        // some common cases quickly without binary search. e.g., Matching
+        // a Unicode class on predominantly ASCII text.
+        for r in self.ranges.iter().take(4) {
+            if c < r.0 {
+                return false;
+            }
+            if c <= r.1 {
+                return true;
+            }
+        }
+        self.ranges
+            .binary_search_by(|r| {
+                if r.1 < c {
+                    Ordering::Less
+                } else if r.0 > c {
+                    Ordering::Greater
+                } else {
+                    Ordering::Equal
+                }
+            })
+            .is_ok()
+    }
+
+    /// Return the number of distinct characters represented by all of the
+    /// ranges.
+    pub fn num_chars(&self) -> usize {
+        self.ranges
+            .iter()
+            .map(|&(s, e)| 1 + (e as u32) - (s as u32))
+            .sum::<u32>() as usize
+    }
+}
+
+/// Representation of the Bytes instruction.
+#[derive(Clone, Debug)]
+pub struct InstBytes {
+    /// The next location to execute in the program if this instruction
+    /// succeeds.
+    pub goto: InstPtr,
+    /// The start (inclusive) of this byte range.
+    pub start: u8,
+    /// The end (inclusive) of this byte range.
+    pub end: u8,
+}
+
+impl InstBytes {
+    /// Returns true if and only if the given byte is in this range.
+    pub fn matches(&self, byte: u8) -> bool {
+        self.start <= byte && byte <= self.end
+    }
+}

diff --git a/src/re_builder.rs b/src/re_builder.rs
new file mode 100644
index 0000000..3fef99d
--- /dev/null
+++ b/src/re_builder.rs

@@ -0,0 +1,419 @@
+/// The set of user configurable options for compiling zero or more regexes.
+#[derive(Clone, Debug)]
+#[allow(missing_docs)]
+pub struct RegexOptions {
+    pub pats: Vec<String>,
+    pub size_limit: usize,
+    pub dfa_size_limit: usize,
+    pub nest_limit: u32,
+    pub case_insensitive: bool,
+    pub multi_line: bool,
+    pub dot_matches_new_line: bool,
+    pub swap_greed: bool,
+    pub ignore_whitespace: bool,
+    pub unicode: bool,
+    pub octal: bool,
+}
+
+impl Default for RegexOptions {
+    fn default() -> Self {
+        RegexOptions {
+            pats: vec![],
+            size_limit: 10 * (1 << 20),
+            dfa_size_limit: 2 * (1 << 20),
+            nest_limit: 250,
+            case_insensitive: false,
+            multi_line: false,
+            dot_matches_new_line: false,
+            swap_greed: false,
+            ignore_whitespace: false,
+            unicode: true,
+            octal: false,
+        }
+    }
+}
+
+macro_rules! define_builder {
+    ($name:ident, $regex_mod:ident, $only_utf8:expr) => {
+        pub mod $name {
+            use super::RegexOptions;
+            use error::Error;
+            use exec::ExecBuilder;
+
+            use $regex_mod::Regex;
+
+            /// A configurable builder for a regular expression.
+            ///
+            /// A builder can be used to configure how the regex is built, for example, by
+            /// setting the default flags (which can be overridden in the expression
+            /// itself) or setting various limits.
+            pub struct RegexBuilder(RegexOptions);
+
+            impl RegexBuilder {
+                /// Create a new regular expression builder with the given pattern.
+                ///
+                /// If the pattern is invalid, then an error will be returned when
+                /// `build` is called.
+                pub fn new(pattern: &str) -> RegexBuilder {
+                    let mut builder = RegexBuilder(RegexOptions::default());
+                    builder.0.pats.push(pattern.to_owned());
+                    builder
+                }
+
+                /// Consume the builder and compile the regular expression.
+                ///
+                /// Note that calling `as_str` on the resulting `Regex` will produce the
+                /// pattern given to `new` verbatim. Notably, it will not incorporate any
+                /// of the flags set on this builder.
+                pub fn build(&self) -> Result<Regex, Error> {
+                    ExecBuilder::new_options(self.0.clone())
+                        .only_utf8($only_utf8)
+                        .build()
+                        .map(Regex::from)
+                }
+
+                /// Set the value for the case insensitive (`i`) flag.
+                ///
+                /// When enabled, letters in the pattern will match both upper case and
+                /// lower case variants.
+                pub fn case_insensitive(
+                    &mut self,
+                    yes: bool,
+                ) -> &mut RegexBuilder {
+                    self.0.case_insensitive = yes;
+                    self
+                }
+
+                /// Set the value for the multi-line matching (`m`) flag.
+                ///
+                /// When enabled, `^` matches the beginning of lines and `$` matches the
+                /// end of lines.
+                ///
+                /// By default, they match beginning/end of the input.
+                pub fn multi_line(&mut self, yes: bool) -> &mut RegexBuilder {
+                    self.0.multi_line = yes;
+                    self
+                }
+
+                /// Set the value for the any character (`s`) flag, where in `.` matches
+                /// anything when `s` is set and matches anything except for new line when
+                /// it is not set (the default).
+                ///
+                /// N.B. "matches anything" means "any byte" when Unicode is disabled and
+                /// means "any valid UTF-8 encoding of any Unicode scalar value" when
+                /// Unicode is enabled.
+                pub fn dot_matches_new_line(
+                    &mut self,
+                    yes: bool,
+                ) -> &mut RegexBuilder {
+                    self.0.dot_matches_new_line = yes;
+                    self
+                }
+
+                /// Set the value for the greedy swap (`U`) flag.
+                ///
+                /// When enabled, a pattern like `a*` is lazy (tries to find shortest
+                /// match) and `a*?` is greedy (tries to find longest match).
+                ///
+                /// By default, `a*` is greedy and `a*?` is lazy.
+                pub fn swap_greed(&mut self, yes: bool) -> &mut RegexBuilder {
+                    self.0.swap_greed = yes;
+                    self
+                }
+
+                /// Set the value for the ignore whitespace (`x`) flag.
+                ///
+                /// When enabled, whitespace such as new lines and spaces will be ignored
+                /// between expressions of the pattern, and `#` can be used to start a
+                /// comment until the next new line.
+                pub fn ignore_whitespace(
+                    &mut self,
+                    yes: bool,
+                ) -> &mut RegexBuilder {
+                    self.0.ignore_whitespace = yes;
+                    self
+                }
+
+                /// Set the value for the Unicode (`u`) flag.
+                ///
+                /// Enabled by default. When disabled, character classes such as `\w` only
+                /// match ASCII word characters instead of all Unicode word characters.
+                pub fn unicode(&mut self, yes: bool) -> &mut RegexBuilder {
+                    self.0.unicode = yes;
+                    self
+                }
+
+                /// Whether to support octal syntax or not.
+                ///
+                /// Octal syntax is a little-known way of uttering Unicode codepoints in
+                /// a regular expression. For example, `a`, `\x61`, `\u0061` and
+                /// `\141` are all equivalent regular expressions, where the last example
+                /// shows octal syntax.
+                ///
+                /// While supporting octal syntax isn't in and of itself a problem, it does
+                /// make good error messages harder. That is, in PCRE based regex engines,
+                /// syntax like `\0` invokes a backreference, which is explicitly
+                /// unsupported in Rust's regex engine. However, many users expect it to
+                /// be supported. Therefore, when octal support is disabled, the error
+                /// message will explicitly mention that backreferences aren't supported.
+                ///
+                /// Octal syntax is disabled by default.
+                pub fn octal(&mut self, yes: bool) -> &mut RegexBuilder {
+                    self.0.octal = yes;
+                    self
+                }
+
+                /// Set the approximate size limit of the compiled regular expression.
+                ///
+                /// This roughly corresponds to the number of bytes occupied by a single
+                /// compiled program. If the program exceeds this number, then a
+                /// compilation error is returned.
+                pub fn size_limit(
+                    &mut self,
+                    limit: usize,
+                ) -> &mut RegexBuilder {
+                    self.0.size_limit = limit;
+                    self
+                }
+
+                /// Set the approximate size of the cache used by the DFA.
+                ///
+                /// This roughly corresponds to the number of bytes that the DFA will
+                /// use while searching.
+                ///
+                /// Note that this is a *per thread* limit. There is no way to set a global
+                /// limit. In particular, if a regex is used from multiple threads
+                /// simultaneously, then each thread may use up to the number of bytes
+                /// specified here.
+                pub fn dfa_size_limit(
+                    &mut self,
+                    limit: usize,
+                ) -> &mut RegexBuilder {
+                    self.0.dfa_size_limit = limit;
+                    self
+                }
+
+                /// Set the nesting limit for this parser.
+                ///
+                /// The nesting limit controls how deep the abstract syntax tree is allowed
+                /// to be. If the AST exceeds the given limit (e.g., with too many nested
+                /// groups), then an error is returned by the parser.
+                ///
+                /// The purpose of this limit is to act as a heuristic to prevent stack
+                /// overflow for consumers that do structural induction on an `Ast` using
+                /// explicit recursion. While this crate never does this (instead using
+                /// constant stack space and moving the call stack to the heap), other
+                /// crates may.
+                ///
+                /// This limit is not checked until the entire Ast is parsed. Therefore,
+                /// if callers want to put a limit on the amount of heap space used, then
+                /// they should impose a limit on the length, in bytes, of the concrete
+                /// pattern string. In particular, this is viable since this parser
+                /// implementation will limit itself to heap space proportional to the
+                /// length of the pattern string.
+                ///
+                /// Note that a nest limit of `0` will return a nest limit error for most
+                /// patterns but not all. For example, a nest limit of `0` permits `a` but
+                /// not `ab`, since `ab` requires a concatenation, which results in a nest
+                /// depth of `1`. In general, a nest limit is not something that manifests
+                /// in an obvious way in the concrete syntax, therefore, it should not be
+                /// used in a granular way.
+                pub fn nest_limit(&mut self, limit: u32) -> &mut RegexBuilder {
+                    self.0.nest_limit = limit;
+                    self
+                }
+            }
+        }
+    };
+}
+
+define_builder!(bytes, re_bytes, false);
+define_builder!(unicode, re_unicode, true);
+
+macro_rules! define_set_builder {
+    ($name:ident, $regex_mod:ident, $only_utf8:expr) => {
+        pub mod $name {
+            use super::RegexOptions;
+            use error::Error;
+            use exec::ExecBuilder;
+
+            use re_set::$regex_mod::RegexSet;
+
+            /// A configurable builder for a set of regular expressions.
+            ///
+            /// A builder can be used to configure how the regexes are built, for example,
+            /// by setting the default flags (which can be overridden in the expression
+            /// itself) or setting various limits.
+            pub struct RegexSetBuilder(RegexOptions);
+
+            impl RegexSetBuilder {
+                /// Create a new regular expression builder with the given pattern.
+                ///
+                /// If the pattern is invalid, then an error will be returned when
+                /// `build` is called.
+                pub fn new<I, S>(patterns: I) -> RegexSetBuilder
+                where
+                    S: AsRef<str>,
+                    I: IntoIterator<Item = S>,
+                {
+                    let mut builder = RegexSetBuilder(RegexOptions::default());
+                    for pat in patterns {
+                        builder.0.pats.push(pat.as_ref().to_owned());
+                    }
+                    builder
+                }
+
+                /// Consume the builder and compile the regular expressions into a set.
+                pub fn build(&self) -> Result<RegexSet, Error> {
+                    ExecBuilder::new_options(self.0.clone())
+                        .only_utf8($only_utf8)
+                        .build()
+                        .map(RegexSet::from)
+                }
+
+                /// Set the value for the case insensitive (`i`) flag.
+                pub fn case_insensitive(
+                    &mut self,
+                    yes: bool,
+                ) -> &mut RegexSetBuilder {
+                    self.0.case_insensitive = yes;
+                    self
+                }
+
+                /// Set the value for the multi-line matching (`m`) flag.
+                pub fn multi_line(
+                    &mut self,
+                    yes: bool,
+                ) -> &mut RegexSetBuilder {
+                    self.0.multi_line = yes;
+                    self
+                }
+
+                /// Set the value for the any character (`s`) flag, where in `.` matches
+                /// anything when `s` is set and matches anything except for new line when
+                /// it is not set (the default).
+                ///
+                /// N.B. "matches anything" means "any byte" for `regex::bytes::RegexSet`
+                /// expressions and means "any Unicode scalar value" for `regex::RegexSet`
+                /// expressions.
+                pub fn dot_matches_new_line(
+                    &mut self,
+                    yes: bool,
+                ) -> &mut RegexSetBuilder {
+                    self.0.dot_matches_new_line = yes;
+                    self
+                }
+
+                /// Set the value for the greedy swap (`U`) flag.
+                pub fn swap_greed(
+                    &mut self,
+                    yes: bool,
+                ) -> &mut RegexSetBuilder {
+                    self.0.swap_greed = yes;
+                    self
+                }
+
+                /// Set the value for the ignore whitespace (`x`) flag.
+                pub fn ignore_whitespace(
+                    &mut self,
+                    yes: bool,
+                ) -> &mut RegexSetBuilder {
+                    self.0.ignore_whitespace = yes;
+                    self
+                }
+
+                /// Set the value for the Unicode (`u`) flag.
+                pub fn unicode(&mut self, yes: bool) -> &mut RegexSetBuilder {
+                    self.0.unicode = yes;
+                    self
+                }
+
+                /// Whether to support octal syntax or not.
+                ///
+                /// Octal syntax is a little-known way of uttering Unicode codepoints in
+                /// a regular expression. For example, `a`, `\x61`, `\u0061` and
+                /// `\141` are all equivalent regular expressions, where the last example
+                /// shows octal syntax.
+                ///
+                /// While supporting octal syntax isn't in and of itself a problem, it does
+                /// make good error messages harder. That is, in PCRE based regex engines,
+                /// syntax like `\0` invokes a backreference, which is explicitly
+                /// unsupported in Rust's regex engine. However, many users expect it to
+                /// be supported. Therefore, when octal support is disabled, the error
+                /// message will explicitly mention that backreferences aren't supported.
+                ///
+                /// Octal syntax is disabled by default.
+                pub fn octal(&mut self, yes: bool) -> &mut RegexSetBuilder {
+                    self.0.octal = yes;
+                    self
+                }
+
+                /// Set the approximate size limit of the compiled regular expression.
+                ///
+                /// This roughly corresponds to the number of bytes occupied by a single
+                /// compiled program. If the program exceeds this number, then a
+                /// compilation error is returned.
+                pub fn size_limit(
+                    &mut self,
+                    limit: usize,
+                ) -> &mut RegexSetBuilder {
+                    self.0.size_limit = limit;
+                    self
+                }
+
+                /// Set the approximate size of the cache used by the DFA.
+                ///
+                /// This roughly corresponds to the number of bytes that the DFA will
+                /// use while searching.
+                ///
+                /// Note that this is a *per thread* limit. There is no way to set a global
+                /// limit. In particular, if a regex is used from multiple threads
+                /// simultaneously, then each thread may use up to the number of bytes
+                /// specified here.
+                pub fn dfa_size_limit(
+                    &mut self,
+                    limit: usize,
+                ) -> &mut RegexSetBuilder {
+                    self.0.dfa_size_limit = limit;
+                    self
+                }
+
+                /// Set the nesting limit for this parser.
+                ///
+                /// The nesting limit controls how deep the abstract syntax tree is allowed
+                /// to be. If the AST exceeds the given limit (e.g., with too many nested
+                /// groups), then an error is returned by the parser.
+                ///
+                /// The purpose of this limit is to act as a heuristic to prevent stack
+                /// overflow for consumers that do structural induction on an `Ast` using
+                /// explicit recursion. While this crate never does this (instead using
+                /// constant stack space and moving the call stack to the heap), other
+                /// crates may.
+                ///
+                /// This limit is not checked until the entire Ast is parsed. Therefore,
+                /// if callers want to put a limit on the amount of heap space used, then
+                /// they should impose a limit on the length, in bytes, of the concrete
+                /// pattern string. In particular, this is viable since this parser
+                /// implementation will limit itself to heap space proportional to the
+                /// length of the pattern string.
+                ///
+                /// Note that a nest limit of `0` will return a nest limit error for most
+                /// patterns but not all. For example, a nest limit of `0` permits `a` but
+                /// not `ab`, since `ab` requires a concatenation, which results in a nest
+                /// depth of `1`. In general, a nest limit is not something that manifests
+                /// in an obvious way in the concrete syntax, therefore, it should not be
+                /// used in a granular way.
+                pub fn nest_limit(
+                    &mut self,
+                    limit: u32,
+                ) -> &mut RegexSetBuilder {
+                    self.0.nest_limit = limit;
+                    self
+                }
+            }
+        }
+    };
+}
+
+define_set_builder!(set_bytes, bytes, false);
+define_set_builder!(set_unicode, unicode, true);

diff --git a/src/re_bytes.rs b/src/re_bytes.rs
new file mode 100644
index 0000000..69f0b33
--- /dev/null
+++ b/src/re_bytes.rs

@@ -0,0 +1,1179 @@
+use std::borrow::Cow;
+use std::collections::HashMap;
+use std::fmt;
+use std::ops::{Index, Range};
+use std::str::FromStr;
+use std::sync::Arc;
+
+use find_byte::find_byte;
+
+use error::Error;
+use exec::{Exec, ExecNoSync};
+use expand::expand_bytes;
+use re_builder::bytes::RegexBuilder;
+use re_trait::{self, RegularExpression, SubCapturesPosIter};
+
+/// Match represents a single match of a regex in a haystack.
+///
+/// The lifetime parameter `'t` refers to the lifetime of the matched text.
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+pub struct Match<'t> {
+    text: &'t [u8],
+    start: usize,
+    end: usize,
+}
+
+impl<'t> Match<'t> {
+    /// Returns the starting byte offset of the match in the haystack.
+    #[inline]
+    pub fn start(&self) -> usize {
+        self.start
+    }
+
+    /// Returns the ending byte offset of the match in the haystack.
+    #[inline]
+    pub fn end(&self) -> usize {
+        self.end
+    }
+
+    /// Returns the range over the starting and ending byte offsets of the
+    /// match in the haystack.
+    #[inline]
+    pub fn range(&self) -> Range<usize> {
+        self.start..self.end
+    }
+
+    /// Returns the matched text.
+    #[inline]
+    pub fn as_bytes(&self) -> &'t [u8] {
+        &self.text[self.range()]
+    }
+
+    /// Creates a new match from the given haystack and byte offsets.
+    #[inline]
+    fn new(haystack: &'t [u8], start: usize, end: usize) -> Match<'t> {
+        Match { text: haystack, start: start, end: end }
+    }
+}
+
+impl<'t> From<Match<'t>> for Range<usize> {
+    fn from(m: Match<'t>) -> Range<usize> {
+        m.range()
+    }
+}
+
+/// A compiled regular expression for matching arbitrary bytes.
+///
+/// It can be used to search, split or replace text. All searching is done with
+/// an implicit `.*?` at the beginning and end of an expression. To force an
+/// expression to match the whole string (or a prefix or a suffix), you must
+/// use an anchor like `^` or `$` (or `\A` and `\z`).
+///
+/// Like the `Regex` type in the parent module, matches with this regex return
+/// byte offsets into the search text. **Unlike** the parent `Regex` type,
+/// these byte offsets may not correspond to UTF-8 sequence boundaries since
+/// the regexes in this module can match arbitrary bytes.
+#[derive(Clone)]
+pub struct Regex(Exec);
+
+impl fmt::Display for Regex {
+    /// Shows the original regular expression.
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{}", self.as_str())
+    }
+}
+
+impl fmt::Debug for Regex {
+    /// Shows the original regular expression.
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        fmt::Display::fmt(self, f)
+    }
+}
+
+/// A constructor for Regex from an Exec.
+///
+/// This is hidden because Exec isn't actually part of the public API.
+#[doc(hidden)]
+impl From<Exec> for Regex {
+    fn from(exec: Exec) -> Regex {
+        Regex(exec)
+    }
+}
+
+impl FromStr for Regex {
+    type Err = Error;
+
+    /// Attempts to parse a string into a regular expression
+    fn from_str(s: &str) -> Result<Regex, Error> {
+        Regex::new(s)
+    }
+}
+
+/// Core regular expression methods.
+impl Regex {
+    /// Compiles a regular expression. Once compiled, it can be used repeatedly
+    /// to search, split or replace text in a string.
+    ///
+    /// If an invalid expression is given, then an error is returned.
+    pub fn new(re: &str) -> Result<Regex, Error> {
+        RegexBuilder::new(re).build()
+    }
+
+    /// Returns true if and only if the regex matches the string given.
+    ///
+    /// It is recommended to use this method if all you need to do is test
+    /// a match, since the underlying matching engine may be able to do less
+    /// work.
+    ///
+    /// # Example
+    ///
+    /// Test if some text contains at least one word with exactly 13 ASCII word
+    /// bytes:
+    ///
+    /// ```rust
+    /// # extern crate regex; use regex::bytes::Regex;
+    /// # fn main() {
+    /// let text = b"I categorically deny having triskaidekaphobia.";
+    /// assert!(Regex::new(r"\b\w{13}\b").unwrap().is_match(text));
+    /// # }
+    /// ```
+    pub fn is_match(&self, text: &[u8]) -> bool {
+        self.is_match_at(text, 0)
+    }
+
+    /// Returns the start and end byte range of the leftmost-first match in
+    /// `text`. If no match exists, then `None` is returned.
+    ///
+    /// Note that this should only be used if you want to discover the position
+    /// of the match. Testing the existence of a match is faster if you use
+    /// `is_match`.
+    ///
+    /// # Example
+    ///
+    /// Find the start and end location of the first word with exactly 13
+    /// ASCII word bytes:
+    ///
+    /// ```rust
+    /// # extern crate regex; use regex::bytes::Regex;
+    /// # fn main() {
+    /// let text = b"I categorically deny having triskaidekaphobia.";
+    /// let mat = Regex::new(r"\b\w{13}\b").unwrap().find(text).unwrap();
+    /// assert_eq!((mat.start(), mat.end()), (2, 15));
+    /// # }
+    /// ```
+    pub fn find<'t>(&self, text: &'t [u8]) -> Option<Match<'t>> {
+        self.find_at(text, 0)
+    }
+
+    /// Returns an iterator for each successive non-overlapping match in
+    /// `text`, returning the start and end byte indices with respect to
+    /// `text`.
+    ///
+    /// # Example
+    ///
+    /// Find the start and end location of every word with exactly 13 ASCII
+    /// word bytes:
+    ///
+    /// ```rust
+    /// # extern crate regex; use regex::bytes::Regex;
+    /// # fn main() {
+    /// let text = b"Retroactively relinquishing remunerations is reprehensible.";
+    /// for mat in Regex::new(r"\b\w{13}\b").unwrap().find_iter(text) {
+    ///     println!("{:?}", mat);
+    /// }
+    /// # }
+    /// ```
+    pub fn find_iter<'r, 't>(&'r self, text: &'t [u8]) -> Matches<'r, 't> {
+        Matches(self.0.searcher().find_iter(text))
+    }
+
+    /// Returns the capture groups corresponding to the leftmost-first
+    /// match in `text`. Capture group `0` always corresponds to the entire
+    /// match. If no match is found, then `None` is returned.
+    ///
+    /// You should only use `captures` if you need access to the location of
+    /// capturing group matches. Otherwise, `find` is faster for discovering
+    /// the location of the overall match.
+    ///
+    /// # Examples
+    ///
+    /// Say you have some text with movie names and their release years,
+    /// like "'Citizen Kane' (1941)". It'd be nice if we could search for text
+    /// looking like that, while also extracting the movie name and its release
+    /// year separately.
+    ///
+    /// ```rust
+    /// # extern crate regex; use regex::bytes::Regex;
+    /// # fn main() {
+    /// let re = Regex::new(r"'([^']+)'\s+\((\d{4})\)").unwrap();
+    /// let text = b"Not my favorite movie: 'Citizen Kane' (1941).";
+    /// let caps = re.captures(text).unwrap();
+    /// assert_eq!(caps.get(1).unwrap().as_bytes(), &b"Citizen Kane"[..]);
+    /// assert_eq!(caps.get(2).unwrap().as_bytes(), &b"1941"[..]);
+    /// assert_eq!(caps.get(0).unwrap().as_bytes(), &b"'Citizen Kane' (1941)"[..]);
+    /// // You can also access the groups by index using the Index notation.
+    /// // Note that this will panic on an invalid index.
+    /// assert_eq!(&caps[1], b"Citizen Kane");
+    /// assert_eq!(&caps[2], b"1941");
+    /// assert_eq!(&caps[0], b"'Citizen Kane' (1941)");
+    /// # }
+    /// ```
+    ///
+    /// Note that the full match is at capture group `0`. Each subsequent
+    /// capture group is indexed by the order of its opening `(`.
+    ///
+    /// We can make this example a bit clearer by using *named* capture groups:
+    ///
+    /// ```rust
+    /// # extern crate regex; use regex::bytes::Regex;
+    /// # fn main() {
+    /// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)")
+    ///                .unwrap();
+    /// let text = b"Not my favorite movie: 'Citizen Kane' (1941).";
+    /// let caps = re.captures(text).unwrap();
+    /// assert_eq!(caps.name("title").unwrap().as_bytes(), b"Citizen Kane");
+    /// assert_eq!(caps.name("year").unwrap().as_bytes(), b"1941");
+    /// assert_eq!(caps.get(0).unwrap().as_bytes(), &b"'Citizen Kane' (1941)"[..]);
+    /// // You can also access the groups by name using the Index notation.
+    /// // Note that this will panic on an invalid group name.
+    /// assert_eq!(&caps["title"], b"Citizen Kane");
+    /// assert_eq!(&caps["year"], b"1941");
+    /// assert_eq!(&caps[0], b"'Citizen Kane' (1941)");
+    ///
+    /// # }
+    /// ```
+    ///
+    /// Here we name the capture groups, which we can access with the `name`
+    /// method or the `Index` notation with a `&str`. Note that the named
+    /// capture groups are still accessible with `get` or the `Index` notation
+    /// with a `usize`.
+    ///
+    /// The `0`th capture group is always unnamed, so it must always be
+    /// accessed with `get(0)` or `[0]`.
+    pub fn captures<'t>(&self, text: &'t [u8]) -> Option<Captures<'t>> {
+        let mut locs = self.capture_locations();
+        self.captures_read_at(&mut locs, text, 0).map(move |_| Captures {
+            text: text,
+            locs: locs.0,
+            named_groups: self.0.capture_name_idx().clone(),
+        })
+    }
+
+    /// Returns an iterator over all the non-overlapping capture groups matched
+    /// in `text`. This is operationally the same as `find_iter`, except it
+    /// yields information about capturing group matches.
+    ///
+    /// # Example
+    ///
+    /// We can use this to find all movie titles and their release years in
+    /// some text, where the movie is formatted like "'Title' (xxxx)":
+    ///
+    /// ```rust
+    /// # extern crate regex; use std::str; use regex::bytes::Regex;
+    /// # fn main() {
+    /// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)")
+    ///                .unwrap();
+    /// let text = b"'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931).";
+    /// for caps in re.captures_iter(text) {
+    ///     let title = str::from_utf8(&caps["title"]).unwrap();
+    ///     let year = str::from_utf8(&caps["year"]).unwrap();
+    ///     println!("Movie: {:?}, Released: {:?}", title, year);
+    /// }
+    /// // Output:
+    /// // Movie: Citizen Kane, Released: 1941
+    /// // Movie: The Wizard of Oz, Released: 1939
+    /// // Movie: M, Released: 1931
+    /// # }
+    /// ```
+    pub fn captures_iter<'r, 't>(
+        &'r self,
+        text: &'t [u8],
+    ) -> CaptureMatches<'r, 't> {
+        CaptureMatches(self.0.searcher().captures_iter(text))
+    }
+
+    /// Returns an iterator of substrings of `text` delimited by a match of the
+    /// regular expression. Namely, each element of the iterator corresponds to
+    /// text that *isn't* matched by the regular expression.
+    ///
+    /// This method will *not* copy the text given.
+    ///
+    /// # Example
+    ///
+    /// To split a string delimited by arbitrary amounts of spaces or tabs:
+    ///
+    /// ```rust
+    /// # extern crate regex; use regex::bytes::Regex;
+    /// # fn main() {
+    /// let re = Regex::new(r"[ \t]+").unwrap();
+    /// let fields: Vec<&[u8]> = re.split(b"a b \t  c\td    e").collect();
+    /// assert_eq!(fields, vec![
+    ///     &b"a"[..], &b"b"[..], &b"c"[..], &b"d"[..], &b"e"[..],
+    /// ]);
+    /// # }
+    /// ```
+    pub fn split<'r, 't>(&'r self, text: &'t [u8]) -> Split<'r, 't> {
+        Split { finder: self.find_iter(text), last: 0 }
+    }
+
+    /// Returns an iterator of at most `limit` substrings of `text` delimited
+    /// by a match of the regular expression. (A `limit` of `0` will return no
+    /// substrings.) Namely, each element of the iterator corresponds to text
+    /// that *isn't* matched by the regular expression. The remainder of the
+    /// string that is not split will be the last element in the iterator.
+    ///
+    /// This method will *not* copy the text given.
+    ///
+    /// # Example
+    ///
+    /// Get the first two words in some text:
+    ///
+    /// ```rust
+    /// # extern crate regex; use regex::bytes::Regex;
+    /// # fn main() {
+    /// let re = Regex::new(r"\W+").unwrap();
+    /// let fields: Vec<&[u8]> = re.splitn(b"Hey! How are you?", 3).collect();
+    /// assert_eq!(fields, vec![&b"Hey"[..], &b"How"[..], &b"are you?"[..]]);
+    /// # }
+    /// ```
+    pub fn splitn<'r, 't>(
+        &'r self,
+        text: &'t [u8],
+        limit: usize,
+    ) -> SplitN<'r, 't> {
+        SplitN { splits: self.split(text), n: limit }
+    }
+
+    /// Replaces the leftmost-first match with the replacement provided. The
+    /// replacement can be a regular byte string (where `$N` and `$name` are
+    /// expanded to match capture groups) or a function that takes the matches'
+    /// `Captures` and returns the replaced byte string.
+    ///
+    /// If no match is found, then a copy of the byte string is returned
+    /// unchanged.
+    ///
+    /// # Replacement string syntax
+    ///
+    /// All instances of `$name` in the replacement text is replaced with the
+    /// corresponding capture group `name`.
+    ///
+    /// `name` may be an integer corresponding to the index of the
+    /// capture group (counted by order of opening parenthesis where `0` is the
+    /// entire match) or it can be a name (consisting of letters, digits or
+    /// underscores) corresponding to a named capture group.
+    ///
+    /// If `name` isn't a valid capture group (whether the name doesn't exist
+    /// or isn't a valid index), then it is replaced with the empty string.
+    ///
+    /// The longest possible name is used. e.g., `$1a` looks up the capture
+    /// group named `1a` and not the capture group at index `1`. To exert more
+    /// precise control over the name, use braces, e.g., `${1}a`.
+    ///
+    /// To write a literal `$` use `$$`.
+    ///
+    /// # Examples
+    ///
+    /// Note that this function is polymorphic with respect to the replacement.
+    /// In typical usage, this can just be a normal byte string:
+    ///
+    /// ```rust
+    /// # extern crate regex; use regex::bytes::Regex;
+    /// # fn main() {
+    /// let re = Regex::new("[^01]+").unwrap();
+    /// assert_eq!(re.replace(b"1078910", &b""[..]), &b"1010"[..]);
+    /// # }
+    /// ```
+    ///
+    /// But anything satisfying the `Replacer` trait will work. For example, a
+    /// closure of type `|&Captures| -> Vec<u8>` provides direct access to the
+    /// captures corresponding to a match. This allows one to access capturing
+    /// group matches easily:
+    ///
+    /// ```rust
+    /// # extern crate regex; use regex::bytes::Regex;
+    /// # use regex::bytes::Captures; fn main() {
+    /// let re = Regex::new(r"([^,\s]+),\s+(\S+)").unwrap();
+    /// let result = re.replace(b"Springsteen, Bruce", |caps: &Captures| {
+    ///     let mut replacement = caps[2].to_owned();
+    ///     replacement.push(b' ');
+    ///     replacement.extend(&caps[1]);
+    ///     replacement
+    /// });
+    /// assert_eq!(result, &b"Bruce Springsteen"[..]);
+    /// # }
+    /// ```
+    ///
+    /// But this is a bit cumbersome to use all the time. Instead, a simple
+    /// syntax is supported that expands `$name` into the corresponding capture
+    /// group. Here's the last example, but using this expansion technique
+    /// with named capture groups:
+    ///
+    /// ```rust
+    /// # extern crate regex; use regex::bytes::Regex;
+    /// # fn main() {
+    /// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(?P<first>\S+)").unwrap();
+    /// let result = re.replace(b"Springsteen, Bruce", &b"$first $last"[..]);
+    /// assert_eq!(result, &b"Bruce Springsteen"[..]);
+    /// # }
+    /// ```
+    ///
+    /// Note that using `$2` instead of `$first` or `$1` instead of `$last`
+    /// would produce the same result. To write a literal `$` use `$$`.
+    ///
+    /// Sometimes the replacement string requires use of curly braces to
+    /// delineate a capture group replacement and surrounding literal text.
+    /// For example, if we wanted to join two words together with an
+    /// underscore:
+    ///
+    /// ```rust
+    /// # extern crate regex; use regex::bytes::Regex;
+    /// # fn main() {
+    /// let re = Regex::new(r"(?P<first>\w+)\s+(?P<second>\w+)").unwrap();
+    /// let result = re.replace(b"deep fried", &b"${first}_$second"[..]);
+    /// assert_eq!(result, &b"deep_fried"[..]);
+    /// # }
+    /// ```
+    ///
+    /// Without the curly braces, the capture group name `first_` would be
+    /// used, and since it doesn't exist, it would be replaced with the empty
+    /// string.
+    ///
+    /// Finally, sometimes you just want to replace a literal string with no
+    /// regard for capturing group expansion. This can be done by wrapping a
+    /// byte string with `NoExpand`:
+    ///
+    /// ```rust
+    /// # extern crate regex; use regex::bytes::Regex;
+    /// # fn main() {
+    /// use regex::bytes::NoExpand;
+    ///
+    /// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(\S+)").unwrap();
+    /// let result = re.replace(b"Springsteen, Bruce", NoExpand(b"$2 $last"));
+    /// assert_eq!(result, &b"$2 $last"[..]);
+    /// # }
+    /// ```
+    pub fn replace<'t, R: Replacer>(
+        &self,
+        text: &'t [u8],
+        rep: R,
+    ) -> Cow<'t, [u8]> {
+        self.replacen(text, 1, rep)
+    }
+
+    /// Replaces all non-overlapping matches in `text` with the replacement
+    /// provided. This is the same as calling `replacen` with `limit` set to
+    /// `0`.
+    ///
+    /// See the documentation for `replace` for details on how to access
+    /// capturing group matches in the replacement text.
+    pub fn replace_all<'t, R: Replacer>(
+        &self,
+        text: &'t [u8],
+        rep: R,
+    ) -> Cow<'t, [u8]> {
+        self.replacen(text, 0, rep)
+    }
+
+    /// Replaces at most `limit` non-overlapping matches in `text` with the
+    /// replacement provided. If `limit` is 0, then all non-overlapping matches
+    /// are replaced.
+    ///
+    /// See the documentation for `replace` for details on how to access
+    /// capturing group matches in the replacement text.
+    pub fn replacen<'t, R: Replacer>(
+        &self,
+        text: &'t [u8],
+        limit: usize,
+        mut rep: R,
+    ) -> Cow<'t, [u8]> {
+        if let Some(rep) = rep.no_expansion() {
+            let mut it = self.find_iter(text).enumerate().peekable();
+            if it.peek().is_none() {
+                return Cow::Borrowed(text);
+            }
+            let mut new = Vec::with_capacity(text.len());
+            let mut last_match = 0;
+            for (i, m) in it {
+                if limit > 0 && i >= limit {
+                    break;
+                }
+                new.extend_from_slice(&text[last_match..m.start()]);
+                new.extend_from_slice(&rep);
+                last_match = m.end();
+            }
+            new.extend_from_slice(&text[last_match..]);
+            return Cow::Owned(new);
+        }
+
+        // The slower path, which we use if the replacement needs access to
+        // capture groups.
+        let mut it = self.captures_iter(text).enumerate().peekable();
+        if it.peek().is_none() {
+            return Cow::Borrowed(text);
+        }
+        let mut new = Vec::with_capacity(text.len());
+        let mut last_match = 0;
+        for (i, cap) in it {
+            if limit > 0 && i >= limit {
+                break;
+            }
+            // unwrap on 0 is OK because captures only reports matches
+            let m = cap.get(0).unwrap();
+            new.extend_from_slice(&text[last_match..m.start()]);
+            rep.replace_append(&cap, &mut new);
+            last_match = m.end();
+        }
+        new.extend_from_slice(&text[last_match..]);
+        Cow::Owned(new)
+    }
+}
+
+/// Advanced or "lower level" search methods.
+impl Regex {
+    /// Returns the end location of a match in the text given.
+    ///
+    /// This method may have the same performance characteristics as
+    /// `is_match`, except it provides an end location for a match. In
+    /// particular, the location returned *may be shorter* than the proper end
+    /// of the leftmost-first match.
+    ///
+    /// # Example
+    ///
+    /// Typically, `a+` would match the entire first sequence of `a` in some
+    /// text, but `shortest_match` can give up as soon as it sees the first
+    /// `a`.
+    ///
+    /// ```rust
+    /// # extern crate regex; use regex::bytes::Regex;
+    /// # fn main() {
+    /// let text = b"aaaaa";
+    /// let pos = Regex::new(r"a+").unwrap().shortest_match(text);
+    /// assert_eq!(pos, Some(1));
+    /// # }
+    /// ```
+    pub fn shortest_match(&self, text: &[u8]) -> Option<usize> {
+        self.shortest_match_at(text, 0)
+    }
+
+    /// Returns the same as shortest_match, but starts the search at the given
+    /// offset.
+    ///
+    /// The significance of the starting point is that it takes the surrounding
+    /// context into consideration. For example, the `\A` anchor can only
+    /// match when `start == 0`.
+    pub fn shortest_match_at(
+        &self,
+        text: &[u8],
+        start: usize,
+    ) -> Option<usize> {
+        self.0.searcher().shortest_match_at(text, start)
+    }
+
+    /// Returns the same as is_match, but starts the search at the given
+    /// offset.
+    ///
+    /// The significance of the starting point is that it takes the surrounding
+    /// context into consideration. For example, the `\A` anchor can only
+    /// match when `start == 0`.
+    pub fn is_match_at(&self, text: &[u8], start: usize) -> bool {
+        self.shortest_match_at(text, start).is_some()
+    }
+
+    /// Returns the same as find, but starts the search at the given
+    /// offset.
+    ///
+    /// The significance of the starting point is that it takes the surrounding
+    /// context into consideration. For example, the `\A` anchor can only
+    /// match when `start == 0`.
+    pub fn find_at<'t>(
+        &self,
+        text: &'t [u8],
+        start: usize,
+    ) -> Option<Match<'t>> {
+        self.0
+            .searcher()
+            .find_at(text, start)
+            .map(|(s, e)| Match::new(text, s, e))
+    }
+
+    /// This is like `captures`, but uses
+    /// [`CaptureLocations`](struct.CaptureLocations.html)
+    /// instead of
+    /// [`Captures`](struct.Captures.html) in order to amortize allocations.
+    ///
+    /// To create a `CaptureLocations` value, use the
+    /// `Regex::capture_locations` method.
+    ///
+    /// This returns the overall match if this was successful, which is always
+    /// equivalence to the `0`th capture group.
+    pub fn captures_read<'t>(
+        &self,
+        locs: &mut CaptureLocations,
+        text: &'t [u8],
+    ) -> Option<Match<'t>> {
+        self.captures_read_at(locs, text, 0)
+    }
+
+    /// Returns the same as `captures_read`, but starts the search at the given
+    /// offset and populates the capture locations given.
+    ///
+    /// The significance of the starting point is that it takes the surrounding
+    /// context into consideration. For example, the `\A` anchor can only
+    /// match when `start == 0`.
+    pub fn captures_read_at<'t>(
+        &self,
+        locs: &mut CaptureLocations,
+        text: &'t [u8],
+        start: usize,
+    ) -> Option<Match<'t>> {
+        self.0
+            .searcher()
+            .captures_read_at(&mut locs.0, text, start)
+            .map(|(s, e)| Match::new(text, s, e))
+    }
+
+    /// An undocumented alias for `captures_read_at`.
+    ///
+    /// The `regex-capi` crate previously used this routine, so to avoid
+    /// breaking that crate, we continue to provide the name as an undocumented
+    /// alias.
+    #[doc(hidden)]
+    pub fn read_captures_at<'t>(
+        &self,
+        locs: &mut CaptureLocations,
+        text: &'t [u8],
+        start: usize,
+    ) -> Option<Match<'t>> {
+        self.captures_read_at(locs, text, start)
+    }
+}
+
+/// Auxiliary methods.
+impl Regex {
+    /// Returns the original string of this regex.
+    pub fn as_str(&self) -> &str {
+        &self.0.regex_strings()[0]
+    }
+
+    /// Returns an iterator over the capture names.
+    pub fn capture_names(&self) -> CaptureNames {
+        CaptureNames(self.0.capture_names().iter())
+    }
+
+    /// Returns the number of captures.
+    pub fn captures_len(&self) -> usize {
+        self.0.capture_names().len()
+    }
+
+    /// Returns an empty set of capture locations that can be reused in
+    /// multiple calls to `captures_read` or `captures_read_at`.
+    pub fn capture_locations(&self) -> CaptureLocations {
+        CaptureLocations(self.0.searcher().locations())
+    }
+
+    /// An alias for `capture_locations` to preserve backward compatibility.
+    ///
+    /// The `regex-capi` crate uses this method, so to avoid breaking that
+    /// crate, we continue to export it as an undocumented API.
+    #[doc(hidden)]
+    pub fn locations(&self) -> CaptureLocations {
+        CaptureLocations(self.0.searcher().locations())
+    }
+}
+
+/// An iterator over all non-overlapping matches for a particular string.
+///
+/// The iterator yields a tuple of integers corresponding to the start and end
+/// of the match. The indices are byte offsets. The iterator stops when no more
+/// matches can be found.
+///
+/// `'r` is the lifetime of the compiled regular expression and `'t` is the
+/// lifetime of the matched byte string.
+pub struct Matches<'r, 't>(re_trait::Matches<'t, ExecNoSync<'r>>);
+
+impl<'r, 't> Iterator for Matches<'r, 't> {
+    type Item = Match<'t>;
+
+    fn next(&mut self) -> Option<Match<'t>> {
+        let text = self.0.text();
+        self.0.next().map(|(s, e)| Match::new(text, s, e))
+    }
+}
+
+/// An iterator that yields all non-overlapping capture groups matching a
+/// particular regular expression.
+///
+/// The iterator stops when no more matches can be found.
+///
+/// `'r` is the lifetime of the compiled regular expression and `'t` is the
+/// lifetime of the matched byte string.
+pub struct CaptureMatches<'r, 't>(
+    re_trait::CaptureMatches<'t, ExecNoSync<'r>>,
+);
+
+impl<'r, 't> Iterator for CaptureMatches<'r, 't> {
+    type Item = Captures<'t>;
+
+    fn next(&mut self) -> Option<Captures<'t>> {
+        self.0.next().map(|locs| Captures {
+            text: self.0.text(),
+            locs: locs,
+            named_groups: self.0.regex().capture_name_idx().clone(),
+        })
+    }
+}
+
+/// Yields all substrings delimited by a regular expression match.
+///
+/// `'r` is the lifetime of the compiled regular expression and `'t` is the
+/// lifetime of the byte string being split.
+pub struct Split<'r, 't> {
+    finder: Matches<'r, 't>,
+    last: usize,
+}
+
+impl<'r, 't> Iterator for Split<'r, 't> {
+    type Item = &'t [u8];
+
+    fn next(&mut self) -> Option<&'t [u8]> {
+        let text = self.finder.0.text();
+        match self.finder.next() {
+            None => {
+                if self.last > text.len() {
+                    None
+                } else {
+                    let s = &text[self.last..];
+                    self.last = text.len() + 1; // Next call will return None
+                    Some(s)
+                }
+            }
+            Some(m) => {
+                let matched = &text[self.last..m.start()];
+                self.last = m.end();
+                Some(matched)
+            }
+        }
+    }
+}
+
+/// Yields at most `N` substrings delimited by a regular expression match.
+///
+/// The last substring will be whatever remains after splitting.
+///
+/// `'r` is the lifetime of the compiled regular expression and `'t` is the
+/// lifetime of the byte string being split.
+pub struct SplitN<'r, 't> {
+    splits: Split<'r, 't>,
+    n: usize,
+}
+
+impl<'r, 't> Iterator for SplitN<'r, 't> {
+    type Item = &'t [u8];
+
+    fn next(&mut self) -> Option<&'t [u8]> {
+        if self.n == 0 {
+            return None;
+        }
+
+        self.n -= 1;
+        if self.n > 0 {
+            return self.splits.next();
+        }
+
+        let text = self.splits.finder.0.text();
+        if self.splits.last > text.len() {
+            // We've already returned all substrings.
+            None
+        } else {
+            // self.n == 0, so future calls will return None immediately
+            Some(&text[self.splits.last..])
+        }
+    }
+}
+
+/// An iterator over the names of all possible captures.
+///
+/// `None` indicates an unnamed capture; the first element (capture 0, the
+/// whole matched region) is always unnamed.
+///
+/// `'r` is the lifetime of the compiled regular expression.
+pub struct CaptureNames<'r>(::std::slice::Iter<'r, Option<String>>);
+
+impl<'r> Iterator for CaptureNames<'r> {
+    type Item = Option<&'r str>;
+
+    fn next(&mut self) -> Option<Option<&'r str>> {
+        self.0
+            .next()
+            .as_ref()
+            .map(|slot| slot.as_ref().map(|name| name.as_ref()))
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.0.size_hint()
+    }
+}
+
+/// CaptureLocations is a low level representation of the raw offsets of each
+/// submatch.
+///
+/// You can think of this as a lower level
+/// [`Captures`](struct.Captures.html), where this type does not support
+/// named capturing groups directly and it does not borrow the text that these
+/// offsets were matched on.
+///
+/// Primarily, this type is useful when using the lower level `Regex` APIs
+/// such as `read_captures`, which permits amortizing the allocation in which
+/// capture match locations are stored.
+///
+/// In order to build a value of this type, you'll need to call the
+/// `capture_locations` method on the `Regex` being used to execute the search.
+/// The value returned can then be reused in subsequent searches.
+#[derive(Clone, Debug)]
+pub struct CaptureLocations(re_trait::Locations);
+
+/// A type alias for `CaptureLocations` for backwards compatibility.
+///
+/// Previously, we exported `CaptureLocations` as `Locations` in an
+/// undocumented API. To prevent breaking that code (e.g., in `regex-capi`),
+/// we continue re-exporting the same undocumented API.
+#[doc(hidden)]
+pub type Locations = CaptureLocations;
+
+impl CaptureLocations {
+    /// Returns the start and end positions of the Nth capture group. Returns
+    /// `None` if `i` is not a valid capture group or if the capture group did
+    /// not match anything. The positions returned are *always* byte indices
+    /// with respect to the original string matched.
+    #[inline]
+    pub fn get(&self, i: usize) -> Option<(usize, usize)> {
+        self.0.pos(i)
+    }
+
+    /// Returns the total number of capturing groups.
+    ///
+    /// This is always at least `1` since every regex has at least `1`
+    /// capturing group that corresponds to the entire match.
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.0.len()
+    }
+
+    /// An alias for the `get` method for backwards compatibility.
+    ///
+    /// Previously, we exported `get` as `pos` in an undocumented API. To
+    /// prevent breaking that code (e.g., in `regex-capi`), we continue
+    /// re-exporting the same undocumented API.
+    #[doc(hidden)]
+    #[inline]
+    pub fn pos(&self, i: usize) -> Option<(usize, usize)> {
+        self.get(i)
+    }
+}
+
+/// Captures represents a group of captured byte strings for a single match.
+///
+/// The 0th capture always corresponds to the entire match. Each subsequent
+/// index corresponds to the next capture group in the regex. If a capture
+/// group is named, then the matched byte string is *also* available via the
+/// `name` method. (Note that the 0th capture is always unnamed and so must be
+/// accessed with the `get` method.)
+///
+/// Positions returned from a capture group are always byte indices.
+///
+/// `'t` is the lifetime of the matched text.
+pub struct Captures<'t> {
+    text: &'t [u8],
+    locs: re_trait::Locations,
+    named_groups: Arc<HashMap<String, usize>>,
+}
+
+impl<'t> Captures<'t> {
+    /// Returns the match associated with the capture group at index `i`. If
+    /// `i` does not correspond to a capture group, or if the capture group
+    /// did not participate in the match, then `None` is returned.
+    ///
+    /// # Examples
+    ///
+    /// Get the text of the match with a default of an empty string if this
+    /// group didn't participate in the match:
+    ///
+    /// ```rust
+    /// # use regex::bytes::Regex;
+    /// let re = Regex::new(r"[a-z]+(?:([0-9]+)|([A-Z]+))").unwrap();
+    /// let caps = re.captures(b"abc123").unwrap();
+    ///
+    /// let text1 = caps.get(1).map_or(&b""[..], |m| m.as_bytes());
+    /// let text2 = caps.get(2).map_or(&b""[..], |m| m.as_bytes());
+    /// assert_eq!(text1, &b"123"[..]);
+    /// assert_eq!(text2, &b""[..]);
+    /// ```
+    pub fn get(&self, i: usize) -> Option<Match<'t>> {
+        self.locs.pos(i).map(|(s, e)| Match::new(self.text, s, e))
+    }
+
+    /// Returns the match for the capture group named `name`. If `name` isn't a
+    /// valid capture group or didn't match anything, then `None` is returned.
+    pub fn name(&self, name: &str) -> Option<Match<'t>> {
+        self.named_groups.get(name).and_then(|&i| self.get(i))
+    }
+
+    /// An iterator that yields all capturing matches in the order in which
+    /// they appear in the regex. If a particular capture group didn't
+    /// participate in the match, then `None` is yielded for that capture.
+    ///
+    /// The first match always corresponds to the overall match of the regex.
+    pub fn iter<'c>(&'c self) -> SubCaptureMatches<'c, 't> {
+        SubCaptureMatches { caps: self, it: self.locs.iter() }
+    }
+
+    /// Expands all instances of `$name` in `replacement` to the corresponding
+    /// capture group `name`, and writes them to the `dst` buffer given.
+    ///
+    /// `name` may be an integer corresponding to the index of the
+    /// capture group (counted by order of opening parenthesis where `0` is the
+    /// entire match) or it can be a name (consisting of letters, digits or
+    /// underscores) corresponding to a named capture group.
+    ///
+    /// If `name` isn't a valid capture group (whether the name doesn't exist
+    /// or isn't a valid index), then it is replaced with the empty string.
+    ///
+    /// The longest possible name is used. e.g., `$1a` looks up the capture
+    /// group named `1a` and not the capture group at index `1`. To exert more
+    /// precise control over the name, use braces, e.g., `${1}a`.
+    ///
+    /// To write a literal `$` use `$$`.
+    pub fn expand(&self, replacement: &[u8], dst: &mut Vec<u8>) {
+        expand_bytes(self, replacement, dst)
+    }
+
+    /// Returns the number of captured groups.
+    ///
+    /// This is always at least `1`, since every regex has at least one capture
+    /// group that corresponds to the full match.
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.locs.len()
+    }
+}
+
+impl<'t> fmt::Debug for Captures<'t> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.debug_tuple("Captures").field(&CapturesDebug(self)).finish()
+    }
+}
+
+struct CapturesDebug<'c, 't: 'c>(&'c Captures<'t>);
+
+impl<'c, 't> fmt::Debug for CapturesDebug<'c, 't> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        fn escape_bytes(bytes: &[u8]) -> String {
+            let mut s = String::new();
+            for &b in bytes {
+                s.push_str(&escape_byte(b));
+            }
+            s
+        }
+
+        fn escape_byte(byte: u8) -> String {
+            use std::ascii::escape_default;
+
+            let escaped: Vec<u8> = escape_default(byte).collect();
+            String::from_utf8_lossy(&escaped).into_owned()
+        }
+
+        // We'd like to show something nice here, even if it means an
+        // allocation to build a reverse index.
+        let slot_to_name: HashMap<&usize, &String> =
+            self.0.named_groups.iter().map(|(a, b)| (b, a)).collect();
+        let mut map = f.debug_map();
+        for (slot, m) in self.0.locs.iter().enumerate() {
+            let m = m.map(|(s, e)| escape_bytes(&self.0.text[s..e]));
+            if let Some(name) = slot_to_name.get(&slot) {
+                map.entry(&name, &m);
+            } else {
+                map.entry(&slot, &m);
+            }
+        }
+        map.finish()
+    }
+}
+
+/// Get a group by index.
+///
+/// `'t` is the lifetime of the matched text.
+///
+/// The text can't outlive the `Captures` object if this method is
+/// used, because of how `Index` is defined (normally `a[i]` is part
+/// of `a` and can't outlive it); to do that, use `get()` instead.
+///
+/// # Panics
+///
+/// If there is no group at the given index.
+impl<'t> Index<usize> for Captures<'t> {
+    type Output = [u8];
+
+    fn index(&self, i: usize) -> &[u8] {
+        self.get(i)
+            .map(|m| m.as_bytes())
+            .unwrap_or_else(|| panic!("no group at index '{}'", i))
+    }
+}
+
+/// Get a group by name.
+///
+/// `'t` is the lifetime of the matched text and `'i` is the lifetime
+/// of the group name (the index).
+///
+/// The text can't outlive the `Captures` object if this method is
+/// used, because of how `Index` is defined (normally `a[i]` is part
+/// of `a` and can't outlive it); to do that, use `name` instead.
+///
+/// # Panics
+///
+/// If there is no group named by the given value.
+impl<'t, 'i> Index<&'i str> for Captures<'t> {
+    type Output = [u8];
+
+    fn index<'a>(&'a self, name: &'i str) -> &'a [u8] {
+        self.name(name)
+            .map(|m| m.as_bytes())
+            .unwrap_or_else(|| panic!("no group named '{}'", name))
+    }
+}
+
+/// An iterator that yields all capturing matches in the order in which they
+/// appear in the regex.
+///
+/// If a particular capture group didn't participate in the match, then `None`
+/// is yielded for that capture. The first match always corresponds to the
+/// overall match of the regex.
+///
+/// The lifetime `'c` corresponds to the lifetime of the `Captures` value, and
+/// the lifetime `'t` corresponds to the originally matched text.
+pub struct SubCaptureMatches<'c, 't: 'c> {
+    caps: &'c Captures<'t>,
+    it: SubCapturesPosIter<'c>,
+}
+
+impl<'c, 't> Iterator for SubCaptureMatches<'c, 't> {
+    type Item = Option<Match<'t>>;
+
+    fn next(&mut self) -> Option<Option<Match<'t>>> {
+        self.it
+            .next()
+            .map(|cap| cap.map(|(s, e)| Match::new(self.caps.text, s, e)))
+    }
+}
+
+/// Replacer describes types that can be used to replace matches in a byte
+/// string.
+///
+/// In general, users of this crate shouldn't need to implement this trait,
+/// since implementations are already provided for `&[u8]` and
+/// `FnMut(&Captures) -> Vec<u8>` (or any `FnMut(&Captures) -> T`
+/// where `T: AsRef<[u8]>`), which covers most use cases.
+pub trait Replacer {
+    /// Appends text to `dst` to replace the current match.
+    ///
+    /// The current match is represented by `caps`, which is guaranteed to
+    /// have a match at capture group `0`.
+    ///
+    /// For example, a no-op replacement would be
+    /// `dst.extend(&caps[0])`.
+    fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>);
+
+    /// Return a fixed unchanging replacement byte string.
+    ///
+    /// When doing replacements, if access to `Captures` is not needed (e.g.,
+    /// the replacement byte string does not need `$` expansion), then it can
+    /// be beneficial to avoid finding sub-captures.
+    ///
+    /// In general, this is called once for every call to `replacen`.
+    fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, [u8]>> {
+        None
+    }
+
+    /// Return a `Replacer` that borrows and wraps this `Replacer`.
+    ///
+    /// This is useful when you want to take a generic `Replacer` (which might
+    /// not be cloneable) and use it without consuming it, so it can be used
+    /// more than once.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex::bytes::{Regex, Replacer};
+    ///
+    /// fn replace_all_twice<R: Replacer>(
+    ///     re: Regex,
+    ///     src: &[u8],
+    ///     mut rep: R,
+    /// ) -> Vec<u8> {
+    ///     let dst = re.replace_all(src, rep.by_ref());
+    ///     let dst = re.replace_all(&dst, rep.by_ref());
+    ///     dst.into_owned()
+    /// }
+    /// ```
+    fn by_ref<'r>(&'r mut self) -> ReplacerRef<'r, Self> {
+        ReplacerRef(self)
+    }
+}
+
+/// By-reference adaptor for a `Replacer`
+///
+/// Returned by [`Replacer::by_ref`](trait.Replacer.html#method.by_ref).
+#[derive(Debug)]
+pub struct ReplacerRef<'a, R: ?Sized + 'a>(&'a mut R);
+
+impl<'a, R: Replacer + ?Sized + 'a> Replacer for ReplacerRef<'a, R> {
+    fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>) {
+        self.0.replace_append(caps, dst)
+    }
+    fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, [u8]>> {
+        self.0.no_expansion()
+    }
+}
+
+impl<'a> Replacer for &'a [u8] {
+    fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>) {
+        caps.expand(*self, dst);
+    }
+
+    fn no_expansion(&mut self) -> Option<Cow<[u8]>> {
+        match find_byte(b'$', *self) {
+            Some(_) => None,
+            None => Some(Cow::Borrowed(*self)),
+        }
+    }
+}
+
+impl<F, T> Replacer for F
+where
+    F: FnMut(&Captures) -> T,
+    T: AsRef<[u8]>,
+{
+    fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>) {
+        dst.extend_from_slice((*self)(caps).as_ref());
+    }
+}
+
+/// `NoExpand` indicates literal byte string replacement.
+///
+/// It can be used with `replace` and `replace_all` to do a literal byte string
+/// replacement without expanding `$name` to their corresponding capture
+/// groups. This can be both convenient (to avoid escaping `$`, for example)
+/// and performant (since capture groups don't need to be found).
+///
+/// `'t` is the lifetime of the literal text.
+pub struct NoExpand<'t>(pub &'t [u8]);
+
+impl<'t> Replacer for NoExpand<'t> {
+    fn replace_append(&mut self, _: &Captures, dst: &mut Vec<u8>) {
+        dst.extend_from_slice(self.0);
+    }
+
+    fn no_expansion(&mut self) -> Option<Cow<[u8]>> {
+        Some(Cow::Borrowed(self.0))
+    }
+}

diff --git a/src/re_set.rs b/src/re_set.rs
new file mode 100644
index 0000000..fc2b61a
--- /dev/null
+++ b/src/re_set.rs

@@ -0,0 +1,452 @@
+macro_rules! define_set {
+    ($name:ident, $builder_mod:ident, $text_ty:ty, $as_bytes:expr,
+     $(#[$doc_regexset_example:meta])* ) => {
+        pub mod $name {
+            use std::fmt;
+            use std::iter;
+            use std::slice;
+            use std::vec;
+
+            use error::Error;
+            use exec::Exec;
+            use re_builder::$builder_mod::RegexSetBuilder;
+            use re_trait::RegularExpression;
+
+/// Match multiple (possibly overlapping) regular expressions in a single scan.
+///
+/// A regex set corresponds to the union of two or more regular expressions.
+/// That is, a regex set will match text where at least one of its
+/// constituent regular expressions matches. A regex set as its formulated here
+/// provides a touch more power: it will also report *which* regular
+/// expressions in the set match. Indeed, this is the key difference between
+/// regex sets and a single `Regex` with many alternates, since only one
+/// alternate can match at a time.
+///
+/// For example, consider regular expressions to match email addresses and
+/// domains: `[a-z]+@[a-z]+\.(com|org|net)` and `[a-z]+\.(com|org|net)`. If a
+/// regex set is constructed from those regexes, then searching the text
+/// `foo@example.com` will report both regexes as matching. Of course, one
+/// could accomplish this by compiling each regex on its own and doing two
+/// searches over the text. The key advantage of using a regex set is that it
+/// will report the matching regexes using a *single pass through the text*.
+/// If one has hundreds or thousands of regexes to match repeatedly (like a URL
+/// router for a complex web application or a user agent matcher), then a regex
+/// set can realize huge performance gains.
+///
+/// # Example
+///
+/// This shows how the above two regexes (for matching email addresses and
+/// domains) might work:
+///
+$(#[$doc_regexset_example])*
+///
+/// Note that it would be possible to adapt the above example to using `Regex`
+/// with an expression like:
+///
+/// ```ignore
+/// (?P<email>[a-z]+@(?P<email_domain>[a-z]+[.](com|org|net)))|(?P<domain>[a-z]+[.](com|org|net))
+/// ```
+///
+/// After a match, one could then inspect the capture groups to figure out
+/// which alternates matched. The problem is that it is hard to make this
+/// approach scale when there are many regexes since the overlap between each
+/// alternate isn't always obvious to reason about.
+///
+/// # Limitations
+///
+/// Regex sets are limited to answering the following two questions:
+///
+/// 1. Does any regex in the set match?
+/// 2. If so, which regexes in the set match?
+///
+/// As with the main `Regex` type, it is cheaper to ask (1) instead of (2)
+/// since the matching engines can stop after the first match is found.
+///
+/// Other features like finding the location of successive matches or their
+/// sub-captures aren't supported. If you need this functionality, the
+/// recommended approach is to compile each regex in the set independently and
+/// selectively match them based on which regexes in the set matched.
+///
+/// # Performance
+///
+/// A `RegexSet` has the same performance characteristics as `Regex`. Namely,
+/// search takes `O(mn)` time, where `m` is proportional to the size of the
+/// regex set and `n` is proportional to the length of the search text.
+#[derive(Clone)]
+pub struct RegexSet(Exec);
+
+impl RegexSet {
+    /// Create a new regex set with the given regular expressions.
+    ///
+    /// This takes an iterator of `S`, where `S` is something that can produce
+    /// a `&str`. If any of the strings in the iterator are not valid regular
+    /// expressions, then an error is returned.
+    ///
+    /// # Example
+    ///
+    /// Create a new regex set from an iterator of strings:
+    ///
+    /// ```rust
+    /// # use regex::RegexSet;
+    /// let set = RegexSet::new(&[r"\w+", r"\d+"]).unwrap();
+    /// assert!(set.is_match("foo"));
+    /// ```
+    pub fn new<I, S>(exprs: I) -> Result<RegexSet, Error>
+            where S: AsRef<str>, I: IntoIterator<Item=S> {
+        RegexSetBuilder::new(exprs).build()
+    }
+
+    /// Returns true if and only if one of the regexes in this set matches
+    /// the text given.
+    ///
+    /// This method should be preferred if you only need to test whether any
+    /// of the regexes in the set should match, but don't care about *which*
+    /// regexes matched. This is because the underlying matching engine will
+    /// quit immediately after seeing the first match instead of continuing to
+    /// find all matches.
+    ///
+    /// Note that as with searches using `Regex`, the expression is unanchored
+    /// by default. That is, if the regex does not start with `^` or `\A`, or
+    /// end with `$` or `\z`, then it is permitted to match anywhere in the
+    /// text.
+    ///
+    /// # Example
+    ///
+    /// Tests whether a set matches some text:
+    ///
+    /// ```rust
+    /// # use regex::RegexSet;
+    /// let set = RegexSet::new(&[r"\w+", r"\d+"]).unwrap();
+    /// assert!(set.is_match("foo"));
+    /// assert!(!set.is_match("☃"));
+    /// ```
+    pub fn is_match(&self, text: $text_ty) -> bool {
+        self.is_match_at(text, 0)
+    }
+
+    /// Returns the same as is_match, but starts the search at the given
+    /// offset.
+    ///
+    /// The significance of the starting point is that it takes the surrounding
+    /// context into consideration. For example, the `\A` anchor can only
+    /// match when `start == 0`.
+    #[doc(hidden)]
+    pub fn is_match_at(&self, text: $text_ty, start: usize) -> bool {
+        self.0.searcher().is_match_at($as_bytes(text), start)
+    }
+
+    /// Returns the set of regular expressions that match in the given text.
+    ///
+    /// The set returned contains the index of each regular expression that
+    /// matches in the given text. The index is in correspondence with the
+    /// order of regular expressions given to `RegexSet`'s constructor.
+    ///
+    /// The set can also be used to iterate over the matched indices.
+    ///
+    /// Note that as with searches using `Regex`, the expression is unanchored
+    /// by default. That is, if the regex does not start with `^` or `\A`, or
+    /// end with `$` or `\z`, then it is permitted to match anywhere in the
+    /// text.
+    ///
+    /// # Example
+    ///
+    /// Tests which regular expressions match the given text:
+    ///
+    /// ```rust
+    /// # use regex::RegexSet;
+    /// let set = RegexSet::new(&[
+    ///     r"\w+",
+    ///     r"\d+",
+    ///     r"\pL+",
+    ///     r"foo",
+    ///     r"bar",
+    ///     r"barfoo",
+    ///     r"foobar",
+    /// ]).unwrap();
+    /// let matches: Vec<_> = set.matches("foobar").into_iter().collect();
+    /// assert_eq!(matches, vec![0, 2, 3, 4, 6]);
+    ///
+    /// // You can also test whether a particular regex matched:
+    /// let matches = set.matches("foobar");
+    /// assert!(!matches.matched(5));
+    /// assert!(matches.matched(6));
+    /// ```
+    pub fn matches(&self, text: $text_ty) -> SetMatches {
+        let mut matches = vec![false; self.0.regex_strings().len()];
+        let any = self.read_matches_at(&mut matches, text, 0);
+        SetMatches {
+            matched_any: any,
+            matches: matches,
+        }
+    }
+
+    /// Returns the same as matches, but starts the search at the given
+    /// offset and stores the matches into the slice given.
+    ///
+    /// The significance of the starting point is that it takes the surrounding
+    /// context into consideration. For example, the `\A` anchor can only
+    /// match when `start == 0`.
+    ///
+    /// `matches` must have a length that is at least the number of regexes
+    /// in this set.
+    ///
+    /// This method returns true if and only if at least one member of
+    /// `matches` is true after executing the set against `text`.
+    #[doc(hidden)]
+    pub fn read_matches_at(
+        &self,
+        matches: &mut [bool],
+        text: $text_ty,
+        start: usize,
+    ) -> bool {
+        self.0.searcher().many_matches_at(matches, $as_bytes(text), start)
+    }
+
+    /// Returns the total number of regular expressions in this set.
+    pub fn len(&self) -> usize {
+        self.0.regex_strings().len()
+    }
+
+    /// Returns the patterns that this set will match on.
+    ///
+    /// This function can be used to determine the pattern for a match. The
+    /// slice returned has exactly as many patterns givens to this regex set,
+    /// and the order of the slice is the same as the order of the patterns
+    /// provided to the set.
+    ///
+    /// # Example
+    ///
+    /// ```rust
+    /// # use regex::RegexSet;
+    /// let set = RegexSet::new(&[
+    ///     r"\w+",
+    ///     r"\d+",
+    ///     r"\pL+",
+    ///     r"foo",
+    ///     r"bar",
+    ///     r"barfoo",
+    ///     r"foobar",
+    /// ]).unwrap();
+    /// let matches: Vec<_> = set
+    ///     .matches("foobar")
+    ///     .into_iter()
+    ///     .map(|match_idx| &set.patterns()[match_idx])
+    ///     .collect();
+    /// assert_eq!(matches, vec![r"\w+", r"\pL+", r"foo", r"bar", r"foobar"]);
+    /// ```
+    pub fn patterns(&self) -> &[String] {
+        self.0.regex_strings()
+    }
+}
+
+/// A set of matches returned by a regex set.
+#[derive(Clone, Debug)]
+pub struct SetMatches {
+    matched_any: bool,
+    matches: Vec<bool>,
+}
+
+impl SetMatches {
+    /// Whether this set contains any matches.
+    pub fn matched_any(&self) -> bool {
+        self.matched_any
+    }
+
+    /// Whether the regex at the given index matched.
+    ///
+    /// The index for a regex is determined by its insertion order upon the
+    /// initial construction of a `RegexSet`, starting at `0`.
+    ///
+    /// # Panics
+    ///
+    /// If `regex_index` is greater than or equal to `self.len()`.
+    pub fn matched(&self, regex_index: usize) -> bool {
+        self.matches[regex_index]
+    }
+
+    /// The total number of regexes in the set that created these matches.
+    pub fn len(&self) -> usize {
+        self.matches.len()
+    }
+
+    /// Returns an iterator over indexes in the regex that matched.
+    ///
+    /// This will always produces matches in ascending order of index, where
+    /// the index corresponds to the index of the regex that matched with
+    /// respect to its position when initially building the set.
+    pub fn iter(&self) -> SetMatchesIter {
+        SetMatchesIter((&*self.matches).into_iter().enumerate())
+    }
+}
+
+impl IntoIterator for SetMatches {
+    type IntoIter = SetMatchesIntoIter;
+    type Item = usize;
+
+    fn into_iter(self) -> Self::IntoIter {
+        SetMatchesIntoIter(self.matches.into_iter().enumerate())
+    }
+}
+
+impl<'a> IntoIterator for &'a SetMatches {
+    type IntoIter = SetMatchesIter<'a>;
+    type Item = usize;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.iter()
+    }
+}
+
+/// An owned iterator over the set of matches from a regex set.
+///
+/// This will always produces matches in ascending order of index, where the
+/// index corresponds to the index of the regex that matched with respect to
+/// its position when initially building the set.
+pub struct SetMatchesIntoIter(iter::Enumerate<vec::IntoIter<bool>>);
+
+impl Iterator for SetMatchesIntoIter {
+    type Item = usize;
+
+    fn next(&mut self) -> Option<usize> {
+        loop {
+            match self.0.next() {
+                None => return None,
+                Some((_, false)) => {}
+                Some((i, true)) => return Some(i),
+            }
+        }
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.0.size_hint()
+    }
+}
+
+impl DoubleEndedIterator for SetMatchesIntoIter {
+    fn next_back(&mut self) -> Option<usize> {
+        loop {
+            match self.0.next_back() {
+                None => return None,
+                Some((_, false)) => {}
+                Some((i, true)) => return Some(i),
+            }
+        }
+    }
+}
+
+/// A borrowed iterator over the set of matches from a regex set.
+///
+/// The lifetime `'a` refers to the lifetime of a `SetMatches` value.
+///
+/// This will always produces matches in ascending order of index, where the
+/// index corresponds to the index of the regex that matched with respect to
+/// its position when initially building the set.
+#[derive(Clone)]
+pub struct SetMatchesIter<'a>(iter::Enumerate<slice::Iter<'a, bool>>);
+
+impl<'a> Iterator for SetMatchesIter<'a> {
+    type Item = usize;
+
+    fn next(&mut self) -> Option<usize> {
+        loop {
+            match self.0.next() {
+                None => return None,
+                Some((_, &false)) => {}
+                Some((i, &true)) => return Some(i),
+            }
+        }
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.0.size_hint()
+    }
+}
+
+impl<'a> DoubleEndedIterator for SetMatchesIter<'a> {
+    fn next_back(&mut self) -> Option<usize> {
+        loop {
+            match self.0.next_back() {
+                None => return None,
+                Some((_, &false)) => {}
+                Some((i, &true)) => return Some(i),
+            }
+        }
+    }
+}
+
+#[doc(hidden)]
+impl From<Exec> for RegexSet {
+    fn from(exec: Exec) -> Self {
+        RegexSet(exec)
+    }
+}
+
+impl fmt::Debug for RegexSet {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "RegexSet({:?})", self.0.regex_strings())
+    }
+}
+
+#[allow(dead_code)] fn as_bytes_str(text: &str) -> &[u8] { text.as_bytes() }
+#[allow(dead_code)] fn as_bytes_bytes(text: &[u8]) -> &[u8] { text }
+        }
+    }
+}
+
+define_set! {
+    unicode,
+    set_unicode,
+    &str,
+    as_bytes_str,
+/// ```rust
+/// # use regex::RegexSet;
+/// let set = RegexSet::new(&[
+///     r"[a-z]+@[a-z]+\.(com|org|net)",
+///     r"[a-z]+\.(com|org|net)",
+/// ]).unwrap();
+///
+/// // Ask whether any regexes in the set match.
+/// assert!(set.is_match("foo@example.com"));
+///
+/// // Identify which regexes in the set match.
+/// let matches: Vec<_> = set.matches("foo@example.com").into_iter().collect();
+/// assert_eq!(vec![0, 1], matches);
+///
+/// // Try again, but with text that only matches one of the regexes.
+/// let matches: Vec<_> = set.matches("example.com").into_iter().collect();
+/// assert_eq!(vec![1], matches);
+///
+/// // Try again, but with text that doesn't match any regex in the set.
+/// let matches: Vec<_> = set.matches("example").into_iter().collect();
+/// assert!(matches.is_empty());
+/// ```
+}
+
+define_set! {
+    bytes,
+    set_bytes,
+    &[u8],
+    as_bytes_bytes,
+/// ```rust
+/// # use regex::bytes::RegexSet;
+/// let set = RegexSet::new(&[
+///     r"[a-z]+@[a-z]+\.(com|org|net)",
+///     r"[a-z]+\.(com|org|net)",
+/// ]).unwrap();
+///
+/// // Ask whether any regexes in the set match.
+/// assert!(set.is_match(b"foo@example.com"));
+///
+/// // Identify which regexes in the set match.
+/// let matches: Vec<_> = set.matches(b"foo@example.com").into_iter().collect();
+/// assert_eq!(vec![0, 1], matches);
+///
+/// // Try again, but with text that only matches one of the regexes.
+/// let matches: Vec<_> = set.matches(b"example.com").into_iter().collect();
+/// assert_eq!(vec![1], matches);
+///
+/// // Try again, but with text that doesn't match any regex in the set.
+/// let matches: Vec<_> = set.matches(b"example").into_iter().collect();
+/// assert!(matches.is_empty());
+/// ```
+}

diff --git a/src/re_trait.rs b/src/re_trait.rs
new file mode 100644
index 0000000..b56804e
--- /dev/null
+++ b/src/re_trait.rs

@@ -0,0 +1,261 @@
+/// Slot is a single saved capture location. Note that there are two slots for
+/// every capture in a regular expression (one slot each for the start and end
+/// of the capture).
+pub type Slot = Option<usize>;
+
+/// Locations represents the offsets of each capturing group in a regex for
+/// a single match.
+///
+/// Unlike `Captures`, a `Locations` value only stores offsets.
+#[doc(hidden)]
+#[derive(Clone, Debug)]
+pub struct Locations(Vec<Slot>);
+
+impl Locations {
+    /// Returns the start and end positions of the Nth capture group. Returns
+    /// `None` if `i` is not a valid capture group or if the capture group did
+    /// not match anything. The positions returned are *always* byte indices
+    /// with respect to the original string matched.
+    pub fn pos(&self, i: usize) -> Option<(usize, usize)> {
+        let (s, e) = (i * 2, i * 2 + 1);
+        match (self.0.get(s), self.0.get(e)) {
+            (Some(&Some(s)), Some(&Some(e))) => Some((s, e)),
+            _ => None,
+        }
+    }
+
+    /// Creates an iterator of all the capture group positions in order of
+    /// appearance in the regular expression. Positions are byte indices
+    /// in terms of the original string matched.
+    pub fn iter(&self) -> SubCapturesPosIter {
+        SubCapturesPosIter { idx: 0, locs: self }
+    }
+
+    /// Returns the total number of capturing groups.
+    ///
+    /// This is always at least `1` since every regex has at least `1`
+    /// capturing group that corresponds to the entire match.
+    pub fn len(&self) -> usize {
+        self.0.len() / 2
+    }
+
+    /// Return the individual slots as a slice.
+    pub(crate) fn as_slots(&mut self) -> &mut [Slot] {
+        &mut self.0
+    }
+}
+
+/// An iterator over capture group positions for a particular match of a
+/// regular expression.
+///
+/// Positions are byte indices in terms of the original string matched.
+///
+/// `'c` is the lifetime of the captures.
+pub struct SubCapturesPosIter<'c> {
+    idx: usize,
+    locs: &'c Locations,
+}
+
+impl<'c> Iterator for SubCapturesPosIter<'c> {
+    type Item = Option<(usize, usize)>;
+
+    fn next(&mut self) -> Option<Option<(usize, usize)>> {
+        if self.idx >= self.locs.len() {
+            return None;
+        }
+        let x = match self.locs.pos(self.idx) {
+            None => Some(None),
+            Some((s, e)) => Some(Some((s, e))),
+        };
+        self.idx += 1;
+        x
+    }
+}
+
+/// `RegularExpression` describes types that can implement regex searching.
+///
+/// This trait is my attempt at reducing code duplication and to standardize
+/// the internal API. Specific duplication that is avoided are the `find`
+/// and `capture` iterators, which are slightly tricky.
+///
+/// It's not clear whether this trait is worth it, and it also isn't
+/// clear whether it's useful as a public trait or not. Methods like
+/// `next_after_empty` reak of bad design, but the rest of the methods seem
+/// somewhat reasonable. One particular thing this trait would expose would be
+/// the ability to start the search of a regex anywhere in a haystack, which
+/// isn't possible in the current public API.
+pub trait RegularExpression: Sized {
+    /// The type of the haystack.
+    type Text: ?Sized;
+
+    /// The number of capture slots in the compiled regular expression. This is
+    /// always two times the number of capture groups (two slots per group).
+    fn slots_len(&self) -> usize;
+
+    /// Allocates fresh space for all capturing groups in this regex.
+    fn locations(&self) -> Locations {
+        Locations(vec![None; self.slots_len()])
+    }
+
+    /// Returns the position of the next character after `i`.
+    ///
+    /// For example, a haystack with type `&[u8]` probably returns `i+1`,
+    /// whereas a haystack with type `&str` probably returns `i` plus the
+    /// length of the next UTF-8 sequence.
+    fn next_after_empty(&self, text: &Self::Text, i: usize) -> usize;
+
+    /// Returns the location of the shortest match.
+    fn shortest_match_at(
+        &self,
+        text: &Self::Text,
+        start: usize,
+    ) -> Option<usize>;
+
+    /// Returns whether the regex matches the text given.
+    fn is_match_at(&self, text: &Self::Text, start: usize) -> bool;
+
+    /// Returns the leftmost-first match location if one exists.
+    fn find_at(
+        &self,
+        text: &Self::Text,
+        start: usize,
+    ) -> Option<(usize, usize)>;
+
+    /// Returns the leftmost-first match location if one exists, and also
+    /// fills in any matching capture slot locations.
+    fn captures_read_at(
+        &self,
+        locs: &mut Locations,
+        text: &Self::Text,
+        start: usize,
+    ) -> Option<(usize, usize)>;
+
+    /// Returns an iterator over all non-overlapping successive leftmost-first
+    /// matches.
+    fn find_iter(self, text: &Self::Text) -> Matches<Self> {
+        Matches { re: self, text: text, last_end: 0, last_match: None }
+    }
+
+    /// Returns an iterator over all non-overlapping successive leftmost-first
+    /// matches with captures.
+    fn captures_iter(self, text: &Self::Text) -> CaptureMatches<Self> {
+        CaptureMatches(self.find_iter(text))
+    }
+}
+
+/// An iterator over all non-overlapping successive leftmost-first matches.
+pub struct Matches<'t, R>
+where
+    R: RegularExpression,
+    R::Text: 't,
+{
+    re: R,
+    text: &'t R::Text,
+    last_end: usize,
+    last_match: Option<usize>,
+}
+
+impl<'t, R> Matches<'t, R>
+where
+    R: RegularExpression,
+    R::Text: 't,
+{
+    /// Return the text being searched.
+    pub fn text(&self) -> &'t R::Text {
+        self.text
+    }
+
+    /// Return the underlying regex.
+    pub fn regex(&self) -> &R {
+        &self.re
+    }
+}
+
+impl<'t, R> Iterator for Matches<'t, R>
+where
+    R: RegularExpression,
+    R::Text: 't + AsRef<[u8]>,
+{
+    type Item = (usize, usize);
+
+    fn next(&mut self) -> Option<(usize, usize)> {
+        if self.last_end > self.text.as_ref().len() {
+            return None;
+        }
+        let (s, e) = match self.re.find_at(self.text, self.last_end) {
+            None => return None,
+            Some((s, e)) => (s, e),
+        };
+        if s == e {
+            // This is an empty match. To ensure we make progress, start
+            // the next search at the smallest possible starting position
+            // of the next match following this one.
+            self.last_end = self.re.next_after_empty(self.text, e);
+            // Don't accept empty matches immediately following a match.
+            // Just move on to the next match.
+            if Some(e) == self.last_match {
+                return self.next();
+            }
+        } else {
+            self.last_end = e;
+        }
+        self.last_match = Some(e);
+        Some((s, e))
+    }
+}
+
+/// An iterator over all non-overlapping successive leftmost-first matches with
+/// captures.
+pub struct CaptureMatches<'t, R>(Matches<'t, R>)
+where
+    R: RegularExpression,
+    R::Text: 't;
+
+impl<'t, R> CaptureMatches<'t, R>
+where
+    R: RegularExpression,
+    R::Text: 't,
+{
+    /// Return the text being searched.
+    pub fn text(&self) -> &'t R::Text {
+        self.0.text()
+    }
+
+    /// Return the underlying regex.
+    pub fn regex(&self) -> &R {
+        self.0.regex()
+    }
+}
+
+impl<'t, R> Iterator for CaptureMatches<'t, R>
+where
+    R: RegularExpression,
+    R::Text: 't + AsRef<[u8]>,
+{
+    type Item = Locations;
+
+    fn next(&mut self) -> Option<Locations> {
+        if self.0.last_end > self.0.text.as_ref().len() {
+            return None;
+        }
+        let mut locs = self.0.re.locations();
+        let (s, e) = match self.0.re.captures_read_at(
+            &mut locs,
+            self.0.text,
+            self.0.last_end,
+        ) {
+            None => return None,
+            Some((s, e)) => (s, e),
+        };
+        if s == e {
+            self.0.last_end = self.0.re.next_after_empty(self.0.text, e);
+            if Some(e) == self.0.last_match {
+                return self.next();
+            }
+        } else {
+            self.0.last_end = e;
+        }
+        self.0.last_match = Some(e);
+        Some(locs)
+    }
+}

diff --git a/src/re_unicode.rs b/src/re_unicode.rs
new file mode 100644
index 0000000..b746599
--- /dev/null
+++ b/src/re_unicode.rs

@@ -0,0 +1,1221 @@
+use std::borrow::Cow;
+use std::collections::HashMap;
+use std::fmt;
+use std::ops::{Index, Range};
+use std::str::FromStr;
+use std::sync::Arc;
+
+use find_byte::find_byte;
+use syntax;
+
+use error::Error;
+use exec::{Exec, ExecNoSyncStr};
+use expand::expand_str;
+use re_builder::unicode::RegexBuilder;
+use re_trait::{self, RegularExpression, SubCapturesPosIter};
+
+/// Escapes all regular expression meta characters in `text`.
+///
+/// The string returned may be safely used as a literal in a regular
+/// expression.
+pub fn escape(text: &str) -> String {
+    syntax::escape(text)
+}
+
+/// Match represents a single match of a regex in a haystack.
+///
+/// The lifetime parameter `'t` refers to the lifetime of the matched text.
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+pub struct Match<'t> {
+    text: &'t str,
+    start: usize,
+    end: usize,
+}
+
+impl<'t> Match<'t> {
+    /// Returns the starting byte offset of the match in the haystack.
+    #[inline]
+    pub fn start(&self) -> usize {
+        self.start
+    }
+
+    /// Returns the ending byte offset of the match in the haystack.
+    #[inline]
+    pub fn end(&self) -> usize {
+        self.end
+    }
+
+    /// Returns the range over the starting and ending byte offsets of the
+    /// match in the haystack.
+    #[inline]
+    pub fn range(&self) -> Range<usize> {
+        self.start..self.end
+    }
+
+    /// Returns the matched text.
+    #[inline]
+    pub fn as_str(&self) -> &'t str {
+        &self.text[self.range()]
+    }
+
+    /// Creates a new match from the given haystack and byte offsets.
+    #[inline]
+    fn new(haystack: &'t str, start: usize, end: usize) -> Match<'t> {
+        Match { text: haystack, start: start, end: end }
+    }
+}
+
+impl<'t> From<Match<'t>> for &'t str {
+    fn from(m: Match<'t>) -> &'t str {
+        m.as_str()
+    }
+}
+
+impl<'t> From<Match<'t>> for Range<usize> {
+    fn from(m: Match<'t>) -> Range<usize> {
+        m.range()
+    }
+}
+
+/// A compiled regular expression for matching Unicode strings.
+///
+/// It is represented as either a sequence of bytecode instructions (dynamic)
+/// or as a specialized Rust function (native). It can be used to search, split
+/// or replace text. All searching is done with an implicit `.*?` at the
+/// beginning and end of an expression. To force an expression to match the
+/// whole string (or a prefix or a suffix), you must use an anchor like `^` or
+/// `$` (or `\A` and `\z`).
+///
+/// While this crate will handle Unicode strings (whether in the regular
+/// expression or in the search text), all positions returned are **byte
+/// indices**. Every byte index is guaranteed to be at a Unicode code point
+/// boundary.
+///
+/// The lifetimes `'r` and `'t` in this crate correspond to the lifetime of a
+/// compiled regular expression and text to search, respectively.
+///
+/// The only methods that allocate new strings are the string replacement
+/// methods. All other methods (searching and splitting) return borrowed
+/// pointers into the string given.
+///
+/// # Examples
+///
+/// Find the location of a US phone number:
+///
+/// ```rust
+/// # use regex::Regex;
+/// let re = Regex::new("[0-9]{3}-[0-9]{3}-[0-9]{4}").unwrap();
+/// let mat = re.find("phone: 111-222-3333").unwrap();
+/// assert_eq!((mat.start(), mat.end()), (7, 19));
+/// ```
+///
+/// # Using the `std::str::pattern` methods with `Regex`
+///
+/// > **Note**: This section requires that this crate is compiled with the
+/// > `pattern` Cargo feature enabled, which **requires nightly Rust**.
+///
+/// Since `Regex` implements `Pattern`, you can use regexes with methods
+/// defined on `&str`. For example, `is_match`, `find`, `find_iter`
+/// and `split` can be replaced with `str::contains`, `str::find`,
+/// `str::match_indices` and `str::split`.
+///
+/// Here are some examples:
+///
+/// ```rust,ignore
+/// # use regex::Regex;
+/// let re = Regex::new(r"\d+").unwrap();
+/// let haystack = "a111b222c";
+///
+/// assert!(haystack.contains(&re));
+/// assert_eq!(haystack.find(&re), Some(1));
+/// assert_eq!(haystack.match_indices(&re).collect::<Vec<_>>(),
+///            vec![(1, 4), (5, 8)]);
+/// assert_eq!(haystack.split(&re).collect::<Vec<_>>(), vec!["a", "b", "c"]);
+/// ```
+#[derive(Clone)]
+pub struct Regex(Exec);
+
+impl fmt::Display for Regex {
+    /// Shows the original regular expression.
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{}", self.as_str())
+    }
+}
+
+impl fmt::Debug for Regex {
+    /// Shows the original regular expression.
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        fmt::Display::fmt(self, f)
+    }
+}
+
+#[doc(hidden)]
+impl From<Exec> for Regex {
+    fn from(exec: Exec) -> Regex {
+        Regex(exec)
+    }
+}
+
+impl FromStr for Regex {
+    type Err = Error;
+
+    /// Attempts to parse a string into a regular expression
+    fn from_str(s: &str) -> Result<Regex, Error> {
+        Regex::new(s)
+    }
+}
+
+/// Core regular expression methods.
+impl Regex {
+    /// Compiles a regular expression. Once compiled, it can be used repeatedly
+    /// to search, split or replace text in a string.
+    ///
+    /// If an invalid expression is given, then an error is returned.
+    pub fn new(re: &str) -> Result<Regex, Error> {
+        RegexBuilder::new(re).build()
+    }
+
+    /// Returns true if and only if the regex matches the string given.
+    ///
+    /// It is recommended to use this method if all you need to do is test
+    /// a match, since the underlying matching engine may be able to do less
+    /// work.
+    ///
+    /// # Example
+    ///
+    /// Test if some text contains at least one word with exactly 13
+    /// Unicode word characters:
+    ///
+    /// ```rust
+    /// # extern crate regex; use regex::Regex;
+    /// # fn main() {
+    /// let text = "I categorically deny having triskaidekaphobia.";
+    /// assert!(Regex::new(r"\b\w{13}\b").unwrap().is_match(text));
+    /// # }
+    /// ```
+    pub fn is_match(&self, text: &str) -> bool {
+        self.is_match_at(text, 0)
+    }
+
+    /// Returns the start and end byte range of the leftmost-first match in
+    /// `text`. If no match exists, then `None` is returned.
+    ///
+    /// Note that this should only be used if you want to discover the position
+    /// of the match. Testing the existence of a match is faster if you use
+    /// `is_match`.
+    ///
+    /// # Example
+    ///
+    /// Find the start and end location of the first word with exactly 13
+    /// Unicode word characters:
+    ///
+    /// ```rust
+    /// # extern crate regex; use regex::Regex;
+    /// # fn main() {
+    /// let text = "I categorically deny having triskaidekaphobia.";
+    /// let mat = Regex::new(r"\b\w{13}\b").unwrap().find(text).unwrap();
+    /// assert_eq!(mat.start(), 2);
+    /// assert_eq!(mat.end(), 15);
+    /// # }
+    /// ```
+    pub fn find<'t>(&self, text: &'t str) -> Option<Match<'t>> {
+        self.find_at(text, 0)
+    }
+
+    /// Returns an iterator for each successive non-overlapping match in
+    /// `text`, returning the start and end byte indices with respect to
+    /// `text`.
+    ///
+    /// # Example
+    ///
+    /// Find the start and end location of every word with exactly 13 Unicode
+    /// word characters:
+    ///
+    /// ```rust
+    /// # extern crate regex; use regex::Regex;
+    /// # fn main() {
+    /// let text = "Retroactively relinquishing remunerations is reprehensible.";
+    /// for mat in Regex::new(r"\b\w{13}\b").unwrap().find_iter(text) {
+    ///     println!("{:?}", mat);
+    /// }
+    /// # }
+    /// ```
+    pub fn find_iter<'r, 't>(&'r self, text: &'t str) -> Matches<'r, 't> {
+        Matches(self.0.searcher_str().find_iter(text))
+    }
+
+    /// Returns the capture groups corresponding to the leftmost-first
+    /// match in `text`. Capture group `0` always corresponds to the entire
+    /// match. If no match is found, then `None` is returned.
+    ///
+    /// You should only use `captures` if you need access to the location of
+    /// capturing group matches. Otherwise, `find` is faster for discovering
+    /// the location of the overall match.
+    ///
+    /// # Examples
+    ///
+    /// Say you have some text with movie names and their release years,
+    /// like "'Citizen Kane' (1941)". It'd be nice if we could search for text
+    /// looking like that, while also extracting the movie name and its release
+    /// year separately.
+    ///
+    /// ```rust
+    /// # extern crate regex; use regex::Regex;
+    /// # fn main() {
+    /// let re = Regex::new(r"'([^']+)'\s+\((\d{4})\)").unwrap();
+    /// let text = "Not my favorite movie: 'Citizen Kane' (1941).";
+    /// let caps = re.captures(text).unwrap();
+    /// assert_eq!(caps.get(1).unwrap().as_str(), "Citizen Kane");
+    /// assert_eq!(caps.get(2).unwrap().as_str(), "1941");
+    /// assert_eq!(caps.get(0).unwrap().as_str(), "'Citizen Kane' (1941)");
+    /// // You can also access the groups by index using the Index notation.
+    /// // Note that this will panic on an invalid index.
+    /// assert_eq!(&caps[1], "Citizen Kane");
+    /// assert_eq!(&caps[2], "1941");
+    /// assert_eq!(&caps[0], "'Citizen Kane' (1941)");
+    /// # }
+    /// ```
+    ///
+    /// Note that the full match is at capture group `0`. Each subsequent
+    /// capture group is indexed by the order of its opening `(`.
+    ///
+    /// We can make this example a bit clearer by using *named* capture groups:
+    ///
+    /// ```rust
+    /// # extern crate regex; use regex::Regex;
+    /// # fn main() {
+    /// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)")
+    ///                .unwrap();
+    /// let text = "Not my favorite movie: 'Citizen Kane' (1941).";
+    /// let caps = re.captures(text).unwrap();
+    /// assert_eq!(caps.name("title").unwrap().as_str(), "Citizen Kane");
+    /// assert_eq!(caps.name("year").unwrap().as_str(), "1941");
+    /// assert_eq!(caps.get(0).unwrap().as_str(), "'Citizen Kane' (1941)");
+    /// // You can also access the groups by name using the Index notation.
+    /// // Note that this will panic on an invalid group name.
+    /// assert_eq!(&caps["title"], "Citizen Kane");
+    /// assert_eq!(&caps["year"], "1941");
+    /// assert_eq!(&caps[0], "'Citizen Kane' (1941)");
+    ///
+    /// # }
+    /// ```
+    ///
+    /// Here we name the capture groups, which we can access with the `name`
+    /// method or the `Index` notation with a `&str`. Note that the named
+    /// capture groups are still accessible with `get` or the `Index` notation
+    /// with a `usize`.
+    ///
+    /// The `0`th capture group is always unnamed, so it must always be
+    /// accessed with `get(0)` or `[0]`.
+    pub fn captures<'t>(&self, text: &'t str) -> Option<Captures<'t>> {
+        let mut locs = self.capture_locations();
+        self.captures_read_at(&mut locs, text, 0).map(move |_| Captures {
+            text: text,
+            locs: locs.0,
+            named_groups: self.0.capture_name_idx().clone(),
+        })
+    }
+
+    /// Returns an iterator over all the non-overlapping capture groups matched
+    /// in `text`. This is operationally the same as `find_iter`, except it
+    /// yields information about capturing group matches.
+    ///
+    /// # Example
+    ///
+    /// We can use this to find all movie titles and their release years in
+    /// some text, where the movie is formatted like "'Title' (xxxx)":
+    ///
+    /// ```rust
+    /// # extern crate regex; use regex::Regex;
+    /// # fn main() {
+    /// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)")
+    ///                .unwrap();
+    /// let text = "'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931).";
+    /// for caps in re.captures_iter(text) {
+    ///     println!("Movie: {:?}, Released: {:?}",
+    ///              &caps["title"], &caps["year"]);
+    /// }
+    /// // Output:
+    /// // Movie: Citizen Kane, Released: 1941
+    /// // Movie: The Wizard of Oz, Released: 1939
+    /// // Movie: M, Released: 1931
+    /// # }
+    /// ```
+    pub fn captures_iter<'r, 't>(
+        &'r self,
+        text: &'t str,
+    ) -> CaptureMatches<'r, 't> {
+        CaptureMatches(self.0.searcher_str().captures_iter(text))
+    }
+
+    /// Returns an iterator of substrings of `text` delimited by a match of the
+    /// regular expression. Namely, each element of the iterator corresponds to
+    /// text that *isn't* matched by the regular expression.
+    ///
+    /// This method will *not* copy the text given.
+    ///
+    /// # Example
+    ///
+    /// To split a string delimited by arbitrary amounts of spaces or tabs:
+    ///
+    /// ```rust
+    /// # extern crate regex; use regex::Regex;
+    /// # fn main() {
+    /// let re = Regex::new(r"[ \t]+").unwrap();
+    /// let fields: Vec<&str> = re.split("a b \t  c\td    e").collect();
+    /// assert_eq!(fields, vec!["a", "b", "c", "d", "e"]);
+    /// # }
+    /// ```
+    pub fn split<'r, 't>(&'r self, text: &'t str) -> Split<'r, 't> {
+        Split { finder: self.find_iter(text), last: 0 }
+    }
+
+    /// Returns an iterator of at most `limit` substrings of `text` delimited
+    /// by a match of the regular expression. (A `limit` of `0` will return no
+    /// substrings.) Namely, each element of the iterator corresponds to text
+    /// that *isn't* matched by the regular expression. The remainder of the
+    /// string that is not split will be the last element in the iterator.
+    ///
+    /// This method will *not* copy the text given.
+    ///
+    /// # Example
+    ///
+    /// Get the first two words in some text:
+    ///
+    /// ```rust
+    /// # extern crate regex; use regex::Regex;
+    /// # fn main() {
+    /// let re = Regex::new(r"\W+").unwrap();
+    /// let fields: Vec<&str> = re.splitn("Hey! How are you?", 3).collect();
+    /// assert_eq!(fields, vec!("Hey", "How", "are you?"));
+    /// # }
+    /// ```
+    pub fn splitn<'r, 't>(
+        &'r self,
+        text: &'t str,
+        limit: usize,
+    ) -> SplitN<'r, 't> {
+        SplitN { splits: self.split(text), n: limit }
+    }
+
+    /// Replaces the leftmost-first match with the replacement provided.
+    /// The replacement can be a regular string (where `$N` and `$name` are
+    /// expanded to match capture groups) or a function that takes the matches'
+    /// `Captures` and returns the replaced string.
+    ///
+    /// If no match is found, then a copy of the string is returned unchanged.
+    ///
+    /// # Replacement string syntax
+    ///
+    /// All instances of `$name` in the replacement text is replaced with the
+    /// corresponding capture group `name`.
+    ///
+    /// `name` may be an integer corresponding to the index of the
+    /// capture group (counted by order of opening parenthesis where `0` is the
+    /// entire match) or it can be a name (consisting of letters, digits or
+    /// underscores) corresponding to a named capture group.
+    ///
+    /// If `name` isn't a valid capture group (whether the name doesn't exist
+    /// or isn't a valid index), then it is replaced with the empty string.
+    ///
+    /// The longest possible name is used. e.g., `$1a` looks up the capture
+    /// group named `1a` and not the capture group at index `1`. To exert more
+    /// precise control over the name, use braces, e.g., `${1}a`.
+    ///
+    /// To write a literal `$` use `$$`.
+    ///
+    /// # Examples
+    ///
+    /// Note that this function is polymorphic with respect to the replacement.
+    /// In typical usage, this can just be a normal string:
+    ///
+    /// ```rust
+    /// # extern crate regex; use regex::Regex;
+    /// # fn main() {
+    /// let re = Regex::new("[^01]+").unwrap();
+    /// assert_eq!(re.replace("1078910", ""), "1010");
+    /// # }
+    /// ```
+    ///
+    /// But anything satisfying the `Replacer` trait will work. For example,
+    /// a closure of type `|&Captures| -> String` provides direct access to the
+    /// captures corresponding to a match. This allows one to access
+    /// capturing group matches easily:
+    ///
+    /// ```rust
+    /// # extern crate regex; use regex::Regex;
+    /// # use regex::Captures; fn main() {
+    /// let re = Regex::new(r"([^,\s]+),\s+(\S+)").unwrap();
+    /// let result = re.replace("Springsteen, Bruce", |caps: &Captures| {
+    ///     format!("{} {}", &caps[2], &caps[1])
+    /// });
+    /// assert_eq!(result, "Bruce Springsteen");
+    /// # }
+    /// ```
+    ///
+    /// But this is a bit cumbersome to use all the time. Instead, a simple
+    /// syntax is supported that expands `$name` into the corresponding capture
+    /// group. Here's the last example, but using this expansion technique
+    /// with named capture groups:
+    ///
+    /// ```rust
+    /// # extern crate regex; use regex::Regex;
+    /// # fn main() {
+    /// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(?P<first>\S+)").unwrap();
+    /// let result = re.replace("Springsteen, Bruce", "$first $last");
+    /// assert_eq!(result, "Bruce Springsteen");
+    /// # }
+    /// ```
+    ///
+    /// Note that using `$2` instead of `$first` or `$1` instead of `$last`
+    /// would produce the same result. To write a literal `$` use `$$`.
+    ///
+    /// Sometimes the replacement string requires use of curly braces to
+    /// delineate a capture group replacement and surrounding literal text.
+    /// For example, if we wanted to join two words together with an
+    /// underscore:
+    ///
+    /// ```rust
+    /// # extern crate regex; use regex::Regex;
+    /// # fn main() {
+    /// let re = Regex::new(r"(?P<first>\w+)\s+(?P<second>\w+)").unwrap();
+    /// let result = re.replace("deep fried", "${first}_$second");
+    /// assert_eq!(result, "deep_fried");
+    /// # }
+    /// ```
+    ///
+    /// Without the curly braces, the capture group name `first_` would be
+    /// used, and since it doesn't exist, it would be replaced with the empty
+    /// string.
+    ///
+    /// Finally, sometimes you just want to replace a literal string with no
+    /// regard for capturing group expansion. This can be done by wrapping a
+    /// byte string with `NoExpand`:
+    ///
+    /// ```rust
+    /// # extern crate regex; use regex::Regex;
+    /// # fn main() {
+    /// use regex::NoExpand;
+    ///
+    /// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(\S+)").unwrap();
+    /// let result = re.replace("Springsteen, Bruce", NoExpand("$2 $last"));
+    /// assert_eq!(result, "$2 $last");
+    /// # }
+    /// ```
+    pub fn replace<'t, R: Replacer>(
+        &self,
+        text: &'t str,
+        rep: R,
+    ) -> Cow<'t, str> {
+        self.replacen(text, 1, rep)
+    }
+
+    /// Replaces all non-overlapping matches in `text` with the replacement
+    /// provided. This is the same as calling `replacen` with `limit` set to
+    /// `0`.
+    ///
+    /// See the documentation for `replace` for details on how to access
+    /// capturing group matches in the replacement string.
+    pub fn replace_all<'t, R: Replacer>(
+        &self,
+        text: &'t str,
+        rep: R,
+    ) -> Cow<'t, str> {
+        self.replacen(text, 0, rep)
+    }
+
+    /// Replaces at most `limit` non-overlapping matches in `text` with the
+    /// replacement provided. If `limit` is 0, then all non-overlapping matches
+    /// are replaced.
+    ///
+    /// See the documentation for `replace` for details on how to access
+    /// capturing group matches in the replacement string.
+    pub fn replacen<'t, R: Replacer>(
+        &self,
+        text: &'t str,
+        limit: usize,
+        mut rep: R,
+    ) -> Cow<'t, str> {
+        // If we know that the replacement doesn't have any capture expansions,
+        // then we can fast path. The fast path can make a tremendous
+        // difference:
+        //
+        //   1) We use `find_iter` instead of `captures_iter`. Not asking for
+        //      captures generally makes the regex engines faster.
+        //   2) We don't need to look up all of the capture groups and do
+        //      replacements inside the replacement string. We just push it
+        //      at each match and be done with it.
+        if let Some(rep) = rep.no_expansion() {
+            let mut it = self.find_iter(text).enumerate().peekable();
+            if it.peek().is_none() {
+                return Cow::Borrowed(text);
+            }
+            let mut new = String::with_capacity(text.len());
+            let mut last_match = 0;
+            for (i, m) in it {
+                if limit > 0 && i >= limit {
+                    break;
+                }
+                new.push_str(&text[last_match..m.start()]);
+                new.push_str(&rep);
+                last_match = m.end();
+            }
+            new.push_str(&text[last_match..]);
+            return Cow::Owned(new);
+        }
+
+        // The slower path, which we use if the replacement needs access to
+        // capture groups.
+        let mut it = self.captures_iter(text).enumerate().peekable();
+        if it.peek().is_none() {
+            return Cow::Borrowed(text);
+        }
+        let mut new = String::with_capacity(text.len());
+        let mut last_match = 0;
+        for (i, cap) in it {
+            if limit > 0 && i >= limit {
+                break;
+            }
+            // unwrap on 0 is OK because captures only reports matches
+            let m = cap.get(0).unwrap();
+            new.push_str(&text[last_match..m.start()]);
+            rep.replace_append(&cap, &mut new);
+            last_match = m.end();
+        }
+        new.push_str(&text[last_match..]);
+        Cow::Owned(new)
+    }
+}
+
+/// Advanced or "lower level" search methods.
+impl Regex {
+    /// Returns the end location of a match in the text given.
+    ///
+    /// This method may have the same performance characteristics as
+    /// `is_match`, except it provides an end location for a match. In
+    /// particular, the location returned *may be shorter* than the proper end
+    /// of the leftmost-first match.
+    ///
+    /// # Example
+    ///
+    /// Typically, `a+` would match the entire first sequence of `a` in some
+    /// text, but `shortest_match` can give up as soon as it sees the first
+    /// `a`.
+    ///
+    /// ```rust
+    /// # extern crate regex; use regex::Regex;
+    /// # fn main() {
+    /// let text = "aaaaa";
+    /// let pos = Regex::new(r"a+").unwrap().shortest_match(text);
+    /// assert_eq!(pos, Some(1));
+    /// # }
+    /// ```
+    pub fn shortest_match(&self, text: &str) -> Option<usize> {
+        self.shortest_match_at(text, 0)
+    }
+
+    /// Returns the same as shortest_match, but starts the search at the given
+    /// offset.
+    ///
+    /// The significance of the starting point is that it takes the surrounding
+    /// context into consideration. For example, the `\A` anchor can only
+    /// match when `start == 0`.
+    pub fn shortest_match_at(
+        &self,
+        text: &str,
+        start: usize,
+    ) -> Option<usize> {
+        self.0.searcher_str().shortest_match_at(text, start)
+    }
+
+    /// Returns the same as is_match, but starts the search at the given
+    /// offset.
+    ///
+    /// The significance of the starting point is that it takes the surrounding
+    /// context into consideration. For example, the `\A` anchor can only
+    /// match when `start == 0`.
+    pub fn is_match_at(&self, text: &str, start: usize) -> bool {
+        self.shortest_match_at(text, start).is_some()
+    }
+
+    /// Returns the same as find, but starts the search at the given
+    /// offset.
+    ///
+    /// The significance of the starting point is that it takes the surrounding
+    /// context into consideration. For example, the `\A` anchor can only
+    /// match when `start == 0`.
+    pub fn find_at<'t>(
+        &self,
+        text: &'t str,
+        start: usize,
+    ) -> Option<Match<'t>> {
+        self.0
+            .searcher_str()
+            .find_at(text, start)
+            .map(|(s, e)| Match::new(text, s, e))
+    }
+
+    /// This is like `captures`, but uses
+    /// [`CaptureLocations`](struct.CaptureLocations.html)
+    /// instead of
+    /// [`Captures`](struct.Captures.html) in order to amortize allocations.
+    ///
+    /// To create a `CaptureLocations` value, use the
+    /// `Regex::capture_locations` method.
+    ///
+    /// This returns the overall match if this was successful, which is always
+    /// equivalence to the `0`th capture group.
+    pub fn captures_read<'t>(
+        &self,
+        locs: &mut CaptureLocations,
+        text: &'t str,
+    ) -> Option<Match<'t>> {
+        self.captures_read_at(locs, text, 0)
+    }
+
+    /// Returns the same as captures, but starts the search at the given
+    /// offset and populates the capture locations given.
+    ///
+    /// The significance of the starting point is that it takes the surrounding
+    /// context into consideration. For example, the `\A` anchor can only
+    /// match when `start == 0`.
+    pub fn captures_read_at<'t>(
+        &self,
+        locs: &mut CaptureLocations,
+        text: &'t str,
+        start: usize,
+    ) -> Option<Match<'t>> {
+        self.0
+            .searcher_str()
+            .captures_read_at(&mut locs.0, text, start)
+            .map(|(s, e)| Match::new(text, s, e))
+    }
+
+    /// An undocumented alias for `captures_read_at`.
+    ///
+    /// The `regex-capi` crate previously used this routine, so to avoid
+    /// breaking that crate, we continue to provide the name as an undocumented
+    /// alias.
+    #[doc(hidden)]
+    pub fn read_captures_at<'t>(
+        &self,
+        locs: &mut CaptureLocations,
+        text: &'t str,
+        start: usize,
+    ) -> Option<Match<'t>> {
+        self.captures_read_at(locs, text, start)
+    }
+}
+
+/// Auxiliary methods.
+impl Regex {
+    /// Returns the original string of this regex.
+    pub fn as_str(&self) -> &str {
+        &self.0.regex_strings()[0]
+    }
+
+    /// Returns an iterator over the capture names.
+    pub fn capture_names(&self) -> CaptureNames {
+        CaptureNames(self.0.capture_names().iter())
+    }
+
+    /// Returns the number of captures.
+    pub fn captures_len(&self) -> usize {
+        self.0.capture_names().len()
+    }
+
+    /// Returns an empty set of capture locations that can be reused in
+    /// multiple calls to `captures_read` or `captures_read_at`.
+    pub fn capture_locations(&self) -> CaptureLocations {
+        CaptureLocations(self.0.searcher_str().locations())
+    }
+
+    /// An alias for `capture_locations` to preserve backward compatibility.
+    ///
+    /// The `regex-capi` crate uses this method, so to avoid breaking that
+    /// crate, we continue to export it as an undocumented API.
+    #[doc(hidden)]
+    pub fn locations(&self) -> CaptureLocations {
+        CaptureLocations(self.0.searcher_str().locations())
+    }
+}
+
+/// An iterator over the names of all possible captures.
+///
+/// `None` indicates an unnamed capture; the first element (capture 0, the
+/// whole matched region) is always unnamed.
+///
+/// `'r` is the lifetime of the compiled regular expression.
+pub struct CaptureNames<'r>(::std::slice::Iter<'r, Option<String>>);
+
+impl<'r> Iterator for CaptureNames<'r> {
+    type Item = Option<&'r str>;
+
+    fn next(&mut self) -> Option<Option<&'r str>> {
+        self.0
+            .next()
+            .as_ref()
+            .map(|slot| slot.as_ref().map(|name| name.as_ref()))
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.0.size_hint()
+    }
+}
+
+/// Yields all substrings delimited by a regular expression match.
+///
+/// `'r` is the lifetime of the compiled regular expression and `'t` is the
+/// lifetime of the string being split.
+pub struct Split<'r, 't> {
+    finder: Matches<'r, 't>,
+    last: usize,
+}
+
+impl<'r, 't> Iterator for Split<'r, 't> {
+    type Item = &'t str;
+
+    fn next(&mut self) -> Option<&'t str> {
+        let text = self.finder.0.text();
+        match self.finder.next() {
+            None => {
+                if self.last > text.len() {
+                    None
+                } else {
+                    let s = &text[self.last..];
+                    self.last = text.len() + 1; // Next call will return None
+                    Some(s)
+                }
+            }
+            Some(m) => {
+                let matched = &text[self.last..m.start()];
+                self.last = m.end();
+                Some(matched)
+            }
+        }
+    }
+}
+
+/// Yields at most `N` substrings delimited by a regular expression match.
+///
+/// The last substring will be whatever remains after splitting.
+///
+/// `'r` is the lifetime of the compiled regular expression and `'t` is the
+/// lifetime of the string being split.
+pub struct SplitN<'r, 't> {
+    splits: Split<'r, 't>,
+    n: usize,
+}
+
+impl<'r, 't> Iterator for SplitN<'r, 't> {
+    type Item = &'t str;
+
+    fn next(&mut self) -> Option<&'t str> {
+        if self.n == 0 {
+            return None;
+        }
+
+        self.n -= 1;
+        if self.n > 0 {
+            return self.splits.next();
+        }
+
+        let text = self.splits.finder.0.text();
+        if self.splits.last > text.len() {
+            // We've already returned all substrings.
+            None
+        } else {
+            // self.n == 0, so future calls will return None immediately
+            Some(&text[self.splits.last..])
+        }
+    }
+}
+
+/// CaptureLocations is a low level representation of the raw offsets of each
+/// submatch.
+///
+/// You can think of this as a lower level
+/// [`Captures`](struct.Captures.html), where this type does not support
+/// named capturing groups directly and it does not borrow the text that these
+/// offsets were matched on.
+///
+/// Primarily, this type is useful when using the lower level `Regex` APIs
+/// such as `read_captures`, which permits amortizing the allocation in which
+/// capture match locations are stored.
+///
+/// In order to build a value of this type, you'll need to call the
+/// `capture_locations` method on the `Regex` being used to execute the search.
+/// The value returned can then be reused in subsequent searches.
+#[derive(Clone, Debug)]
+pub struct CaptureLocations(re_trait::Locations);
+
+/// A type alias for `CaptureLocations` for backwards compatibility.
+///
+/// Previously, we exported `CaptureLocations` as `Locations` in an
+/// undocumented API. To prevent breaking that code (e.g., in `regex-capi`),
+/// we continue re-exporting the same undocumented API.
+#[doc(hidden)]
+pub type Locations = CaptureLocations;
+
+impl CaptureLocations {
+    /// Returns the start and end positions of the Nth capture group. Returns
+    /// `None` if `i` is not a valid capture group or if the capture group did
+    /// not match anything. The positions returned are *always* byte indices
+    /// with respect to the original string matched.
+    #[inline]
+    pub fn get(&self, i: usize) -> Option<(usize, usize)> {
+        self.0.pos(i)
+    }
+
+    /// Returns the total number of capturing groups.
+    ///
+    /// This is always at least `1` since every regex has at least `1`
+    /// capturing group that corresponds to the entire match.
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.0.len()
+    }
+
+    /// An alias for the `get` method for backwards compatibility.
+    ///
+    /// Previously, we exported `get` as `pos` in an undocumented API. To
+    /// prevent breaking that code (e.g., in `regex-capi`), we continue
+    /// re-exporting the same undocumented API.
+    #[doc(hidden)]
+    #[inline]
+    pub fn pos(&self, i: usize) -> Option<(usize, usize)> {
+        self.get(i)
+    }
+}
+
+/// Captures represents a group of captured strings for a single match.
+///
+/// The 0th capture always corresponds to the entire match. Each subsequent
+/// index corresponds to the next capture group in the regex. If a capture
+/// group is named, then the matched string is *also* available via the `name`
+/// method. (Note that the 0th capture is always unnamed and so must be
+/// accessed with the `get` method.)
+///
+/// Positions returned from a capture group are always byte indices.
+///
+/// `'t` is the lifetime of the matched text.
+pub struct Captures<'t> {
+    text: &'t str,
+    locs: re_trait::Locations,
+    named_groups: Arc<HashMap<String, usize>>,
+}
+
+impl<'t> Captures<'t> {
+    /// Returns the match associated with the capture group at index `i`. If
+    /// `i` does not correspond to a capture group, or if the capture group
+    /// did not participate in the match, then `None` is returned.
+    ///
+    /// # Examples
+    ///
+    /// Get the text of the match with a default of an empty string if this
+    /// group didn't participate in the match:
+    ///
+    /// ```rust
+    /// # use regex::Regex;
+    /// let re = Regex::new(r"[a-z]+(?:([0-9]+)|([A-Z]+))").unwrap();
+    /// let caps = re.captures("abc123").unwrap();
+    ///
+    /// let text1 = caps.get(1).map_or("", |m| m.as_str());
+    /// let text2 = caps.get(2).map_or("", |m| m.as_str());
+    /// assert_eq!(text1, "123");
+    /// assert_eq!(text2, "");
+    /// ```
+    pub fn get(&self, i: usize) -> Option<Match<'t>> {
+        self.locs.pos(i).map(|(s, e)| Match::new(self.text, s, e))
+    }
+
+    /// Returns the match for the capture group named `name`. If `name` isn't a
+    /// valid capture group or didn't match anything, then `None` is returned.
+    pub fn name(&self, name: &str) -> Option<Match<'t>> {
+        self.named_groups.get(name).and_then(|&i| self.get(i))
+    }
+
+    /// An iterator that yields all capturing matches in the order in which
+    /// they appear in the regex. If a particular capture group didn't
+    /// participate in the match, then `None` is yielded for that capture.
+    ///
+    /// The first match always corresponds to the overall match of the regex.
+    pub fn iter<'c>(&'c self) -> SubCaptureMatches<'c, 't> {
+        SubCaptureMatches { caps: self, it: self.locs.iter() }
+    }
+
+    /// Expands all instances of `$name` in `replacement` to the corresponding
+    /// capture group `name`, and writes them to the `dst` buffer given.
+    ///
+    /// `name` may be an integer corresponding to the index of the
+    /// capture group (counted by order of opening parenthesis where `0` is the
+    /// entire match) or it can be a name (consisting of letters, digits or
+    /// underscores) corresponding to a named capture group.
+    ///
+    /// If `name` isn't a valid capture group (whether the name doesn't exist
+    /// or isn't a valid index), then it is replaced with the empty string.
+    ///
+    /// The longest possible name is used. e.g., `$1a` looks up the capture
+    /// group named `1a` and not the capture group at index `1`. To exert more
+    /// precise control over the name, use braces, e.g., `${1}a`.
+    ///
+    /// To write a literal `$` use `$$`.
+    pub fn expand(&self, replacement: &str, dst: &mut String) {
+        expand_str(self, replacement, dst)
+    }
+
+    /// Returns the number of captured groups.
+    ///
+    /// This is always at least `1`, since every regex has at least one capture
+    /// group that corresponds to the full match.
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.locs.len()
+    }
+}
+
+impl<'t> fmt::Debug for Captures<'t> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.debug_tuple("Captures").field(&CapturesDebug(self)).finish()
+    }
+}
+
+struct CapturesDebug<'c, 't: 'c>(&'c Captures<'t>);
+
+impl<'c, 't> fmt::Debug for CapturesDebug<'c, 't> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        // We'd like to show something nice here, even if it means an
+        // allocation to build a reverse index.
+        let slot_to_name: HashMap<&usize, &String> =
+            self.0.named_groups.iter().map(|(a, b)| (b, a)).collect();
+        let mut map = f.debug_map();
+        for (slot, m) in self.0.locs.iter().enumerate() {
+            let m = m.map(|(s, e)| &self.0.text[s..e]);
+            if let Some(name) = slot_to_name.get(&slot) {
+                map.entry(&name, &m);
+            } else {
+                map.entry(&slot, &m);
+            }
+        }
+        map.finish()
+    }
+}
+
+/// Get a group by index.
+///
+/// `'t` is the lifetime of the matched text.
+///
+/// The text can't outlive the `Captures` object if this method is
+/// used, because of how `Index` is defined (normally `a[i]` is part
+/// of `a` and can't outlive it); to do that, use `get()` instead.
+///
+/// # Panics
+///
+/// If there is no group at the given index.
+impl<'t> Index<usize> for Captures<'t> {
+    type Output = str;
+
+    fn index(&self, i: usize) -> &str {
+        self.get(i)
+            .map(|m| m.as_str())
+            .unwrap_or_else(|| panic!("no group at index '{}'", i))
+    }
+}
+
+/// Get a group by name.
+///
+/// `'t` is the lifetime of the matched text and `'i` is the lifetime
+/// of the group name (the index).
+///
+/// The text can't outlive the `Captures` object if this method is
+/// used, because of how `Index` is defined (normally `a[i]` is part
+/// of `a` and can't outlive it); to do that, use `name` instead.
+///
+/// # Panics
+///
+/// If there is no group named by the given value.
+impl<'t, 'i> Index<&'i str> for Captures<'t> {
+    type Output = str;
+
+    fn index<'a>(&'a self, name: &'i str) -> &'a str {
+        self.name(name)
+            .map(|m| m.as_str())
+            .unwrap_or_else(|| panic!("no group named '{}'", name))
+    }
+}
+
+/// An iterator that yields all capturing matches in the order in which they
+/// appear in the regex.
+///
+/// If a particular capture group didn't participate in the match, then `None`
+/// is yielded for that capture. The first match always corresponds to the
+/// overall match of the regex.
+///
+/// The lifetime `'c` corresponds to the lifetime of the `Captures` value, and
+/// the lifetime `'t` corresponds to the originally matched text.
+pub struct SubCaptureMatches<'c, 't: 'c> {
+    caps: &'c Captures<'t>,
+    it: SubCapturesPosIter<'c>,
+}
+
+impl<'c, 't> Iterator for SubCaptureMatches<'c, 't> {
+    type Item = Option<Match<'t>>;
+
+    fn next(&mut self) -> Option<Option<Match<'t>>> {
+        self.it
+            .next()
+            .map(|cap| cap.map(|(s, e)| Match::new(self.caps.text, s, e)))
+    }
+}
+
+/// An iterator that yields all non-overlapping capture groups matching a
+/// particular regular expression.
+///
+/// The iterator stops when no more matches can be found.
+///
+/// `'r` is the lifetime of the compiled regular expression and `'t` is the
+/// lifetime of the matched string.
+pub struct CaptureMatches<'r, 't>(
+    re_trait::CaptureMatches<'t, ExecNoSyncStr<'r>>,
+);
+
+impl<'r, 't> Iterator for CaptureMatches<'r, 't> {
+    type Item = Captures<'t>;
+
+    fn next(&mut self) -> Option<Captures<'t>> {
+        self.0.next().map(|locs| Captures {
+            text: self.0.text(),
+            locs: locs,
+            named_groups: self.0.regex().capture_name_idx().clone(),
+        })
+    }
+}
+
+/// An iterator over all non-overlapping matches for a particular string.
+///
+/// The iterator yields a `Match` value. The iterator stops when no more
+/// matches can be found.
+///
+/// `'r` is the lifetime of the compiled regular expression and `'t` is the
+/// lifetime of the matched string.
+pub struct Matches<'r, 't>(re_trait::Matches<'t, ExecNoSyncStr<'r>>);
+
+impl<'r, 't> Iterator for Matches<'r, 't> {
+    type Item = Match<'t>;
+
+    fn next(&mut self) -> Option<Match<'t>> {
+        let text = self.0.text();
+        self.0.next().map(|(s, e)| Match::new(text, s, e))
+    }
+}
+
+/// Replacer describes types that can be used to replace matches in a string.
+///
+/// In general, users of this crate shouldn't need to implement this trait,
+/// since implementations are already provided for `&str` and
+/// `FnMut(&Captures) -> String` (or any `FnMut(&Captures) -> T`
+/// where `T: AsRef<str>`), which covers most use cases.
+pub trait Replacer {
+    /// Appends text to `dst` to replace the current match.
+    ///
+    /// The current match is represented by `caps`, which is guaranteed to
+    /// have a match at capture group `0`.
+    ///
+    /// For example, a no-op replacement would be
+    /// `dst.extend(caps.get(0).unwrap().as_str())`.
+    fn replace_append(&mut self, caps: &Captures, dst: &mut String);
+
+    /// Return a fixed unchanging replacement string.
+    ///
+    /// When doing replacements, if access to `Captures` is not needed (e.g.,
+    /// the replacement byte string does not need `$` expansion), then it can
+    /// be beneficial to avoid finding sub-captures.
+    ///
+    /// In general, this is called once for every call to `replacen`.
+    fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, str>> {
+        None
+    }
+
+    /// Return a `Replacer` that borrows and wraps this `Replacer`.
+    ///
+    /// This is useful when you want to take a generic `Replacer` (which might
+    /// not be cloneable) and use it without consuming it, so it can be used
+    /// more than once.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex::{Regex, Replacer};
+    ///
+    /// fn replace_all_twice<R: Replacer>(
+    ///     re: Regex,
+    ///     src: &str,
+    ///     mut rep: R,
+    /// ) -> String {
+    ///     let dst = re.replace_all(src, rep.by_ref());
+    ///     let dst = re.replace_all(&dst, rep.by_ref());
+    ///     dst.into_owned()
+    /// }
+    /// ```
+    fn by_ref<'r>(&'r mut self) -> ReplacerRef<'r, Self> {
+        ReplacerRef(self)
+    }
+}
+
+/// By-reference adaptor for a `Replacer`
+///
+/// Returned by [`Replacer::by_ref`](trait.Replacer.html#method.by_ref).
+#[derive(Debug)]
+pub struct ReplacerRef<'a, R: ?Sized + 'a>(&'a mut R);
+
+impl<'a, R: Replacer + ?Sized + 'a> Replacer for ReplacerRef<'a, R> {
+    fn replace_append(&mut self, caps: &Captures, dst: &mut String) {
+        self.0.replace_append(caps, dst)
+    }
+    fn no_expansion(&mut self) -> Option<Cow<str>> {
+        self.0.no_expansion()
+    }
+}
+
+impl<'a> Replacer for &'a str {
+    fn replace_append(&mut self, caps: &Captures, dst: &mut String) {
+        caps.expand(*self, dst);
+    }
+
+    fn no_expansion(&mut self) -> Option<Cow<str>> {
+        match find_byte(b'$', self.as_bytes()) {
+            Some(_) => None,
+            None => Some(Cow::Borrowed(*self)),
+        }
+    }
+}
+
+impl<F, T> Replacer for F
+where
+    F: FnMut(&Captures) -> T,
+    T: AsRef<str>,
+{
+    fn replace_append(&mut self, caps: &Captures, dst: &mut String) {
+        dst.push_str((*self)(caps).as_ref());
+    }
+}
+
+/// `NoExpand` indicates literal string replacement.
+///
+/// It can be used with `replace` and `replace_all` to do a literal string
+/// replacement without expanding `$name` to their corresponding capture
+/// groups. This can be both convenient (to avoid escaping `$`, for example)
+/// and performant (since capture groups don't need to be found).
+///
+/// `'t` is the lifetime of the literal text.
+pub struct NoExpand<'t>(pub &'t str);
+
+impl<'t> Replacer for NoExpand<'t> {
+    fn replace_append(&mut self, _: &Captures, dst: &mut String) {
+        dst.push_str(self.0);
+    }
+
+    fn no_expansion(&mut self) -> Option<Cow<str>> {
+        Some(Cow::Borrowed(self.0))
+    }
+}

diff --git a/src/sparse.rs b/src/sparse.rs
new file mode 100644
index 0000000..bc1b2b5
--- /dev/null
+++ b/src/sparse.rs

@@ -0,0 +1,84 @@
+use std::fmt;
+use std::ops::Deref;
+use std::slice;
+
+/// A sparse set used for representing ordered NFA states.
+///
+/// This supports constant time addition and membership testing. Clearing an
+/// entire set can also be done in constant time. Iteration yields elements
+/// in the order in which they were inserted.
+///
+/// The data structure is based on: http://research.swtch.com/sparse
+/// Note though that we don't actually use uninitialized memory. We generally
+/// reuse allocations, so the initial allocation cost is bareable. However,
+/// its other properties listed above are extremely useful.
+#[derive(Clone)]
+pub struct SparseSet {
+    /// Dense contains the instruction pointers in the order in which they
+    /// were inserted.
+    dense: Vec<usize>,
+    /// Sparse maps instruction pointers to their location in dense.
+    ///
+    /// An instruction pointer is in the set if and only if
+    /// sparse[ip] < dense.len() && ip == dense[sparse[ip]].
+    sparse: Box<[usize]>,
+}
+
+impl SparseSet {
+    pub fn new(size: usize) -> SparseSet {
+        SparseSet {
+            dense: Vec::with_capacity(size),
+            sparse: vec![0; size].into_boxed_slice(),
+        }
+    }
+
+    pub fn len(&self) -> usize {
+        self.dense.len()
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.dense.is_empty()
+    }
+
+    pub fn capacity(&self) -> usize {
+        self.dense.capacity()
+    }
+
+    pub fn insert(&mut self, value: usize) {
+        let i = self.len();
+        assert!(i < self.capacity());
+        self.dense.push(value);
+        self.sparse[value] = i;
+    }
+
+    pub fn contains(&self, value: usize) -> bool {
+        let i = self.sparse[value];
+        self.dense.get(i) == Some(&value)
+    }
+
+    pub fn clear(&mut self) {
+        self.dense.clear();
+    }
+}
+
+impl fmt::Debug for SparseSet {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "SparseSet({:?})", self.dense)
+    }
+}
+
+impl Deref for SparseSet {
+    type Target = [usize];
+
+    fn deref(&self) -> &Self::Target {
+        &self.dense
+    }
+}
+
+impl<'a> IntoIterator for &'a SparseSet {
+    type Item = &'a usize;
+    type IntoIter = slice::Iter<'a, usize>;
+    fn into_iter(self) -> Self::IntoIter {
+        self.iter()
+    }
+}

diff --git a/src/testdata/LICENSE b/src/testdata/LICENSE
new file mode 100644
index 0000000..f47dbf4
--- /dev/null
+++ b/src/testdata/LICENSE

@@ -0,0 +1,19 @@
+The following license covers testregex.c and all associated test data.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of THIS SOFTWARE FILE (the "Software"), to deal in the Software
+without restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, and/or sell copies of the
+Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following disclaimer:
+
+THIS SOFTWARE IS PROVIDED BY AT&T ``AS IS'' AND ANY EXPRESS OR IMPLIED
+WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+IN NO EVENT SHALL AT&T BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

diff --git a/src/testdata/README b/src/testdata/README
new file mode 100644
index 0000000..6efc2da
--- /dev/null
+++ b/src/testdata/README

@@ -0,0 +1,17 @@
+Test data was taken from the Go distribution, which was in turn taken from the
+testregex test suite:
+
+  http://www2.research.att.com/~astopen/testregex/testregex.html
+
+The LICENSE in this directory corresponds to the LICENSE that the data was
+released under.
+
+The tests themselves were modified for RE2/Go. A couple were modified further
+by me (Andrew Gallant) (only in repetition.dat) so that RE2/Go would pass them.
+(Yes, it seems like RE2/Go includes failing test cases.) This may or may not
+have been a bad idea, but I think being consistent with an established Regex
+library is worth something.
+
+Note that these files are read by 'scripts/regex-match-tests.py' and turned
+into Rust tests found in 'regex_macros/tests/matches.rs'.
+

diff --git a/src/testdata/basic.dat b/src/testdata/basic.dat
new file mode 100644
index 0000000..632e1bb
--- /dev/null
+++ b/src/testdata/basic.dat

@@ -0,0 +1,221 @@
+NOTE	all standard compliant implementations should pass these : 2002-05-31
+
+BE	abracadabra$	abracadabracadabra	(7,18)
+BE	a...b		abababbb		(2,7)
+BE	XXXXXX		..XXXXXX		(2,8)
+E	\)		()	(1,2)
+BE	a]		a]a	(0,2)
+B	}		}	(0,1)
+E	\}		}	(0,1)
+BE	\]		]	(0,1)
+B	]		]	(0,1)
+E	]		]	(0,1)
+B	{		{	(0,1)
+B	}		}	(0,1)
+BE	^a		ax	(0,1)
+BE	\^a		a^a	(1,3)
+BE	a\^		a^	(0,2)
+BE	a$		aa	(1,2)
+BE	a\$		a$	(0,2)
+BE	^$		NULL	(0,0)
+E	$^		NULL	(0,0)
+E	a($)		aa	(1,2)(2,2)
+E	a*(^a)		aa	(0,1)(0,1)
+E	(..)*(...)*		a	(0,0)
+E	(..)*(...)*		abcd	(0,4)(2,4)
+E	(ab|a)(bc|c)		abc	(0,3)(0,2)(2,3)
+E	(ab)c|abc		abc	(0,3)(0,2)
+E	a{0}b		ab			(1,2)
+E	(a*)(b?)(b+)b{3}	aaabbbbbbb	(0,10)(0,3)(3,4)(4,7)
+E	(a*)(b{0,1})(b{1,})b{3}	aaabbbbbbb	(0,10)(0,3)(3,4)(4,7)
+E	a{9876543210}	NULL	BADBR
+E	((a|a)|a)			a	(0,1)(0,1)(0,1)
+E	(a*)(a|aa)			aaaa	(0,4)(0,3)(3,4)
+E	a*(a.|aa)			aaaa	(0,4)(2,4)
+E	a(b)|c(d)|a(e)f			aef	(0,3)(?,?)(?,?)(1,2)
+E	(a|b)?.*			b	(0,1)(0,1)
+E	(a|b)c|a(b|c)			ac	(0,2)(0,1)
+E	(a|b)c|a(b|c)			ab	(0,2)(?,?)(1,2)
+E	(a|b)*c|(a|ab)*c		abc	(0,3)(1,2)
+E	(a|b)*c|(a|ab)*c		xc	(1,2)
+E	(.a|.b).*|.*(.a|.b)		xa	(0,2)(0,2)
+E	a?(ab|ba)ab			abab	(0,4)(0,2)
+E	a?(ac{0}b|ba)ab			abab	(0,4)(0,2)
+E	ab|abab				abbabab	(0,2)
+E	aba|bab|bba			baaabbbaba	(5,8)
+E	aba|bab				baaabbbaba	(6,9)
+E	(aa|aaa)*|(a|aaaaa)		aa	(0,2)(0,2)
+E	(a.|.a.)*|(a|.a...)		aa	(0,2)(0,2)
+E	ab|a				xabc	(1,3)
+E	ab|a				xxabc	(2,4)
+Ei	(?-u)(Ab|cD)*			aBcD	(0,4)(2,4)
+BE	[^-]			--a		(2,3)
+BE	[a-]*			--a		(0,3)
+BE	[a-m-]*			--amoma--	(0,4)
+E	:::1:::0:|:::1:1:0:	:::0:::1:::1:::0:	(8,17)
+E	:::1:::0:|:::1:1:1:	:::0:::1:::1:::0:	(8,17)
+{E	[[:upper:]]		A		(0,1)	[[<element>]] not supported
+E	[[:lower:]]+		`az{		(1,3)
+E	[[:upper:]]+		@AZ[		(1,3)
+# No collation in Go
+#BE	[[-]]			[[-]]		(2,4)
+#BE	[[.NIL.]]	NULL	ECOLLATE
+#BE	[[=aleph=]]	NULL	ECOLLATE
+}
+BE$	\n		\n	(0,1)
+BEn$	\n		\n	(0,1)
+BE$	[^a]		\n	(0,1)
+BE$	\na		\na	(0,2)
+E	(a)(b)(c)	abc	(0,3)(0,1)(1,2)(2,3)
+BE	xxx		xxx	(0,3)
+E1	(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)	feb 6,	(0,6)
+E1	(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)	2/7	(0,3)
+E1	(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)	feb 1,Feb 6	(5,11)
+E3	((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))	x	(0,1)(0,1)(0,1)
+E3	((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))*	xx	(0,2)(1,2)(1,2)
+E	a?(ab|ba)*	ababababababababababababababababababababababababababababababababababababababababa	(0,81)(79,81)
+E	abaa|abbaa|abbbaa|abbbbaa	ababbabbbabbbabbbbabbbbaa	(18,25)
+E	abaa|abbaa|abbbaa|abbbbaa	ababbabbbabbbabbbbabaa	(18,22)
+E	aaac|aabc|abac|abbc|baac|babc|bbac|bbbc	baaabbbabac	(7,11)
+BE$	.*			\x01\x7f	(0,2)
+E	aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll		XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa	(53,57)
+L	aaaa\nbbbb\ncccc\nddddd\neeeeee\nfffffff\ngggg\nhhhh\niiiii\njjjjj\nkkkkk\nllll		XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa	NOMATCH
+E	a*a*a*a*a*b		aaaaaaaaab	(0,10)
+BE	^			NULL		(0,0)
+BE	$			NULL		(0,0)
+BE	^$			NULL		(0,0)
+BE	^a$			a		(0,1)
+BE	abc			abc		(0,3)
+BE	abc			xabcy		(1,4)
+BE	abc			ababc		(2,5)
+BE	ab*c			abc		(0,3)
+BE	ab*bc			abc		(0,3)
+BE	ab*bc			abbc		(0,4)
+BE	ab*bc			abbbbc		(0,6)
+E	ab+bc			abbc		(0,4)
+E	ab+bc			abbbbc		(0,6)
+E	ab?bc			abbc		(0,4)
+E	ab?bc			abc		(0,3)
+E	ab?c			abc		(0,3)
+BE	^abc$			abc		(0,3)
+BE	^abc			abcc		(0,3)
+BE	abc$			aabc		(1,4)
+BE	^			abc		(0,0)
+BE	$			abc		(3,3)
+BE	a.c			abc		(0,3)
+BE	a.c			axc		(0,3)
+BE	a.*c			axyzc		(0,5)
+BE	a[bc]d			abd		(0,3)
+BE	a[b-d]e			ace		(0,3)
+BE	a[b-d]			aac		(1,3)
+BE	a[-b]			a-		(0,2)
+BE	a[b-]			a-		(0,2)
+BE	a]			a]		(0,2)
+BE	a[]]b			a]b		(0,3)
+BE	a[^bc]d			aed		(0,3)
+BE	a[^-b]c			adc		(0,3)
+BE	a[^]b]c			adc		(0,3)
+E	ab|cd			abc		(0,2)
+E	ab|cd			abcd		(0,2)
+E	a\(b			a(b		(0,3)
+E	a\(*b			ab		(0,2)
+E	a\(*b			a((b		(0,4)
+E	((a))			abc		(0,1)(0,1)(0,1)
+E	(a)b(c)			abc		(0,3)(0,1)(2,3)
+E	a+b+c			aabbabc		(4,7)
+E	a*			aaa		(0,3)
+#E	(a*)*			-		(0,0)(0,0)
+E	(a*)*			-		(0,0)(?,?)	RE2/Go
+E	(a*)+			-		(0,0)(0,0)
+#E	(a*|b)*			-		(0,0)(0,0)
+E	(a*|b)*			-		(0,0)(?,?)	RE2/Go
+E	(a+|b)*			ab		(0,2)(1,2)
+E	(a+|b)+			ab		(0,2)(1,2)
+E	(a+|b)?			ab		(0,1)(0,1)
+BE	[^ab]*			cde		(0,3)
+#E	(^)*			-		(0,0)(0,0)
+E	(^)*			-		(0,0)(?,?)	RE2/Go
+BE	a*			NULL		(0,0)
+E	([abc])*d		abbbcd		(0,6)(4,5)
+E	([abc])*bcd		abcd		(0,4)(0,1)
+E	a|b|c|d|e		e		(0,1)
+E	(a|b|c|d|e)f		ef		(0,2)(0,1)
+#E	((a*|b))*		-		(0,0)(0,0)(0,0)
+E	((a*|b))*		-		(0,0)(?,?)(?,?)	RE2/Go
+BE	abcd*efg		abcdefg		(0,7)
+BE	ab*			xabyabbbz	(1,3)
+BE	ab*			xayabbbz	(1,2)
+E	(ab|cd)e		abcde		(2,5)(2,4)
+BE	[abhgefdc]ij		hij		(0,3)
+E	(a|b)c*d		abcd		(1,4)(1,2)
+E	(ab|ab*)bc		abc		(0,3)(0,1)
+E	a([bc]*)c*		abc		(0,3)(1,3)
+E	a([bc]*)(c*d)		abcd		(0,4)(1,3)(3,4)
+E	a([bc]+)(c*d)		abcd		(0,4)(1,3)(3,4)
+E	a([bc]*)(c+d)		abcd		(0,4)(1,2)(2,4)
+E	a[bcd]*dcdcde		adcdcde		(0,7)
+E	(ab|a)b*c		abc		(0,3)(0,2)
+E	((a)(b)c)(d)		abcd		(0,4)(0,3)(0,1)(1,2)(3,4)
+BE	[A-Za-z_][A-Za-z0-9_]*	alpha		(0,5)
+E	^a(bc+|b[eh])g|.h$	abh		(1,3)
+E	(bc+d$|ef*g.|h?i(j|k))	effgz		(0,5)(0,5)
+E	(bc+d$|ef*g.|h?i(j|k))	ij		(0,2)(0,2)(1,2)
+E	(bc+d$|ef*g.|h?i(j|k))	reffgz		(1,6)(1,6)
+E	(((((((((a)))))))))	a		(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)
+BE	multiple words		multiple words yeah	(0,14)
+E	(.*)c(.*)		abcde		(0,5)(0,2)(3,5)
+BE	abcd			abcd		(0,4)
+E	a(bc)d			abcd		(0,4)(1,3)
+E	a[-]?c		ac		(0,3)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Qaddafi	(0,15)(?,?)(10,12)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Mo'ammar Gadhafi	(0,16)(?,?)(11,13)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Kaddafi	(0,15)(?,?)(10,12)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Qadhafi	(0,15)(?,?)(10,12)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Gadafi	(0,14)(?,?)(10,11)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Mu'ammar Qadafi	(0,15)(?,?)(11,12)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Moamar Gaddafi	(0,14)(?,?)(9,11)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Mu'ammar Qadhdhafi	(0,18)(?,?)(13,15)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Khaddafi	(0,16)(?,?)(11,13)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Ghaddafy	(0,16)(?,?)(11,13)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Ghadafi	(0,15)(?,?)(11,12)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Ghaddafi	(0,16)(?,?)(11,13)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muamar Kaddafi	(0,14)(?,?)(9,11)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Quathafi	(0,16)(?,?)(11,13)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Gheddafi	(0,16)(?,?)(11,13)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Moammar Khadafy	(0,15)(?,?)(11,12)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Moammar Qudhafi	(0,15)(?,?)(10,12)
+E	a+(b|c)*d+		aabcdd			(0,6)(3,4)
+E	^.+$			vivi			(0,4)
+E	^(.+)$			vivi			(0,4)(0,4)
+E	^([^!.]+).att.com!(.+)$	gryphon.att.com!eby	(0,19)(0,7)(16,19)
+E	^([^!]+!)?([^!]+)$	bas			(0,3)(?,?)(0,3)
+E	^([^!]+!)?([^!]+)$	bar!bas			(0,7)(0,4)(4,7)
+E	^([^!]+!)?([^!]+)$	foo!bas			(0,7)(0,4)(4,7)
+E	^.+!([^!]+!)([^!]+)$	foo!bar!bas		(0,11)(4,8)(8,11)
+E	((foo)|(bar))!bas	bar!bas			(0,7)(0,3)(?,?)(0,3)
+E	((foo)|(bar))!bas	foo!bar!bas		(4,11)(4,7)(?,?)(4,7)
+E	((foo)|(bar))!bas	foo!bas			(0,7)(0,3)(0,3)
+E	((foo)|bar)!bas		bar!bas			(0,7)(0,3)
+E	((foo)|bar)!bas		foo!bar!bas		(4,11)(4,7)
+E	((foo)|bar)!bas		foo!bas			(0,7)(0,3)(0,3)
+E	(foo|(bar))!bas		bar!bas			(0,7)(0,3)(0,3)
+E	(foo|(bar))!bas		foo!bar!bas		(4,11)(4,7)(4,7)
+E	(foo|(bar))!bas		foo!bas			(0,7)(0,3)
+E	(foo|bar)!bas		bar!bas			(0,7)(0,3)
+E	(foo|bar)!bas		foo!bar!bas		(4,11)(4,7)
+E	(foo|bar)!bas		foo!bas			(0,7)(0,3)
+E	^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	foo!bar!bas	(0,11)(0,11)(?,?)(?,?)(4,8)(8,11)
+E	^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$	bas		(0,3)(?,?)(0,3)
+E	^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$	bar!bas		(0,7)(0,4)(4,7)
+E	^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$	foo!bar!bas	(0,11)(?,?)(?,?)(4,8)(8,11)
+E	^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$	foo!bas		(0,7)(0,4)(4,7)
+E	^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	bas		(0,3)(0,3)(?,?)(0,3)
+E	^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	bar!bas		(0,7)(0,7)(0,4)(4,7)
+E	^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	foo!bar!bas	(0,11)(0,11)(?,?)(?,?)(4,8)(8,11)
+E	^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	foo!bas		(0,7)(0,7)(0,4)(4,7)
+E	.*(/XXX).*			/XXX			(0,4)(0,4)
+E	.*(\\XXX).*			\XXX			(0,4)(0,4)
+E	\\XXX				\XXX			(0,4)
+E	.*(/000).*			/000			(0,4)(0,4)
+E	.*(\\000).*			\000			(0,4)(0,4)
+E	\\000				\000			(0,4)

diff --git a/src/testdata/nullsubexpr.dat b/src/testdata/nullsubexpr.dat
new file mode 100644
index 0000000..2e18fbb
--- /dev/null
+++ b/src/testdata/nullsubexpr.dat

@@ -0,0 +1,79 @@
+NOTE	null subexpression matches : 2002-06-06
+
+E	(a*)*		a		(0,1)(0,1)
+#E	SAME		x		(0,0)(0,0)
+E	SAME		x		(0,0)(?,?)	RE2/Go
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaax		(0,6)(0,6)
+E	(a*)+		a		(0,1)(0,1)
+E	SAME		x		(0,0)(0,0)
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaax		(0,6)(0,6)
+E	(a+)*		a		(0,1)(0,1)
+E	SAME		x		(0,0)
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaax		(0,6)(0,6)
+E	(a+)+		a		(0,1)(0,1)
+E	SAME		x		NOMATCH
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaax		(0,6)(0,6)
+
+E	([a]*)*		a		(0,1)(0,1)
+#E	SAME		x		(0,0)(0,0)
+E	SAME		x		(0,0)(?,?)	RE2/Go
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaax		(0,6)(0,6)
+E	([a]*)+		a		(0,1)(0,1)
+E	SAME		x		(0,0)(0,0)
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaax		(0,6)(0,6)
+E	([^b]*)*	a		(0,1)(0,1)
+#E	SAME		b		(0,0)(0,0)
+E	SAME		b		(0,0)(?,?)	RE2/Go
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaab		(0,6)(0,6)
+E	([ab]*)*	a		(0,1)(0,1)
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		ababab		(0,6)(0,6)
+E	SAME		bababa		(0,6)(0,6)
+E	SAME		b		(0,1)(0,1)
+E	SAME		bbbbbb		(0,6)(0,6)
+E	SAME		aaaabcde	(0,5)(0,5)
+E	([^a]*)*	b		(0,1)(0,1)
+E	SAME		bbbbbb		(0,6)(0,6)
+#E	SAME		aaaaaa		(0,0)(0,0)
+E	SAME		aaaaaa		(0,0)(?,?)	RE2/Go
+E	([^ab]*)*	ccccxx		(0,6)(0,6)
+#E	SAME		ababab		(0,0)(0,0)
+E	SAME		ababab		(0,0)(?,?)	RE2/Go
+
+E	((z)+|a)*	zabcde		(0,2)(1,2)
+
+#{E	a+?		aaaaaa		(0,1)	no *? +? mimimal match ops
+#E	(a)		aaa		(0,1)(0,1)
+#E	(a*?)		aaa		(0,0)(0,0)
+#E	(a)*?		aaa		(0,0)
+#E	(a*?)*?		aaa		(0,0)
+#}
+
+B	\(a*\)*\(x\)		x	(0,1)(0,0)(0,1)
+B	\(a*\)*\(x\)		ax	(0,2)(0,1)(1,2)
+B	\(a*\)*\(x\)		axa	(0,2)(0,1)(1,2)
+B	\(a*\)*\(x\)\(\1\)	x	(0,1)(0,0)(0,1)(1,1)
+B	\(a*\)*\(x\)\(\1\)	ax	(0,2)(1,1)(1,2)(2,2)
+B	\(a*\)*\(x\)\(\1\)	axa	(0,3)(0,1)(1,2)(2,3)
+B	\(a*\)*\(x\)\(\1\)\(x\)	axax	(0,4)(0,1)(1,2)(2,3)(3,4)
+B	\(a*\)*\(x\)\(\1\)\(x\)	axxa	(0,3)(1,1)(1,2)(2,2)(2,3)
+
+#E	(a*)*(x)		x	(0,1)(0,0)(0,1)
+E	(a*)*(x)		x	(0,1)(?,?)(0,1)	RE2/Go
+E	(a*)*(x)		ax	(0,2)(0,1)(1,2)
+E	(a*)*(x)		axa	(0,2)(0,1)(1,2)
+
+E	(a*)+(x)		x	(0,1)(0,0)(0,1)
+E	(a*)+(x)		ax	(0,2)(0,1)(1,2)
+E	(a*)+(x)		axa	(0,2)(0,1)(1,2)
+
+E	(a*){2}(x)		x	(0,1)(0,0)(0,1)
+E	(a*){2}(x)		ax	(0,2)(1,1)(1,2)
+E	(a*){2}(x)		axa	(0,2)(1,1)(1,2)

diff --git a/src/testdata/repetition.dat b/src/testdata/repetition.dat
new file mode 100644
index 0000000..3bb2121
--- /dev/null
+++ b/src/testdata/repetition.dat

@@ -0,0 +1,163 @@
+NOTE	implicit vs. explicit repetitions : 2009-02-02
+
+# Glenn Fowler <gsf@research.att.com>
+# conforming matches (column 4) must match one of the following BREs
+#	NOMATCH
+#	(0,.)\((\(.\),\(.\))(?,?)(\2,\3)\)*
+#	(0,.)\((\(.\),\(.\))(\2,\3)(?,?)\)*
+# i.e., each 3-tuple has two identical elements and one (?,?)
+
+E	((..)|(.))				NULL		NOMATCH
+E	((..)|(.))((..)|(.))			NULL		NOMATCH
+E	((..)|(.))((..)|(.))((..)|(.))		NULL		NOMATCH
+
+E	((..)|(.)){1}				NULL		NOMATCH
+E	((..)|(.)){2}				NULL		NOMATCH
+E	((..)|(.)){3}				NULL		NOMATCH
+
+E	((..)|(.))*				NULL		(0,0)
+
+E	((..)|(.))				a		(0,1)(0,1)(?,?)(0,1)
+E	((..)|(.))((..)|(.))			a		NOMATCH
+E	((..)|(.))((..)|(.))((..)|(.))		a		NOMATCH
+
+E	((..)|(.)){1}				a		(0,1)(0,1)(?,?)(0,1)
+E	((..)|(.)){2}				a		NOMATCH
+E	((..)|(.)){3}				a		NOMATCH
+
+E	((..)|(.))*				a		(0,1)(0,1)(?,?)(0,1)
+
+E	((..)|(.))				aa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.))((..)|(.))			aa		(0,2)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)
+E	((..)|(.))((..)|(.))((..)|(.))		aa		NOMATCH
+
+E	((..)|(.)){1}				aa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.)){2}				aa		(0,2)(1,2)(?,?)(1,2)
+E	((..)|(.)){3}				aa		NOMATCH
+
+E	((..)|(.))*				aa		(0,2)(0,2)(0,2)(?,?)
+
+E	((..)|(.))				aaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.))((..)|(.))			aaa		(0,3)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)
+E	((..)|(.))((..)|(.))((..)|(.))		aaa		(0,3)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)(2,3)(?,?)(2,3)
+
+E	((..)|(.)){1}				aaa		(0,2)(0,2)(0,2)(?,?)
+#E	((..)|(.)){2}				aaa		(0,3)(2,3)(?,?)(2,3)
+E	((..)|(.)){2}				aaa		(0,3)(2,3)(0,2)(2,3)	RE2/Go
+E	((..)|(.)){3}				aaa		(0,3)(2,3)(?,?)(2,3)
+
+#E	((..)|(.))*				aaa		(0,3)(2,3)(?,?)(2,3)
+E	((..)|(.))*				aaa		(0,3)(2,3)(0,2)(2,3)	RE2/Go
+
+E	((..)|(.))				aaaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.))((..)|(.))			aaaa		(0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
+E	((..)|(.))((..)|(.))((..)|(.))		aaaa		(0,4)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)(3,4)(?,?)(3,4)
+
+E	((..)|(.)){1}				aaaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.)){2}				aaaa		(0,4)(2,4)(2,4)(?,?)
+#E	((..)|(.)){3}				aaaa		(0,4)(3,4)(?,?)(3,4)
+E	((..)|(.)){3}				aaaa		(0,4)(3,4)(0,2)(3,4)	RE2/Go
+
+E	((..)|(.))*				aaaa		(0,4)(2,4)(2,4)(?,?)
+
+E	((..)|(.))				aaaaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.))((..)|(.))			aaaaa		(0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
+E	((..)|(.))((..)|(.))((..)|(.))		aaaaa		(0,5)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,5)(?,?)(4,5)
+
+E	((..)|(.)){1}				aaaaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.)){2}				aaaaa		(0,4)(2,4)(2,4)(?,?)
+#E	((..)|(.)){3}				aaaaa		(0,5)(4,5)(?,?)(4,5)
+E	((..)|(.)){3}				aaaaa		(0,5)(4,5)(2,4)(4,5)	RE2/Go
+
+#E	((..)|(.))*				aaaaa		(0,5)(4,5)(?,?)(4,5)
+E	((..)|(.))*				aaaaa		(0,5)(4,5)(2,4)(4,5)	RE2/Go
+
+E	((..)|(.))				aaaaaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.))((..)|(.))			aaaaaa		(0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
+E	((..)|(.))((..)|(.))((..)|(.))		aaaaaa		(0,6)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,6)(4,6)(?,?)
+
+E	((..)|(.)){1}				aaaaaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.)){2}				aaaaaa		(0,4)(2,4)(2,4)(?,?)
+E	((..)|(.)){3}				aaaaaa		(0,6)(4,6)(4,6)(?,?)
+
+E	((..)|(.))*				aaaaaa		(0,6)(4,6)(4,6)(?,?)
+
+NOTE	additional repetition tests graciously provided by Chris Kuklewicz www.haskell.org 2009-02-02
+
+# These test a bug in OS X / FreeBSD / NetBSD, and libtree. 
+# Linux/GLIBC gets the {8,} and {8,8} wrong.
+
+:HA#100:E	X(.?){0,}Y	X1234567Y	(0,9)(7,8)
+:HA#101:E	X(.?){1,}Y	X1234567Y	(0,9)(7,8)
+:HA#102:E	X(.?){2,}Y	X1234567Y	(0,9)(7,8)
+:HA#103:E	X(.?){3,}Y	X1234567Y	(0,9)(7,8)
+:HA#104:E	X(.?){4,}Y	X1234567Y	(0,9)(7,8)
+:HA#105:E	X(.?){5,}Y	X1234567Y	(0,9)(7,8)
+:HA#106:E	X(.?){6,}Y	X1234567Y	(0,9)(7,8)
+:HA#107:E	X(.?){7,}Y	X1234567Y	(0,9)(7,8)
+:HA#108:E	X(.?){8,}Y	X1234567Y	(0,9)(8,8)
+#:HA#110:E	X(.?){0,8}Y	X1234567Y	(0,9)(7,8)
+:HA#110:E	X(.?){0,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+#:HA#111:E	X(.?){1,8}Y	X1234567Y	(0,9)(7,8)
+:HA#111:E	X(.?){1,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+#:HA#112:E	X(.?){2,8}Y	X1234567Y	(0,9)(7,8)
+:HA#112:E	X(.?){2,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+#:HA#113:E	X(.?){3,8}Y	X1234567Y	(0,9)(7,8)
+:HA#113:E	X(.?){3,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+#:HA#114:E	X(.?){4,8}Y	X1234567Y	(0,9)(7,8)
+:HA#114:E	X(.?){4,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+#:HA#115:E	X(.?){5,8}Y	X1234567Y	(0,9)(7,8)
+:HA#115:E	X(.?){5,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+#:HA#116:E	X(.?){6,8}Y	X1234567Y	(0,9)(7,8)
+:HA#116:E	X(.?){6,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+#:HA#117:E	X(.?){7,8}Y	X1234567Y	(0,9)(7,8)
+:HA#117:E	X(.?){7,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+:HA#118:E	X(.?){8,8}Y	X1234567Y	(0,9)(8,8)
+
+# These test a fixed bug in my regex-tdfa that did not keep the expanded
+# form properly grouped, so right association did the wrong thing with
+# these ambiguous patterns (crafted just to test my code when I became
+# suspicious of my implementation).  The first subexpression should use
+# "ab" then "a" then "bcd".
+
+# OS X / FreeBSD / NetBSD badly fail many of these, with impossible
+# results like (0,6)(4,5)(6,6).
+
+:HA#260:E	(a|ab|c|bcd){0,}(d*)	ababcd	(0,1)(0,1)(1,1)
+:HA#261:E	(a|ab|c|bcd){1,}(d*)	ababcd	(0,1)(0,1)(1,1)
+:HA#262:E	(a|ab|c|bcd){2,}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#263:E	(a|ab|c|bcd){3,}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#264:E	(a|ab|c|bcd){4,}(d*)	ababcd	NOMATCH
+:HA#265:E	(a|ab|c|bcd){0,10}(d*)	ababcd	(0,1)(0,1)(1,1)
+:HA#266:E	(a|ab|c|bcd){1,10}(d*)	ababcd	(0,1)(0,1)(1,1)
+:HA#267:E	(a|ab|c|bcd){2,10}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#268:E	(a|ab|c|bcd){3,10}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#269:E	(a|ab|c|bcd){4,10}(d*)	ababcd	NOMATCH
+:HA#270:E	(a|ab|c|bcd)*(d*)	ababcd	(0,1)(0,1)(1,1)
+:HA#271:E	(a|ab|c|bcd)+(d*)	ababcd	(0,1)(0,1)(1,1)
+
+# The above worked on Linux/GLIBC but the following often fail.
+# They also trip up OS X / FreeBSD / NetBSD:
+
+#:HA#280:E	(ab|a|c|bcd){0,}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#280:E	(ab|a|c|bcd){0,}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+#:HA#281:E	(ab|a|c|bcd){1,}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#281:E	(ab|a|c|bcd){1,}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+#:HA#282:E	(ab|a|c|bcd){2,}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#282:E	(ab|a|c|bcd){2,}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+#:HA#283:E	(ab|a|c|bcd){3,}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#283:E	(ab|a|c|bcd){3,}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+:HA#284:E	(ab|a|c|bcd){4,}(d*)	ababcd	NOMATCH
+#:HA#285:E	(ab|a|c|bcd){0,10}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#285:E	(ab|a|c|bcd){0,10}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+#:HA#286:E	(ab|a|c|bcd){1,10}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#286:E	(ab|a|c|bcd){1,10}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+#:HA#287:E	(ab|a|c|bcd){2,10}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#287:E	(ab|a|c|bcd){2,10}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+#:HA#288:E	(ab|a|c|bcd){3,10}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#288:E	(ab|a|c|bcd){3,10}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+:HA#289:E	(ab|a|c|bcd){4,10}(d*)	ababcd	NOMATCH
+#:HA#290:E	(ab|a|c|bcd)*(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#290:E	(ab|a|c|bcd)*(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+#:HA#291:E	(ab|a|c|bcd)+(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#291:E	(ab|a|c|bcd)+(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go

diff --git a/src/utf8.rs b/src/utf8.rs
new file mode 100644
index 0000000..6e0608f
--- /dev/null
+++ b/src/utf8.rs

@@ -0,0 +1,264 @@
+/// A few elementary UTF-8 encoding and decoding functions used by the matching
+/// engines.
+///
+/// In an ideal world, the matching engines operate on `&str` and we can just
+/// lean on the standard library for all our UTF-8 needs. However, to support
+/// byte based regexes (that can match on arbitrary bytes which may contain
+/// UTF-8), we need to be capable of searching and decoding UTF-8 on a `&[u8]`.
+/// The standard library doesn't really recognize this use case, so we have
+/// to build it out ourselves.
+///
+/// Should this be factored out into a separate crate? It seems independently
+/// useful. There are other crates that already exist (e.g., `utf-8`) that have
+/// overlapping use cases. Not sure what to do.
+use std::char;
+
+const TAG_CONT: u8 = 0b1000_0000;
+const TAG_TWO: u8 = 0b1100_0000;
+const TAG_THREE: u8 = 0b1110_0000;
+const TAG_FOUR: u8 = 0b1111_0000;
+
+/// Returns the smallest possible index of the next valid UTF-8 sequence
+/// starting after `i`.
+pub fn next_utf8(text: &[u8], i: usize) -> usize {
+    let b = match text.get(i) {
+        None => return i + 1,
+        Some(&b) => b,
+    };
+    let inc = if b <= 0x7F {
+        1
+    } else if b <= 0b110_11111 {
+        2
+    } else if b <= 0b1110_1111 {
+        3
+    } else {
+        4
+    };
+    i + inc
+}
+
+/// Decode a single UTF-8 sequence into a single Unicode codepoint from `src`.
+///
+/// If no valid UTF-8 sequence could be found, then `None` is returned.
+/// Otherwise, the decoded codepoint and the number of bytes read is returned.
+/// The number of bytes read (for a valid UTF-8 sequence) is guaranteed to be
+/// 1, 2, 3 or 4.
+///
+/// Note that a UTF-8 sequence is invalid if it is incorrect UTF-8, encodes a
+/// codepoint that is out of range (surrogate codepoints are out of range) or
+/// is not the shortest possible UTF-8 sequence for that codepoint.
+#[inline]
+pub fn decode_utf8(src: &[u8]) -> Option<(char, usize)> {
+    let b0 = match src.get(0) {
+        None => return None,
+        Some(&b) if b <= 0x7F => return Some((b as char, 1)),
+        Some(&b) => b,
+    };
+    match b0 {
+        0b110_00000..=0b110_11111 => {
+            if src.len() < 2 {
+                return None;
+            }
+            let b1 = src[1];
+            if 0b11_000000 & b1 != TAG_CONT {
+                return None;
+            }
+            let cp = ((b0 & !TAG_TWO) as u32) << 6 | ((b1 & !TAG_CONT) as u32);
+            match cp {
+                0x80..=0x7FF => char::from_u32(cp).map(|cp| (cp, 2)),
+                _ => None,
+            }
+        }
+        0b1110_0000..=0b1110_1111 => {
+            if src.len() < 3 {
+                return None;
+            }
+            let (b1, b2) = (src[1], src[2]);
+            if 0b11_000000 & b1 != TAG_CONT {
+                return None;
+            }
+            if 0b11_000000 & b2 != TAG_CONT {
+                return None;
+            }
+            let cp = ((b0 & !TAG_THREE) as u32) << 12
+                | ((b1 & !TAG_CONT) as u32) << 6
+                | ((b2 & !TAG_CONT) as u32);
+            match cp {
+                // char::from_u32 will disallow surrogate codepoints.
+                0x800..=0xFFFF => char::from_u32(cp).map(|cp| (cp, 3)),
+                _ => None,
+            }
+        }
+        0b11110_000..=0b11110_111 => {
+            if src.len() < 4 {
+                return None;
+            }
+            let (b1, b2, b3) = (src[1], src[2], src[3]);
+            if 0b11_000000 & b1 != TAG_CONT {
+                return None;
+            }
+            if 0b11_000000 & b2 != TAG_CONT {
+                return None;
+            }
+            if 0b11_000000 & b3 != TAG_CONT {
+                return None;
+            }
+            let cp = ((b0 & !TAG_FOUR) as u32) << 18
+                | ((b1 & !TAG_CONT) as u32) << 12
+                | ((b2 & !TAG_CONT) as u32) << 6
+                | ((b3 & !TAG_CONT) as u32);
+            match cp {
+                0x10000..=0x10FFFF => char::from_u32(cp).map(|cp| (cp, 4)),
+                _ => None,
+            }
+        }
+        _ => None,
+    }
+}
+
+/// Like `decode_utf8`, but decodes the last UTF-8 sequence in `src` instead
+/// of the first.
+pub fn decode_last_utf8(src: &[u8]) -> Option<(char, usize)> {
+    if src.is_empty() {
+        return None;
+    }
+    let mut start = src.len() - 1;
+    if src[start] <= 0x7F {
+        return Some((src[start] as char, 1));
+    }
+    while start > src.len().saturating_sub(4) {
+        start -= 1;
+        if is_start_byte(src[start]) {
+            break;
+        }
+    }
+    match decode_utf8(&src[start..]) {
+        None => None,
+        Some((_, n)) if n < src.len() - start => None,
+        Some((cp, n)) => Some((cp, n)),
+    }
+}
+
+fn is_start_byte(b: u8) -> bool {
+    b & 0b11_000000 != 0b1_0000000
+}
+
+#[cfg(test)]
+mod tests {
+    use std::str;
+
+    use quickcheck::quickcheck;
+
+    use super::{
+        decode_last_utf8, decode_utf8, TAG_CONT, TAG_FOUR, TAG_THREE, TAG_TWO,
+    };
+
+    #[test]
+    fn prop_roundtrip() {
+        fn p(given_cp: char) -> bool {
+            let mut tmp = [0; 4];
+            let encoded_len = given_cp.encode_utf8(&mut tmp).len();
+            let (got_cp, got_len) = decode_utf8(&tmp[..encoded_len]).unwrap();
+            encoded_len == got_len && given_cp == got_cp
+        }
+        quickcheck(p as fn(char) -> bool)
+    }
+
+    #[test]
+    fn prop_roundtrip_last() {
+        fn p(given_cp: char) -> bool {
+            let mut tmp = [0; 4];
+            let encoded_len = given_cp.encode_utf8(&mut tmp).len();
+            let (got_cp, got_len) =
+                decode_last_utf8(&tmp[..encoded_len]).unwrap();
+            encoded_len == got_len && given_cp == got_cp
+        }
+        quickcheck(p as fn(char) -> bool)
+    }
+
+    #[test]
+    fn prop_encode_matches_std() {
+        fn p(cp: char) -> bool {
+            let mut got = [0; 4];
+            let n = cp.encode_utf8(&mut got).len();
+            let expected = cp.to_string();
+            &got[..n] == expected.as_bytes()
+        }
+        quickcheck(p as fn(char) -> bool)
+    }
+
+    #[test]
+    fn prop_decode_matches_std() {
+        fn p(given_cp: char) -> bool {
+            let mut tmp = [0; 4];
+            let n = given_cp.encode_utf8(&mut tmp).len();
+            let (got_cp, _) = decode_utf8(&tmp[..n]).unwrap();
+            let expected_cp =
+                str::from_utf8(&tmp[..n]).unwrap().chars().next().unwrap();
+            got_cp == expected_cp
+        }
+        quickcheck(p as fn(char) -> bool)
+    }
+
+    #[test]
+    fn prop_decode_last_matches_std() {
+        fn p(given_cp: char) -> bool {
+            let mut tmp = [0; 4];
+            let n = given_cp.encode_utf8(&mut tmp).len();
+            let (got_cp, _) = decode_last_utf8(&tmp[..n]).unwrap();
+            let expected_cp = str::from_utf8(&tmp[..n])
+                .unwrap()
+                .chars()
+                .rev()
+                .next()
+                .unwrap();
+            got_cp == expected_cp
+        }
+        quickcheck(p as fn(char) -> bool)
+    }
+
+    #[test]
+    fn reject_invalid() {
+        // Invalid start byte
+        assert_eq!(decode_utf8(&[0xFF]), None);
+        // Surrogate pair
+        assert_eq!(decode_utf8(&[0xED, 0xA0, 0x81]), None);
+        // Invalid continuation byte.
+        assert_eq!(decode_utf8(&[0xD4, 0xC2]), None);
+        // Bad lengths
+        assert_eq!(decode_utf8(&[0xC3]), None); // 2 bytes
+        assert_eq!(decode_utf8(&[0xEF, 0xBF]), None); // 3 bytes
+        assert_eq!(decode_utf8(&[0xF4, 0x8F, 0xBF]), None); // 4 bytes
+                                                            // Not a minimal UTF-8 sequence
+        assert_eq!(decode_utf8(&[TAG_TWO, TAG_CONT | b'a']), None);
+        assert_eq!(decode_utf8(&[TAG_THREE, TAG_CONT, TAG_CONT | b'a']), None);
+        assert_eq!(
+            decode_utf8(&[TAG_FOUR, TAG_CONT, TAG_CONT, TAG_CONT | b'a',]),
+            None
+        );
+    }
+
+    #[test]
+    fn reject_invalid_last() {
+        // Invalid start byte
+        assert_eq!(decode_last_utf8(&[0xFF]), None);
+        // Surrogate pair
+        assert_eq!(decode_last_utf8(&[0xED, 0xA0, 0x81]), None);
+        // Bad lengths
+        assert_eq!(decode_last_utf8(&[0xC3]), None); // 2 bytes
+        assert_eq!(decode_last_utf8(&[0xEF, 0xBF]), None); // 3 bytes
+        assert_eq!(decode_last_utf8(&[0xF4, 0x8F, 0xBF]), None); // 4 bytes
+                                                                 // Not a minimal UTF-8 sequence
+        assert_eq!(decode_last_utf8(&[TAG_TWO, TAG_CONT | b'a']), None);
+        assert_eq!(
+            decode_last_utf8(&[TAG_THREE, TAG_CONT, TAG_CONT | b'a',]),
+            None
+        );
+        assert_eq!(
+            decode_last_utf8(
+                &[TAG_FOUR, TAG_CONT, TAG_CONT, TAG_CONT | b'a',]
+            ),
+            None
+        );
+    }
+}

diff --git a/test b/test
new file mode 100755
index 0000000..3d1351c
--- /dev/null
+++ b/test

@@ -0,0 +1,28 @@
+#!/bin/bash
+
+# This is a convenience script for running a broad swath of tests across
+# features. We don't test the complete space, since the complete space is quite
+# large. Hopefully once we migrate the test suite to better infrastructure
+# (like regex-automata), we'll be able to test more of the space.
+echo "===== DEFAULT FEATURES ==="
+cargo test
+
+echo "===== DOC TESTS ==="
+cargo test --doc
+
+features=(
+    "std"
+    "std unicode"
+    "std unicode-perl"
+    "std perf"
+    "std perf-cache"
+    "std perf-dfa"
+    "std perf-inline"
+    "std perf-literal"
+)
+for f in "${features[@]}"; do
+    echo "===== FEATURE: $f (default) ==="
+    cargo test --test default --no-default-features --features "$f"
+    echo "===== FEATURE: $f (default-bytes) ==="
+    cargo test --test default-bytes --no-default-features --features "$f"
+done

diff --git a/tests/api.rs b/tests/api.rs
new file mode 100644
index 0000000..0d4962c
--- /dev/null
+++ b/tests/api.rs

@@ -0,0 +1,222 @@
+#[test]
+fn empty_regex_empty_match() {
+    let re = regex!("");
+    assert_eq!(vec![(0, 0)], findall!(re, ""));
+}
+
+#[test]
+fn empty_regex_nonempty_match() {
+    let re = regex!("");
+    assert_eq!(vec![(0, 0), (1, 1), (2, 2), (3, 3)], findall!(re, "abc"));
+}
+
+#[test]
+fn one_zero_length_match() {
+    let re = regex!(r"[0-9]*");
+    assert_eq!(vec![(0, 0), (1, 2), (3, 4)], findall!(re, "a1b2"));
+}
+
+#[test]
+fn many_zero_length_match() {
+    let re = regex!(r"[0-9]*");
+    assert_eq!(
+        vec![(0, 0), (1, 2), (3, 3), (4, 4), (5, 6)],
+        findall!(re, "a1bbb2")
+    );
+}
+
+#[test]
+fn many_sequential_zero_length_match() {
+    let re = regex!(r"[0-9]?");
+    assert_eq!(
+        vec![(0, 0), (1, 2), (2, 3), (4, 5), (6, 6)],
+        findall!(re, "a12b3c")
+    );
+}
+
+#[test]
+fn quoted_bracket_set() {
+    let re = regex!(r"([\x{5b}\x{5d}])");
+    assert_eq!(vec![(0, 1), (1, 2)], findall!(re, "[]"));
+    let re = regex!(r"([\[\]])");
+    assert_eq!(vec![(0, 1), (1, 2)], findall!(re, "[]"));
+}
+
+#[test]
+fn first_range_starts_with_left_bracket() {
+    let re = regex!(r"([\[-z])");
+    assert_eq!(vec![(0, 1), (1, 2)], findall!(re, "[]"));
+}
+
+#[test]
+fn range_ends_with_escape() {
+    let re = regex!(r"([\[-\x{5d}])");
+    assert_eq!(vec![(0, 1), (1, 2)], findall!(re, "[]"));
+}
+
+#[test]
+fn empty_match_find_iter() {
+    let re = regex!(r".*?");
+    assert_eq!(vec![(0, 0), (1, 1), (2, 2), (3, 3)], findall!(re, "abc"));
+}
+
+#[test]
+fn empty_match_captures_iter() {
+    let re = regex!(r".*?");
+    let ms: Vec<_> = re
+        .captures_iter(text!("abc"))
+        .map(|c| c.get(0).unwrap())
+        .map(|m| (m.start(), m.end()))
+        .collect();
+    assert_eq!(ms, vec![(0, 0), (1, 1), (2, 2), (3, 3)]);
+}
+
+#[test]
+fn capture_names() {
+    let re = regex!(r"(.)(?P<a>.)");
+    assert_eq!(3, re.captures_len());
+    assert_eq!((3, Some(3)), re.capture_names().size_hint());
+    assert_eq!(
+        vec![None, None, Some("a")],
+        re.capture_names().collect::<Vec<_>>()
+    );
+}
+
+#[test]
+fn regex_string() {
+    assert_eq!(r"[a-zA-Z0-9]+", regex!(r"[a-zA-Z0-9]+").as_str());
+    assert_eq!(r"[a-zA-Z0-9]+", &format!("{}", regex!(r"[a-zA-Z0-9]+")));
+    assert_eq!(r"[a-zA-Z0-9]+", &format!("{:?}", regex!(r"[a-zA-Z0-9]+")));
+}
+
+#[test]
+fn capture_index() {
+    let re = regex!(r"^(?P<name>.+)$");
+    let cap = re.captures(t!("abc")).unwrap();
+    assert_eq!(&cap[0], t!("abc"));
+    assert_eq!(&cap[1], t!("abc"));
+    assert_eq!(&cap["name"], t!("abc"));
+}
+
+#[test]
+#[should_panic]
+#[cfg_attr(all(target_env = "msvc", target_pointer_width = "32"), ignore)]
+fn capture_index_panic_usize() {
+    let re = regex!(r"^(?P<name>.+)$");
+    let cap = re.captures(t!("abc")).unwrap();
+    let _ = cap[2];
+}
+
+#[test]
+#[should_panic]
+#[cfg_attr(all(target_env = "msvc", target_pointer_width = "32"), ignore)]
+fn capture_index_panic_name() {
+    let re = regex!(r"^(?P<name>.+)$");
+    let cap = re.captures(t!("abc")).unwrap();
+    let _ = cap["bad name"];
+}
+
+#[test]
+fn capture_index_lifetime() {
+    // This is a test of whether the types on `caps["..."]` are general
+    // enough. If not, this will fail to typecheck.
+    fn inner(s: &str) -> usize {
+        let re = regex!(r"(?P<number>[0-9]+)");
+        let caps = re.captures(t!(s)).unwrap();
+        caps["number"].len()
+    }
+    assert_eq!(3, inner("123"));
+}
+
+#[test]
+fn capture_misc() {
+    let re = regex!(r"(.)(?P<a>a)?(.)(?P<b>.)");
+    let cap = re.captures(t!("abc")).unwrap();
+
+    assert_eq!(5, cap.len());
+
+    assert_eq!((0, 3), {
+        let m = cap.get(0).unwrap();
+        (m.start(), m.end())
+    });
+    assert_eq!(None, cap.get(2));
+    assert_eq!((2, 3), {
+        let m = cap.get(4).unwrap();
+        (m.start(), m.end())
+    });
+
+    assert_eq!(t!("abc"), match_text!(cap.get(0).unwrap()));
+    assert_eq!(None, cap.get(2));
+    assert_eq!(t!("c"), match_text!(cap.get(4).unwrap()));
+
+    assert_eq!(None, cap.name("a"));
+    assert_eq!(t!("c"), match_text!(cap.name("b").unwrap()));
+}
+
+#[test]
+fn sub_capture_matches() {
+    let re = regex!(r"([a-z])(([a-z])|([0-9]))");
+    let cap = re.captures(t!("a5")).unwrap();
+    let subs: Vec<_> = cap.iter().collect();
+
+    assert_eq!(5, subs.len());
+    assert!(subs[0].is_some());
+    assert!(subs[1].is_some());
+    assert!(subs[2].is_some());
+    assert!(subs[3].is_none());
+    assert!(subs[4].is_some());
+
+    assert_eq!(t!("a5"), match_text!(subs[0].unwrap()));
+    assert_eq!(t!("a"), match_text!(subs[1].unwrap()));
+    assert_eq!(t!("5"), match_text!(subs[2].unwrap()));
+    assert_eq!(t!("5"), match_text!(subs[4].unwrap()));
+}
+
+expand!(expand1, r"(?-u)(?P<foo>\w+)", "abc", "$foo", "abc");
+expand!(expand2, r"(?-u)(?P<foo>\w+)", "abc", "$0", "abc");
+expand!(expand3, r"(?-u)(?P<foo>\w+)", "abc", "$1", "abc");
+expand!(expand4, r"(?-u)(?P<foo>\w+)", "abc", "$$1", "$1");
+expand!(expand5, r"(?-u)(?P<foo>\w+)", "abc", "$$foo", "$foo");
+expand!(expand6, r"(?-u)(?P<a>\w+)\s+(?P<b>\d+)", "abc 123", "$b$a", "123abc");
+expand!(expand7, r"(?-u)(?P<a>\w+)\s+(?P<b>\d+)", "abc 123", "z$bz$az", "z");
+expand!(
+    expand8,
+    r"(?-u)(?P<a>\w+)\s+(?P<b>\d+)",
+    "abc 123",
+    ".$b.$a.",
+    ".123.abc."
+);
+expand!(
+    expand9,
+    r"(?-u)(?P<a>\w+)\s+(?P<b>\d+)",
+    "abc 123",
+    " $b $a ",
+    " 123 abc "
+);
+expand!(expand10, r"(?-u)(?P<a>\w+)\s+(?P<b>\d+)", "abc 123", "$bz$az", "");
+
+split!(
+    split1,
+    r"(?-u)\s+",
+    "a b\nc\td\n\t e",
+    &[t!("a"), t!("b"), t!("c"), t!("d"), t!("e")]
+);
+split!(
+    split2,
+    r"(?-u)\b",
+    "a b c",
+    &[t!(""), t!("a"), t!(" "), t!("b"), t!(" "), t!("c"), t!("")]
+);
+split!(split3, r"a$", "a", &[t!(""), t!("")]);
+split!(split_none, r"-", r"a", &[t!("a")]);
+split!(split_trailing_blank, r"-", r"a-", &[t!("a"), t!("")]);
+split!(split_trailing_blanks, r"-", r"a--", &[t!("a"), t!(""), t!("")]);
+split!(split_empty, r"-", r"", &[t!("")]);
+
+splitn!(splitn_below_limit, r"-", r"a", 2, &[t!("a")]);
+splitn!(splitn_at_limit, r"-", r"a-b", 2, &[t!("a"), t!("b")]);
+splitn!(splitn_above_limit, r"-", r"a-b-c", 2, &[t!("a"), t!("b-c")]);
+splitn!(splitn_zero_limit, r"-", r"a-b", 0, empty_vec!());
+splitn!(splitn_trailing_blank, r"-", r"a-", 2, &[t!("a"), t!("")]);
+splitn!(splitn_trailing_separator, r"-", r"a--", 2, &[t!("a"), t!("-")]);
+splitn!(splitn_empty, r"-", r"", 1, &[t!("")]);

diff --git a/tests/api_str.rs b/tests/api_str.rs
new file mode 100644
index 0000000..480116d
--- /dev/null
+++ b/tests/api_str.rs

@@ -0,0 +1,34 @@
+// These tests don't really make sense with the bytes API, so we only test them
+// on the Unicode API.
+
+#[test]
+fn empty_match_unicode_find_iter() {
+    // Tests that we still yield byte ranges at valid UTF-8 sequence boundaries
+    // even when we're susceptible to empty width matches.
+    let re = regex!(r".*?");
+    assert_eq!(
+        vec![(0, 0), (3, 3), (4, 4), (7, 7), (8, 8)],
+        findall!(re, "Ⅰ1Ⅱ2")
+    );
+}
+
+#[test]
+fn empty_match_unicode_captures_iter() {
+    // Same as empty_match_unicode_find_iter, but tests capture iteration.
+    let re = regex!(r".*?");
+    let ms: Vec<_> = re
+        .captures_iter(text!("Ⅰ1Ⅱ2"))
+        .map(|c| c.get(0).unwrap())
+        .map(|m| (m.start(), m.end()))
+        .collect();
+    assert_eq!(vec![(0, 0), (3, 3), (4, 4), (7, 7), (8, 8)], ms);
+}
+
+#[test]
+fn match_as_str() {
+    let re = regex!(r"fo+");
+    let caps = re.captures("barfoobar").unwrap();
+    assert_eq!(caps.get(0).map(|m| m.as_str()), Some("foo"));
+    assert_eq!(caps.get(0).map(From::from), Some("foo"));
+    assert_eq!(caps.get(0).map(Into::into), Some("foo"));
+}

diff --git a/tests/bytes.rs b/tests/bytes.rs
new file mode 100644
index 0000000..d05f138
--- /dev/null
+++ b/tests/bytes.rs

@@ -0,0 +1,107 @@
+// These are tests specifically crafted for regexes that can match arbitrary
+// bytes.
+
+// A silly wrapper to make it possible to write and match raw bytes.
+struct R<'a>(&'a [u8]);
+impl<'a> R<'a> {
+    fn as_bytes(&self) -> &'a [u8] {
+        self.0
+    }
+}
+
+mat!(word_boundary, r"(?-u) \b", " δ", None);
+#[cfg(feature = "unicode-perl")]
+mat!(word_boundary_unicode, r" \b", " δ", Some((0, 1)));
+mat!(word_not_boundary, r"(?-u) \B", " δ", Some((0, 1)));
+#[cfg(feature = "unicode-perl")]
+mat!(word_not_boundary_unicode, r" \B", " δ", None);
+
+mat!(perl_w_ascii, r"(?-u)\w+", "aδ", Some((0, 1)));
+#[cfg(feature = "unicode-perl")]
+mat!(perl_w_unicode, r"\w+", "aδ", Some((0, 3)));
+mat!(perl_d_ascii, r"(?-u)\d+", "1२३9", Some((0, 1)));
+#[cfg(feature = "unicode-perl")]
+mat!(perl_d_unicode, r"\d+", "1२३9", Some((0, 8)));
+mat!(perl_s_ascii, r"(?-u)\s+", " \u{1680}", Some((0, 1)));
+#[cfg(feature = "unicode-perl")]
+mat!(perl_s_unicode, r"\s+", " \u{1680}", Some((0, 4)));
+
+// The first `(.+)` matches two Unicode codepoints, but can't match the 5th
+// byte, which isn't valid UTF-8. The second (byte based) `(.+)` takes over and
+// matches.
+mat!(
+    mixed1,
+    r"(.+)(?-u)(.+)",
+    R(b"\xCE\x93\xCE\x94\xFF"),
+    Some((0, 5)),
+    Some((0, 4)),
+    Some((4, 5))
+);
+
+mat!(case_ascii_one, r"(?i-u)a", "A", Some((0, 1)));
+mat!(case_ascii_class, r"(?i-u)[a-z]+", "AaAaA", Some((0, 5)));
+#[cfg(feature = "unicode-case")]
+mat!(case_unicode, r"(?i)[a-z]+", "aA\u{212A}aA", Some((0, 7)));
+mat!(case_not_unicode, r"(?i-u)[a-z]+", "aA\u{212A}aA", Some((0, 2)));
+
+mat!(negate_unicode, r"[^a]", "δ", Some((0, 2)));
+mat!(negate_not_unicode, r"(?-u)[^a]", "δ", Some((0, 1)));
+
+// This doesn't match in a normal Unicode regex because the implicit preceding
+// `.*?` is Unicode aware.
+mat!(dotstar_prefix_not_unicode1, r"(?-u)a", R(b"\xFFa"), Some((1, 2)));
+mat!(dotstar_prefix_not_unicode2, r"a", R(b"\xFFa"), Some((1, 2)));
+
+// Have fun with null bytes.
+mat!(
+    null_bytes,
+    r"(?-u)(?P<cstr>[^\x00]+)\x00",
+    R(b"foo\x00"),
+    Some((0, 4)),
+    Some((0, 3))
+);
+
+// Test that lookahead operators work properly in the face of invalid UTF-8.
+// See: https://github.com/rust-lang/regex/issues/277
+matiter!(
+    invalidutf8_anchor1,
+    r"(?-u)\xcc?^",
+    R(b"\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4"),
+    (0, 0)
+);
+matiter!(
+    invalidutf8_anchor2,
+    r"(?-u)^\xf7|4\xff\d\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a##########[] d\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a##########\[] #####\x80\S7|$",
+    R(b"\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4"),
+    (22, 22)
+);
+matiter!(
+    invalidutf8_anchor3,
+    r"(?-u)^|ddp\xff\xffdddddlQd@\x80",
+    R(b"\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4"),
+    (0, 0)
+);
+
+// See https://github.com/rust-lang/regex/issues/303
+#[test]
+fn negated_full_byte_range() {
+    assert!(::regex::bytes::Regex::new(r#"(?-u)[^\x00-\xff]"#).is_err());
+}
+
+matiter!(word_boundary_ascii1, r"(?-u:\B)x(?-u:\B)", "áxβ");
+matiter!(
+    word_boundary_ascii2,
+    r"(?-u:\B)",
+    "0\u{7EF5E}",
+    (2, 2),
+    (3, 3),
+    (4, 4),
+    (5, 5)
+);
+
+// See: https://github.com/rust-lang/regex/issues/264
+mat!(ascii_boundary_no_capture, r"(?-u)\B", "\u{28f3e}", Some((0, 0)));
+mat!(ascii_boundary_capture, r"(?-u)(\B)", "\u{28f3e}", Some((0, 0)));
+
+// See: https://github.com/rust-lang/regex/issues/271
+mat!(end_not_wb, r"$(?-u:\B)", "\u{5c124}\u{b576c}", Some((8, 8)));

diff --git a/tests/consistent.rs b/tests/consistent.rs
new file mode 100644
index 0000000..0f9ea53
--- /dev/null
+++ b/tests/consistent.rs

@@ -0,0 +1,241 @@
+use regex::internal::ExecBuilder;
+
+/// Given a regex, check if all of the backends produce the same
+/// results on a number of different inputs.
+///
+/// For now this just throws quickcheck at the problem, which
+/// is not very good because it only really tests half of the
+/// problem space. It is pretty unlikely that a random string
+/// will match any given regex, so this will probably just
+/// be checking that the different backends fail in the same
+/// way. This is still worthwhile to test, but is definitely not
+/// the whole story.
+///
+/// TODO(ethan): In order to cover the other half of the problem
+/// space, we should generate a random matching string by inspecting
+/// the AST of the input regex. The right way to do this probably
+/// involves adding a custom Arbitrary instance around a couple
+/// of newtypes. That way we can respect the quickcheck size hinting
+/// and shrinking and whatnot.
+pub fn backends_are_consistent(re: &str) -> Result<u64, String> {
+    let standard_backends = vec![
+        (
+            "bounded_backtracking_re",
+            ExecBuilder::new(re)
+                .bounded_backtracking()
+                .build()
+                .map(|exec| exec.into_regex())
+                .map_err(|err| format!("{}", err))?,
+        ),
+        (
+            "pikevm_re",
+            ExecBuilder::new(re)
+                .nfa()
+                .build()
+                .map(|exec| exec.into_regex())
+                .map_err(|err| format!("{}", err))?,
+        ),
+        (
+            "default_re",
+            ExecBuilder::new(re)
+                .build()
+                .map(|exec| exec.into_regex())
+                .map_err(|err| format!("{}", err))?,
+        ),
+    ];
+
+    let utf8bytes_backends = vec![
+        (
+            "bounded_backtracking_utf8bytes_re",
+            ExecBuilder::new(re)
+                .bounded_backtracking()
+                .bytes(true)
+                .build()
+                .map(|exec| exec.into_regex())
+                .map_err(|err| format!("{}", err))?,
+        ),
+        (
+            "pikevm_utf8bytes_re",
+            ExecBuilder::new(re)
+                .nfa()
+                .bytes(true)
+                .build()
+                .map(|exec| exec.into_regex())
+                .map_err(|err| format!("{}", err))?,
+        ),
+        (
+            "default_utf8bytes_re",
+            ExecBuilder::new(re)
+                .bytes(true)
+                .build()
+                .map(|exec| exec.into_regex())
+                .map_err(|err| format!("{}", err))?,
+        ),
+    ];
+
+    let bytes_backends = vec![
+        (
+            "bounded_backtracking_bytes_re",
+            ExecBuilder::new(re)
+                .bounded_backtracking()
+                .only_utf8(false)
+                .build()
+                .map(|exec| exec.into_byte_regex())
+                .map_err(|err| format!("{}", err))?,
+        ),
+        (
+            "pikevm_bytes_re",
+            ExecBuilder::new(re)
+                .nfa()
+                .only_utf8(false)
+                .build()
+                .map(|exec| exec.into_byte_regex())
+                .map_err(|err| format!("{}", err))?,
+        ),
+        (
+            "default_bytes_re",
+            ExecBuilder::new(re)
+                .only_utf8(false)
+                .build()
+                .map(|exec| exec.into_byte_regex())
+                .map_err(|err| format!("{}", err))?,
+        ),
+    ];
+
+    Ok(string_checker::check_backends(&standard_backends)?
+        + string_checker::check_backends(&utf8bytes_backends)?
+        + bytes_checker::check_backends(&bytes_backends)?)
+}
+
+//
+// A consistency checker parameterized by the input type (&str or &[u8]).
+//
+
+macro_rules! checker {
+    ($module_name:ident, $regex_type:path, $mk_input:expr) => {
+        mod $module_name {
+            use quickcheck;
+            use quickcheck::{Arbitrary, TestResult};
+
+            pub fn check_backends(
+                backends: &[(&str, $regex_type)],
+            ) -> Result<u64, String> {
+                let mut total_passed = 0;
+                for regex in backends[1..].iter() {
+                    total_passed += quickcheck_regex_eq(&backends[0], regex)?;
+                }
+
+                Ok(total_passed)
+            }
+
+            fn quickcheck_regex_eq(
+                &(name1, ref re1): &(&str, $regex_type),
+                &(name2, ref re2): &(&str, $regex_type),
+            ) -> Result<u64, String> {
+                quickcheck::QuickCheck::new()
+                    .quicktest(RegexEqualityTest::new(
+                        re1.clone(),
+                        re2.clone(),
+                    ))
+                    .map_err(|err| {
+                        format!(
+                            "{}(/{}/) and {}(/{}/) are inconsistent.\
+                             QuickCheck Err: {:?}",
+                            name1, re1, name2, re2, err
+                        )
+                    })
+            }
+
+            struct RegexEqualityTest {
+                re1: $regex_type,
+                re2: $regex_type,
+            }
+            impl RegexEqualityTest {
+                fn new(re1: $regex_type, re2: $regex_type) -> Self {
+                    RegexEqualityTest { re1: re1, re2: re2 }
+                }
+            }
+
+            impl quickcheck::Testable for RegexEqualityTest {
+                fn result<G: quickcheck::Gen>(
+                    &self,
+                    gen: &mut G,
+                ) -> TestResult {
+                    let input = $mk_input(gen);
+                    let input = &input;
+
+                    if self.re1.find(&input) != self.re2.find(input) {
+                        return TestResult::error(format!(
+                            "find mismatch input={:?}",
+                            input
+                        ));
+                    }
+
+                    let cap1 = self.re1.captures(input);
+                    let cap2 = self.re2.captures(input);
+                    match (cap1, cap2) {
+                        (None, None) => {}
+                        (Some(cap1), Some(cap2)) => {
+                            for (c1, c2) in cap1.iter().zip(cap2.iter()) {
+                                if c1 != c2 {
+                                    return TestResult::error(format!(
+                                        "captures mismatch input={:?}",
+                                        input
+                                    ));
+                                }
+                            }
+                        }
+                        _ => {
+                            return TestResult::error(format!(
+                                "captures mismatch input={:?}",
+                                input
+                            ))
+                        }
+                    }
+
+                    let fi1 = self.re1.find_iter(input);
+                    let fi2 = self.re2.find_iter(input);
+                    for (m1, m2) in fi1.zip(fi2) {
+                        if m1 != m2 {
+                            return TestResult::error(format!(
+                                "find_iter mismatch input={:?}",
+                                input
+                            ));
+                        }
+                    }
+
+                    let ci1 = self.re1.captures_iter(input);
+                    let ci2 = self.re2.captures_iter(input);
+                    for (cap1, cap2) in ci1.zip(ci2) {
+                        for (c1, c2) in cap1.iter().zip(cap2.iter()) {
+                            if c1 != c2 {
+                                return TestResult::error(format!(
+                                    "captures_iter mismatch input={:?}",
+                                    input
+                                ));
+                            }
+                        }
+                    }
+
+                    let s1 = self.re1.split(input);
+                    let s2 = self.re2.split(input);
+                    for (chunk1, chunk2) in s1.zip(s2) {
+                        if chunk1 != chunk2 {
+                            return TestResult::error(format!(
+                                "split mismatch input={:?}",
+                                input
+                            ));
+                        }
+                    }
+
+                    TestResult::from_bool(true)
+                }
+            }
+        } // mod
+    }; // rule case
+} // macro_rules!
+
+checker!(string_checker, ::regex::Regex, |gen| String::arbitrary(gen));
+checker!(bytes_checker, ::regex::bytes::Regex, |gen| Vec::<u8>::arbitrary(
+    gen
+));

diff --git a/tests/crates_regex.rs b/tests/crates_regex.rs
new file mode 100644
index 0000000..200ec27
--- /dev/null
+++ b/tests/crates_regex.rs

@@ -0,0 +1,3287 @@
+// DO NOT EDIT. Automatically generated by 'scripts/scrape_crates_io.py'
+// on 2018-06-20 09:56:32.820354.
+
+// autoshutdown-0.1.0: r"\s*(\d+)(\w)\s*"
+consistent!(autoshutdown_0, r"\s*(\d+)(\w)\s*");
+
+// epub-1.1.1: r"/"
+consistent!(epub_0, r"/");
+
+// rpi-info-0.2.0: "^Revision\t+: ([0-9a-fA-F]+)"
+consistent!(rpi_info_0, "^Revision\t+: ([0-9a-fA-F]+)");
+
+// rpi-info-0.2.0: "Serial\t+: ([0-9a-fA-F]+)"
+consistent!(rpi_info_1, "Serial\t+: ([0-9a-fA-F]+)");
+
+// pnet_macros-0.21.0: r"^u([0-9]+)(be|le|he)?$"
+consistent!(pnet_macros_0, r"^u([0-9]+)(be|le|he)?$");
+
+// iban_validate-1.0.3: r"^[A-Z]{2}\d{2}[A-Z\d]{1,30}$"
+consistent!(iban_validate_0, r"^[A-Z]{2}\d{2}[A-Z\d]{1,30}$");
+
+// markifier-0.1.0: r".*\[(?P<percent>.+)%.*\].*"
+consistent!(markifier_0, r".*\[(?P<percent>.+)%.*\].*");
+
+// mallumo-0.3.0: r"(#include) (\S*)(.*)"
+consistent!(mallumo_0, r"(#include) (\S*)(.*)");
+
+// mallumo-0.3.0: r"(ERROR: \d+:)(\d+)(: )(.+)"
+consistent!(mallumo_1, r"(ERROR: \d+:)(\d+)(: )(.+)");
+
+// mallumo-0.3.0: r"(\d+\()(\d+)(?:\) : )(.+)"
+consistent!(mallumo_2, r"(\d+\()(\d+)(?:\) : )(.+)");
+
+// magnet_more-0.0.1: r"(.+?)(\[.*?\])?"
+consistent!(magnet_more_0, r"(.+?)(\[.*?\])?");
+
+// magnet_app-0.0.1: r":(?P<k>[a-zA-Z_]+)"
+consistent!(magnet_app_0, r":(?P<k>[a-zA-Z_]+)");
+
+// yubibomb-0.2.0: r"^\d{6}(?:\s*,\s*\d{6})*$"
+consistent!(yubibomb_0, r"^\d{6}(?:\s*,\s*\d{6})*$");
+
+// multirust-rs-0.0.4: r"[\\/]([^\\/?]+)(\?.*)?$"
+consistent!(multirust_rs_0, r"[\\/]([^\\/?]+)(\?.*)?$");
+
+// hueclient-0.3.2: "\"[a-z]*\":null"
+consistent!(hueclient_0, "\"[a-z]*\":null");
+
+// hueclient-0.3.2: ",+"
+consistent!(hueclient_1, ",+");
+
+// hueclient-0.3.2: ",\\}"
+consistent!(hueclient_2, ",\\}");
+
+// hueclient-0.3.2: "\\{,"
+consistent!(hueclient_3, "\\{,");
+
+// aerial-0.1.0: r"[a-zA-Z_\$][a-zA-Z_0-9]*"
+consistent!(aerial_0, r"[a-zA-Z_\$][a-zA-Z_0-9]*");
+
+// aerial-0.1.0: r"thi[sng]+"
+consistent!(aerial_1, r"thi[sng]+");
+
+// rvue-0.1.0: r"(.+)\s+\((.+?)\)"
+consistent!(rvue_0, r"(.+)\s+\((.+?)\)");
+
+// rvue-0.1.0: r"([\d\.]+)\s*out\s*of\s*([\d\.]+)"
+consistent!(rvue_1, r"([\d\.]+)\s*out\s*of\s*([\d\.]+)");
+
+// rvue-0.1.0: r"^([\d\.]+)\s*(?:\(\))?$"
+consistent!(rvue_2, r"^([\d\.]+)\s*(?:\(\))?$");
+
+// rvue-0.1.0: r"([\d\.]+)\s*Points\s*Possible"
+consistent!(rvue_3, r"([\d\.]+)\s*Points\s*Possible");
+
+// rvue-0.1.0: r"([\d\.]+)\s*/\s*([\d\.]+)"
+consistent!(rvue_4, r"([\d\.]+)\s*/\s*([\d\.]+)");
+
+// rvsim-0.1.0: r"_?([_a-z0-9]+)\s*:\s*([_a-z0-9]+)\s*[,)]"
+consistent!(rvsim_0, r"_?([_a-z0-9]+)\s*:\s*([_a-z0-9]+)\s*[,)]");
+
+// nereon-0.1.4: "(.*[^\\\\])\\{\\}(.*)"
+consistent!(nereon_0, "(.*[^\\\\])\\{\\}(.*)");
+
+// next_episode-0.3.0: r"((?i)^(.+).s(\d+)e(\d+).*)$"
+consistent!(next_episode_0, r"((?i)^(.+).s(\d+)e(\d+).*)$");
+
+// migrant_lib-0.19.2: r"[^a-z0-9-]+"
+consistent!(migrant_lib_0, r"[^a-z0-9-]+");
+
+// migrant_lib-0.19.2: r"[0-9]{14}_[a-z0-9-]+"
+consistent!(migrant_lib_1, r"[0-9]{14}_[a-z0-9-]+");
+
+// migrant_lib-0.19.2: r"([0-9]{14}_)?[a-z0-9-]+"
+consistent!(migrant_lib_2, r"([0-9]{14}_)?[a-z0-9-]+");
+
+// minipre-0.2.0: "$_"
+consistent!(minipre_0, "$_");
+
+// minifier-0.0.13: r">\s+<"
+consistent!(minifier_0, r">\s+<");
+
+// minifier-0.0.13: r"\s{2,}|[\r\n]"
+consistent!(minifier_1, r"\s{2,}|[\r\n]");
+
+// minifier-0.0.13: r"<(style|script)[\w|\s].*?>"
+consistent!(minifier_2, r"<(style|script)[\w|\s].*?>");
+
+// minifier-0.0.13: "<!--(.|\n)*?-->"
+consistent!(minifier_3, "<!--(.|\n)*?-->");
+
+// minifier-0.0.13: r"<\w.*?>"
+consistent!(minifier_4, r"<\w.*?>");
+
+// minifier-0.0.13: r" \s+|\s +"
+consistent!(minifier_5, r" \s+|\s +");
+
+// minifier-0.0.13: r"\w\s+\w"
+consistent!(minifier_6, r"\w\s+\w");
+
+// minifier-0.0.13: r"'\s+>"
+consistent!(minifier_7, r"'\s+>");
+
+// minifier-0.0.13: r"\d\s+>"
+consistent!(minifier_8, r"\d\s+>");
+
+// ggp-rs-0.1.2: r"(?P<relation>\([^)]+\))|(?P<prop>[a-zA-Z0-9_]+)"
+consistent!(ggp_rs_0, r"(?P<relation>\([^)]+\))|(?P<prop>[a-zA-Z0-9_]+)");
+
+// ggp-rs-0.1.2: r"\((.*)\)."
+consistent!(ggp_rs_1, r"\((.*)\).");
+
+// poe-superfilter-0.2.0: "[A-Za-z0-9_]"
+consistent!(poe_superfilter_0, "[A-Za-z0-9_]");
+
+// poke-a-mango-0.5.0: r"(\d+)x(\d+)"
+consistent!(poke_a_mango_0, r"(\d+)x(\d+)");
+
+// pop3-rs-0.1.0: r"(?P<nmsg>\d+) (?P<size>\d+)"
+consistent!(pop3_rs_0, r"(?P<nmsg>\d+) (?P<size>\d+)");
+
+// pop3-rs-0.1.0: r"(?P<msgid>\d+) (?P<uidl>[\x21-\x7E]{1,70})"
+consistent!(pop3_rs_1, r"(?P<msgid>\d+) (?P<uidl>[\x21-\x7E]{1,70})");
+
+// pop3-rs-0.1.0: r"(<.*>)\r\n$"
+consistent!(pop3_rs_2, r"(<.*>)\r\n$");
+
+// pop3-rs-0.1.0: r"^(?P<status>\+OK|-ERR) (?P<statustext>.*)"
+consistent!(pop3_rs_3, r"^(?P<status>\+OK|-ERR) (?P<statustext>.*)");
+
+// pop3-1.0.6: r"^\.\r\n$"
+consistent!(pop3_0, r"^\.\r\n$");
+
+// pop3-1.0.6: r"\+OK(.*)"
+consistent!(pop3_1, r"\+OK(.*)");
+
+// pop3-1.0.6: r"-ERR(.*)"
+consistent!(pop3_2, r"-ERR(.*)");
+
+// pop3-1.0.6: r"\+OK (\d+) (\d+)\r\n"
+consistent!(pop3_3, r"\+OK (\d+) (\d+)\r\n");
+
+// pop3-1.0.6: r"(\d+) ([\x21-\x7e]+)\r\n"
+consistent!(pop3_4, r"(\d+) ([\x21-\x7e]+)\r\n");
+
+// pop3-1.0.6: r"\+OK (\d+) ([\x21-\x7e]+)\r\n"
+consistent!(pop3_5, r"\+OK (\d+) ([\x21-\x7e]+)\r\n");
+
+// pop3-1.0.6: r"(\d+) (\d+)\r\n"
+consistent!(pop3_6, r"(\d+) (\d+)\r\n");
+
+// pop3-1.0.6: r"\+OK (\d+) (\d+)\r\n"
+consistent!(pop3_7, r"\+OK (\d+) (\d+)\r\n");
+
+// polk-1.1.3: "github:(\\w+)/?(\\w+)?"
+consistent!(polk_0, "github:(\\w+)/?(\\w+)?");
+
+// geochunk-0.1.5: "^[0-9]{5}"
+consistent!(geochunk_0, "^[0-9]{5}");
+
+// generic-dns-update-1.1.4: r"((?:(?:0|1[\d]{0,2}|2(?:[0-4]\d?|5[0-5]?|[6-9])?|[3-9]\d?)\.){3}(?:0|1[\d]{0,2}|2(?:[0-4]\d?|5[0-5]?|[6-9])?|[3-9]\d?))"
+consistent!(generic_dns_update_0, r"((?:(?:0|1[\d]{0,2}|2(?:[0-4]\d?|5[0-5]?|[6-9])?|[3-9]\d?)\.){3}(?:0|1[\d]{0,2}|2(?:[0-4]\d?|5[0-5]?|[6-9])?|[3-9]\d?))");
+
+// generic-dns-update-1.1.4: r"((([0-9A-Fa-f]{1,4}:){7}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}:[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){5}:([0-9A-Fa-f]{1,4}:)?[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){4}:([0-9A-Fa-f]{1,4}:){0,2}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){3}:([0-9A-Fa-f]{1,4}:){0,3}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){2}:([0-9A-Fa-f]{1,4}:){0,4}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}((\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d)\.){3}(\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d))|(([0-9A-Fa-f]{1,4}:){0,5}:((\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d)\.){3}(\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d))|(::([0-9A-Fa-f]{1,4}:){0,5}((\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d)\.){3}(\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d))|([0-9A-Fa-f]{1,4}::([0-9A-Fa-f]{1,4}:){0,5}[0-9A-Fa-f]{1,4})|(::([0-9A-Fa-f]{1,4}:){0,6}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){1,7}:))"
+consistent!(generic_dns_update_1, r"((([0-9A-Fa-f]{1,4}:){7}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}:[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){5}:([0-9A-Fa-f]{1,4}:)?[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){4}:([0-9A-Fa-f]{1,4}:){0,2}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){3}:([0-9A-Fa-f]{1,4}:){0,3}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){2}:([0-9A-Fa-f]{1,4}:){0,4}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}((\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d)\.){3}(\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d))|(([0-9A-Fa-f]{1,4}:){0,5}:((\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d)\.){3}(\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d))|(::([0-9A-Fa-f]{1,4}:){0,5}((\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d)\.){3}(\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d))|([0-9A-Fa-f]{1,4}::([0-9A-Fa-f]{1,4}:){0,5}[0-9A-Fa-f]{1,4})|(::([0-9A-Fa-f]{1,4}:){0,6}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){1,7}:))");
+
+// generic-dns-update-1.1.4: r"<value><string>([0-9.]*)</string></value>"
+consistent!(
+    generic_dns_update_2,
+    r"<value><string>([0-9.]*)</string></value>"
+);
+
+// generic-dns-update-1.1.4: r"<int>([0-9]+)</int>"
+consistent!(generic_dns_update_3, r"<int>([0-9]+)</int>");
+
+// generic-dns-update-1.1.4: r"<int>([0-9]+)</int>"
+consistent!(generic_dns_update_4, r"<int>([0-9]+)</int>");
+
+// generic-dns-update-1.1.4: r"<boolean>([0-1]*)</boolean>"
+consistent!(generic_dns_update_5, r"<boolean>([0-1]*)</boolean>");
+
+// generate-nix-pkg-0.3.0: r"(\d*)\.(\d*)\.(\d*)(-(\S*))?"
+consistent!(generate_nix_pkg_0, r"(\d*)\.(\d*)\.(\d*)(-(\S*))?");
+
+// generate-nix-pkg-0.3.0: r"^(\S*) (\d*)\.(\d*)\.(\d*)(-(\S*))?"
+consistent!(generate_nix_pkg_1, r"^(\S*) (\d*)\.(\d*)\.(\d*)(-(\S*))?");
+
+// genact-0.6.0: r"arch/([a-z0-9_])+/"
+consistent!(genact_0, r"arch/([a-z0-9_])+/");
+
+// genact-0.6.0: r"arch/([a-z0-9_])+/"
+consistent!(genact_1, r"arch/([a-z0-9_])+/");
+
+// cron_rs-0.1.6: r"^\s*((\*(/\d+)?)|[0-9-,/]+)(\s+((\*(/\d+)?)|[0-9-,/]+)){4,5}\s*$"
+consistent!(
+    cron_rs_0,
+    r"^\s*((\*(/\d+)?)|[0-9-,/]+)(\s+((\*(/\d+)?)|[0-9-,/]+)){4,5}\s*$"
+);
+
+// systemfd-0.3.0: r"^([a-zA-Z]+)::(.+)$"
+consistent!(systemfd_0, r"^([a-zA-Z]+)::(.+)$");
+
+// symbolic-debuginfo-5.0.2: "__?hidden#\\d+_"
+consistent!(symbolic_debuginfo_0, "__?hidden#\\d+_");
+
+// symbolic-minidump-5.0.2: r"^Linux ([^ ]+) (.*) \w+(?: GNU/Linux)?$"
+consistent!(symbolic_minidump_0, r"^Linux ([^ ]+) (.*) \w+(?: GNU/Linux)?$");
+
+// graphql-idl-parser-0.1.1: "^(?u:\\#)(?u:[\t-\r - \u{85}-\u{85}\u{a0}-\u{a0}\u{1680}-\u{1680}\u{2000}-\u{200a}\u{2028}-\u{2029}\u{202f}-\u{202f}\u{205f}-\u{205f}\u{3000}-\u{3000}])*(?u:.)+"
+consistent!(graphql_idl_parser_0, "^(?u:\\#)(?u:[\t-\r - \u{85}-\u{85}\u{a0}-\u{a0}\u{1680}-\u{1680}\u{2000}-\u{200a}\u{2028}-\u{2029}\u{202f}-\u{202f}\u{205f}-\u{205f}\u{3000}-\u{3000}])*(?u:.)+");
+
+// graphql-idl-parser-0.1.1: "^(?u:=)(?u:[\t-\r - \u{85}-\u{85}\u{a0}-\u{a0}\u{1680}-\u{1680}\u{2000}-\u{200a}\u{2028}-\u{2029}\u{202f}-\u{202f}\u{205f}-\u{205f}\u{3000}-\u{3000}])*(?u:.)+"
+consistent!(graphql_idl_parser_1, "^(?u:=)(?u:[\t-\r - \u{85}-\u{85}\u{a0}-\u{a0}\u{1680}-\u{1680}\u{2000}-\u{200a}\u{2028}-\u{2029}\u{202f}-\u{202f}\u{205f}-\u{205f}\u{3000}-\u{3000}])*(?u:.)+");
+
+// graphql-idl-parser-0.1.1: "^(?u:[A-Z_-_a-z])(?u:[0-9A-Z_-_a-z])*"
+consistent!(graphql_idl_parser_2, "^(?u:[A-Z_-_a-z])(?u:[0-9A-Z_-_a-z])*");
+
+// graphql-idl-parser-0.1.1: "^(?u:!)"
+consistent!(graphql_idl_parser_3, "^(?u:!)");
+
+// graphql-idl-parser-0.1.1: "^(?u:\\()"
+consistent!(graphql_idl_parser_4, "^(?u:\\()");
+
+// graphql-idl-parser-0.1.1: "^(?u:\\))"
+consistent!(graphql_idl_parser_5, "^(?u:\\))");
+
+// graphql-idl-parser-0.1.1: "^(?u:,)"
+consistent!(graphql_idl_parser_6, "^(?u:,)");
+
+// graphql-idl-parser-0.1.1: "^(?u::)"
+consistent!(graphql_idl_parser_7, "^(?u::)");
+
+// graphql-idl-parser-0.1.1: "^(?u:@)"
+consistent!(graphql_idl_parser_8, "^(?u:@)");
+
+// graphql-idl-parser-0.1.1: "^(?u:\\[)"
+consistent!(graphql_idl_parser_9, "^(?u:\\[)");
+
+// graphql-idl-parser-0.1.1: "^(?u:\\])"
+consistent!(graphql_idl_parser_10, "^(?u:\\])");
+
+// graphql-idl-parser-0.1.1: "^(?u:enum)"
+consistent!(graphql_idl_parser_11, "^(?u:enum)");
+
+// graphql-idl-parser-0.1.1: "^(?u:implements)"
+consistent!(graphql_idl_parser_12, "^(?u:implements)");
+
+// graphql-idl-parser-0.1.1: "^(?u:input)"
+consistent!(graphql_idl_parser_13, "^(?u:input)");
+
+// graphql-idl-parser-0.1.1: "^(?u:interface)"
+consistent!(graphql_idl_parser_14, "^(?u:interface)");
+
+// graphql-idl-parser-0.1.1: "^(?u:scalar)"
+consistent!(graphql_idl_parser_15, "^(?u:scalar)");
+
+// graphql-idl-parser-0.1.1: "^(?u:type)"
+consistent!(graphql_idl_parser_16, "^(?u:type)");
+
+// graphql-idl-parser-0.1.1: "^(?u:union)"
+consistent!(graphql_idl_parser_17, "^(?u:union)");
+
+// graphql-idl-parser-0.1.1: "^(?u:\\{)"
+consistent!(graphql_idl_parser_18, "^(?u:\\{)");
+
+// graphql-idl-parser-0.1.1: "^(?u:\\})"
+consistent!(graphql_idl_parser_19, "^(?u:\\})");
+
+// grimoire-0.1.0: r"(?s)/\*(?P<config>.*?)\*/"
+consistent!(grimoire_0, r"(?s)/\*(?P<config>.*?)\*/");
+
+// phonenumber-0.2.0+8.9.0: r"[\d]+(?:[~\x{2053}\x{223C}\x{FF5E}][\d]+)?"
+consistent!(phonenumber_0, r"[\d]+(?:[~\x{2053}\x{223C}\x{FF5E}][\d]+)?");
+
+// phonenumber-0.2.0+8.9.0: r"[, \[\]]"
+consistent!(phonenumber_1, r"[, \[\]]");
+
+// phonenumber-0.2.0+8.9.0: r"[\\/] *x"
+consistent!(phonenumber_2, r"[\\/] *x");
+
+// phonenumber-0.2.0+8.9.0: r"[[\P{N}&&\P{L}]&&[^#]]+$"
+consistent!(phonenumber_3, r"[[\P{N}&&\P{L}]&&[^#]]+$");
+
+// phonenumber-0.2.0+8.9.0: r"(?:.*?[A-Za-z]){3}.*"
+consistent!(phonenumber_4, r"(?:.*?[A-Za-z]){3}.*");
+
+// phonenumber-0.2.0+8.9.0: r"(\D+)"
+consistent!(phonenumber_5, r"(\D+)");
+
+// phonenumber-0.2.0+8.9.0: r"(\$\d)"
+consistent!(phonenumber_6, r"(\$\d)");
+
+// phonenumber-0.2.0+8.9.0: r"\(?\$1\)?"
+consistent!(phonenumber_7, r"\(?\$1\)?");
+
+// phone_number-0.1.0: r"\D"
+consistent!(phone_number_0, r"\D");
+
+// phone_number-0.1.0: r"^0+"
+consistent!(phone_number_1, r"^0+");
+
+// phone_number-0.1.0: r"^89"
+consistent!(phone_number_2, r"^89");
+
+// phone_number-0.1.0: r"^8+"
+consistent!(phone_number_3, r"^8+");
+
+// phile-0.1.4: r"^ *(\^_*\^) *$"
+consistent!(phile_0, r"^ *(\^_*\^) *$");
+
+// phile-0.1.4: r"^[_\p{XID_Start}]$"
+consistent!(phile_1, r"^[_\p{XID_Start}]$");
+
+// phile-0.1.4: r"^\p{XID_Continue}$"
+consistent!(phile_2, r"^\p{XID_Continue}$");
+
+// uritemplate-0.1.2: "%25(?P<hex>[0-9a-fA-F][0-9a-fA-F])"
+consistent!(uritemplate_0, "%25(?P<hex>[0-9a-fA-F][0-9a-fA-F])");
+
+// urdf-rs-0.4.2: "^package://(\\w+)/"
+consistent!(urdf_rs_0, "^package://(\\w+)/");
+
+// url-match-0.1.7: r"(?P<key>[?&.])"
+consistent!(url_match_0, r"(?P<key>[?&.])");
+
+// url-match-0.1.7: r":(?P<key>[a-zA-Z0-9_-]+)"
+consistent!(url_match_1, r":(?P<key>[a-zA-Z0-9_-]+)");
+
+// tsm-sys-0.1.0: r"hello world"
+consistent!(tsm_sys_0, r"hello world");
+
+// deb-version-0.1.0: "^(?:(?:(?:\\d+:).+)|(?:[^:]+))$"
+consistent!(deb_version_0, "^(?:(?:(?:\\d+:).+)|(?:[^:]+))$");
+
+// debcargo-2.1.0: r"^(?i)(a|an|the)\s+"
+consistent!(debcargo_0, r"^(?i)(a|an|the)\s+");
+
+// debcargo-2.1.0: r"^(?i)(rust\s+)?(implementation|library|tool|crate)\s+(of|to|for)\s+"
+consistent!(
+    debcargo_1,
+    r"^(?i)(rust\s+)?(implementation|library|tool|crate)\s+(of|to|for)\s+"
+);
+
+// feaders-0.2.0: r"^.*\.h$"
+consistent!(feaders_0, r"^.*\.h$");
+
+// feaders-0.2.0: r"^.*\.c$"
+consistent!(feaders_1, r"^.*\.c$");
+
+// feaders-0.2.0: r"^.*\.hpp$"
+consistent!(feaders_2, r"^.*\.hpp$");
+
+// feaders-0.2.0: r"^.*\.cc$"
+consistent!(feaders_3, r"^.*\.cc$");
+
+// feaders-0.2.0: r"^.*\.cpp$"
+consistent!(feaders_4, r"^.*\.cpp$");
+
+// hyperscan-0.1.6: r"CPtr\(\w+\)"
+consistent!(hyperscan_0, r"CPtr\(\w+\)");
+
+// hyperscan-0.1.6: r"^Version:\s(\d\.\d\.\d)\sFeatures:\s+(\w+)?\sMode:\s(\w+)$"
+consistent!(
+    hyperscan_1,
+    r"^Version:\s(\d\.\d\.\d)\sFeatures:\s+(\w+)?\sMode:\s(\w+)$"
+);
+
+// hyperscan-0.1.6: r"RawDatabase<Block>\{db: \w+\}"
+consistent!(hyperscan_2, r"RawDatabase<Block>\{db: \w+\}");
+
+// hyperscan-0.1.6: r"RawSerializedDatabase\{p: \w+, len: \d+\}"
+consistent!(hyperscan_3, r"RawSerializedDatabase\{p: \w+, len: \d+\}");
+
+// ucd-parse-0.1.1: r"[0-9A-F]+"
+consistent!(ucd_parse_0, r"[0-9A-F]+");
+
+// afsort-0.2.0: r".*"
+consistent!(afsort_0, r".*");
+
+// afsort-0.2.0: r".*"
+consistent!(afsort_1, r".*");
+
+// afsort-0.2.0: r".*"
+consistent!(afsort_2, r".*");
+
+// afsort-0.2.0: r".*"
+consistent!(afsort_3, r".*");
+
+// afsort-0.2.0: r".*"
+consistent!(afsort_4, r".*");
+
+// afsort-0.2.0: r".*"
+consistent!(afsort_5, r".*");
+
+// afsort-0.2.0: r"^[a-z]+$"
+consistent!(afsort_6, r"^[a-z]+$");
+
+// afsort-0.2.0: r"^[a-z]+$"
+consistent!(afsort_7, r"^[a-z]+$");
+
+// tin-summer-1.21.4: r"(\.git|\.pijul|_darcs|\.hg)$"
+consistent!(tin_summer_0, r"(\.git|\.pijul|_darcs|\.hg)$");
+
+// tin-drummer-1.0.1: r".*?\.(a|la|lo|o|ll|keter|bc|dyn_o|d|rlib|crate|min\.js|hi|dyn_hi|S|jsexe|webapp|js\.externs|ibc|toc|aux|fdb_latexmk|fls|egg-info|whl|js_a|js_hi|jld|ji|js_o|so.*|dump-.*|vmb|crx|orig|elmo|elmi|pyc|mod|p_hi|p_o|prof|tix)$"
+consistent!(tin_drummer_0, r".*?\.(a|la|lo|o|ll|keter|bc|dyn_o|d|rlib|crate|min\.js|hi|dyn_hi|S|jsexe|webapp|js\.externs|ibc|toc|aux|fdb_latexmk|fls|egg-info|whl|js_a|js_hi|jld|ji|js_o|so.*|dump-.*|vmb|crx|orig|elmo|elmi|pyc|mod|p_hi|p_o|prof|tix)$");
+
+// tin-drummer-1.0.1: r".*?\.(stats|conf|h|out|cache.*|dat|pc|info|\.js)$"
+consistent!(
+    tin_drummer_1,
+    r".*?\.(stats|conf|h|out|cache.*|dat|pc|info|\.js)$"
+);
+
+// tin-drummer-1.0.1: r".*?\.(exe|a|la|o|ll|keter|bc|dyn_o|d|rlib|crate|min\.js|hi|dyn_hi|jsexe|webapp|js\.externs|ibc|toc|aux|fdb_latexmk|fls|egg-info|whl|js_a|js_hi|jld|ji|js_o|so.*|dump-.*|vmb|crx|orig|elmo|elmi|pyc|mod|p_hi|p_o|prof|tix)$"
+consistent!(tin_drummer_2, r".*?\.(exe|a|la|o|ll|keter|bc|dyn_o|d|rlib|crate|min\.js|hi|dyn_hi|jsexe|webapp|js\.externs|ibc|toc|aux|fdb_latexmk|fls|egg-info|whl|js_a|js_hi|jld|ji|js_o|so.*|dump-.*|vmb|crx|orig|elmo|elmi|pyc|mod|p_hi|p_o|prof|tix)$");
+
+// tin-drummer-1.0.1: r".*?\.(stats|conf|h|out|cache.*|\.js)$"
+consistent!(tin_drummer_3, r".*?\.(stats|conf|h|out|cache.*|\.js)$");
+
+// tin-drummer-1.0.1: r"(\.git|\.pijul|_darcs|\.hg)$"
+consistent!(tin_drummer_4, r"(\.git|\.pijul|_darcs|\.hg)$");
+
+// tin-drummer-1.0.1: r".*?\.(dyn_o|out|d|hi|dyn_hi|dump-.*|p_hi|p_o|prof|tix)$"
+consistent!(
+    tin_drummer_5,
+    r".*?\.(dyn_o|out|d|hi|dyn_hi|dump-.*|p_hi|p_o|prof|tix)$"
+);
+
+// tin-drummer-1.0.1: r".*?\.(ibc)$"
+consistent!(tin_drummer_6, r".*?\.(ibc)$");
+
+// tin-drummer-1.0.1: r"\.stack-work|dist-newstyle"
+consistent!(tin_drummer_7, r"\.stack-work|dist-newstyle");
+
+// timmy-0.3.0: r"_NET_WM_PID\(CARDINAL\) = (\d+)"
+consistent!(timmy_0, r"_NET_WM_PID\(CARDINAL\) = (\d+)");
+
+// timmy-0.3.0: r"today|yesterday|now"
+consistent!(timmy_1, r"today|yesterday|now");
+
+// timmy-0.3.0: r"(?P<day>\d{1,2})/(?P<month>\d{1,2})(/(?P<year>\d{4}|\d{2}))?"
+consistent!(
+    timmy_2,
+    r"(?P<day>\d{1,2})/(?P<month>\d{1,2})(/(?P<year>\d{4}|\d{2}))?"
+);
+
+// timmy-0.3.0: r"(?P<n>\d+) (days?|ds?)(?P<ago>( ago)?)"
+consistent!(timmy_3, r"(?P<n>\d+) (days?|ds?)(?P<ago>( ago)?)");
+
+// timmy-0.3.0: r"(?P<hr>\d{2}):(?P<mins>\d{2})"
+consistent!(timmy_4, r"(?P<hr>\d{2}):(?P<mins>\d{2})");
+
+// tinfo-0.5.0: r"^(\d+): \d+ windows \(.*\) \[\d+x\d+\]( \(attached\))?"
+consistent!(
+    tinfo_0,
+    r"^(\d+): \d+ windows \(.*\) \[\d+x\d+\]( \(attached\))?"
+);
+
+// tinfo-0.5.0: r"^(\d+):(\d+): (.*) \((\d+) panes\) \[(\d+)x(\d+)\]"
+consistent!(tinfo_1, r"^(\d+):(\d+): (.*) \((\d+) panes\) \[(\d+)x(\d+)\]");
+
+// timespan-0.0.4: r"(?:\\\{start\\\}|\\\{end\\\})"
+consistent!(timespan_0, r"(?:\\\{start\\\}|\\\{end\\\})");
+
+// timespan-0.0.4: r"(.*)\s+-\s+(.*)"
+consistent!(timespan_1, r"(.*)\s+-\s+(.*)");
+
+// timespan-0.0.4: r"(.*)\s+(\w+)$"
+consistent!(timespan_2, r"(.*)\s+(\w+)$");
+
+// timespan-0.0.4: r"(.*)\s+(\w+)$"
+consistent!(timespan_3, r"(.*)\s+(\w+)$");
+
+// timespan-0.0.4: r"(.*)\s+-\s+(.*)"
+consistent!(timespan_4, r"(.*)\s+-\s+(.*)");
+
+// titlecase-0.10.0: r"[[:lower:]]"
+consistent!(titlecase_0, r"[[:lower:]]");
+
+// tight-0.1.3: r"^\d+ (day|week|month|year)s?$"
+consistent!(tight_0, r"^\d+ (day|week|month|year)s?$");
+
+// tight-0.1.3: r"^\d+ (day|week|month|year)s?$"
+consistent!(tight_1, r"^\d+ (day|week|month|year)s?$");
+
+// yaml-0.2.1: r"^[-+]?(0|[1-9][0-9_]*)$"
+consistent!(yaml_0, r"^[-+]?(0|[1-9][0-9_]*)$");
+
+// yaml-0.2.1: r"^([-+]?)0o?([0-7_]+)$"
+consistent!(yaml_1, r"^([-+]?)0o?([0-7_]+)$");
+
+// yaml-0.2.1: r"^([-+]?)0x([0-9a-fA-F_]+)$"
+consistent!(yaml_2, r"^([-+]?)0x([0-9a-fA-F_]+)$");
+
+// yaml-0.2.1: r"^([-+]?)0b([0-1_]+)$"
+consistent!(yaml_3, r"^([-+]?)0b([0-1_]+)$");
+
+// yaml-0.2.1: r"^([-+]?)(\.[0-9]+|[0-9]+(\.[0-9]*)?([eE][-+]?[0-9]+)?)$"
+consistent!(
+    yaml_4,
+    r"^([-+]?)(\.[0-9]+|[0-9]+(\.[0-9]*)?([eE][-+]?[0-9]+)?)$"
+);
+
+// yaml-0.2.1: r"^[+]?(\.inf|\.Inf|\.INF)$"
+consistent!(yaml_5, r"^[+]?(\.inf|\.Inf|\.INF)$");
+
+// yaml-0.2.1: r"^-(\.inf|\.Inf|\.INF)$"
+consistent!(yaml_6, r"^-(\.inf|\.Inf|\.INF)$");
+
+// yaml-0.2.1: r"^(\.nan|\.NaN|\.NAN)$"
+consistent!(yaml_7, r"^(\.nan|\.NaN|\.NAN)$");
+
+// yaml-0.2.1: r"^(null|Null|NULL|~)$"
+consistent!(yaml_8, r"^(null|Null|NULL|~)$");
+
+// yaml-0.2.1: r"^(true|True|TRUE|yes|Yes|YES)$"
+consistent!(yaml_9, r"^(true|True|TRUE|yes|Yes|YES)$");
+
+// yaml-0.2.1: r"^(false|False|FALSE|no|No|NO)$"
+consistent!(yaml_10, r"^(false|False|FALSE|no|No|NO)$");
+
+// kefia-0.1.0: r"(?m)^(\S+)/(\S+) (\S+)(?: \((.*)\))?$"
+consistent!(kefia_0, r"(?m)^(\S+)/(\S+) (\S+)(?: \((.*)\))?$");
+
+// risp-0.7.0: "^(\\s+|;.*?(\n|$))+"
+consistent!(risp_0, "^(\\s+|;.*?(\n|$))+");
+
+// risp-0.7.0: "^\".*?\""
+consistent!(risp_1, "^\".*?\"");
+
+// risp-0.7.0: r"^[^\s\{\}()\[\]]+"
+consistent!(risp_2, r"^[^\s\{\}()\[\]]+");
+
+// risp-0.7.0: r"^-?\d+"
+consistent!(risp_3, r"^-?\d+");
+
+// ripgrep-0.8.1: "^([0-9]+)([KMG])?$"
+consistent!(ripgrep_0, "^([0-9]+)([KMG])?$");
+
+// riquid-0.0.1: r"^\w+"
+consistent!(riquid_0, r"^\w+");
+
+// riquid-0.0.1: r"^\d+"
+consistent!(riquid_1, r"^\d+");
+
+// recursive_disassembler-2.1.2: r"\A(0x)?([a-fA-F0-9]+)\z"
+consistent!(recursive_disassembler_0, r"\A(0x)?([a-fA-F0-9]+)\z");
+
+// remake-0.1.0: r"^[a-zA-Z_][a-zA-Z0-9_]*"
+consistent!(remake_0, r"^[a-zA-Z_][a-zA-Z0-9_]*");
+
+// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)"
+consistent!(regex_decode_0, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)");
+
+// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)"
+consistent!(regex_decode_1, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)");
+
+// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)"
+consistent!(regex_decode_2, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)");
+
+// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)"
+consistent!(regex_decode_3, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)");
+
+// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)"
+consistent!(regex_decode_4, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)");
+
+// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)"
+consistent!(regex_decode_5, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)");
+
+// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{2})\)"
+consistent!(regex_decode_6, r"'(?P<title>[^']+)'\s+\((?P<year>\d{2})\)");
+
+// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)"
+consistent!(regex_decode_7, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)");
+
+// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)"
+consistent!(regex_decode_8, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)");
+
+// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})?\)"
+consistent!(regex_decode_9, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})?\)");
+
+// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})?\)"
+consistent!(regex_decode_10, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})?\)");
+
+// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})?\)"
+consistent!(regex_decode_11, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})?\)");
+
+// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})?\)"
+consistent!(regex_decode_12, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})?\)");
+
+// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})?\)"
+consistent!(regex_decode_13, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})?\)");
+
+// regex-cache-0.2.0: "[0-9]{3}-[0-9]{3}-[0-9]{4}"
+consistent!(regex_cache_0, "[0-9]{3}-[0-9]{3}-[0-9]{4}");
+
+// regex-cache-0.2.0: r"^\d+$"
+consistent!(regex_cache_1, r"^\d+$");
+
+// regex-cache-0.2.0: r"^[a-z]+$"
+consistent!(regex_cache_2, r"^[a-z]+$");
+
+// regex-cache-0.2.0: r"^\d+$"
+consistent!(regex_cache_3, r"^\d+$");
+
+// regex-cache-0.2.0: r"^\d+$"
+consistent!(regex_cache_4, r"^\d+$");
+
+// regex_dfa-0.5.0: r"\d{4}-\d{2}-\d{2}"
+consistent!(regex_dfa_0, r"\d{4}-\d{2}-\d{2}");
+
+// reaper-2.0.0: r"^[0-9\p{L} _\\.]{3,16}$"
+consistent!(reaper_0, r"^[0-9\p{L} _\\.]{3,16}$");
+
+// retdec-0.1.0: r"^attachment; filename=(.+)$"
+consistent!(retdec_0, r"^attachment; filename=(.+)$");
+
+// renvsubst-0.1.2: r"(\\)(?P<head>\$[0-9A-Za-z_{])"
+consistent!(renvsubst_0, r"(\\)(?P<head>\$[0-9A-Za-z_{])");
+
+// renvsubst-0.1.2: r"\$([[:word:]]+)"
+consistent!(renvsubst_1, r"\$([[:word:]]+)");
+
+// renvsubst-0.1.2: r"\$\{([[:word:]]+)\}"
+consistent!(renvsubst_2, r"\$\{([[:word:]]+)\}");
+
+// rexpect-0.3.0: r"'[a-z]+'"
+consistent!(rexpect_0, r"'[a-z]+'");
+
+// rexpect-0.3.0: r"^\d{4}-\d{2}-\d{2}$"
+consistent!(rexpect_1, r"^\d{4}-\d{2}-\d{2}$");
+
+// rexpect-0.3.0: r"-\d{2}-"
+consistent!(rexpect_2, r"-\d{2}-");
+
+// luther-0.1.0: "^a(b|c)c*$"
+consistent!(luther_0, "^a(b|c)c*$");
+
+// little_boxes-1.6.0: r"(\x9B|\x1B\[)[0-?]*[ -/]*[@-~]"
+consistent!(little_boxes_0, r"(\x9B|\x1B\[)[0-?]*[ -/]*[@-~]");
+
+// libimagentrytag-0.8.0: "^[a-zA-Z]([a-zA-Z0-9_-]*)$"
+consistent!(libimagentrytag_0, "^[a-zA-Z]([a-zA-Z0-9_-]*)$");
+
+// libimaginteraction-0.8.0: r"^[Yy](\n?)$"
+consistent!(libimaginteraction_0, r"^[Yy](\n?)$");
+
+// libimaginteraction-0.8.0: r"^[Nn](\n?)$"
+consistent!(libimaginteraction_1, r"^[Nn](\n?)$");
+
+// libimagutil-0.8.0: "^(?P<KEY>([^=]*))=(.*)$"
+consistent!(libimagutil_0, "^(?P<KEY>([^=]*))=(.*)$");
+
+// libimagutil-0.8.0: "(.*)=(\"(?P<QVALUE>([^\"]*))\"|(?P<VALUE>(.*)))$"
+consistent!(libimagutil_1, "(.*)=(\"(?P<QVALUE>([^\"]*))\"|(?P<VALUE>(.*)))$");
+
+// linux_ip-0.1.0: r"\s+"
+consistent!(linux_ip_0, r"\s+");
+
+// linux_ip-0.1.0: r"\s*[\n\r]+\s*"
+consistent!(linux_ip_1, r"\s*[\n\r]+\s*");
+
+// linux_ip-0.1.0: r"^([0-9a-fA-F\.:/]+)\s+dev\s+([a-z0-9\.]+)\s*(.*)$"
+consistent!(linux_ip_2, r"^([0-9a-fA-F\.:/]+)\s+dev\s+([a-z0-9\.]+)\s*(.*)$");
+
+// linux_ip-0.1.0: r"^([0-9a-fA-F\.:/]+|default)\s+via\s+([a-z0-9\.:]+)\s+dev\s+([a-z0-9\.]+)\s*(.*)$"
+consistent!(linux_ip_3, r"^([0-9a-fA-F\.:/]+|default)\s+via\s+([a-z0-9\.:]+)\s+dev\s+([a-z0-9\.]+)\s*(.*)$");
+
+// linux_ip-0.1.0: r"^(blackhole)\s+([0-9a-fA-F\.:/]+)$"
+consistent!(linux_ip_4, r"^(blackhole)\s+([0-9a-fA-F\.:/]+)$");
+
+// linux_ip-0.1.0: r"^(unreachable)\s+([0-9a-fA-F\.:/]+)\s+dev\s+([a-z0-9\.]+)\s+(.*)$"
+consistent!(
+    linux_ip_5,
+    r"^(unreachable)\s+([0-9a-fA-F\.:/]+)\s+dev\s+([a-z0-9\.]+)\s+(.*)$"
+);
+
+// linux_ip-0.1.0: r"\s*[\n\r]+\s*"
+consistent!(linux_ip_6, r"\s*[\n\r]+\s*");
+
+// linux_ip-0.1.0: r"^\d+:\s+([a-zA-Z0-9\.-]+)(@\S+)*:\s+(.*)$"
+consistent!(linux_ip_7, r"^\d+:\s+([a-zA-Z0-9\.-]+)(@\S+)*:\s+(.*)$");
+
+// linux_ip-0.1.0: r"\s*link/ether\s+([a-f0-9:]+)\s+.*"
+consistent!(linux_ip_8, r"\s*link/ether\s+([a-f0-9:]+)\s+.*");
+
+// linux_ip-0.1.0: r"\s*inet[6]*\s+([0-9a-f:\./]+)\s+.*"
+consistent!(linux_ip_9, r"\s*inet[6]*\s+([0-9a-f:\./]+)\s+.*");
+
+// linky-0.1.4: r"[^\w -]"
+consistent!(linky_0, r"[^\w -]");
+
+// linky-0.1.4: r"^(.*):(\d+): [^ ]* ([^ ]*)$"
+consistent!(linky_1, r"^(.*):(\d+): [^ ]* ([^ ]*)$");
+
+// limonite-0.2.1: r"^(\d{4}-\d{2}-\d{2})-(\d{3})-(.+)$"
+consistent!(limonite_0, r"^(\d{4}-\d{2}-\d{2})-(\d{3})-(.+)$");
+
+// process-queue-0.1.1: r"^[a-zA-Z]+$"
+consistent!(process_queue_0, r"^[a-zA-Z]+$");
+
+// pronghorn-0.1.2: r"^\{([a-zA-Z_]+)\}$"
+consistent!(pronghorn_0, r"^\{([a-zA-Z_]+)\}$");
+
+// protocol-ftp-client-0.1.1: "(?m:^(\\d{3}) (.+)\r$)"
+consistent!(protocol_ftp_client_0, "(?m:^(\\d{3}) (.+)\r$)");
+
+// protocol-ftp-client-0.1.1: "\"(.+)\""
+consistent!(protocol_ftp_client_1, "\"(.+)\"");
+
+// protocol-ftp-client-0.1.1: "(\\w+) [Tt]ype: (\\w+)"
+consistent!(protocol_ftp_client_2, "(\\w+) [Tt]ype: (\\w+)");
+
+// protocol-ftp-client-0.1.1: "(?m:^(\\d{3})-.+\r$)"
+consistent!(protocol_ftp_client_3, "(?m:^(\\d{3})-.+\r$)");
+
+// protocol-ftp-client-0.1.1: "Entering Passive Mode \\((\\d+),(\\d+),(\\d+),(\\d+),(\\d+),(\\d+)\\)"
+consistent!(
+    protocol_ftp_client_4,
+    "Entering Passive Mode \\((\\d+),(\\d+),(\\d+),(\\d+),(\\d+),(\\d+)\\)"
+);
+
+// protocol-ftp-client-0.1.1: "(?m:^(.+)\r$)"
+consistent!(protocol_ftp_client_5, "(?m:^(.+)\r$)");
+
+// protocol-ftp-client-0.1.1: "^([d-])(?:[rwx-]{3}){3} +\\d+ +\\w+ +\\w+ +(\\d+) +(.+) +(.+)$"
+consistent!(
+    protocol_ftp_client_6,
+    "^([d-])(?:[rwx-]{3}){3} +\\d+ +\\w+ +\\w+ +(\\d+) +(.+) +(.+)$"
+);
+
+// article-date-extractor-0.1.1: r"([\./\-_]{0,1}(19|20)\d{2})[\./\-_]{0,1}(([0-3]{0,1}[0-9][\./\-_])|(\w{3,5}[\./\-_]))([0-3]{0,1}[0-9][\./\-]{0,1})"
+consistent!(article_date_extractor_0, r"([\./\-_]{0,1}(19|20)\d{2})[\./\-_]{0,1}(([0-3]{0,1}[0-9][\./\-_])|(\w{3,5}[\./\-_]))([0-3]{0,1}[0-9][\./\-]{0,1})");
+
+// article-date-extractor-0.1.1: r"(?i)publishdate|pubdate|timestamp|article_date|articledate|date"
+consistent!(
+    article_date_extractor_1,
+    r"(?i)publishdate|pubdate|timestamp|article_date|articledate|date"
+);
+
+// arthas_plugin-0.1.1: r"type\((.*)\)"
+consistent!(arthas_plugin_0, r"type\((.*)\)");
+
+// arthas_plugin-0.1.1: r"Vec<(.*)>"
+consistent!(arthas_plugin_1, r"Vec<(.*)>");
+
+// arthas_plugin-0.1.1: r"Option<(.*)>"
+consistent!(arthas_plugin_2, r"Option<(.*)>");
+
+// arthas_plugin-0.1.1: r"HashMap<[a-z0-9A-Z]+, *(.*)>"
+consistent!(arthas_plugin_3, r"HashMap<[a-z0-9A-Z]+, *(.*)>");
+
+// arthas_derive-0.1.0: "Vec *< *(.*) *>"
+consistent!(arthas_derive_0, "Vec *< *(.*) *>");
+
+// arthas_derive-0.1.0: r"Option *< *(.*) *>"
+consistent!(arthas_derive_1, r"Option *< *(.*) *>");
+
+// arthas_derive-0.1.0: r"HashMap *< *[a-z0-9A-Z]+ *, *(.*) *>"
+consistent!(arthas_derive_2, r"HashMap *< *[a-z0-9A-Z]+ *, *(.*) *>");
+
+// arpabet-0.2.0: r"^([\w\-\(\)\.']+)\s+([^\s].*)\s*$"
+consistent!(arpabet_0, r"^([\w\-\(\)\.']+)\s+([^\s].*)\s*$");
+
+// arpabet-0.2.0: r"^;;;\s+"
+consistent!(arpabet_1, r"^;;;\s+");
+
+// glossy_codegen-0.2.0: r"/\*.*?\*/|//.*"
+consistent!(glossy_codegen_0, r"/\*.*?\*/|//.*");
+
+// glossy_codegen-0.2.0: "^\\s*#\\s*include\\s+<([:print:]+)>\\s*$"
+consistent!(glossy_codegen_1, "^\\s*#\\s*include\\s+<([:print:]+)>\\s*$");
+
+// glossy_codegen-0.2.0: "^\\s*#\\s*include\\s+\"([:print:]+)\"\\s*$"
+consistent!(glossy_codegen_2, "^\\s*#\\s*include\\s+\"([:print:]+)\"\\s*$");
+
+// glossy_codegen-0.2.0: r"^\s*#\s*version\s+(\d+)"
+consistent!(glossy_codegen_3, r"^\s*#\s*version\s+(\d+)");
+
+// glossy_codegen-0.2.0: r"^\s*$"
+consistent!(glossy_codegen_4, r"^\s*$");
+
+// gluster-1.0.1: r"(?P<addr>via \S+)"
+consistent!(gluster_0, r"(?P<addr>via \S+)");
+
+// gluster-1.0.1: r"(?P<src>src \S+)"
+consistent!(gluster_1, r"(?P<src>src \S+)");
+
+// gl_helpers-0.1.7: r"(.*)\[\d+\]"
+consistent!(gl_helpers_0, r"(.*)\[\d+\]");
+
+// gl_helpers-0.1.7: r"(\d+).(\d+)"
+consistent!(gl_helpers_1, r"(\d+).(\d+)");
+
+// glr-parser-0.0.1: r"(?P<c>[\\\.\+\*\?\(\)\|\[\]\{\}\^\$])"
+consistent!(glr_parser_0, r"(?P<c>[\\\.\+\*\?\(\)\|\[\]\{\}\^\$])");
+
+// glr-parser-0.0.1: r"^\w+$"
+consistent!(glr_parser_1, r"^\w+$");
+
+// glr-parser-0.0.1: "'[^']+'"
+consistent!(glr_parser_2, "'[^']+'");
+
+// hoodlum-0.5.0: r"(?m)//.*"
+consistent!(hoodlum_0, r"(?m)//.*");
+
+// form-checker-0.2.2: r"^1\d{10}$"
+consistent!(form_checker_0, r"^1\d{10}$");
+
+// form-checker-0.2.2: r"(?i)^[\w.%+-]+@(?:[A-Z0-9-]+\.)+[A-Z]{2,4}$"
+consistent!(form_checker_1, r"(?i)^[\w.%+-]+@(?:[A-Z0-9-]+\.)+[A-Z]{2,4}$");
+
+// wikibase-0.2.0: r"(?P<user_agent>[a-zA-Z0-9-_]+/[0-9\.]+)"
+consistent!(wikibase_0, r"(?P<user_agent>[a-zA-Z0-9-_]+/[0-9\.]+)");
+
+// wifiscanner-0.3.6: r"Cell [0-9]{2,} - Address:"
+consistent!(wifiscanner_0, r"Cell [0-9]{2,} - Address:");
+
+// wifiscanner-0.3.6: r"([0-9a-zA-Z]{1}[0-9a-zA-Z]{1}[:]{1}){5}[0-9a-zA-Z]{1}[0-9a-zA-Z]{1}"
+consistent!(
+    wifiscanner_1,
+    r"([0-9a-zA-Z]{1}[0-9a-zA-Z]{1}[:]{1}){5}[0-9a-zA-Z]{1}[0-9a-zA-Z]{1}"
+);
+
+// wifiscanner-0.3.6: r"Signal level=(\d+)/100"
+consistent!(wifiscanner_2, r"Signal level=(\d+)/100");
+
+// bbcode-1.0.2: r"(?s)\[b\](.*?)\[/b\]"
+consistent!(bbcode_0, r"(?s)\[b\](.*?)\[/b\]");
+
+// bbcode-1.0.2: r"(?s)\[i\](.*?)\[/i\]"
+consistent!(bbcode_1, r"(?s)\[i\](.*?)\[/i\]");
+
+// bbcode-1.0.2: r"(?s)\[u\](.*?)\[/u\]"
+consistent!(bbcode_2, r"(?s)\[u\](.*?)\[/u\]");
+
+// bbcode-1.0.2: r"(?s)\[s\](.*?)\[/s\]"
+consistent!(bbcode_3, r"(?s)\[s\](.*?)\[/s\]");
+
+// bbcode-1.0.2: r"(?s)\[size=(\d+)](.*?)\[/size\]"
+consistent!(bbcode_4, r"(?s)\[size=(\d+)](.*?)\[/size\]");
+
+// bbcode-1.0.2: r"(?s)\[color=(.+)](.*?)\[/color\]"
+consistent!(bbcode_5, r"(?s)\[color=(.+)](.*?)\[/color\]");
+
+// bbcode-1.0.2: r"(?s)\[center\](.*?)\[/center\]"
+consistent!(bbcode_6, r"(?s)\[center\](.*?)\[/center\]");
+
+// bbcode-1.0.2: r"(?s)\[left\](.*?)\[/left\]"
+consistent!(bbcode_7, r"(?s)\[left\](.*?)\[/left\]");
+
+// bbcode-1.0.2: r"(?s)\[right\](.*?)\[/right\]"
+consistent!(bbcode_8, r"(?s)\[right\](.*?)\[/right\]");
+
+// bbcode-1.0.2: r"(?s)\[table\](.*?)\[/table\]"
+consistent!(bbcode_9, r"(?s)\[table\](.*?)\[/table\]");
+
+// bbcode-1.0.2: r"(?s)\[td\](.*?)\[/td\]"
+consistent!(bbcode_10, r"(?s)\[td\](.*?)\[/td\]");
+
+// bbcode-1.0.2: r"(?s)\[tr\](.*?)\[/tr\]"
+consistent!(bbcode_11, r"(?s)\[tr\](.*?)\[/tr\]");
+
+// bbcode-1.0.2: r"(?s)\[th\](.*?)\[/th\]"
+consistent!(bbcode_12, r"(?s)\[th\](.*?)\[/th\]");
+
+// bbcode-1.0.2: r"(?s)\[url\](.*?)\[/url\]"
+consistent!(bbcode_13, r"(?s)\[url\](.*?)\[/url\]");
+
+// bbcode-1.0.2: r"(?s)\[url=(.+)\](.*?)\[/url\]"
+consistent!(bbcode_14, r"(?s)\[url=(.+)\](.*?)\[/url\]");
+
+// bbcode-1.0.2: r"(?s)\[quote\](.*?)\[/quote\]"
+consistent!(bbcode_15, r"(?s)\[quote\](.*?)\[/quote\]");
+
+// bbcode-1.0.2: r"(?s)\[quote=(.+)\](.*?)\[/quote\]"
+consistent!(bbcode_16, r"(?s)\[quote=(.+)\](.*?)\[/quote\]");
+
+// bbcode-1.0.2: r"(?s)\[img=(\d+)x(\d+)(\b.*)?\](.*?)\[/img\]"
+consistent!(bbcode_17, r"(?s)\[img=(\d+)x(\d+)(\b.*)?\](.*?)\[/img\]");
+
+// bbcode-1.0.2: r"(?s)\[img=(.+)(\b.*)?\](.*?)\[/img\]"
+consistent!(bbcode_18, r"(?s)\[img=(.+)(\b.*)?\](.*?)\[/img\]");
+
+// bbcode-1.0.2: r"(?s)\[img(\b.*)?\](.*?)\[/img\]"
+consistent!(bbcode_19, r"(?s)\[img(\b.*)?\](.*?)\[/img\]");
+
+// bbcode-1.0.2: r"(?s)\[ol\](.*?)\[/ol\]"
+consistent!(bbcode_20, r"(?s)\[ol\](.*?)\[/ol\]");
+
+// bbcode-1.0.2: r"(?s)\[ul\](.*?)\[/ul\]"
+consistent!(bbcode_21, r"(?s)\[ul\](.*?)\[/ul\]");
+
+// bbcode-1.0.2: r"(?s)\[list\](.*?)\[/list\]"
+consistent!(bbcode_22, r"(?s)\[list\](.*?)\[/list\]");
+
+// bbcode-1.0.2: r"(?s)\[youtube\](.*?)\[/youtube\]"
+consistent!(bbcode_23, r"(?s)\[youtube\](.*?)\[/youtube\]");
+
+// bbcode-1.0.2: r"(?s)\[youtube=(\d+)x(\d+)\](.*?)\[/youtube\]"
+consistent!(bbcode_24, r"(?s)\[youtube=(\d+)x(\d+)\](.*?)\[/youtube\]");
+
+// bbcode-1.0.2: r"(?s)\[li\](.*?)\[/li\]"
+consistent!(bbcode_25, r"(?s)\[li\](.*?)\[/li\]");
+
+// block-utils-0.5.0: r"loop\d+"
+consistent!(block_utils_0, r"loop\d+");
+
+// block-utils-0.5.0: r"ram\d+"
+consistent!(block_utils_1, r"ram\d+");
+
+// block-utils-0.5.0: r"md\d+"
+consistent!(block_utils_2, r"md\d+");
+
+// kvvliveapi-0.1.0: r"^([1-9]) min$"
+consistent!(kvvliveapi_0, r"^([1-9]) min$");
+
+// rfc822_sanitizer-0.3.3: r"(\d{2}):(\d{2}):(\d{2})"
+consistent!(rfc822_sanitizer_0, r"(\d{2}):(\d{2}):(\d{2})");
+
+// rfc822_sanitizer-0.3.3: r"(\d{1,2}):(\d{1,2}):(\d{1,2})"
+consistent!(rfc822_sanitizer_1, r"(\d{1,2}):(\d{1,2}):(\d{1,2})");
+
+// faker-0.0.4: r"[2-9]"
+consistent!(faker_0, r"[2-9]");
+
+// faker-0.0.4: r"[1-9]"
+consistent!(faker_1, r"[1-9]");
+
+// faker-0.0.4: r"[0-9]"
+consistent!(faker_2, r"[0-9]");
+
+// faker-0.0.4: r"\d{10}"
+consistent!(faker_3, r"\d{10}");
+
+// faker-0.0.4: r"\d{1}"
+consistent!(faker_4, r"\d{1}");
+
+// faker-0.0.4: r"^\w+"
+consistent!(faker_5, r"^\w+");
+
+// faker-0.0.4: r"^\w+"
+consistent!(faker_6, r"^\w+");
+
+// faker-0.0.4: r"^(\w+\.? ?){2,3}$"
+consistent!(faker_7, r"^(\w+\.? ?){2,3}$");
+
+// faker-0.0.4: r"^[A-Z][a-z]+\.?$"
+consistent!(faker_8, r"^[A-Z][a-z]+\.?$");
+
+// faker-0.0.4: r"^[A-Z][A-Za-z]*\.?$"
+consistent!(faker_9, r"^[A-Z][A-Za-z]*\.?$");
+
+// faker-0.0.4: r"http://lorempixel.com/100/100/\w+"
+consistent!(faker_10, r"http://lorempixel.com/100/100/\w+");
+
+// faker-0.0.4: r"http://lorempixel.com/100/100/cats"
+consistent!(faker_11, r"http://lorempixel.com/100/100/cats");
+
+// fancy-regex-0.1.0: "(?i:ß)"
+consistent!(fancy_regex_0, "(?i:ß)");
+
+// fancy-regex-0.1.0: "(?i:\\x{0587})"
+consistent!(fancy_regex_1, "(?i:\\x{0587})");
+
+// fancy-regex-0.1.0: "^\\\\([!-/:-@\\[-`\\{-~aftnrv]|[0-7]{1,3}|x[0-9a-fA-F]{2}|x\\{[0-9a-fA-F]{1,6}\\})"
+consistent!(fancy_regex_2, "^\\\\([!-/:-@\\[-`\\{-~aftnrv]|[0-7]{1,3}|x[0-9a-fA-F]{2}|x\\{[0-9a-fA-F]{1,6}\\})");
+
+// fancy-prompt-0.1.5: r"/([^/])[^/]+/"
+consistent!(fancy_prompt_0, r"/([^/])[^/]+/");
+
+// fancy-prompt-0.1.5: r"^([^:]+):.*?(?::([^:]+))?$"
+consistent!(fancy_prompt_1, r"^([^:]+):.*?(?::([^:]+))?$");
+
+// fanta-0.2.0: r"^(/?__\w+__)/(.*)"
+consistent!(fanta_0, r"^(/?__\w+__)/(.*)");
+
+// fanta-cli-0.1.1: r"(.)([A-Z])"
+consistent!(fanta_cli_0, r"(.)([A-Z])");
+
+// fanta-cli-0.1.1: "\\{:[^\\s]+\\}"
+consistent!(fanta_cli_1, "\\{:[^\\s]+\\}");
+
+// amethyst_tools-0.7.1: "(?P<last>[^\r])\n"
+consistent!(amethyst_tools_0, "(?P<last>[^\r])\n");
+
+// amigo-0.3.1: r"^-?\d+(\.\d)?"
+consistent!(amigo_0, r"^-?\d+(\.\d)?");
+
+// amigo-0.3.1: r"^[a-zA-Z_]+[\w-]*[!?_]?"
+consistent!(amigo_1, r"^[a-zA-Z_]+[\w-]*[!?_]?");
+
+// amigo-0.3.1: r"^\("
+consistent!(amigo_2, r"^\(");
+
+// amigo-0.3.1: r"^\)"
+consistent!(amigo_3, r"^\)");
+
+// amigo-0.3.1: r"^\s+"
+consistent!(amigo_4, r"^\s+");
+
+// ethcore-logger-1.12.0: "\x1b\\[[^m]+m"
+consistent!(ethcore_logger_0, "\x1b\\[[^m]+m");
+
+// dash2html-1.0.1: r"__.*?__"
+consistent!(dash2html_0, r"__.*?__");
+
+// dash2html-1.0.1: r"(?i)@(?:time|clipboard|cursor|date)"
+consistent!(dash2html_1, r"(?i)@(?:time|clipboard|cursor|date)");
+
+// os_type-2.0.0: r"^Microsoft Windows \[Version\s(\d+\.\d+\.\d+)\]$"
+consistent!(os_type_0, r"^Microsoft Windows \[Version\s(\d+\.\d+\.\d+)\]$");
+
+// os_type-2.0.0: r"ProductName:\s([\w\s]+)\n"
+consistent!(os_type_1, r"ProductName:\s([\w\s]+)\n");
+
+// os_type-2.0.0: r"ProductVersion:\s(\w+\.\w+\.\w+)"
+consistent!(os_type_2, r"ProductVersion:\s(\w+\.\w+\.\w+)");
+
+// os_type-2.0.0: r"BuildVersion:\s(\w+)"
+consistent!(os_type_3, r"BuildVersion:\s(\w+)");
+
+// os_type-2.0.0: r"(\w+) Linux release"
+consistent!(os_type_4, r"(\w+) Linux release");
+
+// os_type-2.0.0: r"release\s([\w\.]+)"
+consistent!(os_type_5, r"release\s([\w\.]+)");
+
+// os_type-2.0.0: r"Distributor ID:\s(\w+)"
+consistent!(os_type_6, r"Distributor ID:\s(\w+)");
+
+// os_type-2.0.0: r"Release:\s([\w\.]+)"
+consistent!(os_type_7, r"Release:\s([\w\.]+)");
+
+// bindgen-0.37.0: r"typename type\-parameter\-\d+\-\d+::.+"
+consistent!(bindgen_0, r"typename type\-parameter\-\d+\-\d+::.+");
+
+// imap-0.8.1: "^+(.*)\r\n"
+consistent!(imap_0, "^+(.*)\r\n");
+
+// image-base64-0.1.0: r"^ffd8ffe0"
+consistent!(image_base64_0, r"^ffd8ffe0");
+
+// image-base64-0.1.0: r"^89504e47"
+consistent!(image_base64_1, r"^89504e47");
+
+// image-base64-0.1.0: r"^47494638"
+consistent!(image_base64_2, r"^47494638");
+
+// json-pointer-0.3.2: "^(/([^/~]|~[01])*)*$"
+consistent!(json_pointer_0, "^(/([^/~]|~[01])*)*$");
+
+// json-pointer-0.3.2: "^#(/([^/~%]|~[01]|%[0-9a-fA-F]{2})*)*$"
+consistent!(json_pointer_1, "^#(/([^/~%]|~[01]|%[0-9a-fA-F]{2})*)*$");
+
+// mysql_common-0.7.0: r"^5.5.5-(\d{1,2})\.(\d{1,2})\.(\d{1,3})-MariaDB"
+consistent!(mysql_common_0, r"^5.5.5-(\d{1,2})\.(\d{1,2})\.(\d{1,3})-MariaDB");
+
+// mysql_common-0.7.0: r"^(\d{1,2})\.(\d{1,2})\.(\d{1,3})(.*)"
+consistent!(mysql_common_1, r"^(\d{1,2})\.(\d{1,2})\.(\d{1,3})(.*)");
+
+// government_id-0.1.0: r"^[0-9]{4}[0-9A-Z]{2}[0-9]{3}$"
+consistent!(government_id_0, r"^[0-9]{4}[0-9A-Z]{2}[0-9]{3}$");
+
+// ohmers-0.1.1: r"UniqueIndexViolation: (\w+)"
+consistent!(ohmers_0, r"UniqueIndexViolation: (\w+)");
+
+// eliza-1.0.0: r"(.*) you are (.*)"
+consistent!(eliza_0, r"(.*) you are (.*)");
+
+// eliza-1.0.0: r"(.*) you are (.*)"
+consistent!(eliza_1, r"(.*) you are (.*)");
+
+// eliza-1.0.0: r"(.*) you are (.*)"
+consistent!(eliza_2, r"(.*) you are (.*)");
+
+// chema-0.0.5: "^\\s*\\*"
+consistent!(chema_0, "^\\s*\\*");
+
+// chema-0.0.5: "^\\s*@(\\w+)\\s+(.*)"
+consistent!(chema_1, "^\\s*@(\\w+)\\s+(.*)");
+
+// chord3-0.3.0: r"^\s*#"
+consistent!(chord3_0, r"^\s*#");
+
+// chord3-0.3.0: r"\{(?P<cmd>\w+)(?::?\s*(?P<arg>.*))?\}"
+consistent!(chord3_1, r"\{(?P<cmd>\w+)(?::?\s*(?P<arg>.*))?\}");
+
+// chord3-0.3.0: r"\{(eot|end_of_tab):?\s*"
+consistent!(chord3_2, r"\{(eot|end_of_tab):?\s*");
+
+// chord3-0.3.0: r"([^\[]*)(?:\[([^\]]*)\])?"
+consistent!(chord3_3, r"([^\[]*)(?:\[([^\]]*)\])?");
+
+// checkmail-0.1.1: "^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$"
+consistent!(checkmail_0, "^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$");
+
+// cntk-0.2.1: r"\b\w\w+\b"
+consistent!(cntk_0, r"\b\w\w+\b");
+
+// cntk-0.2.1: r"\b\w\w+\b"
+consistent!(cntk_1, r"\b\w\w+\b");
+
+// cniguru-0.1.0: r"\(id: (\d+)\)"
+consistent!(cniguru_0, r"\(id: (\d+)\)");
+
+// upm_lib-0.3.0: r"^(\d+)\.(\d+)\.(\d+)(?:-([\dA-Za-z-]+(?:\.[\dA-Za-z-]+)*))?(?:\+([\dA-Za-z-]+(?:\.[\dA-Za-z-]+)*))?$"
+consistent!(upm_lib_0, r"^(\d+)\.(\d+)\.(\d+)(?:-([\dA-Za-z-]+(?:\.[\dA-Za-z-]+)*))?(?:\+([\dA-Za-z-]+(?:\.[\dA-Za-z-]+)*))?$");
+
+// avro-0.2.1: r"^\s*(\*+(\s+))?"
+consistent!(avro_0, r"^\s*(\*+(\s+))?");
+
+// avro-0.2.1: r"^\s*(\*+)?"
+consistent!(avro_1, r"^\s*(\*+)?");
+
+// nomi-0.0.2: "[0-9]+"
+consistent!(nomi_0, "[0-9]+");
+
+// nodes-0.1.0: "([0-9]+)@(?:nodes|n)?:([^@]+)?"
+consistent!(nodes_0, "([0-9]+)@(?:nodes|n)?:([^@]+)?");
+
+// not-stakkr-1.0.0: r"(?i)in (\d+) (second|minute|hour|day|week)s?"
+consistent!(not_stakkr_0, r"(?i)in (\d+) (second|minute|hour|day|week)s?");
+
+// notetxt-0.0.1: "^([A-Za-z0-9 -_:]+)\n-+\n"
+consistent!(notetxt_0, "^([A-Za-z0-9 -_:]+)\n-+\n");
+
+// nail-0.1.0-pre.0: r"^-?[0-9]+(\.[0-9]+)?([eE]-?[0-9]+)?$"
+consistent!(nail_0, r"^-?[0-9]+(\.[0-9]+)?([eE]-?[0-9]+)?$");
+
+// nail-0.1.0-pre.0: r"^-?[0-9]+$"
+consistent!(nail_1, r"^-?[0-9]+$");
+
+// askalono-0.2.0: r"[^\w\s\pP]+"
+consistent!(askalono_0, r"[^\w\s\pP]+");
+
+// askalono-0.2.0: r"(?x)[ \t\p{Zs} \\ / \| \x2044 ]+"
+consistent!(askalono_1, r"(?x)[ \t\p{Zs} \\ / \| \x2044 ]+");
+
+// askalono-0.2.0: r"\p{Pd}+"
+consistent!(askalono_2, r"\p{Pd}+");
+
+// askalono-0.2.0: r"\p{Ps}+"
+consistent!(askalono_3, r"\p{Ps}+");
+
+// askalono-0.2.0: r"\p{Pe}+"
+consistent!(askalono_4, r"\p{Pe}+");
+
+// askalono-0.2.0: r"\p{Pc}+"
+consistent!(askalono_5, r"\p{Pc}+");
+
+// askalono-0.2.0: r"[©Ⓒⓒ]"
+consistent!(askalono_6, r"[©Ⓒⓒ]");
+
+// askalono-0.2.0: r"[\r\n\v\f]"
+consistent!(askalono_7, r"[\r\n\v\f]");
+
+// askalono-0.2.0: r"\n{3,}"
+consistent!(askalono_8, r"\n{3,}");
+
+// askalono-0.2.0: r"[^\w\s]+"
+consistent!(askalono_9, r"[^\w\s]+");
+
+// askalono-0.2.0: r"\s+"
+consistent!(askalono_10, r"\s+");
+
+// assembunny_plus-0.0.3: r"[^0-9a-zA-Z_]"
+consistent!(assembunny_plus_0, r"[^0-9a-zA-Z_]");
+
+// assembunny_plus-0.0.3: r"[0-9]"
+consistent!(assembunny_plus_1, r"[0-9]");
+
+// salt-compressor-0.4.0: r"(?m)^Minion (\S*) did not respond\. No job will be sent\.$"
+consistent!(
+    salt_compressor_0,
+    r"(?m)^Minion (\S*) did not respond\. No job will be sent\.$"
+);
+
+// sabisabi-0.4.1: r"</?[^>]+?>"
+consistent!(sabisabi_0, r"</?[^>]+?>");
+
+// sabisabi-0.4.1: r"\([^)]*\)"
+consistent!(sabisabi_1, r"\([^)]*\)");
+
+// sassers-0.13.5-h28: "@import \"([^\"]*)\";"
+consistent!(sassers_0, "@import \"([^\"]*)\";");
+
+// shadowsocks-0.6.2: r"[A-Za-z\d-]{1,63}$"
+consistent!(shadowsocks_0, r"[A-Za-z\d-]{1,63}$");
+
+// shkeleton-0.1.5: "[abc]+"
+consistent!(shkeleton_0, "[abc]+");
+
+// shellwords-0.1.0: r"([^A-Za-z0-9_\-.,:/@\n])"
+consistent!(shellwords_0, r"([^A-Za-z0-9_\-.,:/@\n])");
+
+// shellwords-0.1.0: r"\n"
+consistent!(shellwords_1, r"\n");
+
+// shush-0.1.5: "(?P<num>[0-9]+)(?P<units>[dhms])"
+consistent!(shush_0, "(?P<num>[0-9]+)(?P<units>[dhms])");
+
+// woothee-0.8.0: r"(?:Chrome|CrMo|CriOS)/([.0-9]+)"
+consistent!(woothee_0, r"(?:Chrome|CrMo|CriOS)/([.0-9]+)");
+
+// woothee-0.8.0: r"Vivaldi/([.0-9]+)"
+consistent!(woothee_1, r"Vivaldi/([.0-9]+)");
+
+// woothee-0.8.0: r"Firefox/([.0-9]+)"
+consistent!(woothee_2, r"Firefox/([.0-9]+)");
+
+// woothee-0.8.0: r"^Mozilla/[.0-9]+ \((?:Mobile|Tablet);(?:.*;)? rv:([.0-9]+)\) Gecko/[.0-9]+ Firefox/[.0-9]+$"
+consistent!(woothee_3, r"^Mozilla/[.0-9]+ \((?:Mobile|Tablet);(?:.*;)? rv:([.0-9]+)\) Gecko/[.0-9]+ Firefox/[.0-9]+$");
+
+// woothee-0.8.0: r"FxiOS/([.0-9]+)"
+consistent!(woothee_4, r"FxiOS/([.0-9]+)");
+
+// woothee-0.8.0: r"\(([^;)]+);FOMA;"
+consistent!(woothee_5, r"\(([^;)]+);FOMA;");
+
+// woothee-0.8.0: r"jig browser[^;]+; ([^);]+)"
+consistent!(woothee_6, r"jig browser[^;]+; ([^);]+)");
+
+// woothee-0.8.0: r"(?i)rss(?:reader|bar|[-_ /;()]|[ +]*/)"
+consistent!(woothee_7, r"(?i)rss(?:reader|bar|[-_ /;()]|[ +]*/)");
+
+// woothee-0.8.0: r"(?i)(?:bot|crawler|spider)(?:[-_ ./;@()]|$)"
+consistent!(woothee_8, r"(?i)(?:bot|crawler|spider)(?:[-_ ./;@()]|$)");
+
+// woothee-0.8.0: r"(?i)(?:feed|web) ?parser"
+consistent!(woothee_9, r"(?i)(?:feed|web) ?parser");
+
+// woothee-0.8.0: r"(?i)watch ?dog"
+consistent!(woothee_10, r"(?i)watch ?dog");
+
+// woothee-0.8.0: r"Edge/([.0-9]+)"
+consistent!(woothee_11, r"Edge/([.0-9]+)");
+
+// woothee-0.8.0: r"MSIE ([.0-9]+);"
+consistent!(woothee_12, r"MSIE ([.0-9]+);");
+
+// woothee-0.8.0: r"Version/([.0-9]+)"
+consistent!(woothee_13, r"Version/([.0-9]+)");
+
+// woothee-0.8.0: r"Opera[/ ]([.0-9]+)"
+consistent!(woothee_14, r"Opera[/ ]([.0-9]+)");
+
+// woothee-0.8.0: r"OPR/([.0-9]+)"
+consistent!(woothee_15, r"OPR/([.0-9]+)");
+
+// woothee-0.8.0: r"Version/([.0-9]+)"
+consistent!(woothee_16, r"Version/([.0-9]+)");
+
+// woothee-0.8.0: r"(?:SoftBank|Vodafone|J-PHONE)/[.0-9]+/([^ /;()]+)"
+consistent!(woothee_17, r"(?:SoftBank|Vodafone|J-PHONE)/[.0-9]+/([^ /;()]+)");
+
+// woothee-0.8.0: r"Trident/([.0-9]+);"
+consistent!(woothee_18, r"Trident/([.0-9]+);");
+
+// woothee-0.8.0: r" rv:([.0-9]+)"
+consistent!(woothee_19, r" rv:([.0-9]+)");
+
+// woothee-0.8.0: r"IEMobile/([.0-9]+);"
+consistent!(woothee_20, r"IEMobile/([.0-9]+);");
+
+// woothee-0.8.0: r"(?:WILLCOM|DDIPOCKET);[^/]+/([^ /;()]+)"
+consistent!(woothee_21, r"(?:WILLCOM|DDIPOCKET);[^/]+/([^ /;()]+)");
+
+// woothee-0.8.0: r"Windows ([ .a-zA-Z0-9]+)[;\\)]"
+consistent!(woothee_22, r"Windows ([ .a-zA-Z0-9]+)[;\\)]");
+
+// woothee-0.8.0: r"^Phone(?: OS)? ([.0-9]+)"
+consistent!(woothee_23, r"^Phone(?: OS)? ([.0-9]+)");
+
+// woothee-0.8.0: r"iP(hone;|ad;|od) .*like Mac OS X"
+consistent!(woothee_24, r"iP(hone;|ad;|od) .*like Mac OS X");
+
+// woothee-0.8.0: r"Version/([.0-9]+)"
+consistent!(woothee_25, r"Version/([.0-9]+)");
+
+// woothee-0.8.0: r"rv:(\d+\.\d+\.\d+)"
+consistent!(woothee_26, r"rv:(\d+\.\d+\.\d+)");
+
+// woothee-0.8.0: r"FreeBSD ([^;\)]+);"
+consistent!(woothee_27, r"FreeBSD ([^;\)]+);");
+
+// woothee-0.8.0: r"CrOS ([^\)]+)\)"
+consistent!(woothee_28, r"CrOS ([^\)]+)\)");
+
+// woothee-0.8.0: r"Android[- ](\d+\.\d+(?:\.\d+)?)"
+consistent!(woothee_29, r"Android[- ](\d+\.\d+(?:\.\d+)?)");
+
+// woothee-0.8.0: r"PSP \(PlayStation Portable\); ([.0-9]+)\)"
+consistent!(woothee_30, r"PSP \(PlayStation Portable\); ([.0-9]+)\)");
+
+// woothee-0.8.0: r"PLAYSTATION 3;? ([.0-9]+)\)"
+consistent!(woothee_31, r"PLAYSTATION 3;? ([.0-9]+)\)");
+
+// woothee-0.8.0: r"PlayStation Vita ([.0-9]+)\)"
+consistent!(woothee_32, r"PlayStation Vita ([.0-9]+)\)");
+
+// woothee-0.8.0: r"PlayStation 4 ([.0-9]+)\)"
+consistent!(woothee_33, r"PlayStation 4 ([.0-9]+)\)");
+
+// woothee-0.8.0: r"BB10(?:.+)Version/([.0-9]+) "
+consistent!(woothee_34, r"BB10(?:.+)Version/([.0-9]+) ");
+
+// woothee-0.8.0: r"BlackBerry(?:\d+)/([.0-9]+) "
+consistent!(woothee_35, r"BlackBerry(?:\d+)/([.0-9]+) ");
+
+// woothee-0.8.0: r"; CPU(?: iPhone)? OS (\d+_\d+(?:_\d+)?) like Mac OS X"
+consistent!(
+    woothee_36,
+    r"; CPU(?: iPhone)? OS (\d+_\d+(?:_\d+)?) like Mac OS X"
+);
+
+// woothee-0.8.0: r"Mac OS X (10[._]\d+(?:[._]\d+)?)(?:\)|;)"
+consistent!(woothee_37, r"Mac OS X (10[._]\d+(?:[._]\d+)?)(?:\)|;)");
+
+// woothee-0.8.0: r"^(?:Apache-HttpClient/|Jakarta Commons-HttpClient/|Java/)"
+consistent!(
+    woothee_38,
+    r"^(?:Apache-HttpClient/|Jakarta Commons-HttpClient/|Java/)"
+);
+
+// woothee-0.8.0: r"[- ]HttpClient(/|$)"
+consistent!(woothee_39, r"[- ]HttpClient(/|$)");
+
+// woothee-0.8.0: r"^(?:PHP|WordPress|CakePHP|PukiWiki|PECL::HTTP)(?:/| |$)"
+consistent!(
+    woothee_40,
+    r"^(?:PHP|WordPress|CakePHP|PukiWiki|PECL::HTTP)(?:/| |$)"
+);
+
+// woothee-0.8.0: r"(?:PEAR HTTP_Request|HTTP_Request)(?: class|2)"
+consistent!(woothee_41, r"(?:PEAR HTTP_Request|HTTP_Request)(?: class|2)");
+
+// woothee-0.8.0: r"(?:Rome Client |UnwindFetchor/|ia_archiver |Summify |PostRank/)"
+consistent!(
+    woothee_42,
+    r"(?:Rome Client |UnwindFetchor/|ia_archiver |Summify |PostRank/)"
+);
+
+// woothee-0.8.0: r"Sleipnir/([.0-9]+)"
+consistent!(woothee_43, r"Sleipnir/([.0-9]+)");
+
+// word_replace-0.0.3: r"@@[a-z|A-Z|\d]+@@"
+consistent!(word_replace_0, r"@@[a-z|A-Z|\d]+@@");
+
+// wordcount-0.1.0: r"\w+"
+consistent!(wordcount_0, r"\w+");
+
+// just-0.3.12: "^([^=]+)=(.*)$"
+consistent!(just_0, "^([^=]+)=(.*)$");
+
+// emote-0.1.0: r":[a-zA-Z_]+?:"
+consistent!(emote_0, r":[a-zA-Z_]+?:");
+
+// emojicons-1.0.1: r":([a-zA-Z0-9_+-]+):"
+consistent!(emojicons_0, r":([a-zA-Z0-9_+-]+):");
+
+// git2_codecommit-0.1.2: r"git-codecommit\.([a-z0-9-]+)\.amazonaws\.com"
+consistent!(
+    git2_codecommit_0,
+    r"git-codecommit\.([a-z0-9-]+)\.amazonaws\.com"
+);
+
+// git-workarea-3.1.2: r"^submodule\.(?P<name>.*)\.(?P<key>[^=]*)=(?P<value>.*)$"
+consistent!(
+    git_workarea_0,
+    r"^submodule\.(?P<name>.*)\.(?P<key>[^=]*)=(?P<value>.*)$"
+);
+
+// git-shell-enforce-directory-1.0.0: r"^(?P<command>git-(?:receive|upload)-pack) '(?P<path>.+)'$"
+consistent!(
+    git_shell_enforce_directory_0,
+    r"^(?P<command>git-(?:receive|upload)-pack) '(?P<path>.+)'$"
+);
+
+// git-journal-1.6.3: r"[ \n]:(.*?):"
+consistent!(git_journal_0, r"[ \n]:(.*?):");
+
+// git-find-0.3.2: r"^git@(?P<host>[[:alnum:]\._-]+):(?P<path>[[:alnum:]\._\-/]+).git$"
+consistent!(
+    git_find_0,
+    r"^git@(?P<host>[[:alnum:]\._-]+):(?P<path>[[:alnum:]\._\-/]+).git$"
+);
+
+// gitlab-api-0.6.0: r"private_token=\w{20}"
+consistent!(gitlab_api_0, r"private_token=\w{20}");
+
+// td-client-0.7.0: "^(http://|https://)"
+consistent!(td_client_0, "^(http://|https://)");
+
+// karaconv-0.3.0: r"--(?P<type>[a-zA-Z]+)-- (?P<contents>.*)"
+consistent!(karaconv_0, r"--(?P<type>[a-zA-Z]+)-- (?P<contents>.*)");
+
+// katana-1.0.2: r"(?P<comp>et al\.)(?:\.)"
+consistent!(katana_0, r"(?P<comp>et al\.)(?:\.)");
+
+// katana-1.0.2: r"\.{3}"
+consistent!(katana_1, r"\.{3}");
+
+// katana-1.0.2: r"(?P<number>[0-9]+)\.(?P<decimal>[0-9]+)"
+consistent!(katana_2, r"(?P<number>[0-9]+)\.(?P<decimal>[0-9]+)");
+
+// katana-1.0.2: r"\s\.(?P<nums>[0-9]+)"
+consistent!(katana_3, r"\s\.(?P<nums>[0-9]+)");
+
+// katana-1.0.2: r"(?:[A-Za-z]\.){2,}"
+consistent!(katana_4, r"(?:[A-Za-z]\.){2,}");
+
+// katana-1.0.2: r"(?P<init>[A-Z])(?P<point>\.)"
+consistent!(katana_5, r"(?P<init>[A-Z])(?P<point>\.)");
+
+// katana-1.0.2: r"(?P<title>[A-Z][a-z]{1,3})(\.)"
+consistent!(katana_6, r"(?P<title>[A-Z][a-z]{1,3})(\.)");
+
+// katana-1.0.2: r"&==&(?P<p>[.!?])"
+consistent!(katana_7, r"&==&(?P<p>[.!?])");
+
+// katana-1.0.2: r"&\^&(?P<p>[.!?])"
+consistent!(katana_8, r"&\^&(?P<p>[.!?])");
+
+// katana-1.0.2: r"&\*\*&(?P<p>[.!?])"
+consistent!(katana_9, r"&\*\*&(?P<p>[.!?])");
+
+// katana-1.0.2: r"&=&(?P<p>[.!?])"
+consistent!(katana_10, r"&=&(?P<p>[.!?])");
+
+// katana-1.0.2: r"&##&(?P<p>[.!?])"
+consistent!(katana_11, r"&##&(?P<p>[.!?])");
+
+// katana-1.0.2: r"&\$&(?P<p>[.!?])"
+consistent!(katana_12, r"&\$&(?P<p>[.!?])");
+
+// kailua_syntax-1.1.0: r"@(?:_|\d+(?:/\d+(?:-\d+)?)?)"
+consistent!(kailua_syntax_0, r"@(?:_|\d+(?:/\d+(?:-\d+)?)?)");
+
+// kailua_syntax-1.1.0: r"<(\d+)>"
+consistent!(kailua_syntax_1, r"<(\d+)>");
+
+// ftp-3.0.1: r"\((\d+),(\d+),(\d+),(\d+),(\d+),(\d+)\)"
+consistent!(ftp_0, r"\((\d+),(\d+),(\d+),(\d+),(\d+),(\d+)\)");
+
+// ftp-3.0.1: r"\b(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})\b"
+consistent!(ftp_1, r"\b(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})\b");
+
+// ftp-3.0.1: r"\s+(\d+)\s*$"
+consistent!(ftp_2, r"\s+(\d+)\s*$");
+
+// vat-0.1.0: r"<countryCode>(.*?)</countryCode>"
+consistent!(vat_0, r"<countryCode>(.*?)</countryCode>");
+
+// vat-0.1.0: r"<vatNumber>(.*?)</vatNumber>"
+consistent!(vat_1, r"<vatNumber>(.*?)</vatNumber>");
+
+// vat-0.1.0: r"<name>(.*?)</name>"
+consistent!(vat_2, r"<name>(.*?)</name>");
+
+// vat-0.1.0: r"<address>(?s)(.*?)(?-s)</address>"
+consistent!(vat_3, r"<address>(?s)(.*?)(?-s)</address>");
+
+// vat-0.1.0: r"<valid>(true|false)</valid>"
+consistent!(vat_4, r"<valid>(true|false)</valid>");
+
+// vat-0.1.0: r"^ATU\d{8}$"
+consistent!(vat_5, r"^ATU\d{8}$");
+
+// vat-0.1.0: r"^BE0?\d{9, 10}$"
+consistent!(vat_6, r"^BE0?\d{9, 10}$");
+
+// vat-0.1.0: r"^BG\d{9,10}$"
+consistent!(vat_7, r"^BG\d{9,10}$");
+
+// vat-0.1.0: r"^HR\d{11}$"
+consistent!(vat_8, r"^HR\d{11}$");
+
+// vat-0.1.0: r"^CY\d{8}[A-Z]$"
+consistent!(vat_9, r"^CY\d{8}[A-Z]$");
+
+// vat-0.1.0: r"^CZ\d{8,10}$"
+consistent!(vat_10, r"^CZ\d{8,10}$");
+
+// vat-0.1.0: r"^DK\d{8}$"
+consistent!(vat_11, r"^DK\d{8}$");
+
+// vat-0.1.0: r"^EE\d{9}$"
+consistent!(vat_12, r"^EE\d{9}$");
+
+// vat-0.1.0: r"^FI\d{8}$"
+consistent!(vat_13, r"^FI\d{8}$");
+
+// vat-0.1.0: r"^FR[A-HJ-NP-Z0-9][A-HJ-NP-Z0-9]\d{9}$"
+consistent!(vat_14, r"^FR[A-HJ-NP-Z0-9][A-HJ-NP-Z0-9]\d{9}$");
+
+// vat-0.1.0: r"^DE\d{9}$"
+consistent!(vat_15, r"^DE\d{9}$");
+
+// vat-0.1.0: r"^EL\d{9}$"
+consistent!(vat_16, r"^EL\d{9}$");
+
+// vat-0.1.0: r"^HU\d{8}$"
+consistent!(vat_17, r"^HU\d{8}$");
+
+// vat-0.1.0: r"^IE\d[A-Z0-9\+\*]\d{5}[A-Z]{1,2}$"
+consistent!(vat_18, r"^IE\d[A-Z0-9\+\*]\d{5}[A-Z]{1,2}$");
+
+// vat-0.1.0: r"^IT\d{11}$"
+consistent!(vat_19, r"^IT\d{11}$");
+
+// vat-0.1.0: r"^LV\d{11}$"
+consistent!(vat_20, r"^LV\d{11}$");
+
+// vat-0.1.0: r"^LT(\d{9}|\d{12})$"
+consistent!(vat_21, r"^LT(\d{9}|\d{12})$");
+
+// vat-0.1.0: r"^LU\d{8}$"
+consistent!(vat_22, r"^LU\d{8}$");
+
+// vat-0.1.0: r"^MT\d{8}$"
+consistent!(vat_23, r"^MT\d{8}$");
+
+// vat-0.1.0: r"^NL\d{9}B\d{2}$"
+consistent!(vat_24, r"^NL\d{9}B\d{2}$");
+
+// vat-0.1.0: r"^PL\d{10}$"
+consistent!(vat_25, r"^PL\d{10}$");
+
+// vat-0.1.0: r"^PT\d{9}$"
+consistent!(vat_26, r"^PT\d{9}$");
+
+// vat-0.1.0: r"^RO\d{2,10}$"
+consistent!(vat_27, r"^RO\d{2,10}$");
+
+// vat-0.1.0: r"^SK\d{10}$"
+consistent!(vat_28, r"^SK\d{10}$");
+
+// vat-0.1.0: r"^SI\d{8}$"
+consistent!(vat_29, r"^SI\d{8}$");
+
+// vat-0.1.0: r"^ES[A-Z0-9]\d{7}[A-Z0-9]$"
+consistent!(vat_30, r"^ES[A-Z0-9]\d{7}[A-Z0-9]$");
+
+// vat-0.1.0: r"^SE\d{10}01$"
+consistent!(vat_31, r"^SE\d{10}01$");
+
+// vat-0.1.0: r"^(GB(GD|HA)\d{3}|GB\d{9}|GB\d{12})$"
+consistent!(vat_32, r"^(GB(GD|HA)\d{3}|GB\d{9}|GB\d{12})$");
+
+// eve-0.1.1: r"\{\{(.*)\}\}"
+consistent!(eve_0, r"\{\{(.*)\}\}");
+
+// egc-0.1.2: "^mio"
+consistent!(egc_0, "^mio");
+
+// pew-0.2.3: ""
+consistent!(pew_0, "");
+
+// pew-0.2.3: ""
+consistent!(pew_1, "");
+
+// mob-0.4.3: "y"
+consistent!(mob_0, "y");
+
+// lit-0.2.8: "@([a-z]+)"
+consistent!(lit_0, "@([a-z]+)");
+
+// lit-0.2.8: "([A-Z-]+):(.*)"
+consistent!(lit_1, "([A-Z-]+):(.*)");
+
+// lit-0.2.8: "^[a-zA-Z_][a-zA-Z0-9_]*$"
+consistent!(lit_2, "^[a-zA-Z_][a-zA-Z0-9_]*$");
+
+// avm-1.0.1: r"\d+\.\d+\.\d+"
+consistent!(avm_0, r"\d+\.\d+\.\d+");
+
+// avm-1.0.1: r"\d+\.\d+\.\d+"
+consistent!(avm_1, r"\d+\.\d+\.\d+");
+
+// orm-0.2.0: r"^Vec<(.+)>$"
+consistent!(orm_0, r"^Vec<(.+)>$");
+
+// sgf-0.1.5: r"\\(\r\n|\n\r|\n|\r)"
+consistent!(sgf_0, r"\\(\r\n|\n\r|\n|\r)");
+
+// sgf-0.1.5: r"\\(.)"
+consistent!(sgf_1, r"\\(.)");
+
+// sgf-0.1.5: r"\r\n|\n\r|\n|\r"
+consistent!(sgf_2, r"\r\n|\n\r|\n|\r");
+
+// sgf-0.1.5: r"([\]\\:])"
+consistent!(sgf_3, r"([\]\\:])");
+
+// dok-0.2.0: "^Bearer realm=\"(.+?)\",service=\"(.+?)\",scope=\"(.+?)\"$"
+consistent!(
+    dok_0,
+    "^Bearer realm=\"(.+?)\",service=\"(.+?)\",scope=\"(.+?)\"$"
+);
+
+// d20-0.1.0: r"([+-]?\s*\d+[dD]\d+|[+-]?\s*\d+)"
+consistent!(d20_0, r"([+-]?\s*\d+[dD]\d+|[+-]?\s*\d+)");
+
+// dvb-0.3.0: "E"
+consistent!(dvb_0, "E");
+
+// dvb-0.3.0: "^F"
+consistent!(dvb_1, "^F");
+
+// dvb-0.3.0: "^S"
+consistent!(dvb_2, "^S");
+
+// ger-0.2.0: r"Change-Id: (I[a-f0-9]{40})$"
+consistent!(ger_0, r"Change-Id: (I[a-f0-9]{40})$");
+
+// ger-0.2.0: r"(refs|ref|fix|fixes|close|closes)\s+([A-Z]{2,5}-[0-9]{1,5})$"
+consistent!(
+    ger_1,
+    r"(refs|ref|fix|fixes|close|closes)\s+([A-Z]{2,5}-[0-9]{1,5})$"
+);
+
+// n5-0.2.1: r"(\d+)(\.(\d+))?(\.(\d+))?(.*)"
+consistent!(n5_0, r"(\d+)(\.(\d+))?(\.(\d+))?(.*)");
+
+// po-0.1.4: r"[A-Za-z0-9]"
+consistent!(po_0, r"[A-Za-z0-9]");
+
+// carnix-0.8.5: "path is (‘|')?([^’'\n]*)(’|')?"
+consistent!(carnix_0, "path is (‘|')?([^’'\n]*)(’|')?");
+
+// carnix-0.8.5: r"^(\S*) (\d*)\.(\d*)\.(\d*)(-(\S*))?(.*)?"
+consistent!(carnix_1, r"^(\S*) (\d*)\.(\d*)\.(\d*)(-(\S*))?(.*)?");
+
+// carnix-0.8.5: r"(\d*)\.(\d*)\.(\d*)(-(\S*))?"
+consistent!(carnix_2, r"(\d*)\.(\d*)\.(\d*)(-(\S*))?");
+
+// carnix-0.8.5: r"(\S*)-(\d*)\.(\d*)\.(\d*)(-(\S*))?"
+consistent!(carnix_3, r"(\S*)-(\d*)\.(\d*)\.(\d*)(-(\S*))?");
+
+// caseless-0.2.1: r"^# CaseFolding-(\d+)\.(\d+)\.(\d+).txt$"
+consistent!(caseless_0, r"^# CaseFolding-(\d+)\.(\d+)\.(\d+).txt$");
+
+// caseless-0.2.1: r"^([0-9A-F]+); [CF]; ([0-9A-F ]+);"
+consistent!(caseless_1, r"^([0-9A-F]+); [CF]; ([0-9A-F ]+);");
+
+// cabot-0.2.0: "\r?\n\r?\n"
+consistent!(cabot_0, "\r?\n\r?\n");
+
+// cabot-0.2.0: "\r?\n"
+consistent!(cabot_1, "\r?\n");
+
+// card-validate-2.2.1: r"^600"
+consistent!(card_validate_0, r"^600");
+
+// card-validate-2.2.1: r"^5019"
+consistent!(card_validate_1, r"^5019");
+
+// card-validate-2.2.1: r"^4"
+consistent!(card_validate_2, r"^4");
+
+// card-validate-2.2.1: r"^(5[1-5]|2[2-7])"
+consistent!(card_validate_3, r"^(5[1-5]|2[2-7])");
+
+// card-validate-2.2.1: r"^3[47]"
+consistent!(card_validate_4, r"^3[47]");
+
+// card-validate-2.2.1: r"^3[0689]"
+consistent!(card_validate_5, r"^3[0689]");
+
+// card-validate-2.2.1: r"^6([045]|22)"
+consistent!(card_validate_6, r"^6([045]|22)");
+
+// card-validate-2.2.1: r"^(62|88)"
+consistent!(card_validate_7, r"^(62|88)");
+
+// card-validate-2.2.1: r"^35"
+consistent!(card_validate_8, r"^35");
+
+// card-validate-2.2.1: r"^[0-9]+$"
+consistent!(card_validate_9, r"^[0-9]+$");
+
+// cargo-testify-0.3.0: r"\d{1,} passed.*filtered out"
+consistent!(cargo_testify_0, r"\d{1,} passed.*filtered out");
+
+// cargo-testify-0.3.0: r"error(:|\[).*"
+consistent!(cargo_testify_1, r"error(:|\[).*");
+
+// cargo-wix-0.0.5: r"<(.*?)>"
+consistent!(cargo_wix_0, r"<(.*?)>");
+
+// cargo-wix-0.0.5: r"<(.*?)>"
+consistent!(cargo_wix_1, r"<(.*?)>");
+
+// cargo-wix-0.0.5: r"<(.*?)>"
+consistent!(cargo_wix_2, r"<(.*?)>");
+
+// cargo-wix-0.0.5: r"<(.*?)>"
+consistent!(cargo_wix_3, r"<(.*?)>");
+
+// cargo-incremental-0.1.23: r"(?m)^incremental: re-using (\d+) out of (\d+) modules$"
+consistent!(
+    cargo_incremental_0,
+    r"(?m)^incremental: re-using (\d+) out of (\d+) modules$"
+);
+
+// cargo-incremental-0.1.23: "(?m)(warning|error): (.*)\n  --> ([^:]:\\d+:\\d+)$"
+consistent!(
+    cargo_incremental_1,
+    "(?m)(warning|error): (.*)\n  --> ([^:]:\\d+:\\d+)$"
+);
+
+// cargo-incremental-0.1.23: r"(?m)^test (.*) \.\.\. (\w+)"
+consistent!(cargo_incremental_2, r"(?m)^test (.*) \.\.\. (\w+)");
+
+// cargo-incremental-0.1.23: r"(?m)(\d+) passed; (\d+) failed; (\d+) ignored; \d+ measured"
+consistent!(
+    cargo_incremental_3,
+    r"(?m)(\d+) passed; (\d+) failed; (\d+) ignored; \d+ measured"
+);
+
+// cargo-testjs-0.1.2: r"^[^-]+-[0-9a-f]+\.js$"
+consistent!(cargo_testjs_0, r"^[^-]+-[0-9a-f]+\.js$");
+
+// cargo-tarpaulin-0.6.2: r"\s*//"
+consistent!(cargo_tarpaulin_0, r"\s*//");
+
+// cargo-tarpaulin-0.6.2: r"/\*"
+consistent!(cargo_tarpaulin_1, r"/\*");
+
+// cargo-tarpaulin-0.6.2: r"\*/"
+consistent!(cargo_tarpaulin_2, r"\*/");
+
+// cargo-culture-kit-0.1.0: r"^fo"
+consistent!(cargo_culture_kit_0, r"^fo");
+
+// cargo-screeps-0.1.3: "\\s+"
+consistent!(cargo_screeps_0, "\\s+");
+
+// cargo-brew-0.1.4: r"`(\S+) v([0-9.]+)"
+consistent!(cargo_brew_0, r"`(\S+) v([0-9.]+)");
+
+// cargo-release-0.10.2: "^\\[.+\\]"
+consistent!(cargo_release_0, "^\\[.+\\]");
+
+// cargo-release-0.10.2: "^\\[\\[.+\\]\\]"
+consistent!(cargo_release_1, "^\\[\\[.+\\]\\]");
+
+// cargo-edit-0.3.0-beta.1: r"^https://github.com/([-_0-9a-zA-Z]+)/([-_0-9a-zA-Z]+)(/|.git)?$"
+consistent!(
+    cargo_edit_0,
+    r"^https://github.com/([-_0-9a-zA-Z]+)/([-_0-9a-zA-Z]+)(/|.git)?$"
+);
+
+// cargo-edit-0.3.0-beta.1: r"^https://gitlab.com/([-_0-9a-zA-Z]+)/([-_0-9a-zA-Z]+)(/|.git)?$"
+consistent!(
+    cargo_edit_1,
+    r"^https://gitlab.com/([-_0-9a-zA-Z]+)/([-_0-9a-zA-Z]+)(/|.git)?$"
+);
+
+// cargo-disassemble-0.1.1: ".*"
+consistent!(cargo_disassemble_0, ".*");
+
+// cargo-demangle-0.1.2: r"(?m)(?P<symbol>_ZN[0-9]+.*E)"
+consistent!(cargo_demangle_0, r"(?m)(?P<symbol>_ZN[0-9]+.*E)");
+
+// cargo-coverage-annotations-0.1.5: r"^\s*\}(?:\)*;?|\s*else\s*\{)$"
+consistent!(cargo_coverage_annotations_0, r"^\s*\}(?:\)*;?|\s*else\s*\{)$");
+
+// cargo-urlcrate-1.0.1: "[\u{001b}\u{009b}][\\[()#;?]*(?:[0-9]{1,4}(?:;[0-9]{0,4})*)?[0-9A-PRZcf-nqry=><]"
+consistent!(cargo_urlcrate_0, "[\u{001b}\u{009b}][\\[()#;?]*(?:[0-9]{1,4}(?:;[0-9]{0,4})*)?[0-9A-PRZcf-nqry=><]");
+
+// cargo-script-0.2.8: r"^\s*\*( |$)"
+consistent!(cargo_script_0, r"^\s*\*( |$)");
+
+// cargo-script-0.2.8: r"^(\s+)"
+consistent!(cargo_script_1, r"^(\s+)");
+
+// cargo-script-0.2.8: r"/\*|\*/"
+consistent!(cargo_script_2, r"/\*|\*/");
+
+// cargo-script-0.2.8: r"^\s*//!"
+consistent!(cargo_script_3, r"^\s*//!");
+
+// cargo-script-0.2.8: r"^#![^\[].*?(\r\n|\n)"
+consistent!(cargo_script_4, r"^#![^\[].*?(\r\n|\n)");
+
+// cargo-update-1.5.2: r"cargo-install-update\.exe-v.+"
+consistent!(cargo_update_0, r"cargo-install-update\.exe-v.+");
+
+// canteen-0.4.1: r"^<(?:(int|uint|str|float|path):)?([\w_][a-zA-Z0-9_]*)>$"
+consistent!(
+    canteen_0,
+    r"^<(?:(int|uint|str|float|path):)?([\w_][a-zA-Z0-9_]*)>$"
+);
+
+// thruster-cli-0.1.3: r"(.)([A-Z])"
+consistent!(thruster_cli_0, r"(.)([A-Z])");
+
+// thieves-cant-0.1.0: "([Z]+)$"
+consistent!(thieves_cant_0, "([Z]+)$");
+
+// codeowners-0.1.3: r"^@\S+/\S+"
+consistent!(codeowners_0, r"^@\S+/\S+");
+
+// codeowners-0.1.3: r"^@\S+"
+consistent!(codeowners_1, r"^@\S+");
+
+// codeowners-0.1.3: r"^\S+@\S+"
+consistent!(codeowners_2, r"^\S+@\S+");
+
+// conserve-0.4.2: r"^b0000 {21} complete   20[-0-9T:+]+\s +\d+s\n$"
+consistent!(conserve_0, r"^b0000 {21} complete   20[-0-9T:+]+\s +\d+s\n$");
+
+// commodore-0.3.0: r"(?P<greeting>\S+?) (?P<name>\S+?)$"
+consistent!(commodore_0, r"(?P<greeting>\S+?) (?P<name>\S+?)$");
+
+// corollary-0.3.0: r"([ \t]*)```haskell([\s\S]*?)```"
+consistent!(corollary_0, r"([ \t]*)```haskell([\s\S]*?)```");
+
+// corollary-0.3.0: r"\b((?:a|b|t)\d*)\b"
+consistent!(corollary_1, r"\b((?:a|b|t)\d*)\b");
+
+// colorizex-0.1.3: "NB"
+consistent!(colorizex_0, "NB");
+
+// colorstring-0.0.1: r"(?i)\[[a-z0-9_-]+\]"
+consistent!(colorstring_0, r"(?i)\[[a-z0-9_-]+\]");
+
+// colorstring-0.0.1: r"^(?i)(\[[a-z0-9_-]+\])+"
+consistent!(colorstring_1, r"^(?i)(\[[a-z0-9_-]+\])+");
+
+// cosmogony-0.3.0: "name:(.+)"
+consistent!(cosmogony_0, "name:(.+)");
+
+// cobalt-bin-0.12.1: r"(?m:^ {0,3}\[[^\]]+\]:.+$)"
+consistent!(cobalt_bin_0, r"(?m:^ {0,3}\[[^\]]+\]:.+$)");
+
+// comrak-0.2.12: r"[^\p{L}\p{M}\p{N}\p{Pc} -]"
+consistent!(comrak_0, r"[^\p{L}\p{M}\p{N}\p{Pc} -]");
+
+// content-blocker-0.2.3: ""
+consistent!(content_blocker_0, "");
+
+// content-blocker-0.2.3: "(?i)hi"
+consistent!(content_blocker_1, "(?i)hi");
+
+// content-blocker-0.2.3: "http[s]?://domain.org"
+consistent!(content_blocker_2, "http[s]?://domain.org");
+
+// content-blocker-0.2.3: "(?i)http[s]?://domain.org"
+consistent!(content_blocker_3, "(?i)http[s]?://domain.org");
+
+// content-blocker-0.2.3: "http://domain.org"
+consistent!(content_blocker_4, "http://domain.org");
+
+// content-blocker-0.2.3: "http://domain.org"
+consistent!(content_blocker_5, "http://domain.org");
+
+// content-blocker-0.2.3: "ad.html"
+consistent!(content_blocker_6, "ad.html");
+
+// content-blocker-0.2.3: "ad.html"
+consistent!(content_blocker_7, "ad.html");
+
+// content-blocker-0.2.3: "http://domain.org"
+consistent!(content_blocker_8, "http://domain.org");
+
+// content-blocker-0.2.3: "http://domain.org/nocookies.sjs"
+consistent!(content_blocker_9, "http://domain.org/nocookies.sjs");
+
+// content-blocker-0.2.3: "http://domain.org/nocookies.sjs"
+consistent!(content_blocker_10, "http://domain.org/nocookies.sjs");
+
+// content-blocker-0.2.3: "http://domain.org/hideme.jpg"
+consistent!(content_blocker_11, "http://domain.org/hideme.jpg");
+
+// content-blocker-0.2.3: "http://domain.org/ok.html"
+consistent!(content_blocker_12, "http://domain.org/ok.html");
+
+// content-blocker-0.2.3: "http://domain.org/ok.html\\?except_this=1"
+consistent!(content_blocker_13, "http://domain.org/ok.html\\?except_this=1");
+
+// victoria-dom-0.1.2: "[A-Za-z0-9=]"
+consistent!(victoria_dom_0, "[A-Za-z0-9=]");
+
+// numbat-1.0.0: r"^nsq://"
+consistent!(numbat_0, r"^nsq://");
+
+// airkorea-0.1.2: r"[\s\t\r\n]"
+consistent!(airkorea_0, r"[\s\t\r\n]");
+
+// airkorea-0.1.2: r"([\{\[,])|([\}\]])"
+consistent!(airkorea_1, r"([\{\[,])|([\}\]])");
+
+// airkorea-0.1.2: r"[^.\d]+$"
+consistent!(airkorea_2, r"[^.\d]+$");
+
+// rofl-0.0.1: r"\b"
+// consistent!(rofl_0, r"\b");
+
+// rogcat-0.2.15: r"--------- beginning of.*"
+consistent!(rogcat_0, r"--------- beginning of.*");
+
+// rogcat-0.2.15: r"a|e|i|o|u"
+consistent!(rogcat_1, r"a|e|i|o|u");
+
+// rogcat-0.2.15: r"^(\d+)([kMG])$"
+consistent!(rogcat_2, r"^(\d+)([kMG])$");
+
+// media_filename-0.1.4: "\\.([A-Za-z0-9]{2,4})$"
+consistent!(media_filename_0, "\\.([A-Za-z0-9]{2,4})$");
+
+// media_filename-0.1.4: "([0-9]{3,4}p|[0-9]{3,4}x[0-9]{3,4})"
+consistent!(media_filename_1, "([0-9]{3,4}p|[0-9]{3,4}x[0-9]{3,4})");
+
+// media_filename-0.1.4: "(?:^\\[([^]]+)\\]|- ?([^-]+)$)"
+consistent!(media_filename_2, "(?:^\\[([^]]+)\\]|- ?([^-]+)$)");
+
+// media_filename-0.1.4: "(?:[eE]([0-9]{2,3})|[^0-9A-Za-z]([0-9]{2,3})(?:v[0-9])?[^0-9A-Za-z])"
+consistent!(
+    media_filename_3,
+    "(?:[eE]([0-9]{2,3})|[^0-9A-Za-z]([0-9]{2,3})(?:v[0-9])?[^0-9A-Za-z])"
+);
+
+// media_filename-0.1.4: "[sS]([0-9]{1,2})"
+consistent!(media_filename_4, "[sS]([0-9]{1,2})");
+
+// media_filename-0.1.4: "((?i)(?:PPV.)?[HP]DTV|(?:HD)?CAM|BRRIP|[^a-z]TS[^a-z]|(?:PPV )?WEB.?DL(?: DVDRip)?|HDRip|DVDRip|CamRip|W[EB]BRip|BluRay|BD|DVD|DvDScr|hdtv)"
+consistent!(media_filename_5, "((?i)(?:PPV.)?[HP]DTV|(?:HD)?CAM|BRRIP|[^a-z]TS[^a-z]|(?:PPV )?WEB.?DL(?: DVDRip)?|HDRip|DVDRip|CamRip|W[EB]BRip|BluRay|BD|DVD|DvDScr|hdtv)");
+
+// media_filename-0.1.4: "((19[0-9]|20[01])[0-9])"
+consistent!(media_filename_6, "((19[0-9]|20[01])[0-9])");
+
+// media_filename-0.1.4: "((?i)xvid|x264|h\\.?264)"
+consistent!(media_filename_7, "((?i)xvid|x264|h\\.?264)");
+
+// media_filename-0.1.4: "((?i)MP3|DD5\\.?1|Dual[- ]Audio|LiNE|DTS|AAC(?:\\.?2\\.0)?|AC3(?:\\.5\\.1)?)"
+consistent!(media_filename_8, "((?i)MP3|DD5\\.?1|Dual[- ]Audio|LiNE|DTS|AAC(?:\\.?2\\.0)?|AC3(?:\\.5\\.1)?)");
+
+// media_filename-0.1.4: "\\[([0-9A-F]{8})\\]"
+consistent!(media_filename_9, "\\[([0-9A-F]{8})\\]");
+
+// termimage-0.3.2: r"(\d+)[xX](\d+)"
+consistent!(termimage_0, r"(\d+)[xX](\d+)");
+
+// teensy-0.1.0: r".*(\d{4}-\d{2}-\d{2}).*"
+consistent!(teensy_0, r".*(\d{4}-\d{2}-\d{2}).*");
+
+// telescreen-0.1.3: r"<@(.+)>"
+consistent!(telescreen_0, r"<@(.+)>");
+
+// tempus_fugit-0.4.4: r"^(\d+)"
+consistent!(tempus_fugit_0, r"^(\d+)");
+
+// fselect-0.4.1: "(\\?|\\.|\\*|\\[|\\]|\\(|\\)|\\^|\\$)"
+consistent!(fselect_0, "(\\?|\\.|\\*|\\[|\\]|\\(|\\)|\\^|\\$)");
+
+// fselect-0.4.1: "(%|_|\\?|\\.|\\*|\\[|\\]|\\(|\\)|\\^|\\$)"
+consistent!(fselect_1, "(%|_|\\?|\\.|\\*|\\[|\\]|\\(|\\)|\\^|\\$)");
+
+// fs_eventbridge-0.1.0: r"^([A-Z]+)(?:\s(.+))?\s*"
+consistent!(fs_eventbridge_0, r"^([A-Z]+)(?:\s(.+))?\s*");
+
+// joseki-0.0.1: r"(\w{1,2})\[(.+?)\]"
+consistent!(joseki_0, r"(\w{1,2})\[(.+?)\]");
+
+// tweetr-0.2.1: r"(?i)in (\d+) (second|minute|hour|day|week)s?"
+consistent!(tweetr_0, r"(?i)in (\d+) (second|minute|hour|day|week)s?");
+
+// bullet_core-0.1.1: "^(?u:[0-9])+"
+consistent!(bullet_core_0, "^(?u:[0-9])+");
+
+// bullet_core-0.1.1: "^(?u:[0-9])+(?u:\\.)(?u:[0-9])+"
+consistent!(bullet_core_1, "^(?u:[0-9])+(?u:\\.)(?u:[0-9])+");
+
+// bullet_core-0.1.1: "^(?u:[A-Za-zª-ªµ-µº-ºÀ-ÖØ-öø-ˁˆ-ˑˠ-ˤˬ-ˬˮ-ˮͰ-ʹͶ-ͷͺ-ͽͿ-ͿΆ-ΆΈ-ΊΌ-ΌΎ-ΡΣ-ϵϷ-ҁҊ-ԯԱ-Ֆՙ-ՙա-ևא-תװ-ײؠ-يٮ-ٯٱ-ۓە-ەۥ-ۦۮ-ۯۺ-ۼۿ-ۿܐ-ܐܒ-ܯݍ-ޥޱ-ޱߊ-ߪߴ-ߵߺ-ߺࠀ-ࠕࠚ-ࠚࠤ-ࠤࠨ-ࠨࡀ-ࡘࢠ-ࢴऄ-हऽ-ऽॐ-ॐक़-ॡॱ-ঀঅ-ঌএ-ঐও-নপ-রল-লশ-হঽ-ঽৎ-ৎড়-ঢ়য়-ৡৰ-ৱਅ-ਊਏ-ਐਓ-ਨਪ-ਰਲ-ਲ਼ਵ-ਸ਼ਸ-ਹਖ਼-ੜਫ਼-ਫ਼ੲ-ੴઅ-ઍએ-ઑઓ-નપ-રલ-ળવ-હઽ-ઽૐ-ૐૠ-ૡૹ-ૹଅ-ଌଏ-ଐଓ-ନପ-ରଲ-ଳଵ-ହଽ-ଽଡ଼-ଢ଼ୟ-ୡୱ-ୱஃ-ஃஅ-ஊஎ-ஐஒ-கங-சஜ-ஜஞ-டண-தந-பம-ஹௐ-ௐఅ-ఌఎ-ఐఒ-నప-హఽ-ఽౘ-ౚౠ-ౡಅ-ಌಎ-ಐಒ-ನಪ-ಳವ-ಹಽ-ಽೞ-ೞೠ-ೡೱ-ೲഅ-ഌഎ-ഐഒ-ഺഽ-ഽൎ-ൎൟ-ൡൺ-ൿඅ-ඖක-නඳ-රල-ලව-ෆก-ะา-ำเ-ๆກ-ຂຄ-ຄງ-ຈຊ-ຊຍ-ຍດ-ທນ-ຟມ-ຣລ-ລວ-ວສ-ຫອ-ະາ-ຳຽ-ຽເ-ໄໆ-ໆໜ-ໟༀ-ༀཀ-ཇཉ-ཬྈ-ྌက-ဪဿ-ဿၐ-ၕၚ-ၝၡ-ၡၥ-ၦၮ-ၰၵ-ႁႎ-ႎႠ-ჅჇ-ჇჍ-Ⴭა-ჺჼ-ቈቊ-ቍቐ-ቖቘ-ቘቚ-ቝበ-ኈኊ-ኍነ-ኰኲ-ኵኸ-ኾዀ-ዀዂ-ዅወ-ዖዘ-ጐጒ-ጕጘ-ፚᎀ-ᎏᎠ-Ᏽᏸ-ᏽᐁ-ᙬᙯ-ᙿᚁ-ᚚᚠ-ᛪᛱ-ᛸᜀ-ᜌᜎ-ᜑᜠ-ᜱᝀ-ᝑᝠ-ᝬᝮ-ᝰក-ឳៗ-ៗៜ-ៜᠠ-ᡷᢀ-ᢨᢪ-ᢪᢰ-ᣵᤀ-ᤞᥐ-ᥭᥰ-ᥴᦀ-ᦫᦰ-ᧉᨀ-ᨖᨠ-ᩔᪧ-ᪧᬅ-ᬳᭅ-ᭋᮃ-ᮠᮮ-ᮯᮺ-ᯥᰀ-ᰣᱍ-ᱏᱚ-ᱽᳩ-ᳬᳮ-ᳱᳵ-ᳶᴀ-ᶿḀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙ-ὙὛ-ὛὝ-ὝὟ-ώᾀ-ᾴᾶ-ᾼι-ιῂ-ῄῆ-ῌῐ-ΐῖ-Ίῠ-Ῥῲ-ῴῶ-ῼⁱ-ⁱⁿ-ⁿₐ-ₜℂ-ℂℇ-ℇℊ-ℓℕ-ℕℙ-ℝℤ-ℤΩ-Ωℨ-ℨK-ℭℯ-ℹℼ-ℿⅅ-ⅉⅎ-ⅎↃ-ↄⰀ-Ⱞⰰ-ⱞⱠ-ⳤⳫ-ⳮⳲ-ⳳⴀ-ⴥⴧ-ⴧⴭ-ⴭⴰ-ⵧⵯ-ⵯⶀ-ⶖⶠ-ⶦⶨ-ⶮⶰ-ⶶⶸ-ⶾⷀ-ⷆⷈ-ⷎⷐ-ⷖⷘ-ⷞⸯ-ⸯ々-〆〱-〵〻-〼ぁ-ゖゝ-ゟァ-ヺー-ヿㄅ-ㄭㄱ-ㆎㆠ-ㆺㇰ-ㇿ㐀-䶵一-鿕ꀀ-ꒌꓐ-ꓽꔀ-ꘌꘐ-ꘟꘪ-ꘫꙀ-ꙮꙿ-ꚝꚠ-ꛥꜗ-ꜟꜢ-ꞈꞋ-ꞭꞰ-ꞷꟷ-ꠁꠃ-ꠅꠇ-ꠊꠌ-ꠢꡀ-ꡳꢂ-ꢳꣲ-ꣷꣻ-ꣻꣽ-ꣽꤊ-ꤥꤰ-ꥆꥠ-ꥼꦄ-ꦲꧏ-ꧏꧠ-ꧤꧦ-ꧯꧺ-ꧾꨀ-ꨨꩀ-ꩂꩄ-ꩋꩠ-ꩶꩺ-ꩺꩾ-ꪯꪱ-ꪱꪵ-ꪶꪹ-ꪽꫀ-ꫀꫂ-ꫂꫛ-ꫝꫠ-ꫪꫲ-ꫴꬁ-ꬆꬉ-ꬎꬑ-ꬖꬠ-ꬦꬨ-ꬮꬰ-ꭚꭜ-ꭥꭰ-ꯢ가-힣ힰ-ퟆퟋ-ퟻ豈-舘並-龎ﬀ-ﬆﬓ-ﬗיִ-יִײַ-ﬨשׁ-זּטּ-לּמּ-מּנּ-סּףּ-פּצּ-ﮱﯓ-ﴽﵐ-ﶏﶒ-ﷇﷰ-ﷻﹰ-ﹴﹶ-ﻼＡ-Ｚａ-ｚｦ-ﾾￂ-ￇￊ-ￏￒ-ￗￚ-ￜ𐀀-𐀋𐀍-𐀦𐀨-𐀺𐀼-𐀽𐀿-𐁍𐁐-𐁝𐂀-𐃺𐊀-𐊜𐊠-𐋐𐌀-𐌟𐌰-𐍀𐍂-𐍉𐍐-𐍵𐎀-𐎝𐎠-𐏃𐏈-𐏏𐐀-𐒝𐔀-𐔧𐔰-𐕣𐘀-𐜶𐝀-𐝕𐝠-𐝧𐠀-𐠅𐠈-𐠈𐠊-𐠵𐠷-𐠸𐠼-𐠼𐠿-𐡕𐡠-𐡶𐢀-𐢞𐣠-𐣲𐣴-𐣵𐤀-𐤕𐤠-𐤹𐦀-𐦷𐦾-𐦿𐨀-𐨀𐨐-𐨓𐨕-𐨗𐨙-𐨳𐩠-𐩼𐪀-𐪜𐫀-𐫇𐫉-𐫤𐬀-𐬵𐭀-𐭕𐭠-𐭲𐮀-𐮑𐰀-𐱈𐲀-𐲲𐳀-𐳲𑀃-𑀷𑂃-𑂯𑃐-𑃨𑄃-𑄦𑅐-𑅲𑅶-𑅶𑆃-𑆲𑇁-𑇄𑇚-𑇚𑇜-𑇜𑈀-𑈑𑈓-𑈫𑊀-𑊆𑊈-𑊈𑊊-𑊍𑊏-𑊝𑊟-𑊨𑊰-𑋞𑌅-𑌌𑌏-𑌐𑌓-𑌨𑌪-𑌰𑌲-𑌳𑌵-𑌹𑌽-𑌽𑍐-𑍐𑍝-𑍡𑒀-𑒯𑓄-𑓅𑓇-𑓇𑖀-𑖮𑗘-𑗛𑘀-𑘯𑙄-𑙄𑚀-𑚪𑜀-𑜙𑢠-𑣟𑣿-𑣿𑫀-𑫸𒀀-𒎙𒒀-𒕃𓀀-𓐮𔐀-𔙆𖠀-𖨸𖩀-𖩞𖫐-𖫭𖬀-𖬯𖭀-𖭃𖭣-𖭷𖭽-𖮏𖼀-𖽄𖽐-𖽐𖾓-𖾟𛀀-𛀁𛰀-𛱪𛱰-𛱼𛲀-𛲈𛲐-𛲙𝐀-𝑔𝑖-𝒜𝒞-𝒟𝒢-𝒢𝒥-𝒦𝒩-𝒬𝒮-𝒹𝒻-𝒻𝒽-𝓃𝓅-𝔅𝔇-𝔊𝔍-𝔔𝔖-𝔜𝔞-𝔹𝔻-𝔾𝕀-𝕄𝕆-𝕆𝕊-𝕐𝕒-𝚥𝚨-𝛀𝛂-𝛚𝛜-𝛺𝛼-𝜔𝜖-𝜴𝜶-𝝎𝝐-𝝮𝝰-𝞈𝞊-𝞨𝞪-𝟂𝟄-𝟋𞠀-𞣄𞸀-𞸃𞸅-𞸟𞸡-𞸢𞸤-𞸤𞸧-𞸧𞸩-𞸲𞸴-𞸷𞸹-𞸹𞸻-𞸻𞹂-𞹂𞹇-𞹇𞹉-𞹉𞹋-𞹋𞹍-𞹏𞹑-𞹒𞹔-𞹔𞹗-𞹗𞹙-𞹙𞹛-𞹛𞹝-𞹝𞹟-𞹟𞹡-𞹢𞹤-𞹤𞹧-𞹪𞹬-𞹲𞹴-𞹷𞹹-𞹼𞹾-𞹾𞺀-𞺉𞺋-𞺛𞺡-𞺣𞺥-𞺩𞺫-𞺻𠀀-𪛖𪜀-𫜴𫝀-𫠝𫠠-𬺡丽-𪘀])+"
+consistent!(bullet_core_2, "^(?u:[A-Za-zª-ªµ-µº-ºÀ-ÖØ-öø-ˁˆ-ˑˠ-ˤˬ-ˬˮ-ˮͰ-ʹͶ-ͷͺ-ͽͿ-ͿΆ-ΆΈ-ΊΌ-ΌΎ-ΡΣ-ϵϷ-ҁҊ-ԯԱ-Ֆՙ-ՙա-ևא-תװ-ײؠ-يٮ-ٯٱ-ۓە-ەۥ-ۦۮ-ۯۺ-ۼۿ-ۿܐ-ܐܒ-ܯݍ-ޥޱ-ޱߊ-ߪߴ-ߵߺ-ߺࠀ-ࠕࠚ-ࠚࠤ-ࠤࠨ-ࠨࡀ-ࡘࢠ-ࢴऄ-हऽ-ऽॐ-ॐक़-ॡॱ-ঀঅ-ঌএ-ঐও-নপ-রল-লশ-হঽ-ঽৎ-ৎড়-ঢ়য়-ৡৰ-ৱਅ-ਊਏ-ਐਓ-ਨਪ-ਰਲ-ਲ਼ਵ-ਸ਼ਸ-ਹਖ਼-ੜਫ਼-ਫ਼ੲ-ੴઅ-ઍએ-ઑઓ-નપ-રલ-ળવ-હઽ-ઽૐ-ૐૠ-ૡૹ-ૹଅ-ଌଏ-ଐଓ-ନପ-ରଲ-ଳଵ-ହଽ-ଽଡ଼-ଢ଼ୟ-ୡୱ-ୱஃ-ஃஅ-ஊஎ-ஐஒ-கங-சஜ-ஜஞ-டண-தந-பம-ஹௐ-ௐఅ-ఌఎ-ఐఒ-నప-హఽ-ఽౘ-ౚౠ-ౡಅ-ಌಎ-ಐಒ-ನಪ-ಳವ-ಹಽ-ಽೞ-ೞೠ-ೡೱ-ೲഅ-ഌഎ-ഐഒ-ഺഽ-ഽൎ-ൎൟ-ൡൺ-ൿඅ-ඖක-නඳ-රල-ලව-ෆก-ะา-ำเ-ๆກ-ຂຄ-ຄງ-ຈຊ-ຊຍ-ຍດ-ທນ-ຟມ-ຣລ-ລວ-ວສ-ຫອ-ະາ-ຳຽ-ຽເ-ໄໆ-ໆໜ-ໟༀ-ༀཀ-ཇཉ-ཬྈ-ྌက-ဪဿ-ဿၐ-ၕၚ-ၝၡ-ၡၥ-ၦၮ-ၰၵ-ႁႎ-ႎႠ-ჅჇ-ჇჍ-Ⴭა-ჺჼ-ቈቊ-ቍቐ-ቖቘ-ቘቚ-ቝበ-ኈኊ-ኍነ-ኰኲ-ኵኸ-ኾዀ-ዀዂ-ዅወ-ዖዘ-ጐጒ-ጕጘ-ፚᎀ-ᎏᎠ-Ᏽᏸ-ᏽᐁ-ᙬᙯ-ᙿᚁ-ᚚᚠ-ᛪᛱ-ᛸᜀ-ᜌᜎ-ᜑᜠ-ᜱᝀ-ᝑᝠ-ᝬᝮ-ᝰក-ឳៗ-ៗៜ-ៜᠠ-ᡷᢀ-ᢨᢪ-ᢪᢰ-ᣵᤀ-ᤞᥐ-ᥭᥰ-ᥴᦀ-ᦫᦰ-ᧉᨀ-ᨖᨠ-ᩔᪧ-ᪧᬅ-ᬳᭅ-ᭋᮃ-ᮠᮮ-ᮯᮺ-ᯥᰀ-ᰣᱍ-ᱏᱚ-ᱽᳩ-ᳬᳮ-ᳱᳵ-ᳶᴀ-ᶿḀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙ-ὙὛ-ὛὝ-ὝὟ-ώᾀ-ᾴᾶ-ᾼι-ιῂ-ῄῆ-ῌῐ-ΐῖ-Ίῠ-Ῥῲ-ῴῶ-ῼⁱ-ⁱⁿ-ⁿₐ-ₜℂ-ℂℇ-ℇℊ-ℓℕ-ℕℙ-ℝℤ-ℤΩ-Ωℨ-ℨK-ℭℯ-ℹℼ-ℿⅅ-ⅉⅎ-ⅎↃ-ↄⰀ-Ⱞⰰ-ⱞⱠ-ⳤⳫ-ⳮⳲ-ⳳⴀ-ⴥⴧ-ⴧⴭ-ⴭⴰ-ⵧⵯ-ⵯⶀ-ⶖⶠ-ⶦⶨ-ⶮⶰ-ⶶⶸ-ⶾⷀ-ⷆⷈ-ⷎⷐ-ⷖⷘ-ⷞⸯ-ⸯ々-〆〱-〵〻-〼ぁ-ゖゝ-ゟァ-ヺー-ヿㄅ-ㄭㄱ-ㆎㆠ-ㆺㇰ-ㇿ㐀-䶵一-鿕ꀀ-ꒌꓐ-ꓽꔀ-ꘌꘐ-ꘟꘪ-ꘫꙀ-ꙮꙿ-ꚝꚠ-ꛥꜗ-ꜟꜢ-ꞈꞋ-ꞭꞰ-ꞷꟷ-ꠁꠃ-ꠅꠇ-ꠊꠌ-ꠢꡀ-ꡳꢂ-ꢳꣲ-ꣷꣻ-ꣻꣽ-ꣽꤊ-ꤥꤰ-ꥆꥠ-ꥼꦄ-ꦲꧏ-ꧏꧠ-ꧤꧦ-ꧯꧺ-ꧾꨀ-ꨨꩀ-ꩂꩄ-ꩋꩠ-ꩶꩺ-ꩺꩾ-ꪯꪱ-ꪱꪵ-ꪶꪹ-ꪽꫀ-ꫀꫂ-ꫂꫛ-ꫝꫠ-ꫪꫲ-ꫴꬁ-ꬆꬉ-ꬎꬑ-ꬖꬠ-ꬦꬨ-ꬮꬰ-ꭚꭜ-ꭥꭰ-ꯢ가-힣ힰ-ퟆퟋ-ퟻ豈-舘並-龎ﬀ-ﬆﬓ-ﬗיִ-יִײַ-ﬨשׁ-זּטּ-לּמּ-מּנּ-סּףּ-פּצּ-ﮱﯓ-ﴽﵐ-ﶏﶒ-ﷇﷰ-ﷻﹰ-ﹴﹶ-ﻼＡ-Ｚａ-ｚｦ-ﾾￂ-ￇￊ-ￏￒ-ￗￚ-ￜ𐀀-𐀋𐀍-𐀦𐀨-𐀺𐀼-𐀽𐀿-𐁍𐁐-𐁝𐂀-𐃺𐊀-𐊜𐊠-𐋐𐌀-𐌟𐌰-𐍀𐍂-𐍉𐍐-𐍵𐎀-𐎝𐎠-𐏃𐏈-𐏏𐐀-𐒝𐔀-𐔧𐔰-𐕣𐘀-𐜶𐝀-𐝕𐝠-𐝧𐠀-𐠅𐠈-𐠈𐠊-𐠵𐠷-𐠸𐠼-𐠼𐠿-𐡕𐡠-𐡶𐢀-𐢞𐣠-𐣲𐣴-𐣵𐤀-𐤕𐤠-𐤹𐦀-𐦷𐦾-𐦿𐨀-𐨀𐨐-𐨓𐨕-𐨗𐨙-𐨳𐩠-𐩼𐪀-𐪜𐫀-𐫇𐫉-𐫤𐬀-𐬵𐭀-𐭕𐭠-𐭲𐮀-𐮑𐰀-𐱈𐲀-𐲲𐳀-𐳲𑀃-𑀷𑂃-𑂯𑃐-𑃨𑄃-𑄦𑅐-𑅲𑅶-𑅶𑆃-𑆲𑇁-𑇄𑇚-𑇚𑇜-𑇜𑈀-𑈑𑈓-𑈫𑊀-𑊆𑊈-𑊈𑊊-𑊍𑊏-𑊝𑊟-𑊨𑊰-𑋞𑌅-𑌌𑌏-𑌐𑌓-𑌨𑌪-𑌰𑌲-𑌳𑌵-𑌹𑌽-𑌽𑍐-𑍐𑍝-𑍡𑒀-𑒯𑓄-𑓅𑓇-𑓇𑖀-𑖮𑗘-𑗛𑘀-𑘯𑙄-𑙄𑚀-𑚪𑜀-𑜙𑢠-𑣟𑣿-𑣿𑫀-𑫸𒀀-𒎙𒒀-𒕃𓀀-𓐮𔐀-𔙆𖠀-𖨸𖩀-𖩞𖫐-𖫭𖬀-𖬯𖭀-𖭃𖭣-𖭷𖭽-𖮏𖼀-𖽄𖽐-𖽐𖾓-𖾟𛀀-𛀁𛰀-𛱪𛱰-𛱼𛲀-𛲈𛲐-𛲙𝐀-𝑔𝑖-𝒜𝒞-𝒟𝒢-𝒢𝒥-𝒦𝒩-𝒬𝒮-𝒹𝒻-𝒻𝒽-𝓃𝓅-𝔅𝔇-𝔊𝔍-𝔔𝔖-𝔜𝔞-𝔹𝔻-𝔾𝕀-𝕄𝕆-𝕆𝕊-𝕐𝕒-𝚥𝚨-𝛀𝛂-𝛚𝛜-𝛺𝛼-𝜔𝜖-𝜴𝜶-𝝎𝝐-𝝮𝝰-𝞈𝞊-𝞨𝞪-𝟂𝟄-𝟋𞠀-𞣄𞸀-𞸃𞸅-𞸟𞸡-𞸢𞸤-𞸤𞸧-𞸧𞸩-𞸲𞸴-𞸷𞸹-𞸹𞸻-𞸻𞹂-𞹂𞹇-𞹇𞹉-𞹉𞹋-𞹋𞹍-𞹏𞹑-𞹒𞹔-𞹔𞹗-𞹗𞹙-𞹙𞹛-𞹛𞹝-𞹝𞹟-𞹟𞹡-𞹢𞹤-𞹤𞹧-𞹪𞹬-𞹲𞹴-𞹷𞹹-𞹼𞹾-𞹾𞺀-𞺉𞺋-𞺛𞺡-𞺣𞺥-𞺩𞺫-𞺻𠀀-𪛖𪜀-𫜴𫝀-𫠝𫠠-𬺡丽-𪘀])+");
+
+// bullet_core-0.1.1: "^(?u:d/d)((?u:[A-Za-zª-ªµ-µº-ºÀ-ÖØ-öø-ˁˆ-ˑˠ-ˤˬ-ˬˮ-ˮͰ-ʹͶ-ͷͺ-ͽͿ-ͿΆ-ΆΈ-ΊΌ-ΌΎ-ΡΣ-ϵϷ-ҁҊ-ԯԱ-Ֆՙ-ՙա-ևא-תװ-ײؠ-يٮ-ٯٱ-ۓە-ەۥ-ۦۮ-ۯۺ-ۼۿ-ۿܐ-ܐܒ-ܯݍ-ޥޱ-ޱߊ-ߪߴ-ߵߺ-ߺࠀ-ࠕࠚ-ࠚࠤ-ࠤࠨ-ࠨࡀ-ࡘࢠ-ࢴऄ-हऽ-ऽॐ-ॐक़-ॡॱ-ঀঅ-ঌএ-ঐও-নপ-রল-লশ-হঽ-ঽৎ-ৎড়-ঢ়য়-ৡৰ-ৱਅ-ਊਏ-ਐਓ-ਨਪ-ਰਲ-ਲ਼ਵ-ਸ਼ਸ-ਹਖ਼-ੜਫ਼-ਫ਼ੲ-ੴઅ-ઍએ-ઑઓ-નપ-રલ-ળવ-હઽ-ઽૐ-ૐૠ-ૡૹ-ૹଅ-ଌଏ-ଐଓ-ନପ-ରଲ-ଳଵ-ହଽ-ଽଡ଼-ଢ଼ୟ-ୡୱ-ୱஃ-ஃஅ-ஊஎ-ஐஒ-கங-சஜ-ஜஞ-டண-தந-பம-ஹௐ-ௐఅ-ఌఎ-ఐఒ-నప-హఽ-ఽౘ-ౚౠ-ౡಅ-ಌಎ-ಐಒ-ನಪ-ಳವ-ಹಽ-ಽೞ-ೞೠ-ೡೱ-ೲഅ-ഌഎ-ഐഒ-ഺഽ-ഽൎ-ൎൟ-ൡൺ-ൿඅ-ඖක-නඳ-රල-ලව-ෆก-ะา-ำเ-ๆກ-ຂຄ-ຄງ-ຈຊ-ຊຍ-ຍດ-ທນ-ຟມ-ຣລ-ລວ-ວສ-ຫອ-ະາ-ຳຽ-ຽເ-ໄໆ-ໆໜ-ໟༀ-ༀཀ-ཇཉ-ཬྈ-ྌက-ဪဿ-ဿၐ-ၕၚ-ၝၡ-ၡၥ-ၦၮ-ၰၵ-ႁႎ-ႎႠ-ჅჇ-ჇჍ-Ⴭა-ჺჼ-ቈቊ-ቍቐ-ቖቘ-ቘቚ-ቝበ-ኈኊ-ኍነ-ኰኲ-ኵኸ-ኾዀ-ዀዂ-ዅወ-ዖዘ-ጐጒ-ጕጘ-ፚᎀ-ᎏᎠ-Ᏽᏸ-ᏽᐁ-ᙬᙯ-ᙿᚁ-ᚚᚠ-ᛪᛱ-ᛸᜀ-ᜌᜎ-ᜑᜠ-ᜱᝀ-ᝑᝠ-ᝬᝮ-ᝰក-ឳៗ-ៗៜ-ៜᠠ-ᡷᢀ-ᢨᢪ-ᢪᢰ-ᣵᤀ-ᤞᥐ-ᥭᥰ-ᥴᦀ-ᦫᦰ-ᧉᨀ-ᨖᨠ-ᩔᪧ-ᪧᬅ-ᬳᭅ-ᭋᮃ-ᮠᮮ-ᮯᮺ-ᯥᰀ-ᰣᱍ-ᱏᱚ-ᱽᳩ-ᳬᳮ-ᳱᳵ-ᳶᴀ-ᶿḀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙ-ὙὛ-ὛὝ-ὝὟ-ώᾀ-ᾴᾶ-ᾼι-ιῂ-ῄῆ-ῌῐ-ΐῖ-Ίῠ-Ῥῲ-ῴῶ-ῼⁱ-ⁱⁿ-ⁿₐ-ₜℂ-ℂℇ-ℇℊ-ℓℕ-ℕℙ-ℝℤ-ℤΩ-Ωℨ-ℨK-ℭℯ-ℹℼ-ℿⅅ-ⅉⅎ-ⅎↃ-ↄⰀ-Ⱞⰰ-ⱞⱠ-ⳤⳫ-ⳮⳲ-ⳳⴀ-ⴥⴧ-ⴧⴭ-ⴭⴰ-ⵧⵯ-ⵯⶀ-ⶖⶠ-ⶦⶨ-ⶮⶰ-ⶶⶸ-ⶾⷀ-ⷆⷈ-ⷎⷐ-ⷖⷘ-ⷞⸯ-ⸯ々-〆〱-〵〻-〼ぁ-ゖゝ-ゟァ-ヺー-ヿㄅ-ㄭㄱ-ㆎㆠ-ㆺㇰ-ㇿ㐀-䶵一-鿕ꀀ-ꒌꓐ-ꓽꔀ-ꘌꘐ-ꘟꘪ-ꘫꙀ-ꙮꙿ-ꚝꚠ-ꛥꜗ-ꜟꜢ-ꞈꞋ-ꞭꞰ-ꞷꟷ-ꠁꠃ-ꠅꠇ-ꠊꠌ-ꠢꡀ-ꡳꢂ-ꢳꣲ-ꣷꣻ-ꣻꣽ-ꣽꤊ-ꤥꤰ-ꥆꥠ-ꥼꦄ-ꦲꧏ-ꧏꧠ-ꧤꧦ-ꧯꧺ-ꧾꨀ-ꨨꩀ-ꩂꩄ-ꩋꩠ-ꩶꩺ-ꩺꩾ-ꪯꪱ-ꪱꪵ-ꪶꪹ-ꪽꫀ-ꫀꫂ-ꫂꫛ-ꫝꫠ-ꫪꫲ-ꫴꬁ-ꬆꬉ-ꬎꬑ-ꬖꬠ-ꬦꬨ-ꬮꬰ-ꭚꭜ-ꭥꭰ-ꯢ가-힣ힰ-ퟆퟋ-ퟻ豈-舘並-龎ﬀ-ﬆﬓ-ﬗיִ-יִײַ-ﬨשׁ-זּטּ-לּמּ-מּנּ-סּףּ-פּצּ-ﮱﯓ-ﴽﵐ-ﶏﶒ-ﷇﷰ-ﷻﹰ-ﹴﹶ-ﻼＡ-Ｚａ-ｚｦ-ﾾￂ-ￇￊ-ￏￒ-ￗￚ-ￜ𐀀-𐀋𐀍-𐀦𐀨-𐀺𐀼-𐀽𐀿-𐁍𐁐-𐁝𐂀-𐃺𐊀-𐊜𐊠-𐋐𐌀-𐌟𐌰-𐍀𐍂-𐍉𐍐-𐍵𐎀-𐎝𐎠-𐏃𐏈-𐏏𐐀-𐒝𐔀-𐔧𐔰-𐕣𐘀-𐜶𐝀-𐝕𐝠-𐝧𐠀-𐠅𐠈-𐠈𐠊-𐠵𐠷-𐠸𐠼-𐠼𐠿-𐡕𐡠-𐡶𐢀-𐢞𐣠-𐣲𐣴-𐣵𐤀-𐤕𐤠-𐤹𐦀-𐦷𐦾-𐦿𐨀-𐨀𐨐-𐨓𐨕-𐨗𐨙-𐨳𐩠-𐩼𐪀-𐪜𐫀-𐫇𐫉-𐫤𐬀-𐬵𐭀-𐭕𐭠-𐭲𐮀-𐮑𐰀-𐱈𐲀-𐲲𐳀-𐳲𑀃-𑀷𑂃-𑂯𑃐-𑃨𑄃-𑄦𑅐-𑅲𑅶-𑅶𑆃-𑆲𑇁-𑇄𑇚-𑇚𑇜-𑇜𑈀-𑈑𑈓-𑈫𑊀-𑊆𑊈-𑊈𑊊-𑊍𑊏-𑊝𑊟-𑊨𑊰-𑋞𑌅-𑌌𑌏-𑌐𑌓-𑌨𑌪-𑌰𑌲-𑌳𑌵-𑌹𑌽-𑌽𑍐-𑍐𑍝-𑍡𑒀-𑒯𑓄-𑓅𑓇-𑓇𑖀-𑖮𑗘-𑗛𑘀-𑘯𑙄-𑙄𑚀-𑚪𑜀-𑜙𑢠-𑣟𑣿-𑣿𑫀-𑫸𒀀-𒎙𒒀-𒕃𓀀-𓐮𔐀-𔙆𖠀-𖨸𖩀-𖩞𖫐-𖫭𖬀-𖬯𖭀-𖭃𖭣-𖭷𖭽-𖮏𖼀-𖽄𖽐-𖽐𖾓-𖾟𛀀-𛀁𛰀-𛱪𛱰-𛱼𛲀-𛲈𛲐-𛲙𝐀-𝑔𝑖-𝒜𝒞-𝒟𝒢-𝒢𝒥-𝒦𝒩-𝒬𝒮-𝒹𝒻-𝒻𝒽-𝓃𝓅-𝔅𝔇-𝔊𝔍-𝔔𝔖-𝔜𝔞-𝔹𝔻-𝔾𝕀-𝕄𝕆-𝕆𝕊-𝕐𝕒-𝚥𝚨-𝛀𝛂-𝛚𝛜-𝛺𝛼-𝜔𝜖-𝜴𝜶-𝝎𝝐-𝝮𝝰-𝞈𝞊-𝞨𝞪-𝟂𝟄-𝟋𞠀-𞣄𞸀-𞸃𞸅-𞸟𞸡-𞸢𞸤-𞸤𞸧-𞸧𞸩-𞸲𞸴-𞸷𞸹-𞸹𞸻-𞸻𞹂-𞹂𞹇-𞹇𞹉-𞹉𞹋-𞹋𞹍-𞹏𞹑-𞹒𞹔-𞹔𞹗-𞹗𞹙-𞹙𞹛-𞹛𞹝-𞹝𞹟-𞹟𞹡-𞹢𞹤-𞹤𞹧-𞹪𞹬-𞹲𞹴-𞹷𞹹-𞹼𞹾-𞹾𞺀-𞺉𞺋-𞺛𞺡-𞺣𞺥-𞺩𞺫-𞺻𠀀-𪛖𪜀-𫜴𫝀-𫠝𫠠-𬺡丽-𪘀])+)"
+consistent!(bullet_core_3, "^(?u:d/d)((?u:[A-Za-zª-ªµ-µº-ºÀ-ÖØ-öø-ˁˆ-ˑˠ-ˤˬ-ˬˮ-ˮͰ-ʹͶ-ͷͺ-ͽͿ-ͿΆ-ΆΈ-ΊΌ-ΌΎ-ΡΣ-ϵϷ-ҁҊ-ԯԱ-Ֆՙ-ՙա-ևא-תװ-ײؠ-يٮ-ٯٱ-ۓە-ەۥ-ۦۮ-ۯۺ-ۼۿ-ۿܐ-ܐܒ-ܯݍ-ޥޱ-ޱߊ-ߪߴ-ߵߺ-ߺࠀ-ࠕࠚ-ࠚࠤ-ࠤࠨ-ࠨࡀ-ࡘࢠ-ࢴऄ-हऽ-ऽॐ-ॐक़-ॡॱ-ঀঅ-ঌএ-ঐও-নপ-রল-লশ-হঽ-ঽৎ-ৎড়-ঢ়য়-ৡৰ-ৱਅ-ਊਏ-ਐਓ-ਨਪ-ਰਲ-ਲ਼ਵ-ਸ਼ਸ-ਹਖ਼-ੜਫ਼-ਫ਼ੲ-ੴઅ-ઍએ-ઑઓ-નપ-રલ-ળવ-હઽ-ઽૐ-ૐૠ-ૡૹ-ૹଅ-ଌଏ-ଐଓ-ନପ-ରଲ-ଳଵ-ହଽ-ଽଡ଼-ଢ଼ୟ-ୡୱ-ୱஃ-ஃஅ-ஊஎ-ஐஒ-கங-சஜ-ஜஞ-டண-தந-பம-ஹௐ-ௐఅ-ఌఎ-ఐఒ-నప-హఽ-ఽౘ-ౚౠ-ౡಅ-ಌಎ-ಐಒ-ನಪ-ಳವ-ಹಽ-ಽೞ-ೞೠ-ೡೱ-ೲഅ-ഌഎ-ഐഒ-ഺഽ-ഽൎ-ൎൟ-ൡൺ-ൿඅ-ඖක-නඳ-රල-ලව-ෆก-ะา-ำเ-ๆກ-ຂຄ-ຄງ-ຈຊ-ຊຍ-ຍດ-ທນ-ຟມ-ຣລ-ລວ-ວສ-ຫອ-ະາ-ຳຽ-ຽເ-ໄໆ-ໆໜ-ໟༀ-ༀཀ-ཇཉ-ཬྈ-ྌက-ဪဿ-ဿၐ-ၕၚ-ၝၡ-ၡၥ-ၦၮ-ၰၵ-ႁႎ-ႎႠ-ჅჇ-ჇჍ-Ⴭა-ჺჼ-ቈቊ-ቍቐ-ቖቘ-ቘቚ-ቝበ-ኈኊ-ኍነ-ኰኲ-ኵኸ-ኾዀ-ዀዂ-ዅወ-ዖዘ-ጐጒ-ጕጘ-ፚᎀ-ᎏᎠ-Ᏽᏸ-ᏽᐁ-ᙬᙯ-ᙿᚁ-ᚚᚠ-ᛪᛱ-ᛸᜀ-ᜌᜎ-ᜑᜠ-ᜱᝀ-ᝑᝠ-ᝬᝮ-ᝰក-ឳៗ-ៗៜ-ៜᠠ-ᡷᢀ-ᢨᢪ-ᢪᢰ-ᣵᤀ-ᤞᥐ-ᥭᥰ-ᥴᦀ-ᦫᦰ-ᧉᨀ-ᨖᨠ-ᩔᪧ-ᪧᬅ-ᬳᭅ-ᭋᮃ-ᮠᮮ-ᮯᮺ-ᯥᰀ-ᰣᱍ-ᱏᱚ-ᱽᳩ-ᳬᳮ-ᳱᳵ-ᳶᴀ-ᶿḀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙ-ὙὛ-ὛὝ-ὝὟ-ώᾀ-ᾴᾶ-ᾼι-ιῂ-ῄῆ-ῌῐ-ΐῖ-Ίῠ-Ῥῲ-ῴῶ-ῼⁱ-ⁱⁿ-ⁿₐ-ₜℂ-ℂℇ-ℇℊ-ℓℕ-ℕℙ-ℝℤ-ℤΩ-Ωℨ-ℨK-ℭℯ-ℹℼ-ℿⅅ-ⅉⅎ-ⅎↃ-ↄⰀ-Ⱞⰰ-ⱞⱠ-ⳤⳫ-ⳮⳲ-ⳳⴀ-ⴥⴧ-ⴧⴭ-ⴭⴰ-ⵧⵯ-ⵯⶀ-ⶖⶠ-ⶦⶨ-ⶮⶰ-ⶶⶸ-ⶾⷀ-ⷆⷈ-ⷎⷐ-ⷖⷘ-ⷞⸯ-ⸯ々-〆〱-〵〻-〼ぁ-ゖゝ-ゟァ-ヺー-ヿㄅ-ㄭㄱ-ㆎㆠ-ㆺㇰ-ㇿ㐀-䶵一-鿕ꀀ-ꒌꓐ-ꓽꔀ-ꘌꘐ-ꘟꘪ-ꘫꙀ-ꙮꙿ-ꚝꚠ-ꛥꜗ-ꜟꜢ-ꞈꞋ-ꞭꞰ-ꞷꟷ-ꠁꠃ-ꠅꠇ-ꠊꠌ-ꠢꡀ-ꡳꢂ-ꢳꣲ-ꣷꣻ-ꣻꣽ-ꣽꤊ-ꤥꤰ-ꥆꥠ-ꥼꦄ-ꦲꧏ-ꧏꧠ-ꧤꧦ-ꧯꧺ-ꧾꨀ-ꨨꩀ-ꩂꩄ-ꩋꩠ-ꩶꩺ-ꩺꩾ-ꪯꪱ-ꪱꪵ-ꪶꪹ-ꪽꫀ-ꫀꫂ-ꫂꫛ-ꫝꫠ-ꫪꫲ-ꫴꬁ-ꬆꬉ-ꬎꬑ-ꬖꬠ-ꬦꬨ-ꬮꬰ-ꭚꭜ-ꭥꭰ-ꯢ가-힣ힰ-ퟆퟋ-ퟻ豈-舘並-龎ﬀ-ﬆﬓ-ﬗיִ-יִײַ-ﬨשׁ-זּטּ-לּמּ-מּנּ-סּףּ-פּצּ-ﮱﯓ-ﴽﵐ-ﶏﶒ-ﷇﷰ-ﷻﹰ-ﹴﹶ-ﻼＡ-Ｚａ-ｚｦ-ﾾￂ-ￇￊ-ￏￒ-ￗￚ-ￜ𐀀-𐀋𐀍-𐀦𐀨-𐀺𐀼-𐀽𐀿-𐁍𐁐-𐁝𐂀-𐃺𐊀-𐊜𐊠-𐋐𐌀-𐌟𐌰-𐍀𐍂-𐍉𐍐-𐍵𐎀-𐎝𐎠-𐏃𐏈-𐏏𐐀-𐒝𐔀-𐔧𐔰-𐕣𐘀-𐜶𐝀-𐝕𐝠-𐝧𐠀-𐠅𐠈-𐠈𐠊-𐠵𐠷-𐠸𐠼-𐠼𐠿-𐡕𐡠-𐡶𐢀-𐢞𐣠-𐣲𐣴-𐣵𐤀-𐤕𐤠-𐤹𐦀-𐦷𐦾-𐦿𐨀-𐨀𐨐-𐨓𐨕-𐨗𐨙-𐨳𐩠-𐩼𐪀-𐪜𐫀-𐫇𐫉-𐫤𐬀-𐬵𐭀-𐭕𐭠-𐭲𐮀-𐮑𐰀-𐱈𐲀-𐲲𐳀-𐳲𑀃-𑀷𑂃-𑂯𑃐-𑃨𑄃-𑄦𑅐-𑅲𑅶-𑅶𑆃-𑆲𑇁-𑇄𑇚-𑇚𑇜-𑇜𑈀-𑈑𑈓-𑈫𑊀-𑊆𑊈-𑊈𑊊-𑊍𑊏-𑊝𑊟-𑊨𑊰-𑋞𑌅-𑌌𑌏-𑌐𑌓-𑌨𑌪-𑌰𑌲-𑌳𑌵-𑌹𑌽-𑌽𑍐-𑍐𑍝-𑍡𑒀-𑒯𑓄-𑓅𑓇-𑓇𑖀-𑖮𑗘-𑗛𑘀-𑘯𑙄-𑙄𑚀-𑚪𑜀-𑜙𑢠-𑣟𑣿-𑣿𑫀-𑫸𒀀-𒎙𒒀-𒕃𓀀-𓐮𔐀-𔙆𖠀-𖨸𖩀-𖩞𖫐-𖫭𖬀-𖬯𖭀-𖭃𖭣-𖭷𖭽-𖮏𖼀-𖽄𖽐-𖽐𖾓-𖾟𛀀-𛀁𛰀-𛱪𛱰-𛱼𛲀-𛲈𛲐-𛲙𝐀-𝑔𝑖-𝒜𝒞-𝒟𝒢-𝒢𝒥-𝒦𝒩-𝒬𝒮-𝒹𝒻-𝒻𝒽-𝓃𝓅-𝔅𝔇-𝔊𝔍-𝔔𝔖-𝔜𝔞-𝔹𝔻-𝔾𝕀-𝕄𝕆-𝕆𝕊-𝕐𝕒-𝚥𝚨-𝛀𝛂-𝛚𝛜-𝛺𝛼-𝜔𝜖-𝜴𝜶-𝝎𝝐-𝝮𝝰-𝞈𝞊-𝞨𝞪-𝟂𝟄-𝟋𞠀-𞣄𞸀-𞸃𞸅-𞸟𞸡-𞸢𞸤-𞸤𞸧-𞸧𞸩-𞸲𞸴-𞸷𞸹-𞸹𞸻-𞸻𞹂-𞹂𞹇-𞹇𞹉-𞹉𞹋-𞹋𞹍-𞹏𞹑-𞹒𞹔-𞹔𞹗-𞹗𞹙-𞹙𞹛-𞹛𞹝-𞹝𞹟-𞹟𞹡-𞹢𞹤-𞹤𞹧-𞹪𞹬-𞹲𞹴-𞹷𞹹-𞹼𞹾-𞹾𞺀-𞺉𞺋-𞺛𞺡-𞺣𞺥-𞺩𞺫-𞺻𠀀-𪛖𪜀-𫜴𫝀-𫠝𫠠-𬺡丽-𪘀])+)");
+
+// bullet_core-0.1.1: "^(?u:\\()"
+consistent!(bullet_core_4, "^(?u:\\()");
+
+// bullet_core-0.1.1: "^(?u:\\))"
+consistent!(bullet_core_5, "^(?u:\\))");
+
+// bullet_core-0.1.1: "^(?u:\\*)"
+consistent!(bullet_core_6, "^(?u:\\*)");
+
+// bullet_core-0.1.1: "^(?u:\\+)"
+consistent!(bullet_core_7, "^(?u:\\+)");
+
+// bullet_core-0.1.1: "^(?u:,)"
+consistent!(bullet_core_8, "^(?u:,)");
+
+// bullet_core-0.1.1: "^(?u:\\-)"
+consistent!(bullet_core_9, "^(?u:\\-)");
+
+// bullet_core-0.1.1: "^(?u:/)"
+consistent!(bullet_core_10, "^(?u:/)");
+
+// bullet_core-0.1.1: "^(?u:\\[)"
+consistent!(bullet_core_11, "^(?u:\\[)");
+
+// bullet_core-0.1.1: "^(?u:\\])"
+consistent!(bullet_core_12, "^(?u:\\])");
+
+// bullet_core-0.1.1: "^(?u:\\^)"
+consistent!(bullet_core_13, "^(?u:\\^)");
+
+// bullet_core-0.1.1: "^(?u:·)"
+consistent!(bullet_core_14, "^(?u:·)");
+
+// actix-web-0.6.13: "//+"
+consistent!(actix_web_0, "//+");
+
+// actix-web-0.6.13: "//+"
+consistent!(actix_web_1, "//+");
+
+// althea_kernel_interface-0.1.0: r"(\S*) .* (\S*) (REACHABLE|STALE|DELAY)"
+consistent!(
+    althea_kernel_interface_0,
+    r"(\S*) .* (\S*) (REACHABLE|STALE|DELAY)"
+);
+
+// althea_kernel_interface-0.1.0: r"-s (.*) --ip6-dst (.*)/.* bcnt = (.*)"
+consistent!(
+    althea_kernel_interface_1,
+    r"-s (.*) --ip6-dst (.*)/.* bcnt = (.*)"
+);
+
+// alcibiades-0.3.0: r"\buci(?:\s|$)"
+consistent!(alcibiades_0, r"\buci(?:\s|$)");
+
+// ruma-identifiers-0.11.0: r"\A[a-z0-9._=-]+\z"
+consistent!(ruma_identifiers_0, r"\A[a-z0-9._=-]+\z");
+
+// rusqbin-0.2.3: r"/rusqbins/((?i)[A-F0-9]{8}\-[A-F0-9]{4}\-4[A-F0-9]{3}\-[89AB][A-F0-9]{3}\-[A-F0-9]{12})$"
+consistent!(rusqbin_0, r"/rusqbins/((?i)[A-F0-9]{8}\-[A-F0-9]{4}\-4[A-F0-9]{3}\-[89AB][A-F0-9]{3}\-[A-F0-9]{12})$");
+
+// rusqbin-0.2.3: r"/rusqbins/((?i)[A-F0-9]{8}\-[A-F0-9]{4}\-4[A-F0-9]{3}\-[89AB][A-F0-9]{3}\-[A-F0-9]{12})/requests/?$"
+consistent!(rusqbin_1, r"/rusqbins/((?i)[A-F0-9]{8}\-[A-F0-9]{4}\-4[A-F0-9]{3}\-[89AB][A-F0-9]{3}\-[A-F0-9]{12})/requests/?$");
+
+// rust-install-0.0.4: r"^(nightly|beta|stable)(?:-(\d{4}-\d{2}-\d{2}))?$"
+consistent!(
+    rust_install_0,
+    r"^(nightly|beta|stable)(?:-(\d{4}-\d{2}-\d{2}))?$"
+);
+
+// rust_inbox-0.0.5: "^+(.*)\r\n"
+consistent!(rust_inbox_0, "^+(.*)\r\n");
+
+// rust_inbox-0.0.5: r"^\* CAPABILITY (.*)\r\n"
+consistent!(rust_inbox_1, r"^\* CAPABILITY (.*)\r\n");
+
+// rust_inbox-0.0.5: r"^([a-zA-Z0-9]+) (OK|NO|BAD)(.*)"
+consistent!(rust_inbox_2, r"^([a-zA-Z0-9]+) (OK|NO|BAD)(.*)");
+
+// rust_inbox-0.0.5: r"^\* (\d+) EXISTS\r\n"
+consistent!(rust_inbox_3, r"^\* (\d+) EXISTS\r\n");
+
+// rust_inbox-0.0.5: r"^\* (\d+) RECENT\r\n"
+consistent!(rust_inbox_4, r"^\* (\d+) RECENT\r\n");
+
+// rust_inbox-0.0.5: r"^\* FLAGS (.+)\r\n"
+consistent!(rust_inbox_5, r"^\* FLAGS (.+)\r\n");
+
+// rust_inbox-0.0.5: r"^\* OK \[UNSEEN (\d+)\](.*)\r\n"
+consistent!(rust_inbox_6, r"^\* OK \[UNSEEN (\d+)\](.*)\r\n");
+
+// rust_inbox-0.0.5: r"^\* OK \[UIDVALIDITY (\d+)\](.*)\r\n"
+consistent!(rust_inbox_7, r"^\* OK \[UIDVALIDITY (\d+)\](.*)\r\n");
+
+// rust_inbox-0.0.5: r"^\* OK \[UIDNEXT (\d+)\](.*)\r\n"
+consistent!(rust_inbox_8, r"^\* OK \[UIDNEXT (\d+)\](.*)\r\n");
+
+// rust_inbox-0.0.5: r"^\* OK \[PERMANENTFLAGS (.+)\](.*)\r\n"
+consistent!(rust_inbox_9, r"^\* OK \[PERMANENTFLAGS (.+)\](.*)\r\n");
+
+// rustml-0.0.7: r"^[a-z]+ (\d+)$"
+consistent!(rustml_0, r"^[a-z]+ (\d+)$");
+
+// rustml-0.0.7: r"^[a-z]+ (\d+)$"
+consistent!(rustml_1, r"^[a-z]+ (\d+)$");
+
+// rustml-0.0.7: r"^[a-z]+ (\d+)$"
+consistent!(rustml_2, r"^[a-z]+ (\d+)$");
+
+// rustfmt-0.10.0: r"([^\\](\\\\)*)\\[\n\r][[:space:]]*"
+consistent!(rustfmt_0, r"([^\\](\\\\)*)\\[\n\r][[:space:]]*");
+
+// rustfmt-core-0.4.0: r"(^\s*$)|(^\s*//\s*rustfmt-[^:]+:\s*\S+)"
+consistent!(rustfmt_core_0, r"(^\s*$)|(^\s*//\s*rustfmt-[^:]+:\s*\S+)");
+
+// rustfmt-core-0.4.0: r"^## `([^`]+)`"
+consistent!(rustfmt_core_1, r"^## `([^`]+)`");
+
+// rustfmt-core-0.4.0: r"([^\\](\\\\)*)\\[\n\r][[:space:]]*"
+consistent!(rustfmt_core_2, r"([^\\](\\\\)*)\\[\n\r][[:space:]]*");
+
+// rustfmt-core-0.4.0: r"\s;"
+consistent!(rustfmt_core_3, r"\s;");
+
+// rust-enum-derive-0.4.0: r"^(0x)?([:digit:]+)$"
+consistent!(rust_enum_derive_0, r"^(0x)?([:digit:]+)$");
+
+// rust-enum-derive-0.4.0: r"^([:digit:]+)[:space:]*<<[:space:]*([:digit:]+)$"
+consistent!(
+    rust_enum_derive_1,
+    r"^([:digit:]+)[:space:]*<<[:space:]*([:digit:]+)$"
+);
+
+// rust-enum-derive-0.4.0: r"^[:space:]*([[:alnum:]_]+)([:space:]*=[:space:]*([:graph:]+))?[:space:]*,"
+consistent!(rust_enum_derive_2, r"^[:space:]*([[:alnum:]_]+)([:space:]*=[:space:]*([:graph:]+))?[:space:]*,");
+
+// rust-enum-derive-0.4.0: r"^#define[:space:]+([:graph:]+)[:space:]+([:graph:]+)"
+consistent!(
+    rust_enum_derive_3,
+    r"^#define[:space:]+([:graph:]+)[:space:]+([:graph:]+)"
+);
+
+// rustsourcebundler-0.2.0: r"^\s*pub mod (.+);$"
+consistent!(rustsourcebundler_0, r"^\s*pub mod (.+);$");
+
+// rustsourcebundler-0.2.0: r"^\s*pub mod (.+);$"
+consistent!(rustsourcebundler_1, r"^\s*pub mod (.+);$");
+
+// rustfmt-nightly-0.8.2: r"([^\\](\\\\)*)\\[\n\r][[:space:]]*"
+consistent!(rustfmt_nightly_0, r"([^\\](\\\\)*)\\[\n\r][[:space:]]*");
+
+// rustfmt-nightly-0.8.2: r"\s;"
+consistent!(rustfmt_nightly_1, r"\s;");
+
+// rustache-0.1.0: r"(?s)(.*?)([ \t\r\n]*)(\{\{(\{?\S?\s*?[\w\.\s]*.*?\s*?\}?)\}\})([ \t\r\n]*)"
+consistent!(rustache_0, r"(?s)(.*?)([ \t\r\n]*)(\{\{(\{?\S?\s*?[\w\.\s]*.*?\s*?\}?)\}\})([ \t\r\n]*)");
+
+// rustfilt-0.2.0: r"_ZN[\$\._[:alnum:]]*"
+consistent!(rustfilt_0, r"_ZN[\$\._[:alnum:]]*");
+
+// rustache-lists-0.1.2: r"(?s)(.*?)([ \t\r\n]*)(\{\{(\{?\S?\s*?[\w\.\s]*.*?\s*?\}?)\}\})([ \t\r\n]*)"
+consistent!(rustache_lists_0, r"(?s)(.*?)([ \t\r\n]*)(\{\{(\{?\S?\s*?[\w\.\s]*.*?\s*?\}?)\}\})([ \t\r\n]*)");
+
+// rural-0.7.3: "(.+)=(.+)"
+consistent!(rural_0, "(.+)=(.+)");
+
+// rural-0.7.3: "(.*):(.+)"
+consistent!(rural_1, "(.*):(.+)");
+
+// rural-0.7.3: "(.+):=(.+)"
+consistent!(rural_2, "(.+):=(.+)");
+
+// rural-0.7.3: "(.*)==(.+)"
+consistent!(rural_3, "(.*)==(.+)");
+
+// rusoto_credential-0.11.0: r"^\[([^\]]+)\]$"
+consistent!(rusoto_credential_0, r"^\[([^\]]+)\]$");
+
+// rumblebars-0.3.0: "([:blank:]*)$"
+consistent!(rumblebars_0, "([:blank:]*)$");
+
+// rumblebars-0.3.0: "(\r?\n)[:blank:]*(\\{\\{~?[#!/](?:\\}?[^}])*\\}\\})[:blank:]*(:?\r?\n)?\\z"
+consistent!(rumblebars_1, "(\r?\n)[:blank:]*(\\{\\{~?[#!/](?:\\}?[^}])*\\}\\})[:blank:]*(:?\r?\n)?\\z");
+
+// rumblebars-0.3.0: "(\r?\n[:blank:]*)(\\{\\{~?>(?:\\}?[^}])*\\}\\})[:blank:]*(:?\r?\n)?\\z"
+consistent!(
+    rumblebars_2,
+    "(\r?\n[:blank:]*)(\\{\\{~?>(?:\\}?[^}])*\\}\\})[:blank:]*(:?\r?\n)?\\z"
+);
+
+// rumblebars-0.3.0: "((?:[:blank:]|\r?\n)*)(\r?\n)[:blank:]*$"
+consistent!(rumblebars_3, "((?:[:blank:]|\r?\n)*)(\r?\n)[:blank:]*$");
+
+// rumblebars-0.3.0: "^([:blank:]*\r?\n)(.*)"
+consistent!(rumblebars_4, "^([:blank:]*\r?\n)(.*)");
+
+// diesel_cli-1.3.1: r"(?P<stamp>[\d-]*)_hello"
+consistent!(diesel_cli_0, r"(?P<stamp>[\d-]*)_hello");
+
+// dishub-0.1.1: r"(\d+)s"
+consistent!(dishub_0, r"(\d+)s");
+
+// spreadsheet_textconv-0.1.0: r"\n"
+consistent!(spreadsheet_textconv_0, r"\n");
+
+// spreadsheet_textconv-0.1.0: r"\r"
+consistent!(spreadsheet_textconv_1, r"\r");
+
+// spreadsheet_textconv-0.1.0: r"\t"
+consistent!(spreadsheet_textconv_2, r"\t");
+
+// split_aud-0.1.0: r"DELAY (-?\d+)ms"
+consistent!(split_aud_0, r"DELAY (-?\d+)ms");
+
+// split_aud-0.1.0: r"Trim\((\d+), ?(\d+)\)"
+consistent!(split_aud_1, r"Trim\((\d+), ?(\d+)\)");
+
+// spotrust-0.0.5: r"spotify:[a-z]+:[a-zA-Z0-9]+"
+consistent!(spotrust_0, r"spotify:[a-z]+:[a-zA-Z0-9]+");
+
+// spaceslugs-0.1.0: r"[^\x00-\x7F]"
+consistent!(spaceslugs_0, r"[^\x00-\x7F]");
+
+// spaceslugs-0.1.0: r"[']+"
+consistent!(spaceslugs_1, r"[']+");
+
+// spaceslugs-0.1.0: r"\W+"
+consistent!(spaceslugs_2, r"\W+");
+
+// spaceslugs-0.1.0: r"[ ]+"
+consistent!(spaceslugs_3, r"[ ]+");
+
+// space_email_api-0.1.1: "PHPSESSID=([0-9a-f]+)"
+consistent!(space_email_api_0, "PHPSESSID=([0-9a-f]+)");
+
+// lorikeet-0.7.0: "[^0-9.,]"
+consistent!(lorikeet_0, "[^0-9.,]");
+
+// claude-0.3.0: r"^(?:\b|(-)?)(\p{Currency_Symbol})?((?:(?:\d{1,3}[\.,])+\d{3})|\d+)(?:[\.,](\d{2}))?\b$"
+consistent!(claude_0, r"^(?:\b|(-)?)(\p{Currency_Symbol})?((?:(?:\d{1,3}[\.,])+\d{3})|\d+)(?:[\.,](\d{2}))?\b$");
+
+// clam-0.1.6: r"<%=\s*(.+?)\s*%>"
+consistent!(clam_0, r"<%=\s*(.+?)\s*%>");
+
+// classifier-0.0.3: r"(\s)"
+consistent!(classifier_0, r"(\s)");
+
+// click-0.3.2: r"(-----BEGIN .*-----\n)((?:(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)*\n)+)(-----END .*-----)"
+consistent!(click_0, r"(-----BEGIN .*-----\n)((?:(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)*\n)+)(-----END .*-----)");
+
+// click-0.3.2: r"-----BEGIN PRIVATE KEY-----"
+consistent!(click_1, r"-----BEGIN PRIVATE KEY-----");
+
+// ultrastar-txt-0.1.2: r"#([A-Z3a-z]*):(.*)"
+consistent!(ultrastar_txt_0, r"#([A-Z3a-z]*):(.*)");
+
+// ultrastar-txt-0.1.2: "^-\\s?(-?[0-9]+)\\s*$"
+consistent!(ultrastar_txt_1, "^-\\s?(-?[0-9]+)\\s*$");
+
+// ultrastar-txt-0.1.2: "^-\\s?(-?[0-9]+)\\s+(-?[0-9]+)"
+consistent!(ultrastar_txt_2, "^-\\s?(-?[0-9]+)\\s+(-?[0-9]+)");
+
+// ultrastar-txt-0.1.2: "^(.)\\s*(-?[0-9]+)\\s+(-?[0-9]+)\\s+(-?[0-9]+)\\s?(.*)"
+consistent!(
+    ultrastar_txt_3,
+    "^(.)\\s*(-?[0-9]+)\\s+(-?[0-9]+)\\s+(-?[0-9]+)\\s?(.*)"
+);
+
+// ultrastar-txt-0.1.2: "^P\\s?(-?[0-9]+)"
+consistent!(ultrastar_txt_4, "^P\\s?(-?[0-9]+)");
+
+// db-accelerate-2.0.0: r"^template\.add($|\..+$)"
+consistent!(db_accelerate_0, r"^template\.add($|\..+$)");
+
+// db-accelerate-2.0.0: r"^template\.sub($|\..+$)"
+consistent!(db_accelerate_1, r"^template\.sub($|\..+$)");
+
+// sterling-0.3.0: r"(\d+)([cegps])"
+consistent!(sterling_0, r"(\d+)([cegps])");
+
+// stache-0.2.0: r"[^\w]"
+consistent!(stache_0, r"[^\w]");
+
+// strukt-0.1.0: "\"([<>]?)([xcbB\\?hHiIlLqQfdspP]*)\""
+consistent!(strukt_0, "\"([<>]?)([xcbB\\?hHiIlLqQfdspP]*)\"");
+
+// steamid-ng-0.3.1: r"^STEAM_([0-4]):([0-1]):([0-9]{1,10})$"
+consistent!(steamid_ng_0, r"^STEAM_([0-4]):([0-1]):([0-9]{1,10})$");
+
+// steamid-ng-0.3.1: r"^\[([AGMPCgcLTIUai]):([0-4]):([0-9]{1,10})(:([0-9]+))?\]$"
+consistent!(
+    steamid_ng_1,
+    r"^\[([AGMPCgcLTIUai]):([0-4]):([0-9]{1,10})(:([0-9]+))?\]$"
+);
+
+// strscan-0.1.1: r"^\w+"
+consistent!(strscan_0, r"^\w+");
+
+// strscan-0.1.1: r"^\s+"
+consistent!(strscan_1, r"^\s+");
+
+// strscan-0.1.1: r"^\w+"
+consistent!(strscan_2, r"^\w+");
+
+// strscan-0.1.1: r"^\s+"
+consistent!(strscan_3, r"^\s+");
+
+// strscan-0.1.1: r"^(\w+)\s+"
+consistent!(strscan_4, r"^(\w+)\s+");
+
+// tk-carbon-0.2.0: r"^([a-zA-Z0-9\.-]+)(?:\s+(\d+))$"
+consistent!(tk_carbon_0, r"^([a-zA-Z0-9\.-]+)(?:\s+(\d+))$");
+
+// tk-carbon-0.2.0: r"^([a-zA-Z0-9\.-]+)(?:\s+(\d+))$"
+consistent!(tk_carbon_1, r"^([a-zA-Z0-9\.-]+)(?:\s+(\d+))$");
+
+// evalrs-0.0.10: r"extern\s+crate\s+([a-z0-9_]+)\s*;(\s*//(.+))?"
+consistent!(evalrs_0, r"extern\s+crate\s+([a-z0-9_]+)\s*;(\s*//(.+))?");
+
+// evalrs-0.0.10: r"(?m)^# "
+consistent!(evalrs_1, r"(?m)^# ");
+
+// evalrs-0.0.10: r"(?m)^\s*fn +main *\( *\)"
+consistent!(evalrs_2, r"(?m)^\s*fn +main *\( *\)");
+
+// evalrs-0.0.10: r"(extern\s+crate\s+[a-z0-9_]+\s*;)"
+consistent!(evalrs_3, r"(extern\s+crate\s+[a-z0-9_]+\s*;)");
+
+// gate_build-0.5.0: "(.*)_t([0-9]+)"
+consistent!(gate_build_0, "(.*)_t([0-9]+)");
+
+// rake-0.1.1: r"[^\P{P}-]|\s+-\s+"
+consistent!(rake_0, r"[^\P{P}-]|\s+-\s+");
+
+// rafy-0.2.1: r"^.*(?:(?:youtu\.be/|v/|vi/|u/w/|embed/)|(?:(?:watch)?\?v(?:i)?=|\&v(?:i)?=))([^#\&\?]*).*"
+consistent!(rafy_0, r"^.*(?:(?:youtu\.be/|v/|vi/|u/w/|embed/)|(?:(?:watch)?\?v(?:i)?=|\&v(?:i)?=))([^#\&\?]*).*");
+
+// raven-0.2.1: r"^(?P<protocol>.*?)://(?P<public_key>.*?):(?P<secret_key>.*?)@(?P<host>.*?)/(?P<path>.*/)?(?P<project_id>.*)$"
+consistent!(raven_0, r"^(?P<protocol>.*?)://(?P<public_key>.*?):(?P<secret_key>.*?)@(?P<host>.*?)/(?P<path>.*/)?(?P<project_id>.*)$");
+
+// rargs-0.2.0: r"\{[[:space:]]*[^{}]*[[:space:]]*\}"
+consistent!(rargs_0, r"\{[[:space:]]*[^{}]*[[:space:]]*\}");
+
+// rargs-0.2.0: r"^\{[[:space:]]*(?P<name>[[:word:]]*)[[:space:]]*\}$"
+consistent!(rargs_1, r"^\{[[:space:]]*(?P<name>[[:word:]]*)[[:space:]]*\}$");
+
+// rargs-0.2.0: r"^\{[[:space:]]*(?P<num>-?\d+)[[:space:]]*\}$"
+consistent!(rargs_2, r"^\{[[:space:]]*(?P<num>-?\d+)[[:space:]]*\}$");
+
+// rargs-0.2.0: r"^\{(?P<left>-?\d*)?\.\.(?P<right>-?\d*)?(?::(?P<sep>.*))?\}$"
+consistent!(
+    rargs_3,
+    r"^\{(?P<left>-?\d*)?\.\.(?P<right>-?\d*)?(?::(?P<sep>.*))?\}$"
+);
+
+// rargs-0.2.0: r"(.*?)[[:space:]]+|(.*?)$"
+consistent!(rargs_4, r"(.*?)[[:space:]]+|(.*?)$");
+
+// indradb-lib-0.15.0: r"[a-zA-Z0-9]{8}"
+consistent!(indradb_lib_0, r"[a-zA-Z0-9]{8}");
+
+// fungi-lang-0.1.50: r"::"
+consistent!(fungi_lang_0, r"::");
+
+// nickel-0.10.1: "/hello/(?P<name>[a-zA-Z]+)"
+consistent!(nickel_0, "/hello/(?P<name>[a-zA-Z]+)");
+
+// nickel-0.10.1: "/hello/(?P<name>[a-zA-Z]+)"
+consistent!(nickel_1, "/hello/(?P<name>[a-zA-Z]+)");
+
+// pact_verifier-0.4.0: r"\{(\w+)\}"
+consistent!(pact_verifier_0, r"\{(\w+)\}");
+
+// pact_matching-0.4.1: "application/.*json"
+consistent!(pact_matching_0, "application/.*json");
+
+// pact_matching-0.4.1: "application/json.*"
+consistent!(pact_matching_1, "application/json.*");
+
+// pact_matching-0.4.1: "application/.*xml"
+consistent!(pact_matching_2, "application/.*xml");
+
+// pangu-0.2.0: "([\"'\\(\\[\\{{<\u{201c}])(\\s*)(.+?)(\\s*)([\"'\\)\\]\\}}>\u{201d}])"
+consistent!(
+    pangu_0,
+    "([\"'\\(\\[\\{{<\u{201c}])(\\s*)(.+?)(\\s*)([\"'\\)\\]\\}}>\u{201d}])"
+);
+
+// pangu-0.2.0: "([\\(\\[\\{{<\u{201c}]+)(\\s*)(.+?)(\\s*)([\\)\\]\\}}>\u{201d}]+)"
+consistent!(
+    pangu_1,
+    "([\\(\\[\\{{<\u{201c}]+)(\\s*)(.+?)(\\s*)([\\)\\]\\}}>\u{201d}]+)"
+);
+
+// parser-haskell-0.2.0: r"\{-[\s\S]*?-\}"
+consistent!(parser_haskell_0, r"\{-[\s\S]*?-\}");
+
+// parser-haskell-0.2.0: r"(?m);+\s*$"
+consistent!(parser_haskell_1, r"(?m);+\s*$");
+
+// parser-haskell-0.2.0: r"(?m)^#(if|ifn?def|endif|else|include|elif).*"
+consistent!(parser_haskell_2, r"(?m)^#(if|ifn?def|endif|else|include|elif).*");
+
+// parser-haskell-0.2.0: r"'([^'\\]|\\[A-Z]{1,3}|\\.)'"
+consistent!(parser_haskell_3, r"'([^'\\]|\\[A-Z]{1,3}|\\.)'");
+
+// parser-haskell-0.2.0: r"forall\s+(.*?)\."
+consistent!(parser_haskell_4, r"forall\s+(.*?)\.");
+
+// html2md-0.2.1: "\\s{2,}"
+consistent!(html2md_0, "\\s{2,}");
+
+// html2md-0.2.1: "\\n{2,}"
+consistent!(html2md_1, "\\n{2,}");
+
+// html2md-0.2.1: "(?m)(\\S) $"
+consistent!(html2md_2, "(?m)(\\S) $");
+
+// html2md-0.2.1: "(?m)^[-*] "
+consistent!(html2md_3, "(?m)^[-*] ");
+
+// ovpnfile-0.1.2: r"#.*$"
+consistent!(ovpnfile_0, r"#.*$");
+
+// ovpnfile-0.1.2: r"^<(\S+)>"
+consistent!(ovpnfile_1, r"^<(\S+)>");
+
+// ovpnfile-0.1.2: r"^</(\S+)>"
+consistent!(ovpnfile_2, r"^</(\S+)>");
+
+// screenruster-saver-fractal-0.1.1: r"#([:xdigit:]{2})([:xdigit:]{2})([:xdigit:]{2})"
+consistent!(
+    screenruster_saver_fractal_0,
+    r"#([:xdigit:]{2})([:xdigit:]{2})([:xdigit:]{2})"
+);
+
+// scarlet-0.2.2: r"rgb\((?: *(\d{1,3}),)(?: *(\d{1,3}),)(?: *(\d{1,3}))\)"
+consistent!(
+    scarlet_0,
+    r"rgb\((?: *(\d{1,3}),)(?: *(\d{1,3}),)(?: *(\d{1,3}))\)"
+);
+
+// cpp_to_rust_generator-0.2.0: r"^([\w:]+)<(.+)>$"
+consistent!(cpp_to_rust_generator_0, r"^([\w:]+)<(.+)>$");
+
+// cpp_to_rust_generator-0.2.0: r"^type-parameter-(\d+)-(\d+)$"
+consistent!(cpp_to_rust_generator_1, r"^type-parameter-(\d+)-(\d+)$");
+
+// cpp_to_rust_generator-0.2.0: r"^([\w~]+)<[^<>]+>$"
+consistent!(cpp_to_rust_generator_2, r"^([\w~]+)<[^<>]+>$");
+
+// cpp_to_rust_generator-0.2.0: r"(signals|Q_SIGNALS)\s*:"
+consistent!(cpp_to_rust_generator_3, r"(signals|Q_SIGNALS)\s*:");
+
+// cpp_to_rust_generator-0.2.0: r"(slots|Q_SLOTS)\s*:"
+consistent!(cpp_to_rust_generator_4, r"(slots|Q_SLOTS)\s*:");
+
+// cpp_to_rust_generator-0.2.0: r"(public|protected|private)\s*:"
+consistent!(cpp_to_rust_generator_5, r"(public|protected|private)\s*:");
+
+// cpp_to_rust-0.5.3: r"^([\w:]+)<(.+)>$"
+consistent!(cpp_to_rust_0, r"^([\w:]+)<(.+)>$");
+
+// cpp_to_rust-0.5.3: r"^type-parameter-(\d+)-(\d+)$"
+consistent!(cpp_to_rust_1, r"^type-parameter-(\d+)-(\d+)$");
+
+// cpp_to_rust-0.5.3: r"^([\w~]+)<[^<>]+>$"
+consistent!(cpp_to_rust_2, r"^([\w~]+)<[^<>]+>$");
+
+// cpp_to_rust-0.5.3: r"(signals|Q_SIGNALS)\s*:"
+consistent!(cpp_to_rust_3, r"(signals|Q_SIGNALS)\s*:");
+
+// cpp_to_rust-0.5.3: r"(slots|Q_SLOTS)\s*:"
+consistent!(cpp_to_rust_4, r"(slots|Q_SLOTS)\s*:");
+
+// cpp_to_rust-0.5.3: r"(public|protected|private)\s*:"
+consistent!(cpp_to_rust_5, r"(public|protected|private)\s*:");
+
+// fritzbox_logs-0.2.0: "(\\d{2}\\.\\d{2}\\.\\d{2}) (\\d{2}:\\d{2}:\\d{2}) (.*)"
+consistent!(
+    fritzbox_logs_0,
+    "(\\d{2}\\.\\d{2}\\.\\d{2}) (\\d{2}:\\d{2}:\\d{2}) (.*)"
+);
+
+// fractal-matrix-api-3.29.0: r"mxc://(?P<server>[^/]+)/(?P<media>.+)"
+consistent!(fractal_matrix_api_0, r"mxc://(?P<server>[^/]+)/(?P<media>.+)");
+
+// smtp2go-0.1.4: r"^api-[a-zA-Z0-9]{32}$"
+consistent!(smtp2go_0, r"^api-[a-zA-Z0-9]{32}$");
+
+// pusher-0.3.1: r"^[-a-zA-Z0-9_=@,.;]+$"
+consistent!(pusher_0, r"^[-a-zA-Z0-9_=@,.;]+$");
+
+// pusher-0.3.1: r"\A\d+\.\d+\z"
+consistent!(pusher_1, r"\A\d+\.\d+\z");
+
+// bakervm-0.9.0: r"^\.(.+?) +?(.+)$"
+consistent!(bakervm_0, r"^\.(.+?) +?(.+)$");
+
+// bakervm-0.9.0: r"^\.([^\s]+)$"
+consistent!(bakervm_1, r"^\.([^\s]+)$");
+
+// bakervm-0.9.0: r"^include! +([^\s]+)$"
+consistent!(bakervm_2, r"^include! +([^\s]+)$");
+
+// bakervm-0.9.0: r"^@(\d+)$"
+consistent!(bakervm_3, r"^@(\d+)$");
+
+// bakervm-0.9.0: r"^true|false$"
+consistent!(bakervm_4, r"^true|false$");
+
+// bakervm-0.9.0: r"^(-?\d+)?\.[0-9]+$"
+consistent!(bakervm_5, r"^(-?\d+)?\.[0-9]+$");
+
+// bakervm-0.9.0: r"^(-?\d+)?$"
+consistent!(bakervm_6, r"^(-?\d+)?$");
+
+// bakervm-0.9.0: r"^#([0-9abcdefABCDEF]{6})$"
+consistent!(bakervm_7, r"^#([0-9abcdefABCDEF]{6})$");
+
+// bakervm-0.9.0: r"^'(.)'$"
+consistent!(bakervm_8, r"^'(.)'$");
+
+// bakervm-0.9.0: r"^\$vi\((\d+)\)$"
+consistent!(bakervm_9, r"^\$vi\((\d+)\)$");
+
+// bakervm-0.9.0: r"^\$key\((\d+)\)$"
+consistent!(bakervm_10, r"^\$key\((\d+)\)$");
+
+// banana-0.0.2: "(?P<type>[A-Z^']+) (?P<route>[^']+) HTTP/(?P<http>[^']+)"
+consistent!(
+    banana_0,
+    "(?P<type>[A-Z^']+) (?P<route>[^']+) HTTP/(?P<http>[^']+)"
+);
+
+// serial-key-2.0.0: r"[A-F0-9]{8}"
+consistent!(serial_key_0, r"[A-F0-9]{8}");
+
+// serde-hjson-0.8.1: "[\\\\\"\x00-\x1f\x7f-\u{9f}\u{00ad}\u{0600}-\u{0604}\u{070f}\u{17b4}\u{17b5}\u{200c}-\u{200f}\u{2028}-\u{202f}\u{2060}-\u{206f}\u{feff}\u{fff0}-\u{ffff}]"
+consistent!(serde_hjson_0, "[\\\\\"\x00-\x1f\x7f-\u{9f}\u{00ad}\u{0600}-\u{0604}\u{070f}\u{17b4}\u{17b5}\u{200c}-\u{200f}\u{2028}-\u{202f}\u{2060}-\u{206f}\u{feff}\u{fff0}-\u{ffff}]");
+
+// serde-hjson-0.8.1: "[\x00-\x1f\x7f-\u{9f}\u{00ad}\u{0600}-\u{0604}\u{070f}\u{17b4}\u{17b5}\u{200c}-\u{200f}\u{2028}-\u{202f}\u{2060}-\u{206f}\u{feff}\u{fff0}-\u{ffff}]"
+consistent!(serde_hjson_1, "[\x00-\x1f\x7f-\u{9f}\u{00ad}\u{0600}-\u{0604}\u{070f}\u{17b4}\u{17b5}\u{200c}-\u{200f}\u{2028}-\u{202f}\u{2060}-\u{206f}\u{feff}\u{fff0}-\u{ffff}]");
+
+// serde-hjson-0.8.1: "'''|[\x00-\x09\x0b\x0c\x0e-\x1f\x7f-\u{9f}\u{00ad}\u{0600}-\u{0604}\u{070f}\u{17b4}\u{17b5}\u{200c}-\u{200f}\u{2028}-\u{202f}\u{2060}-\u{206f}\u{feff}\u{fff0}-\u{ffff}]"
+consistent!(serde_hjson_2, "'''|[\x00-\x09\x0b\x0c\x0e-\x1f\x7f-\u{9f}\u{00ad}\u{0600}-\u{0604}\u{070f}\u{17b4}\u{17b5}\u{200c}-\u{200f}\u{2028}-\u{202f}\u{2060}-\u{206f}\u{feff}\u{fff0}-\u{ffff}]");
+
+// serde-odbc-0.1.0: r"/todos/(?P<id>\d+)"
+consistent!(serde_odbc_0, r"/todos/(?P<id>\d+)");
+
+// sentry-0.6.0: r"^(?:_<)?([a-zA-Z0-9_]+?)(?:\.\.|::)"
+consistent!(sentry_0, r"^(?:_<)?([a-zA-Z0-9_]+?)(?:\.\.|::)");
+
+// sentiment-0.1.1: r"[^a-zA-Z0 -]+"
+consistent!(sentiment_0, r"[^a-zA-Z0 -]+");
+
+// sentiment-0.1.1: r" {2,}"
+consistent!(sentiment_1, r" {2,}");
+
+// verilog-0.0.1: r"(?m)//.*"
+consistent!(verilog_0, r"(?m)//.*");
+
+// verex-0.2.2: "(?P<robot>C3PO)"
+consistent!(verex_0, "(?P<robot>C3PO)");
+
+// handlebars-0.32.4: ">|<|\"|&"
+consistent!(handlebars_0, ">|<|\"|&");
+
+// haikunator-0.1.2: r"^\w+-\w+-[0123456789]{4}$"
+consistent!(haikunator_0, r"^\w+-\w+-[0123456789]{4}$");
+
+// haikunator-0.1.2: r"^\w+@\w+@[0123456789]{4}$"
+consistent!(haikunator_1, r"^\w+@\w+@[0123456789]{4}$");
+
+// haikunator-0.1.2: r"^\w+-\w+-[0123456789abcdef]{4}$"
+consistent!(haikunator_2, r"^\w+-\w+-[0123456789abcdef]{4}$");
+
+// haikunator-0.1.2: r"^\w+-\w+-[0123456789忠犬ハチ公]{10}$"
+consistent!(haikunator_3, r"^\w+-\w+-[0123456789忠犬ハチ公]{10}$");
+
+// haikunator-0.1.2: r"^\w+-\w+$"
+consistent!(haikunator_4, r"^\w+-\w+$");
+
+// haikunator-0.1.2: r"^\w+-\w+-[foo]{4}$"
+consistent!(haikunator_5, r"^\w+-\w+-[foo]{4}$");
+
+// haikunator-0.1.2: r"^\w+-\w+-[0123456789忠犬ハチ公]{5}$"
+consistent!(haikunator_6, r"^\w+-\w+-[0123456789忠犬ハチ公]{5}$");
+
+// bobbin-cli-0.8.3: r"(.*)"
+consistent!(bobbin_cli_0, r"(.*)");
+
+// bobbin-cli-0.8.3: r"rustc (.*)"
+consistent!(bobbin_cli_1, r"rustc (.*)");
+
+// bobbin-cli-0.8.3: r"cargo (.*)"
+consistent!(bobbin_cli_2, r"cargo (.*)");
+
+// bobbin-cli-0.8.3: r"xargo (.*)\n"
+consistent!(bobbin_cli_3, r"xargo (.*)\n");
+
+// bobbin-cli-0.8.3: r"Open On-Chip Debugger (.*)"
+consistent!(bobbin_cli_4, r"Open On-Chip Debugger (.*)");
+
+// bobbin-cli-0.8.3: r"arm-none-eabi-gcc \(GNU Tools for ARM Embedded Processors[^\)]*\) (.*)"
+consistent!(
+    bobbin_cli_5,
+    r"arm-none-eabi-gcc \(GNU Tools for ARM Embedded Processors[^\)]*\) (.*)"
+);
+
+// bobbin-cli-0.8.3: r"(?m).*\nBasic Open Source SAM-BA Application \(BOSSA\) Version (.*)\n"
+consistent!(
+    bobbin_cli_6,
+    r"(?m).*\nBasic Open Source SAM-BA Application \(BOSSA\) Version (.*)\n"
+);
+
+// bobbin-cli-0.8.3: r"(?m)SEGGER J-Link Commander (.*)\n"
+consistent!(bobbin_cli_7, r"(?m)SEGGER J-Link Commander (.*)\n");
+
+// bobbin-cli-0.8.3: r"(?m)Teensy Loader, Command Line, Version (.*)\n"
+consistent!(bobbin_cli_8, r"(?m)Teensy Loader, Command Line, Version (.*)\n");
+
+// bobbin-cli-0.8.3: r"dfu-util (.*)\n"
+consistent!(bobbin_cli_9, r"dfu-util (.*)\n");
+
+// borsholder-0.9.1: r"^/static/[\w.]+$"
+consistent!(borsholder_0, r"^/static/[\w.]+$");
+
+// borsholder-0.9.1: r"^/timeline/([0-9]+)$"
+consistent!(borsholder_1, r"^/timeline/([0-9]+)$");
+
+// fblog-1.0.1: "\u{001B}\\[[\\d;]*[^\\d;]"
+consistent!(fblog_0, "\u{001B}\\[[\\d;]*[^\\d;]");
+
+// fblog-1.0.1: "\u{001B}\\[[\\d;]*[^\\d;]"
+consistent!(fblog_1, "\u{001B}\\[[\\d;]*[^\\d;]");
+
+// toml-query-0.6.0: r"^\[\d+\]$"
+consistent!(toml_query_0, r"^\[\d+\]$");
+
+// todo-txt-1.1.0: r" (?P<key>[^\s]+):(?P<value>[^\s^/]+)"
+consistent!(todo_txt_0, r" (?P<key>[^\s]+):(?P<value>[^\s^/]+)");
+
+// findr-0.1.5: r"\band\b"
+consistent!(findr_0, r"\band\b");
+
+// findr-0.1.5: r"\bor\b"
+consistent!(findr_1, r"\bor\b");
+
+// findr-0.1.5: r"\bnot\b"
+consistent!(findr_2, r"\bnot\b");
+
+// file-sniffer-3.0.1: r".*?\.(a|la|lo|o|ll|keter|bc|dyn_o|out|d|rlib|crate|min\.js|hi|dyn_hi|S|jsexe|webapp|js\.externs|ibc|toc|aux|fdb_latexmk|fls|egg-info|whl|js_a|js_hi|jld|ji|js_o|so.*|dump-.*|vmb|crx|orig|elmo|elmi|pyc|mod|p_hi|p_o|prof|tix)$"
+consistent!(file_sniffer_0, r".*?\.(a|la|lo|o|ll|keter|bc|dyn_o|out|d|rlib|crate|min\.js|hi|dyn_hi|S|jsexe|webapp|js\.externs|ibc|toc|aux|fdb_latexmk|fls|egg-info|whl|js_a|js_hi|jld|ji|js_o|so.*|dump-.*|vmb|crx|orig|elmo|elmi|pyc|mod|p_hi|p_o|prof|tix)$");
+
+// file-sniffer-3.0.1: r".*?\.(stats|conf|h|cache.*|dat|pc|info)$"
+consistent!(file_sniffer_1, r".*?\.(stats|conf|h|cache.*|dat|pc|info)$");
+
+// file-sniffer-3.0.1: r".*?\.(exe|a|la|o|ll|keter|bc|dyn_o|out|d|rlib|crate|min\.js|hi|dyn_hi|jsexe|webapp|js\.externs|ibc|toc|aux|fdb_latexmk|fls|egg-info|whl|js_a|js_hi|jld|ji|js_o|so.*|dump-.*|vmb|crx|orig|elmo|elmi|pyc|mod|p_hi|p_o|prof|tix)$"
+consistent!(file_sniffer_2, r".*?\.(exe|a|la|o|ll|keter|bc|dyn_o|out|d|rlib|crate|min\.js|hi|dyn_hi|jsexe|webapp|js\.externs|ibc|toc|aux|fdb_latexmk|fls|egg-info|whl|js_a|js_hi|jld|ji|js_o|so.*|dump-.*|vmb|crx|orig|elmo|elmi|pyc|mod|p_hi|p_o|prof|tix)$");
+
+// file-sniffer-3.0.1: r".*?\.(stats|conf|h|cache.*)$"
+consistent!(file_sniffer_3, r".*?\.(stats|conf|h|cache.*)$");
+
+// file-sniffer-3.0.1: r"(\.git|\.pijul|_darcs|\.hg)$"
+consistent!(file_sniffer_4, r"(\.git|\.pijul|_darcs|\.hg)$");
+
+// file_logger-0.1.0: "test"
+consistent!(file_logger_0, "test");
+
+// file_scanner-0.2.0: r"foo"
+consistent!(file_scanner_0, r"foo");
+
+// file_scanner-0.2.0: r"a+b"
+consistent!(file_scanner_1, r"a+b");
+
+// file_scanner-0.2.0: r"a[ab]*b"
+consistent!(file_scanner_2, r"a[ab]*b");
+
+// file_scanner-0.2.0: r"\s+"
+consistent!(file_scanner_3, r"\s+");
+
+// file_scanner-0.2.0: r"\s+"
+consistent!(file_scanner_4, r"\s+");
+
+// cellsplit-0.2.1: r"^\s*([^\s]+) %cellsplit<\d+>$"
+consistent!(cellsplit_0, r"^\s*([^\s]+) %cellsplit<\d+>$");
+
+// cellsplit-0.2.1: r"^\s*([^\s]+) %cellsplit<\d+>$"
+consistent!(cellsplit_1, r"^\s*([^\s]+) %cellsplit<\d+>$");
+
+// aterm-0.20.0: r"^[+\-]?[0-9]+"
+consistent!(aterm_0, r"^[+\-]?[0-9]+");
+
+// aterm-0.20.0: r"^[+\-]?[0-9]+\.[0-9]*([eE][+\-]?[0-9]+)?"
+consistent!(aterm_1, r"^[+\-]?[0-9]+\.[0-9]*([eE][+\-]?[0-9]+)?");
+
+// atarashii_imap-0.3.0: r"^[*] OK"
+consistent!(atarashii_imap_0, r"^[*] OK");
+
+// atarashii_imap-0.3.0: r"FLAGS\s\((.+)\)"
+consistent!(atarashii_imap_1, r"FLAGS\s\((.+)\)");
+
+// atarashii_imap-0.3.0: r"\[PERMANENTFLAGS\s\((.+)\)\]"
+consistent!(atarashii_imap_2, r"\[PERMANENTFLAGS\s\((.+)\)\]");
+
+// atarashii_imap-0.3.0: r"\[UIDVALIDITY\s(\d+)\]"
+consistent!(atarashii_imap_3, r"\[UIDVALIDITY\s(\d+)\]");
+
+// atarashii_imap-0.3.0: r"(\d+)\sEXISTS"
+consistent!(atarashii_imap_4, r"(\d+)\sEXISTS");
+
+// atarashii_imap-0.3.0: r"(\d+)\sRECENT"
+consistent!(atarashii_imap_5, r"(\d+)\sRECENT");
+
+// atarashii_imap-0.3.0: r"\[UNSEEN\s(\d+)\]"
+consistent!(atarashii_imap_6, r"\[UNSEEN\s(\d+)\]");
+
+// atarashii_imap-0.3.0: r"\[UIDNEXT\s(\d+)\]"
+consistent!(atarashii_imap_7, r"\[UIDNEXT\s(\d+)\]");
+
+// editorconfig-1.0.0: r"\\(\{|\})"
+consistent!(editorconfig_0, r"\\(\{|\})");
+
+// editorconfig-1.0.0: r"(^|[^\\])\\\|"
+consistent!(editorconfig_1, r"(^|[^\\])\\\|");
+
+// editorconfig-1.0.0: r"\[([^\]]*)$"
+consistent!(editorconfig_2, r"\[([^\]]*)$");
+
+// editorconfig-1.0.0: r"\[(.*/.*)\]"
+consistent!(editorconfig_3, r"\[(.*/.*)\]");
+
+// editorconfig-1.0.0: r"\{(-?\d+\\\.\\\.-?\d+)\}"
+consistent!(editorconfig_4, r"\{(-?\d+\\\.\\\.-?\d+)\}");
+
+// editorconfig-1.0.0: r"\{([^,]+)\}"
+consistent!(editorconfig_5, r"\{([^,]+)\}");
+
+// editorconfig-1.0.0: r"\{(([^\}].*)?(,|\|)(.*[^\\])?)\}"
+consistent!(editorconfig_6, r"\{(([^\}].*)?(,|\|)(.*[^\\])?)\}");
+
+// editorconfig-1.0.0: r"^/"
+consistent!(editorconfig_7, r"^/");
+
+// editorconfig-1.0.0: r"(^|[^\\])(\{|\})"
+consistent!(editorconfig_8, r"(^|[^\\])(\{|\})");
+
+// edmunge-1.0.0: "^#!.*\n"
+consistent!(edmunge_0, "^#!.*\n");
+
+// unicode_names2_macros-0.2.0: r"\\N\{(.*?)(?:\}|$)"
+consistent!(unicode_names2_macros_0, r"\\N\{(.*?)(?:\}|$)");
+
+// unidiff-0.2.1: r"^--- (?P<filename>[^\t\n]+)(?:\t(?P<timestamp>[^\n]+))?"
+consistent!(
+    unidiff_0,
+    r"^--- (?P<filename>[^\t\n]+)(?:\t(?P<timestamp>[^\n]+))?"
+);
+
+// unidiff-0.2.1: r"^\+\+\+ (?P<filename>[^\t\n]+)(?:\t(?P<timestamp>[^\n]+))?"
+consistent!(
+    unidiff_1,
+    r"^\+\+\+ (?P<filename>[^\t\n]+)(?:\t(?P<timestamp>[^\n]+))?"
+);
+
+// unidiff-0.2.1: r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)"
+consistent!(unidiff_2, r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)");
+
+// unidiff-0.2.1: r"^(?P<line_type>[- \n\+\\]?)(?P<value>.*)"
+consistent!(unidiff_3, r"^(?P<line_type>[- \n\+\\]?)(?P<value>.*)");
+
+// slippy-map-tiles-0.13.1: "/?(?P<zoom>[0-9]?[0-9])/(?P<x>[0-9]{1,10})/(?P<y>[0-9]{1,10})(\\.[a-zA-Z]{3,4})?$"
+consistent!(slippy_map_tiles_0, "/?(?P<zoom>[0-9]?[0-9])/(?P<x>[0-9]{1,10})/(?P<y>[0-9]{1,10})(\\.[a-zA-Z]{3,4})?$");
+
+// slippy-map-tiles-0.13.1: r"^(?P<minlon>-?[0-9]{1,3}(\.[0-9]{1,10})?) (?P<minlat>-?[0-9]{1,3}(\.[0-9]{1,10})?) (?P<maxlon>-?[0-9]{1,3}(\.[0-9]{1,10})?) (?P<maxlat>-?[0-9]{1,3}(\.[0-9]{1,10})?)$"
+consistent!(slippy_map_tiles_1, r"^(?P<minlon>-?[0-9]{1,3}(\.[0-9]{1,10})?) (?P<minlat>-?[0-9]{1,3}(\.[0-9]{1,10})?) (?P<maxlon>-?[0-9]{1,3}(\.[0-9]{1,10})?) (?P<maxlat>-?[0-9]{1,3}(\.[0-9]{1,10})?)$");
+
+// slippy-map-tiles-0.13.1: r"^(?P<minlon>-?[0-9]{1,3}(\.[0-9]{1,10})?),(?P<minlat>-?[0-9]{1,3}(\.[0-9]{1,10})?),(?P<maxlon>-?[0-9]{1,3}(\.[0-9]{1,10})?),(?P<maxlat>-?[0-9]{1,3}(\.[0-9]{1,10})?)$"
+consistent!(slippy_map_tiles_2, r"^(?P<minlon>-?[0-9]{1,3}(\.[0-9]{1,10})?),(?P<minlat>-?[0-9]{1,3}(\.[0-9]{1,10})?),(?P<maxlon>-?[0-9]{1,3}(\.[0-9]{1,10})?),(?P<maxlat>-?[0-9]{1,3}(\.[0-9]{1,10})?)$");
+
+// sonos-0.1.2: r"^https?://(.+?):1400/xml"
+consistent!(sonos_0, r"^https?://(.+?):1400/xml");
+
+// validator_derive-0.7.0: r"^[a-z]{2}$"
+consistent!(validator_derive_0, r"^[a-z]{2}$");
+
+// validator_derive-0.7.0: r"[a-z]{2}"
+consistent!(validator_derive_1, r"[a-z]{2}");
+
+// validator_derive-0.7.0: r"[a-z]{2}"
+consistent!(validator_derive_2, r"[a-z]{2}");
+
+// nginx-config-0.8.0: r"one of \d+ options"
+consistent!(nginx_config_0, r"one of \d+ options");
+
+// waltz-0.4.0: r"[\s,]"
+consistent!(waltz_0, r"[\s,]");
+
+// warheadhateus-0.2.1: r"^aws_access_key_id = (.*)"
+consistent!(warheadhateus_0, r"^aws_access_key_id = (.*)");
+
+// warheadhateus-0.2.1: r"^aws_secret_access_key = (.*)"
+consistent!(warheadhateus_1, r"^aws_secret_access_key = (.*)");
+
+// warheadhateus-0.2.1: r"^aws_access_key_id = (.*)"
+consistent!(warheadhateus_2, r"^aws_access_key_id = (.*)");
+
+// warheadhateus-0.2.1: r"^aws_secret_access_key = (.*)"
+consistent!(warheadhateus_3, r"^aws_secret_access_key = (.*)");
+
+// jieba-rs-0.2.2: r"([\u{4E00}-\u{9FD5}a-zA-Z0-9+#&\._%]+)"
+consistent!(jieba_rs_0, r"([\u{4E00}-\u{9FD5}a-zA-Z0-9+#&\._%]+)");
+
+// jieba-rs-0.2.2: r"(\r\n|\s)"
+consistent!(jieba_rs_1, r"(\r\n|\s)");
+
+// jieba-rs-0.2.2: "([\u{4E00}-\u{9FD5}]+)"
+consistent!(jieba_rs_2, "([\u{4E00}-\u{9FD5}]+)");
+
+// jieba-rs-0.2.2: r"[^a-zA-Z0-9+#\n]"
+consistent!(jieba_rs_3, r"[^a-zA-Z0-9+#\n]");
+
+// jieba-rs-0.2.2: r"([\u{4E00}-\u{9FD5}]+)"
+consistent!(jieba_rs_4, r"([\u{4E00}-\u{9FD5}]+)");
+
+// jieba-rs-0.2.2: r"([a-zA-Z0-9]+(?:.\d+)?%?)"
+consistent!(jieba_rs_5, r"([a-zA-Z0-9]+(?:.\d+)?%?)");
+
+// lalrpop-0.15.2: r"Span\([0-9 ,]*\)"
+consistent!(lalrpop_0, r"Span\([0-9 ,]*\)");
+
+// lalrpop-snap-0.15.2: r"Span\([0-9 ,]*\)"
+consistent!(lalrpop_snap_0, r"Span\([0-9 ,]*\)");
+
+// nlp-tokenize-0.1.0: r"[\S]+"
+consistent!(nlp_tokenize_0, r"[\S]+");
+
+// kbgpg-0.1.2: "[[:xdigit:]][70]"
+consistent!(kbgpg_0, "[[:xdigit:]][70]");
+
+// cdbd-0.1.1: r"^((?P<address>.*):)?(?P<port>\d+)$"
+consistent!(cdbd_0, r"^((?P<address>.*):)?(?P<port>\d+)$");
+
+// mbutiles-0.1.1: r"[\w\s=+-/]+\((\{(.|\n)*\})\);?"
+consistent!(mbutiles_0, r"[\w\s=+-/]+\((\{(.|\n)*\})\);?");
+
+// extrahop-0.2.5: r"^-\d+(?:ms|s|m|h|d|w|y)?$"
+consistent!(extrahop_0, r"^-\d+(?:ms|s|m|h|d|w|y)?$");
+
+// pippin-0.1.0: "^((?:.*)-)?ss(0|[1-9][0-9]*)\\.pip$"
+consistent!(pippin_0, "^((?:.*)-)?ss(0|[1-9][0-9]*)\\.pip$");
+
+// pippin-0.1.0: "^((?:.*)-)?ss(0|[1-9][0-9]*)-cl(0|[1-9][0-9]*)\\.piplog$"
+consistent!(
+    pippin_1,
+    "^((?:.*)-)?ss(0|[1-9][0-9]*)-cl(0|[1-9][0-9]*)\\.piplog$"
+);
+
+// pippin-0.1.0: "^((?:.*)-)?ss(0|[1-9][0-9]*)\\.pip$"
+consistent!(pippin_2, "^((?:.*)-)?ss(0|[1-9][0-9]*)\\.pip$");
+
+// pippin-0.1.0: "^((?:.*)-)?ss(0|[1-9][0-9]*)-cl(0|[1-9][0-9]*)\\.piplog$"
+consistent!(
+    pippin_3,
+    "^((?:.*)-)?ss(0|[1-9][0-9]*)-cl(0|[1-9][0-9]*)\\.piplog$"
+);
+
+// pippin-0.1.0: "^.*pn(0|[1-9][0-9]*)(-ss(0|[1-9][0-9]*)(\\.pip|-cl(0|[1-9][0-9]*)\\.piplog))?$"
+consistent!(pippin_4, "^.*pn(0|[1-9][0-9]*)(-ss(0|[1-9][0-9]*)(\\.pip|-cl(0|[1-9][0-9]*)\\.piplog))?$");
+
+// pippin-0.1.0: "^(.*)-ss(?:0|[1-9][0-9]*)(?:\\.pip|-cl(?:0|[1-9][0-9]*)\\.piplog)$"
+consistent!(
+    pippin_5,
+    "^(.*)-ss(?:0|[1-9][0-9]*)(?:\\.pip|-cl(?:0|[1-9][0-9]*)\\.piplog)$"
+);
+
+// pinyin-0.3.0: r"(?i)[āáǎàēéěèōóǒòīíǐìūúǔùüǘǚǜńň]"
+consistent!(
+    pinyin_0,
+    r"(?i)[āáǎàēéěèōóǒòīíǐìūúǔùüǘǚǜńň]"
+);
+
+// pinyin-0.3.0: r"([aeoiuvnm])([0-4])$"
+consistent!(pinyin_1, r"([aeoiuvnm])([0-4])$");
+
+// duration-parser-0.2.0: r"(?P<value>\d+)(?P<units>[a-z])"
+consistent!(duration_parser_0, r"(?P<value>\d+)(?P<units>[a-z])");
+
+// dutree-0.2.7: r"^\d+\D?$"
+consistent!(dutree_0, r"^\d+\D?$");
+
+// djangohashers-0.3.0: r"^[A-Za-z0-9]*$"
+consistent!(djangohashers_0, r"^[A-Za-z0-9]*$");
+
+// rtag-0.3.5: r"^[A-Z][A-Z0-9]{2,}$"
+consistent!(rtag_0, r"^[A-Z][A-Z0-9]{2,}$");
+
+// rtag-0.3.5: r"^http://www\.emusic\.com"
+consistent!(rtag_1, r"^http://www\.emusic\.com");
+
+// rtag-0.3.5: r"^[A-Z][A-Z0-9]{2,}"
+consistent!(rtag_2, r"^[A-Z][A-Z0-9]{2,}");
+
+// rtag-0.3.5: r"(^[\x{0}|\x{feff}|\x{fffe}]*|[\x{0}|\x{feff}|\x{fffe}]*$)"
+consistent!(
+    rtag_3,
+    r"(^[\x{0}|\x{feff}|\x{fffe}]*|[\x{0}|\x{feff}|\x{fffe}]*$)"
+);
+
+// rtow-0.1.0: r"(\d+)[xX](\d+)"
+consistent!(rtow_0, r"(\d+)[xX](\d+)");
+
+// pleingres-sql-plugin-0.1.0: r"\$([a-zA-Z0-9_]+)"
+consistent!(pleingres_sql_plugin_0, r"\$([a-zA-Z0-9_]+)");
+
+// dono-2.0.0: "[\\n]+"
+consistent!(dono_0, "[\\n]+");
+
+// dono-2.0.0: "(?m)^\\n"
+consistent!(dono_1, "(?m)^\\n");
+
+// dono-2.0.0: "(?m)^\\n"
+consistent!(dono_2, "(?m)^\\n");
+
+// ssb-common-0.3.0: r"^[0-9A-Za-z\+/]{43}=\.ed25519$"
+consistent!(ssb_common_0, r"^[0-9A-Za-z\+/]{43}=\.ed25519$");
+
+// ssb-common-0.3.0: r"^[0-9A-Za-z\+/]{86}==\.ed25519$"
+consistent!(ssb_common_1, r"^[0-9A-Za-z\+/]{86}==\.ed25519$");
+
+// ssb-common-0.3.0: r"^[0-9A-Za-z\+/]{43}=\.sha256$"
+consistent!(ssb_common_2, r"^[0-9A-Za-z\+/]{43}=\.sha256$");
+
+// mozversion-0.1.3: r"^(?P<major>\d+)\.(?P<minor>\d+)(?:\.(?P<patch>\d+))?(?:(?P<pre0>[a-z]+)(?P<pre1>\d*))?$"
+consistent!(mozversion_0, r"^(?P<major>\d+)\.(?P<minor>\d+)(?:\.(?P<patch>\d+))?(?:(?P<pre0>[a-z]+)(?P<pre1>\d*))?$");
+
+// monger-0.5.6: r"^(\d+)\.(\d+)$"
+consistent!(monger_0, r"^(\d+)\.(\d+)$");
+
+// mongo_rub-0.0.2: r"^[rv]2\.6"
+consistent!(mongo_rub_0, r"^[rv]2\.6");
+
+// flow-0.3.5: "body value"
+consistent!(flow_0, "body value");
+
+// flow-0.3.5: "start marker"
+consistent!(flow_1, "start marker");
+
+// flow-0.3.5: "end marker"
+consistent!(flow_2, "end marker");
+
+// flow-0.3.5: "body value"
+consistent!(flow_3, "body value");
+
+// vobsub-0.2.3: "^([A-Za-z/ ]+): (.*)"
+consistent!(vobsub_0, "^([A-Za-z/ ]+): (.*)");
+
+// voidmap-1.1.2: r"#([^\s=]+)*"
+consistent!(voidmap_0, r"#([^\s=]+)*");
+
+// voidmap-1.1.2: r"#(\S+)*"
+consistent!(voidmap_1, r"#(\S+)*");
+
+// voidmap-1.1.2: r"#prio=(\d+)"
+consistent!(voidmap_2, r"#prio=(\d+)");
+
+// voidmap-1.1.2: r"\[(\S+)\]"
+consistent!(voidmap_3, r"\[(\S+)\]");
+
+// voidmap-1.1.2: r"#limit=(\d+)"
+consistent!(voidmap_4, r"#limit=(\d+)");
+
+// voidmap-1.1.2: r"#tagged=(\S+)"
+consistent!(voidmap_5, r"#tagged=(\S+)");
+
+// voidmap-1.1.2: r"#rev\b"
+consistent!(voidmap_6, r"#rev\b");
+
+// voidmap-1.1.2: r"#done\b"
+consistent!(voidmap_7, r"#done\b");
+
+// voidmap-1.1.2: r"#open\b"
+consistent!(voidmap_8, r"#open\b");
+
+// voidmap-1.1.2: r"#since=(\S+)"
+consistent!(voidmap_9, r"#since=(\S+)");
+
+// voidmap-1.1.2: r"#until=(\S+)"
+consistent!(voidmap_10, r"#until=(\S+)");
+
+// voidmap-1.1.2: r"#plot=(\S+)"
+consistent!(voidmap_11, r"#plot=(\S+)");
+
+// voidmap-1.1.2: r"#n=(\d+)"
+consistent!(voidmap_12, r"#n=(\d+)");
+
+// voidmap-1.1.2: r"(\S+)"
+consistent!(voidmap_13, r"(\S+)");
+
+// voidmap-1.1.2: r"(?P<y>\d+)y"
+consistent!(voidmap_14, r"(?P<y>\d+)y");
+
+// voidmap-1.1.2: r"(?P<m>\d+)m"
+consistent!(voidmap_15, r"(?P<m>\d+)m");
+
+// voidmap-1.1.2: r"(?P<w>\d+)w"
+consistent!(voidmap_16, r"(?P<w>\d+)w");
+
+// voidmap-1.1.2: r"(?P<d>\d+)d"
+consistent!(voidmap_17, r"(?P<d>\d+)d");
+
+// voidmap-1.1.2: r"(?P<h>\d+)h"
+consistent!(voidmap_18, r"(?P<h>\d+)h");
+
+// voidmap-1.1.2: r"C-(.)"
+consistent!(voidmap_19, r"C-(.)");
+
+// qt_generator-0.2.0: r"^\.\./qt[^/]+/"
+consistent!(qt_generator_0, r"^\.\./qt[^/]+/");
+
+// qt_generator-0.2.0: "(href|src)=\"([^\"]*)\""
+consistent!(qt_generator_1, "(href|src)=\"([^\"]*)\"");
+
+// kryptos-0.6.1: r"[01]{5}"
+consistent!(kryptos_0, r"[01]{5}");
+
+// cifar_10_loader-0.2.0: "data_batch_[1-5].bin"
+consistent!(cifar_10_loader_0, "data_batch_[1-5].bin");
+
+// cifar_10_loader-0.2.0: "test_batch.bin"
+consistent!(cifar_10_loader_1, "test_batch.bin");
+
+// circadian-0.6.0: r"^\d+.\d+s$"
+consistent!(circadian_0, r"^\d+.\d+s$");
+
+// circadian-0.6.0: r"^\d+:\d+$"
+consistent!(circadian_1, r"^\d+:\d+$");
+
+// circadian-0.6.0: r"^\d+:\d+m$"
+consistent!(circadian_2, r"^\d+:\d+m$");
+
+// cicada-0.8.1: r"!!"
+consistent!(cicada_0, r"!!");
+
+// cicada-0.8.1: r"^([^`]*)`([^`]+)`(.*)$"
+consistent!(cicada_1, r"^([^`]*)`([^`]+)`(.*)$");
+
+// cicada-0.8.1: r"\*+"
+consistent!(cicada_2, r"\*+");
+
+// cicada-0.8.1: r"([^\$]*)\$\{?([A-Za-z0-9\?\$_]+)\}?(.*)"
+consistent!(cicada_3, r"([^\$]*)\$\{?([A-Za-z0-9\?\$_]+)\}?(.*)");
+
+// cicada-0.8.1: r"^ *alias +([a-zA-Z0-9_\.-]+)=(.*)$"
+consistent!(cicada_4, r"^ *alias +([a-zA-Z0-9_\.-]+)=(.*)$");
+
+// vterm-sys-0.1.0: r"hi"
+consistent!(vterm_sys_0, r"hi");
+
+// skim-0.5.0: r".*?\t"
+consistent!(skim_0, r".*?\t");
+
+// skim-0.5.0: r".*?[\t ]"
+consistent!(skim_1, r".*?[\t ]");
+
+// skim-0.5.0: r"(\{-?[0-9.,q]*?})"
+consistent!(skim_2, r"(\{-?[0-9.,q]*?})");
+
+// skim-0.5.0: r"[ \t\n]+"
+consistent!(skim_3, r"[ \t\n]+");
+
+// skim-0.5.0: r"[ \t\n]+"
+consistent!(skim_4, r"[ \t\n]+");
+
+// skim-0.5.0: r"([^ |]+( +\| +[^ |]*)+)|( +)"
+consistent!(skim_5, r"([^ |]+( +\| +[^ |]*)+)|( +)");
+
+// skim-0.5.0: r" +\| +"
+consistent!(skim_6, r" +\| +");
+
+// skim-0.5.0: r"^(?P<left>-?\d+)?(?P<sep>\.\.)?(?P<right>-?\d+)?$"
+consistent!(skim_7, r"^(?P<left>-?\d+)?(?P<sep>\.\.)?(?P<right>-?\d+)?$");
+
+// skim-0.5.0: ","
+consistent!(skim_8, ",");
+
+// skim-0.5.0: ".*?,"
+consistent!(skim_9, ".*?,");
+
+// skim-0.5.0: ".*?,"
+consistent!(skim_10, ".*?,");
+
+// skim-0.5.0: ","
+consistent!(skim_11, ",");
+
+// skim-0.5.0: r"\x1B\[(?:([0-9]+;[0-9]+[Hf])|([0-9]+[ABCD])|(s|u|2J|K)|([0-9;]*m)|(=[0-9]+[hI]))"
+consistent!(skim_12, r"\x1B\[(?:([0-9]+;[0-9]+[Hf])|([0-9]+[ABCD])|(s|u|2J|K)|([0-9;]*m)|(=[0-9]+[hI]))");
+
+// egg-mode-text-1.14.7: r"[-_./]\z"
+consistent!(egg_mode_text_0, r"[-_./]\z");
+
+// java-properties-1.1.1: "^[ \t\r\n\x0c]*[#!]"
+consistent!(java_properties_0, "^[ \t\r\n\x0c]*[#!]");
+
+// java-properties-1.1.1: r"^[ \t\x0c]*[#!][^\r\n]*$"
+consistent!(java_properties_1, r"^[ \t\x0c]*[#!][^\r\n]*$");
+
+// java-properties-1.1.1: r"^([ \t\x0c]*[:=][ \t\x0c]*|[ \t\x0c]+)$"
+consistent!(java_properties_2, r"^([ \t\x0c]*[:=][ \t\x0c]*|[ \t\x0c]+)$");
+
+// ipaddress-0.1.2: r":.+\."
+consistent!(ipaddress_0, r":.+\.");
+
+// ipaddress-0.1.2: r"\."
+consistent!(ipaddress_1, r"\.");
+
+// ipaddress-0.1.2: r":"
+consistent!(ipaddress_2, r":");
+
+// iptables-0.2.2: r"v(\d+)\.(\d+)\.(\d+)"
+consistent!(iptables_0, r"v(\d+)\.(\d+)\.(\d+)");
+
+// rsure-0.8.1: r"^([^-]+)-(.*)\.dat\.gz$"
+consistent!(rsure_0, r"^([^-]+)-(.*)\.dat\.gz$");
+
+// rs-jsonpath-0.1.0: "^(.*?)(<=|<|==|>=|>)(.*?)$"
+consistent!(rs_jsonpath_0, "^(.*?)(<=|<|==|>=|>)(.*?)$");
+
+// oatie-0.3.0: r"(\n|^)(\w+):([\n\w\W]+?)(\n(?:\w)|(\n\]))"
+consistent!(oatie_0, r"(\n|^)(\w+):([\n\w\W]+?)(\n(?:\w)|(\n\]))");
+
+// weld-0.2.0: "#.*$"
+consistent!(weld_0, "#.*$");
+
+// weld-0.2.0: r"^[A-Za-z$_][A-Za-z0-9$_]*$"
+consistent!(weld_1, r"^[A-Za-z$_][A-Za-z0-9$_]*$");
+
+// weld-0.2.0: r"^[0-9]+[cC]$"
+consistent!(weld_2, r"^[0-9]+[cC]$");
+
+// weld-0.2.0: r"^0b[0-1]+[cC]$"
+consistent!(weld_3, r"^0b[0-1]+[cC]$");
+
+// weld-0.2.0: r"^0x[0-9a-fA-F]+[cC]$"
+consistent!(weld_4, r"^0x[0-9a-fA-F]+[cC]$");
+
+// weld-0.2.0: r"^[0-9]+$"
+consistent!(weld_5, r"^[0-9]+$");
+
+// weld-0.2.0: r"^0b[0-1]+$"
+consistent!(weld_6, r"^0b[0-1]+$");
+
+// weld-0.2.0: r"^0x[0-9a-fA-F]+$"
+consistent!(weld_7, r"^0x[0-9a-fA-F]+$");
+
+// weld-0.2.0: r"^[0-9]+[lL]$"
+consistent!(weld_8, r"^[0-9]+[lL]$");
+
+// weld-0.2.0: r"^0b[0-1]+[lL]$"
+consistent!(weld_9, r"^0b[0-1]+[lL]$");
+
+// weld-0.2.0: r"^0x[0-9a-fA-F]+[lL]$"
+consistent!(weld_10, r"^0x[0-9a-fA-F]+[lL]$");
+
+// webgl_generator-0.1.0: "([(, ])enum\\b"
+consistent!(webgl_generator_0, "([(, ])enum\\b");
+
+// webgl_generator-0.1.0: "\\bAcquireResourcesCallback\\b"
+consistent!(webgl_generator_1, "\\bAcquireResourcesCallback\\b");
+
+// weave-0.2.0: r"^(\d+)(,(\d+))?([acd]).*$"
+consistent!(weave_0, r"^(\d+)(,(\d+))?([acd]).*$");
+
+// wemo-0.0.12: r"<BinaryState>(\d)(\|-?\d+)*</BinaryState>"
+consistent!(wemo_0, r"<BinaryState>(\d)(\|-?\d+)*</BinaryState>");
+
+// webscale-0.9.4: r"(http[s]?://[^\s]+)"
+consistent!(webscale_0, r"(http[s]?://[^\s]+)");
+
+// svgrep-1.1.0: r"^\d+.*$"
+consistent!(svgrep_0, r"^\d+.*$");
+
+// ignore-0.4.2: r"^[\pL\pN]+$"
+consistent!(ignore_0, r"^[\pL\pN]+$");
+
+// ommui_string_patterns-0.1.2: r"^([A-Za-z][0-9A-Za-z_]*)?$"
+consistent!(ommui_string_patterns_0, r"^([A-Za-z][0-9A-Za-z_]*)?$");
+
+// ommui_string_patterns-0.1.2: r"^(\S+(?:.*\S)?)?$"
+consistent!(ommui_string_patterns_1, r"^(\S+(?:.*\S)?)?$");
+
+// opcua-types-0.3.0: "^(?P<min>[0-9]{1,10})(:(?P<max>[0-9]{1,10}))?$"
+consistent!(opcua_types_0, "^(?P<min>[0-9]{1,10})(:(?P<max>[0-9]{1,10}))?$");
+
+// opcua-types-0.3.0: r"^(ns=(?P<ns>[0-9]+);)?(?P<t>[isgb])=(?P<v>.+)$"
+consistent!(opcua_types_1, r"^(ns=(?P<ns>[0-9]+);)?(?P<t>[isgb])=(?P<v>.+)$");
+
+// open_read_later-1.1.1: r"^(.+?)\s*:\s*(.+)$"
+consistent!(open_read_later_0, r"^(.+?)\s*:\s*(.+)$");
+
+// youtube-downloader-0.1.0: r"^.*(?:(?:youtu\.be/|v/|vi/|u/w/|embed/)|(?:(?:watch)?\?v(?:i)?=|\&v(?:i)?=))([^#\&\?]*).*"
+consistent!(youtube_downloader_0, r"^.*(?:(?:youtu\.be/|v/|vi/|u/w/|embed/)|(?:(?:watch)?\?v(?:i)?=|\&v(?:i)?=))([^#\&\?]*).*");
+
+// yobot-0.1.1: "."
+consistent!(yobot_0, ".");
+
+// yobot-0.1.1: r"."
+consistent!(yobot_1, r".");
+
+// yobot-0.1.1: r".+"
+consistent!(yobot_2, r".+");
+
+// yobot-0.1.1: r"."
+consistent!(yobot_3, r".");
+
+// ubiquity-0.1.5: r"foo"
+consistent!(ubiquity_0, r"foo");
+
+// ubiquity-0.1.5: r"/target/"
+consistent!(ubiquity_1, r"/target/");
+
+// ubiquity-0.1.5: r".DS_Store"
+consistent!(ubiquity_2, r".DS_Store");
+
+// qasm-1.0.0: r"//.*"
+consistent!(qasm_0, r"//.*");
+
+// drill-0.3.5: r"\{\{ *([a-z\._]+) *\}\}"
+consistent!(drill_0, r"\{\{ *([a-z\._]+) *\}\}");
+
+// queryst-2.0.0: r"^([^\]\[]+)"
+consistent!(queryst_0, r"^([^\]\[]+)");
+
+// queryst-2.0.0: r"(\[[^\]\[]*\])"
+consistent!(queryst_1, r"(\[[^\]\[]*\])");
+
+// qui-vive-0.1.0: r"^/(\w+)$"
+consistent!(qui_vive_0, r"^/(\w+)$");
+
+// qui-vive-0.1.0: r"^/key$"
+consistent!(qui_vive_1, r"^/key$");
+
+// qui-vive-0.1.0: r"^/key/(\w+)$"
+consistent!(qui_vive_2, r"^/key/(\w+)$");
+
+// qui-vive-0.1.0: r"^/url$"
+consistent!(qui_vive_3, r"^/url$");
+
+// qui-vive-0.1.0: r"^/url/(\w+)$"
+consistent!(qui_vive_4, r"^/url/(\w+)$");
+
+// qui-vive-0.1.0: r"^/inv$"
+consistent!(qui_vive_5, r"^/inv$");
+
+// qui-vive-0.1.0: r"^/inv/(\w+)$"
+consistent!(qui_vive_6, r"^/inv/(\w+)$");
+
+// subdiff-0.1.0: r"\b"
+// consistent!(subdiff_0, r"\b");
+
+// substudy-0.4.5: r"^(\d+)/(\d+)$"
+consistent!(substudy_0, r"^(\d+)/(\d+)$");
+
+// substudy-0.4.5: r"\s+"
+consistent!(substudy_1, r"\s+");
+
+// substudy-0.4.5: r"<[a-z/][^>]*>"
+consistent!(substudy_2, r"<[a-z/][^>]*>");
+
+// substudy-0.4.5: r"(\([^)]*\)|♪[^♪]*♪|[A-Z]{2,} ?:)"
+consistent!(substudy_3, r"(\([^)]*\)|♪[^♪]*♪|[A-Z]{2,} ?:)");
+
+// substudy-0.4.5: r"\s+"
+consistent!(substudy_4, r"\s+");
+
+// isbnid-0.1.3: r"^(\d(-| )?){9}(x|X|\d|(\d(-| )?){3}\d)$"
+consistent!(isbnid_0, r"^(\d(-| )?){9}(x|X|\d|(\d(-| )?){3}\d)$");
+
+// isbnid-0.1.3: r"[^0-9X]"
+consistent!(isbnid_1, r"[^0-9X]");
+
+// ispc-0.3.5: r"Intel\(r\) SPMD Program Compiler \(ispc\), (\d+\.\d+\.\d+)"
+consistent!(
+    ispc_0,
+    r"Intel\(r\) SPMD Program Compiler \(ispc\), (\d+\.\d+\.\d+)"
+);

diff --git a/tests/crazy.rs b/tests/crazy.rs
new file mode 100644
index 0000000..8c72273
--- /dev/null
+++ b/tests/crazy.rs

@@ -0,0 +1,446 @@
+mat!(ascii_literal, r"a", "a", Some((0, 1)));
+
+// Some crazy expressions from regular-expressions.info.
+mat!(
+    match_ranges,
+    r"(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b",
+    "num: 255",
+    Some((5, 8))
+);
+mat!(
+    match_ranges_not,
+    r"(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b",
+    "num: 256",
+    None
+);
+mat!(match_float1, r"[-+]?[0-9]*\.?[0-9]+", "0.1", Some((0, 3)));
+mat!(match_float2, r"[-+]?[0-9]*\.?[0-9]+", "0.1.2", Some((0, 3)));
+mat!(match_float3, r"[-+]?[0-9]*\.?[0-9]+", "a1.2", Some((1, 4)));
+mat!(match_float4, r"^[-+]?[0-9]*\.?[0-9]+$", "1.a", None);
+mat!(
+    match_email,
+    r"(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b",
+    "mine is jam.slam@gmail.com ",
+    Some((8, 26))
+);
+mat!(
+    match_email_not,
+    r"(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b",
+    "mine is jam.slam@gmail ",
+    None
+);
+mat!(
+    match_email_big,
+    r"[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?",
+    "mine is jam.slam@gmail.com ",
+    Some((8, 26))
+);
+mat!(
+    match_date1,
+    r"(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
+    "1900-01-01",
+    Some((0, 10))
+);
+mat!(
+    match_date2,
+    r"(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
+    "1900-00-01",
+    None
+);
+mat!(
+    match_date3,
+    r"(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
+    "1900-13-01",
+    None
+);
+
+// Do some crazy dancing with the start/end assertions.
+matiter!(match_start_end_empty, r"^$", "", (0, 0));
+matiter!(match_start_end_empty_many_1, r"^$^$^$", "", (0, 0));
+matiter!(match_start_end_empty_many_2, r"^^^$$$", "", (0, 0));
+matiter!(match_start_end_empty_rev, r"$^", "", (0, 0));
+matiter!(
+    match_start_end_empty_rep,
+    r"(?:^$)*",
+    "a\nb\nc",
+    (0, 0),
+    (1, 1),
+    (2, 2),
+    (3, 3),
+    (4, 4),
+    (5, 5)
+);
+matiter!(
+    match_start_end_empty_rep_rev,
+    r"(?:$^)*",
+    "a\nb\nc",
+    (0, 0),
+    (1, 1),
+    (2, 2),
+    (3, 3),
+    (4, 4),
+    (5, 5)
+);
+
+// Test negated character classes.
+mat!(negclass_letters, r"[^ac]", "acx", Some((2, 3)));
+mat!(negclass_letter_comma, r"[^a,]", "a,x", Some((2, 3)));
+mat!(negclass_letter_space, r"[^a[:space:]]", "a x", Some((2, 3)));
+mat!(negclass_comma, r"[^,]", ",,x", Some((2, 3)));
+mat!(negclass_space, r"[^[:space:]]", " a", Some((1, 2)));
+mat!(negclass_space_comma, r"[^,[:space:]]", ", a", Some((2, 3)));
+mat!(negclass_comma_space, r"[^[:space:],]", " ,a", Some((2, 3)));
+mat!(negclass_ascii, r"[^[:alpha:]Z]", "A1", Some((1, 2)));
+
+// Test that repeated empty expressions don't loop forever.
+mat!(lazy_many_many, r"((?:.*)*?)=", "a=b", Some((0, 2)));
+mat!(lazy_many_optional, r"((?:.?)*?)=", "a=b", Some((0, 2)));
+mat!(lazy_one_many_many, r"((?:.*)+?)=", "a=b", Some((0, 2)));
+mat!(lazy_one_many_optional, r"((?:.?)+?)=", "a=b", Some((0, 2)));
+mat!(lazy_range_min_many, r"((?:.*){1,}?)=", "a=b", Some((0, 2)));
+mat!(lazy_range_many, r"((?:.*){1,2}?)=", "a=b", Some((0, 2)));
+mat!(greedy_many_many, r"((?:.*)*)=", "a=b", Some((0, 2)));
+mat!(greedy_many_optional, r"((?:.?)*)=", "a=b", Some((0, 2)));
+mat!(greedy_one_many_many, r"((?:.*)+)=", "a=b", Some((0, 2)));
+mat!(greedy_one_many_optional, r"((?:.?)+)=", "a=b", Some((0, 2)));
+mat!(greedy_range_min_many, r"((?:.*){1,})=", "a=b", Some((0, 2)));
+mat!(greedy_range_many, r"((?:.*){1,2})=", "a=b", Some((0, 2)));
+
+// Test that we handle various flavors of empty expressions.
+matiter!(match_empty1, r"", "", (0, 0));
+matiter!(match_empty2, r"", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty3, r"()", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty4, r"()*", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty5, r"()+", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty6, r"()?", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty7, r"()()", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty8, r"()+|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty9, r"z|()+", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty10, r"()+|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty11, r"b|()+", "abc", (0, 0), (1, 2), (3, 3));
+
+// Test that the DFA can handle pathological cases.
+// (This should result in the DFA's cache being flushed too frequently, which
+// should cause it to quit and fall back to the NFA algorithm.)
+#[test]
+fn dfa_handles_pathological_case() {
+    fn ones_and_zeroes(count: usize) -> String {
+        use rand::{thread_rng, Rng};
+
+        let mut rng = thread_rng();
+        let mut s = String::new();
+        for _ in 0..count {
+            if rng.gen() {
+                s.push('1');
+            } else {
+                s.push('0');
+            }
+        }
+        s
+    }
+
+    let re = regex!(r"[01]*1[01]{20}$");
+    let text = {
+        let mut pieces = ones_and_zeroes(100_000);
+        pieces.push('1');
+        pieces.push_str(&ones_and_zeroes(20));
+        pieces
+    };
+    assert!(re.is_match(text!(&*text)));
+}
+
+#[test]
+fn nest_limit_makes_it_parse() {
+    use regex::RegexBuilder;
+
+    RegexBuilder::new(
+        r#"(?-u)
+        2(?:
+          [45]\d{3}|
+          7(?:
+            1[0-267]|
+            2[0-289]|
+            3[0-29]|
+            4[01]|
+            5[1-3]|
+            6[013]|
+            7[0178]|
+            91
+          )|
+          8(?:
+            0[125]|
+            [139][1-6]|
+            2[0157-9]|
+            41|
+            6[1-35]|
+            7[1-5]|
+            8[1-8]|
+            90
+          )|
+          9(?:
+            0[0-2]|
+            1[0-4]|
+            2[568]|
+            3[3-6]|
+            5[5-7]|
+            6[0167]|
+            7[15]|
+            8[0146-9]
+          )
+        )\d{4}|
+        3(?:
+          12?[5-7]\d{2}|
+          0(?:
+            2(?:
+              [025-79]\d|
+              [348]\d{1,2}
+            )|
+            3(?:
+              [2-4]\d|
+              [56]\d?
+            )
+          )|
+          2(?:
+            1\d{2}|
+            2(?:
+              [12]\d|
+              [35]\d{1,2}|
+              4\d?
+            )
+          )|
+          3(?:
+            1\d{2}|
+            2(?:
+              [2356]\d|
+              4\d{1,2}
+            )
+          )|
+          4(?:
+            1\d{2}|
+            2(?:
+              2\d{1,2}|
+              [47]|
+              5\d{2}
+            )
+          )|
+          5(?:
+            1\d{2}|
+            29
+          )|
+          [67]1\d{2}|
+          8(?:
+            1\d{2}|
+            2(?:
+              2\d{2}|
+              3|
+              4\d
+            )
+          )
+        )\d{3}|
+        4(?:
+          0(?:
+            2(?:
+              [09]\d|
+              7
+            )|
+            33\d{2}
+          )|
+          1\d{3}|
+          2(?:
+            1\d{2}|
+            2(?:
+              [25]\d?|
+              [348]\d|
+              [67]\d{1,2}
+            )
+          )|
+          3(?:
+            1\d{2}(?:
+              \d{2}
+            )?|
+            2(?:
+              [045]\d|
+              [236-9]\d{1,2}
+            )|
+            32\d{2}
+          )|
+          4(?:
+            [18]\d{2}|
+            2(?:
+              [2-46]\d{2}|
+              3
+            )|
+            5[25]\d{2}
+          )|
+          5(?:
+            1\d{2}|
+            2(?:
+              3\d|
+              5
+            )
+          )|
+          6(?:
+            [18]\d{2}|
+            2(?:
+              3(?:
+                \d{2}
+              )?|
+              [46]\d{1,2}|
+              5\d{2}|
+              7\d
+            )|
+            5(?:
+              3\d?|
+              4\d|
+              [57]\d{1,2}|
+              6\d{2}|
+              8
+            )
+          )|
+          71\d{2}|
+          8(?:
+            [18]\d{2}|
+            23\d{2}|
+            54\d{2}
+          )|
+          9(?:
+            [18]\d{2}|
+            2[2-5]\d{2}|
+            53\d{1,2}
+          )
+        )\d{3}|
+        5(?:
+          02[03489]\d{2}|
+          1\d{2}|
+          2(?:
+            1\d{2}|
+            2(?:
+              2(?:
+                \d{2}
+              )?|
+              [457]\d{2}
+            )
+          )|
+          3(?:
+            1\d{2}|
+            2(?:
+              [37](?:
+                \d{2}
+              )?|
+              [569]\d{2}
+            )
+          )|
+          4(?:
+            1\d{2}|
+            2[46]\d{2}
+          )|
+          5(?:
+            1\d{2}|
+            26\d{1,2}
+          )|
+          6(?:
+            [18]\d{2}|
+            2|
+            53\d{2}
+          )|
+          7(?:
+            1|
+            24
+          )\d{2}|
+          8(?:
+            1|
+            26
+          )\d{2}|
+          91\d{2}
+        )\d{3}|
+        6(?:
+          0(?:
+            1\d{2}|
+            2(?:
+              3\d{2}|
+              4\d{1,2}
+            )
+          )|
+          2(?:
+            2[2-5]\d{2}|
+            5(?:
+              [3-5]\d{2}|
+              7
+            )|
+            8\d{2}
+          )|
+          3(?:
+            1|
+            2[3478]
+          )\d{2}|
+          4(?:
+            1|
+            2[34]
+          )\d{2}|
+          5(?:
+            1|
+            2[47]
+          )\d{2}|
+          6(?:
+            [18]\d{2}|
+            6(?:
+              2(?:
+                2\d|
+                [34]\d{2}
+              )|
+              5(?:
+                [24]\d{2}|
+                3\d|
+                5\d{1,2}
+              )
+            )
+          )|
+          72[2-5]\d{2}|
+          8(?:
+            1\d{2}|
+            2[2-5]\d{2}
+          )|
+          9(?:
+            1\d{2}|
+            2[2-6]\d{2}
+          )
+        )\d{3}|
+        7(?:
+          (?:
+            02|
+            [3-589]1|
+            6[12]|
+            72[24]
+          )\d{2}|
+          21\d{3}|
+          32
+        )\d{3}|
+        8(?:
+          (?:
+            4[12]|
+            [5-7]2|
+            1\d?
+          )|
+          (?:
+            0|
+            3[12]|
+            [5-7]1|
+            217
+          )\d
+        )\d{4}|
+        9(?:
+          [35]1|
+          (?:
+            [024]2|
+            81
+          )\d|
+          (?:
+            1|
+            [24]1
+          )\d{2}
+        )\d{3}
+        "#,
+    )
+    .build()
+    .unwrap();
+}

diff --git a/tests/flags.rs b/tests/flags.rs
new file mode 100644
index 0000000..c33b82d
--- /dev/null
+++ b/tests/flags.rs

@@ -0,0 +1,31 @@
+mat!(match_flag_case, "(?-u)(?i)abc", "ABC", Some((0, 3)));
+mat!(match_flag_weird_case, "(?-u)(?i)a(?-i)bc", "Abc", Some((0, 3)));
+mat!(match_flag_weird_case_not, "(?-u)(?i)a(?-i)bc", "ABC", None);
+mat!(match_flag_case_dotnl, "(?-u)(?is)a(?u:.)", "A\n", Some((0, 2)));
+mat!(
+    match_flag_case_dotnl_toggle,
+    "(?-u)(?is)a(?u:.)(?-is)a(?u:.)",
+    "A\nab",
+    Some((0, 4))
+);
+mat!(
+    match_flag_case_dotnl_toggle_not,
+    "(?-u)(?is)a(?u:.)(?-is)a(?u:.)",
+    "A\na\n",
+    None
+);
+mat!(
+    match_flag_case_dotnl_toggle_ok,
+    "(?-u)(?is)a(?u:.)(?-is:a(?u:.))?",
+    "A\na\n",
+    Some((0, 2))
+);
+mat!(
+    match_flag_multi,
+    r"(?-u)(?m)(?:^\d+$\n?)+",
+    "123\n456\n789",
+    Some((0, 11))
+);
+mat!(match_flag_ungreedy, "(?U)a+", "aa", Some((0, 1)));
+mat!(match_flag_ungreedy_greedy, "(?U)a+?", "aa", Some((0, 2)));
+mat!(match_flag_ungreedy_noop, "(?U)(?-U)a+", "aa", Some((0, 2)));

diff --git a/tests/fowler.rs b/tests/fowler.rs
new file mode 100644
index 0000000..7f56a75
--- /dev/null
+++ b/tests/fowler.rs

@@ -0,0 +1,1588 @@
+// DO NOT EDIT. Automatically generated by 'scripts/regex-match-tests.py'
+// on 2019-09-02 11:07:37.849994.
+
+// Tests from basic.dat
+mat!(match_basic_3, r"abracadabra$", r"abracadabracadabra", Some((7, 18)));
+mat!(match_basic_4, r"a...b", r"abababbb", Some((2, 7)));
+mat!(match_basic_5, r"XXXXXX", r"..XXXXXX", Some((2, 8)));
+mat!(match_basic_6, r"\)", r"()", Some((1, 2)));
+mat!(match_basic_7, r"a]", r"a]a", Some((0, 2)));
+mat!(match_basic_9, r"\}", r"}", Some((0, 1)));
+mat!(match_basic_10, r"\]", r"]", Some((0, 1)));
+mat!(match_basic_12, r"]", r"]", Some((0, 1)));
+mat!(match_basic_15, r"^a", r"ax", Some((0, 1)));
+mat!(match_basic_16, r"\^a", r"a^a", Some((1, 3)));
+mat!(match_basic_17, r"a\^", r"a^", Some((0, 2)));
+mat!(match_basic_18, r"a$", r"aa", Some((1, 2)));
+mat!(match_basic_19, r"a\$", r"a$", Some((0, 2)));
+mat!(match_basic_20, r"^$", r"", Some((0, 0)));
+mat!(match_basic_21, r"$^", r"", Some((0, 0)));
+mat!(match_basic_22, r"a($)", r"aa", Some((1, 2)), Some((2, 2)));
+mat!(match_basic_23, r"a*(^a)", r"aa", Some((0, 1)), Some((0, 1)));
+mat!(match_basic_24, r"(..)*(...)*", r"a", Some((0, 0)));
+mat!(match_basic_25, r"(..)*(...)*", r"abcd", Some((0, 4)), Some((2, 4)));
+mat!(
+    match_basic_26,
+    r"(ab|a)(bc|c)",
+    r"abc",
+    Some((0, 3)),
+    Some((0, 2)),
+    Some((2, 3))
+);
+mat!(match_basic_27, r"(ab)c|abc", r"abc", Some((0, 3)), Some((0, 2)));
+mat!(match_basic_28, r"a{0}b", r"ab", Some((1, 2)));
+mat!(
+    match_basic_29,
+    r"(a*)(b?)(b+)b{3}",
+    r"aaabbbbbbb",
+    Some((0, 10)),
+    Some((0, 3)),
+    Some((3, 4)),
+    Some((4, 7))
+);
+mat!(
+    match_basic_30,
+    r"(a*)(b{0,1})(b{1,})b{3}",
+    r"aaabbbbbbb",
+    Some((0, 10)),
+    Some((0, 3)),
+    Some((3, 4)),
+    Some((4, 7))
+);
+mat!(
+    match_basic_32,
+    r"((a|a)|a)",
+    r"a",
+    Some((0, 1)),
+    Some((0, 1)),
+    Some((0, 1))
+);
+mat!(
+    match_basic_33,
+    r"(a*)(a|aa)",
+    r"aaaa",
+    Some((0, 4)),
+    Some((0, 3)),
+    Some((3, 4))
+);
+mat!(match_basic_34, r"a*(a.|aa)", r"aaaa", Some((0, 4)), Some((2, 4)));
+mat!(
+    match_basic_35,
+    r"a(b)|c(d)|a(e)f",
+    r"aef",
+    Some((0, 3)),
+    None,
+    None,
+    Some((1, 2))
+);
+mat!(match_basic_36, r"(a|b)?.*", r"b", Some((0, 1)), Some((0, 1)));
+mat!(match_basic_37, r"(a|b)c|a(b|c)", r"ac", Some((0, 2)), Some((0, 1)));
+mat!(
+    match_basic_38,
+    r"(a|b)c|a(b|c)",
+    r"ab",
+    Some((0, 2)),
+    None,
+    Some((1, 2))
+);
+mat!(match_basic_39, r"(a|b)*c|(a|ab)*c", r"abc", Some((0, 3)), Some((1, 2)));
+mat!(match_basic_40, r"(a|b)*c|(a|ab)*c", r"xc", Some((1, 2)));
+mat!(
+    match_basic_41,
+    r"(.a|.b).*|.*(.a|.b)",
+    r"xa",
+    Some((0, 2)),
+    Some((0, 2))
+);
+mat!(match_basic_42, r"a?(ab|ba)ab", r"abab", Some((0, 4)), Some((0, 2)));
+mat!(match_basic_43, r"a?(ac{0}b|ba)ab", r"abab", Some((0, 4)), Some((0, 2)));
+mat!(match_basic_44, r"ab|abab", r"abbabab", Some((0, 2)));
+mat!(match_basic_45, r"aba|bab|bba", r"baaabbbaba", Some((5, 8)));
+mat!(match_basic_46, r"aba|bab", r"baaabbbaba", Some((6, 9)));
+mat!(
+    match_basic_47,
+    r"(aa|aaa)*|(a|aaaaa)",
+    r"aa",
+    Some((0, 2)),
+    Some((0, 2))
+);
+mat!(
+    match_basic_48,
+    r"(a.|.a.)*|(a|.a...)",
+    r"aa",
+    Some((0, 2)),
+    Some((0, 2))
+);
+mat!(match_basic_49, r"ab|a", r"xabc", Some((1, 3)));
+mat!(match_basic_50, r"ab|a", r"xxabc", Some((2, 4)));
+mat!(
+    match_basic_51,
+    r"(?i)(?-u)(Ab|cD)*",
+    r"aBcD",
+    Some((0, 4)),
+    Some((2, 4))
+);
+mat!(match_basic_52, r"[^-]", r"--a", Some((2, 3)));
+mat!(match_basic_53, r"[a-]*", r"--a", Some((0, 3)));
+mat!(match_basic_54, r"[a-m-]*", r"--amoma--", Some((0, 4)));
+mat!(
+    match_basic_55,
+    r":::1:::0:|:::1:1:0:",
+    r":::0:::1:::1:::0:",
+    Some((8, 17))
+);
+mat!(
+    match_basic_56,
+    r":::1:::0:|:::1:1:1:",
+    r":::0:::1:::1:::0:",
+    Some((8, 17))
+);
+mat!(match_basic_57, r"[[:upper:]]", r"A", Some((0, 1)));
+mat!(match_basic_58, r"[[:lower:]]+", r"`az{", Some((1, 3)));
+mat!(match_basic_59, r"[[:upper:]]+", r"@AZ[", Some((1, 3)));
+mat!(
+    match_basic_65,
+    r"
+",
+    r"
+",
+    Some((0, 1))
+);
+mat!(
+    match_basic_66,
+    r"
+",
+    r"
+",
+    Some((0, 1))
+);
+mat!(
+    match_basic_67,
+    r"[^a]",
+    r"
+",
+    Some((0, 1))
+);
+mat!(
+    match_basic_68,
+    r"
+a",
+    r"
+a",
+    Some((0, 2))
+);
+mat!(
+    match_basic_69,
+    r"(a)(b)(c)",
+    r"abc",
+    Some((0, 3)),
+    Some((0, 1)),
+    Some((1, 2)),
+    Some((2, 3))
+);
+mat!(match_basic_70, r"xxx", r"xxx", Some((0, 3)));
+mat!(
+    match_basic_71,
+    r"(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)",
+    r"feb 6,",
+    Some((0, 6))
+);
+mat!(
+    match_basic_72,
+    r"(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)",
+    r"2/7",
+    Some((0, 3))
+);
+mat!(
+    match_basic_73,
+    r"(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)",
+    r"feb 1,Feb 6",
+    Some((5, 11))
+);
+mat!(
+    match_basic_74,
+    r"((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))",
+    r"x",
+    Some((0, 1)),
+    Some((0, 1)),
+    Some((0, 1))
+);
+mat!(
+    match_basic_75,
+    r"((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))*",
+    r"xx",
+    Some((0, 2)),
+    Some((1, 2)),
+    Some((1, 2))
+);
+mat!(
+    match_basic_76,
+    r"a?(ab|ba)*",
+    r"ababababababababababababababababababababababababababababababababababababababababa",
+    Some((0, 81)),
+    Some((79, 81))
+);
+mat!(
+    match_basic_77,
+    r"abaa|abbaa|abbbaa|abbbbaa",
+    r"ababbabbbabbbabbbbabbbbaa",
+    Some((18, 25))
+);
+mat!(
+    match_basic_78,
+    r"abaa|abbaa|abbbaa|abbbbaa",
+    r"ababbabbbabbbabbbbabaa",
+    Some((18, 22))
+);
+mat!(
+    match_basic_79,
+    r"aaac|aabc|abac|abbc|baac|babc|bbac|bbbc",
+    r"baaabbbabac",
+    Some((7, 11))
+);
+mat!(match_basic_80, r".*", r"", Some((0, 2)));
+mat!(
+    match_basic_81,
+    r"aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll",
+    r"XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa",
+    Some((53, 57))
+);
+mat!(match_basic_83, r"a*a*a*a*a*b", r"aaaaaaaaab", Some((0, 10)));
+mat!(match_basic_84, r"^", r"", Some((0, 0)));
+mat!(match_basic_85, r"$", r"", Some((0, 0)));
+mat!(match_basic_86, r"^$", r"", Some((0, 0)));
+mat!(match_basic_87, r"^a$", r"a", Some((0, 1)));
+mat!(match_basic_88, r"abc", r"abc", Some((0, 3)));
+mat!(match_basic_89, r"abc", r"xabcy", Some((1, 4)));
+mat!(match_basic_90, r"abc", r"ababc", Some((2, 5)));
+mat!(match_basic_91, r"ab*c", r"abc", Some((0, 3)));
+mat!(match_basic_92, r"ab*bc", r"abc", Some((0, 3)));
+mat!(match_basic_93, r"ab*bc", r"abbc", Some((0, 4)));
+mat!(match_basic_94, r"ab*bc", r"abbbbc", Some((0, 6)));
+mat!(match_basic_95, r"ab+bc", r"abbc", Some((0, 4)));
+mat!(match_basic_96, r"ab+bc", r"abbbbc", Some((0, 6)));
+mat!(match_basic_97, r"ab?bc", r"abbc", Some((0, 4)));
+mat!(match_basic_98, r"ab?bc", r"abc", Some((0, 3)));
+mat!(match_basic_99, r"ab?c", r"abc", Some((0, 3)));
+mat!(match_basic_100, r"^abc$", r"abc", Some((0, 3)));
+mat!(match_basic_101, r"^abc", r"abcc", Some((0, 3)));
+mat!(match_basic_102, r"abc$", r"aabc", Some((1, 4)));
+mat!(match_basic_103, r"^", r"abc", Some((0, 0)));
+mat!(match_basic_104, r"$", r"abc", Some((3, 3)));
+mat!(match_basic_105, r"a.c", r"abc", Some((0, 3)));
+mat!(match_basic_106, r"a.c", r"axc", Some((0, 3)));
+mat!(match_basic_107, r"a.*c", r"axyzc", Some((0, 5)));
+mat!(match_basic_108, r"a[bc]d", r"abd", Some((0, 3)));
+mat!(match_basic_109, r"a[b-d]e", r"ace", Some((0, 3)));
+mat!(match_basic_110, r"a[b-d]", r"aac", Some((1, 3)));
+mat!(match_basic_111, r"a[-b]", r"a-", Some((0, 2)));
+mat!(match_basic_112, r"a[b-]", r"a-", Some((0, 2)));
+mat!(match_basic_113, r"a]", r"a]", Some((0, 2)));
+mat!(match_basic_114, r"a[]]b", r"a]b", Some((0, 3)));
+mat!(match_basic_115, r"a[^bc]d", r"aed", Some((0, 3)));
+mat!(match_basic_116, r"a[^-b]c", r"adc", Some((0, 3)));
+mat!(match_basic_117, r"a[^]b]c", r"adc", Some((0, 3)));
+mat!(match_basic_118, r"ab|cd", r"abc", Some((0, 2)));
+mat!(match_basic_119, r"ab|cd", r"abcd", Some((0, 2)));
+mat!(match_basic_120, r"a\(b", r"a(b", Some((0, 3)));
+mat!(match_basic_121, r"a\(*b", r"ab", Some((0, 2)));
+mat!(match_basic_122, r"a\(*b", r"a((b", Some((0, 4)));
+mat!(
+    match_basic_123,
+    r"((a))",
+    r"abc",
+    Some((0, 1)),
+    Some((0, 1)),
+    Some((0, 1))
+);
+mat!(
+    match_basic_124,
+    r"(a)b(c)",
+    r"abc",
+    Some((0, 3)),
+    Some((0, 1)),
+    Some((2, 3))
+);
+mat!(match_basic_125, r"a+b+c", r"aabbabc", Some((4, 7)));
+mat!(match_basic_126, r"a*", r"aaa", Some((0, 3)));
+mat!(match_basic_128, r"(a*)*", r"-", Some((0, 0)), None);
+mat!(match_basic_129, r"(a*)+", r"-", Some((0, 0)), Some((0, 0)));
+mat!(match_basic_131, r"(a*|b)*", r"-", Some((0, 0)), None);
+mat!(match_basic_132, r"(a+|b)*", r"ab", Some((0, 2)), Some((1, 2)));
+mat!(match_basic_133, r"(a+|b)+", r"ab", Some((0, 2)), Some((1, 2)));
+mat!(match_basic_134, r"(a+|b)?", r"ab", Some((0, 1)), Some((0, 1)));
+mat!(match_basic_135, r"[^ab]*", r"cde", Some((0, 3)));
+mat!(match_basic_137, r"(^)*", r"-", Some((0, 0)), None);
+mat!(match_basic_138, r"a*", r"", Some((0, 0)));
+mat!(match_basic_139, r"([abc])*d", r"abbbcd", Some((0, 6)), Some((4, 5)));
+mat!(match_basic_140, r"([abc])*bcd", r"abcd", Some((0, 4)), Some((0, 1)));
+mat!(match_basic_141, r"a|b|c|d|e", r"e", Some((0, 1)));
+mat!(match_basic_142, r"(a|b|c|d|e)f", r"ef", Some((0, 2)), Some((0, 1)));
+mat!(match_basic_144, r"((a*|b))*", r"-", Some((0, 0)), None, None);
+mat!(match_basic_145, r"abcd*efg", r"abcdefg", Some((0, 7)));
+mat!(match_basic_146, r"ab*", r"xabyabbbz", Some((1, 3)));
+mat!(match_basic_147, r"ab*", r"xayabbbz", Some((1, 2)));
+mat!(match_basic_148, r"(ab|cd)e", r"abcde", Some((2, 5)), Some((2, 4)));
+mat!(match_basic_149, r"[abhgefdc]ij", r"hij", Some((0, 3)));
+mat!(match_basic_150, r"(a|b)c*d", r"abcd", Some((1, 4)), Some((1, 2)));
+mat!(match_basic_151, r"(ab|ab*)bc", r"abc", Some((0, 3)), Some((0, 1)));
+mat!(match_basic_152, r"a([bc]*)c*", r"abc", Some((0, 3)), Some((1, 3)));
+mat!(
+    match_basic_153,
+    r"a([bc]*)(c*d)",
+    r"abcd",
+    Some((0, 4)),
+    Some((1, 3)),
+    Some((3, 4))
+);
+mat!(
+    match_basic_154,
+    r"a([bc]+)(c*d)",
+    r"abcd",
+    Some((0, 4)),
+    Some((1, 3)),
+    Some((3, 4))
+);
+mat!(
+    match_basic_155,
+    r"a([bc]*)(c+d)",
+    r"abcd",
+    Some((0, 4)),
+    Some((1, 2)),
+    Some((2, 4))
+);
+mat!(match_basic_156, r"a[bcd]*dcdcde", r"adcdcde", Some((0, 7)));
+mat!(match_basic_157, r"(ab|a)b*c", r"abc", Some((0, 3)), Some((0, 2)));
+mat!(
+    match_basic_158,
+    r"((a)(b)c)(d)",
+    r"abcd",
+    Some((0, 4)),
+    Some((0, 3)),
+    Some((0, 1)),
+    Some((1, 2)),
+    Some((3, 4))
+);
+mat!(match_basic_159, r"[A-Za-z_][A-Za-z0-9_]*", r"alpha", Some((0, 5)));
+mat!(match_basic_160, r"^a(bc+|b[eh])g|.h$", r"abh", Some((1, 3)));
+mat!(
+    match_basic_161,
+    r"(bc+d$|ef*g.|h?i(j|k))",
+    r"effgz",
+    Some((0, 5)),
+    Some((0, 5))
+);
+mat!(
+    match_basic_162,
+    r"(bc+d$|ef*g.|h?i(j|k))",
+    r"ij",
+    Some((0, 2)),
+    Some((0, 2)),
+    Some((1, 2))
+);
+mat!(
+    match_basic_163,
+    r"(bc+d$|ef*g.|h?i(j|k))",
+    r"reffgz",
+    Some((1, 6)),
+    Some((1, 6))
+);
+mat!(
+    match_basic_164,
+    r"(((((((((a)))))))))",
+    r"a",
+    Some((0, 1)),
+    Some((0, 1)),
+    Some((0, 1)),
+    Some((0, 1)),
+    Some((0, 1)),
+    Some((0, 1)),
+    Some((0, 1)),
+    Some((0, 1)),
+    Some((0, 1)),
+    Some((0, 1))
+);
+mat!(
+    match_basic_165,
+    r"multiple words",
+    r"multiple words yeah",
+    Some((0, 14))
+);
+mat!(
+    match_basic_166,
+    r"(.*)c(.*)",
+    r"abcde",
+    Some((0, 5)),
+    Some((0, 2)),
+    Some((3, 5))
+);
+mat!(match_basic_167, r"abcd", r"abcd", Some((0, 4)));
+mat!(match_basic_168, r"a(bc)d", r"abcd", Some((0, 4)), Some((1, 3)));
+mat!(match_basic_169, r"a[-]?c", r"ac", Some((0, 3)));
+mat!(
+    match_basic_170,
+    r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]",
+    r"Muammar Qaddafi",
+    Some((0, 15)),
+    None,
+    Some((10, 12))
+);
+mat!(
+    match_basic_171,
+    r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]",
+    r"Mo'ammar Gadhafi",
+    Some((0, 16)),
+    None,
+    Some((11, 13))
+);
+mat!(
+    match_basic_172,
+    r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]",
+    r"Muammar Kaddafi",
+    Some((0, 15)),
+    None,
+    Some((10, 12))
+);
+mat!(
+    match_basic_173,
+    r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]",
+    r"Muammar Qadhafi",
+    Some((0, 15)),
+    None,
+    Some((10, 12))
+);
+mat!(
+    match_basic_174,
+    r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]",
+    r"Muammar Gadafi",
+    Some((0, 14)),
+    None,
+    Some((10, 11))
+);
+mat!(
+    match_basic_175,
+    r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]",
+    r"Mu'ammar Qadafi",
+    Some((0, 15)),
+    None,
+    Some((11, 12))
+);
+mat!(
+    match_basic_176,
+    r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]",
+    r"Moamar Gaddafi",
+    Some((0, 14)),
+    None,
+    Some((9, 11))
+);
+mat!(
+    match_basic_177,
+    r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]",
+    r"Mu'ammar Qadhdhafi",
+    Some((0, 18)),
+    None,
+    Some((13, 15))
+);
+mat!(
+    match_basic_178,
+    r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]",
+    r"Muammar Khaddafi",
+    Some((0, 16)),
+    None,
+    Some((11, 13))
+);
+mat!(
+    match_basic_179,
+    r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]",
+    r"Muammar Ghaddafy",
+    Some((0, 16)),
+    None,
+    Some((11, 13))
+);
+mat!(
+    match_basic_180,
+    r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]",
+    r"Muammar Ghadafi",
+    Some((0, 15)),
+    None,
+    Some((11, 12))
+);
+mat!(
+    match_basic_181,
+    r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]",
+    r"Muammar Ghaddafi",
+    Some((0, 16)),
+    None,
+    Some((11, 13))
+);
+mat!(
+    match_basic_182,
+    r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]",
+    r"Muamar Kaddafi",
+    Some((0, 14)),
+    None,
+    Some((9, 11))
+);
+mat!(
+    match_basic_183,
+    r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]",
+    r"Muammar Quathafi",
+    Some((0, 16)),
+    None,
+    Some((11, 13))
+);
+mat!(
+    match_basic_184,
+    r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]",
+    r"Muammar Gheddafi",
+    Some((0, 16)),
+    None,
+    Some((11, 13))
+);
+mat!(
+    match_basic_185,
+    r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]",
+    r"Moammar Khadafy",
+    Some((0, 15)),
+    None,
+    Some((11, 12))
+);
+mat!(
+    match_basic_186,
+    r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]",
+    r"Moammar Qudhafi",
+    Some((0, 15)),
+    None,
+    Some((10, 12))
+);
+mat!(match_basic_187, r"a+(b|c)*d+", r"aabcdd", Some((0, 6)), Some((3, 4)));
+mat!(match_basic_188, r"^.+$", r"vivi", Some((0, 4)));
+mat!(match_basic_189, r"^(.+)$", r"vivi", Some((0, 4)), Some((0, 4)));
+mat!(
+    match_basic_190,
+    r"^([^!.]+).att.com!(.+)$",
+    r"gryphon.att.com!eby",
+    Some((0, 19)),
+    Some((0, 7)),
+    Some((16, 19))
+);
+mat!(
+    match_basic_191,
+    r"^([^!]+!)?([^!]+)$",
+    r"bas",
+    Some((0, 3)),
+    None,
+    Some((0, 3))
+);
+mat!(
+    match_basic_192,
+    r"^([^!]+!)?([^!]+)$",
+    r"bar!bas",
+    Some((0, 7)),
+    Some((0, 4)),
+    Some((4, 7))
+);
+mat!(
+    match_basic_193,
+    r"^([^!]+!)?([^!]+)$",
+    r"foo!bas",
+    Some((0, 7)),
+    Some((0, 4)),
+    Some((4, 7))
+);
+mat!(
+    match_basic_194,
+    r"^.+!([^!]+!)([^!]+)$",
+    r"foo!bar!bas",
+    Some((0, 11)),
+    Some((4, 8)),
+    Some((8, 11))
+);
+mat!(
+    match_basic_195,
+    r"((foo)|(bar))!bas",
+    r"bar!bas",
+    Some((0, 7)),
+    Some((0, 3)),
+    None,
+    Some((0, 3))
+);
+mat!(
+    match_basic_196,
+    r"((foo)|(bar))!bas",
+    r"foo!bar!bas",
+    Some((4, 11)),
+    Some((4, 7)),
+    None,
+    Some((4, 7))
+);
+mat!(
+    match_basic_197,
+    r"((foo)|(bar))!bas",
+    r"foo!bas",
+    Some((0, 7)),
+    Some((0, 3)),
+    Some((0, 3))
+);
+mat!(
+    match_basic_198,
+    r"((foo)|bar)!bas",
+    r"bar!bas",
+    Some((0, 7)),
+    Some((0, 3))
+);
+mat!(
+    match_basic_199,
+    r"((foo)|bar)!bas",
+    r"foo!bar!bas",
+    Some((4, 11)),
+    Some((4, 7))
+);
+mat!(
+    match_basic_200,
+    r"((foo)|bar)!bas",
+    r"foo!bas",
+    Some((0, 7)),
+    Some((0, 3)),
+    Some((0, 3))
+);
+mat!(
+    match_basic_201,
+    r"(foo|(bar))!bas",
+    r"bar!bas",
+    Some((0, 7)),
+    Some((0, 3)),
+    Some((0, 3))
+);
+mat!(
+    match_basic_202,
+    r"(foo|(bar))!bas",
+    r"foo!bar!bas",
+    Some((4, 11)),
+    Some((4, 7)),
+    Some((4, 7))
+);
+mat!(
+    match_basic_203,
+    r"(foo|(bar))!bas",
+    r"foo!bas",
+    Some((0, 7)),
+    Some((0, 3))
+);
+mat!(
+    match_basic_204,
+    r"(foo|bar)!bas",
+    r"bar!bas",
+    Some((0, 7)),
+    Some((0, 3))
+);
+mat!(
+    match_basic_205,
+    r"(foo|bar)!bas",
+    r"foo!bar!bas",
+    Some((4, 11)),
+    Some((4, 7))
+);
+mat!(
+    match_basic_206,
+    r"(foo|bar)!bas",
+    r"foo!bas",
+    Some((0, 7)),
+    Some((0, 3))
+);
+mat!(
+    match_basic_207,
+    r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$",
+    r"foo!bar!bas",
+    Some((0, 11)),
+    Some((0, 11)),
+    None,
+    None,
+    Some((4, 8)),
+    Some((8, 11))
+);
+mat!(
+    match_basic_208,
+    r"^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$",
+    r"bas",
+    Some((0, 3)),
+    None,
+    Some((0, 3))
+);
+mat!(
+    match_basic_209,
+    r"^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$",
+    r"bar!bas",
+    Some((0, 7)),
+    Some((0, 4)),
+    Some((4, 7))
+);
+mat!(
+    match_basic_210,
+    r"^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$",
+    r"foo!bar!bas",
+    Some((0, 11)),
+    None,
+    None,
+    Some((4, 8)),
+    Some((8, 11))
+);
+mat!(
+    match_basic_211,
+    r"^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$",
+    r"foo!bas",
+    Some((0, 7)),
+    Some((0, 4)),
+    Some((4, 7))
+);
+mat!(
+    match_basic_212,
+    r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$",
+    r"bas",
+    Some((0, 3)),
+    Some((0, 3)),
+    None,
+    Some((0, 3))
+);
+mat!(
+    match_basic_213,
+    r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$",
+    r"bar!bas",
+    Some((0, 7)),
+    Some((0, 7)),
+    Some((0, 4)),
+    Some((4, 7))
+);
+mat!(
+    match_basic_214,
+    r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$",
+    r"foo!bar!bas",
+    Some((0, 11)),
+    Some((0, 11)),
+    None,
+    None,
+    Some((4, 8)),
+    Some((8, 11))
+);
+mat!(
+    match_basic_215,
+    r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$",
+    r"foo!bas",
+    Some((0, 7)),
+    Some((0, 7)),
+    Some((0, 4)),
+    Some((4, 7))
+);
+mat!(match_basic_216, r".*(/XXX).*", r"/XXX", Some((0, 4)), Some((0, 4)));
+mat!(match_basic_217, r".*(\\XXX).*", r"\XXX", Some((0, 4)), Some((0, 4)));
+mat!(match_basic_218, r"\\XXX", r"\XXX", Some((0, 4)));
+mat!(match_basic_219, r".*(/000).*", r"/000", Some((0, 4)), Some((0, 4)));
+mat!(match_basic_220, r".*(\\000).*", r"\000", Some((0, 4)), Some((0, 4)));
+mat!(match_basic_221, r"\\000", r"\000", Some((0, 4)));
+
+// Tests from nullsubexpr.dat
+mat!(match_nullsubexpr_3, r"(a*)*", r"a", Some((0, 1)), Some((0, 1)));
+mat!(match_nullsubexpr_5, r"(a*)*", r"x", Some((0, 0)), None);
+mat!(match_nullsubexpr_6, r"(a*)*", r"aaaaaa", Some((0, 6)), Some((0, 6)));
+mat!(match_nullsubexpr_7, r"(a*)*", r"aaaaaax", Some((0, 6)), Some((0, 6)));
+mat!(match_nullsubexpr_8, r"(a*)+", r"a", Some((0, 1)), Some((0, 1)));
+mat!(match_nullsubexpr_9, r"(a*)+", r"x", Some((0, 0)), Some((0, 0)));
+mat!(match_nullsubexpr_10, r"(a*)+", r"aaaaaa", Some((0, 6)), Some((0, 6)));
+mat!(match_nullsubexpr_11, r"(a*)+", r"aaaaaax", Some((0, 6)), Some((0, 6)));
+mat!(match_nullsubexpr_12, r"(a+)*", r"a", Some((0, 1)), Some((0, 1)));
+mat!(match_nullsubexpr_13, r"(a+)*", r"x", Some((0, 0)));
+mat!(match_nullsubexpr_14, r"(a+)*", r"aaaaaa", Some((0, 6)), Some((0, 6)));
+mat!(match_nullsubexpr_15, r"(a+)*", r"aaaaaax", Some((0, 6)), Some((0, 6)));
+mat!(match_nullsubexpr_16, r"(a+)+", r"a", Some((0, 1)), Some((0, 1)));
+mat!(match_nullsubexpr_17, r"(a+)+", r"x", None);
+mat!(match_nullsubexpr_18, r"(a+)+", r"aaaaaa", Some((0, 6)), Some((0, 6)));
+mat!(match_nullsubexpr_19, r"(a+)+", r"aaaaaax", Some((0, 6)), Some((0, 6)));
+mat!(match_nullsubexpr_21, r"([a]*)*", r"a", Some((0, 1)), Some((0, 1)));
+mat!(match_nullsubexpr_23, r"([a]*)*", r"x", Some((0, 0)), None);
+mat!(match_nullsubexpr_24, r"([a]*)*", r"aaaaaa", Some((0, 6)), Some((0, 6)));
+mat!(match_nullsubexpr_25, r"([a]*)*", r"aaaaaax", Some((0, 6)), Some((0, 6)));
+mat!(match_nullsubexpr_26, r"([a]*)+", r"a", Some((0, 1)), Some((0, 1)));
+mat!(match_nullsubexpr_27, r"([a]*)+", r"x", Some((0, 0)), Some((0, 0)));
+mat!(match_nullsubexpr_28, r"([a]*)+", r"aaaaaa", Some((0, 6)), Some((0, 6)));
+mat!(match_nullsubexpr_29, r"([a]*)+", r"aaaaaax", Some((0, 6)), Some((0, 6)));
+mat!(match_nullsubexpr_30, r"([^b]*)*", r"a", Some((0, 1)), Some((0, 1)));
+mat!(match_nullsubexpr_32, r"([^b]*)*", r"b", Some((0, 0)), None);
+mat!(match_nullsubexpr_33, r"([^b]*)*", r"aaaaaa", Some((0, 6)), Some((0, 6)));
+mat!(
+    match_nullsubexpr_34,
+    r"([^b]*)*",
+    r"aaaaaab",
+    Some((0, 6)),
+    Some((0, 6))
+);
+mat!(match_nullsubexpr_35, r"([ab]*)*", r"a", Some((0, 1)), Some((0, 1)));
+mat!(match_nullsubexpr_36, r"([ab]*)*", r"aaaaaa", Some((0, 6)), Some((0, 6)));
+mat!(match_nullsubexpr_37, r"([ab]*)*", r"ababab", Some((0, 6)), Some((0, 6)));
+mat!(match_nullsubexpr_38, r"([ab]*)*", r"bababa", Some((0, 6)), Some((0, 6)));
+mat!(match_nullsubexpr_39, r"([ab]*)*", r"b", Some((0, 1)), Some((0, 1)));
+mat!(match_nullsubexpr_40, r"([ab]*)*", r"bbbbbb", Some((0, 6)), Some((0, 6)));
+mat!(
+    match_nullsubexpr_41,
+    r"([ab]*)*",
+    r"aaaabcde",
+    Some((0, 5)),
+    Some((0, 5))
+);
+mat!(match_nullsubexpr_42, r"([^a]*)*", r"b", Some((0, 1)), Some((0, 1)));
+mat!(match_nullsubexpr_43, r"([^a]*)*", r"bbbbbb", Some((0, 6)), Some((0, 6)));
+mat!(match_nullsubexpr_45, r"([^a]*)*", r"aaaaaa", Some((0, 0)), None);
+mat!(
+    match_nullsubexpr_46,
+    r"([^ab]*)*",
+    r"ccccxx",
+    Some((0, 6)),
+    Some((0, 6))
+);
+mat!(match_nullsubexpr_48, r"([^ab]*)*", r"ababab", Some((0, 0)), None);
+mat!(
+    match_nullsubexpr_50,
+    r"((z)+|a)*",
+    r"zabcde",
+    Some((0, 2)),
+    Some((1, 2))
+);
+mat!(
+    match_nullsubexpr_69,
+    r"(a*)*(x)",
+    r"x",
+    Some((0, 1)),
+    None,
+    Some((0, 1))
+);
+mat!(
+    match_nullsubexpr_70,
+    r"(a*)*(x)",
+    r"ax",
+    Some((0, 2)),
+    Some((0, 1)),
+    Some((1, 2))
+);
+mat!(
+    match_nullsubexpr_71,
+    r"(a*)*(x)",
+    r"axa",
+    Some((0, 2)),
+    Some((0, 1)),
+    Some((1, 2))
+);
+mat!(
+    match_nullsubexpr_73,
+    r"(a*)+(x)",
+    r"x",
+    Some((0, 1)),
+    Some((0, 0)),
+    Some((0, 1))
+);
+mat!(
+    match_nullsubexpr_74,
+    r"(a*)+(x)",
+    r"ax",
+    Some((0, 2)),
+    Some((0, 1)),
+    Some((1, 2))
+);
+mat!(
+    match_nullsubexpr_75,
+    r"(a*)+(x)",
+    r"axa",
+    Some((0, 2)),
+    Some((0, 1)),
+    Some((1, 2))
+);
+mat!(
+    match_nullsubexpr_77,
+    r"(a*){2}(x)",
+    r"x",
+    Some((0, 1)),
+    Some((0, 0)),
+    Some((0, 1))
+);
+mat!(
+    match_nullsubexpr_78,
+    r"(a*){2}(x)",
+    r"ax",
+    Some((0, 2)),
+    Some((1, 1)),
+    Some((1, 2))
+);
+mat!(
+    match_nullsubexpr_79,
+    r"(a*){2}(x)",
+    r"axa",
+    Some((0, 2)),
+    Some((1, 1)),
+    Some((1, 2))
+);
+
+// Tests from repetition.dat
+mat!(match_repetition_10, r"((..)|(.))", r"", None);
+mat!(match_repetition_11, r"((..)|(.))((..)|(.))", r"", None);
+mat!(match_repetition_12, r"((..)|(.))((..)|(.))((..)|(.))", r"", None);
+mat!(match_repetition_14, r"((..)|(.)){1}", r"", None);
+mat!(match_repetition_15, r"((..)|(.)){2}", r"", None);
+mat!(match_repetition_16, r"((..)|(.)){3}", r"", None);
+mat!(match_repetition_18, r"((..)|(.))*", r"", Some((0, 0)));
+mat!(
+    match_repetition_20,
+    r"((..)|(.))",
+    r"a",
+    Some((0, 1)),
+    Some((0, 1)),
+    None,
+    Some((0, 1))
+);
+mat!(match_repetition_21, r"((..)|(.))((..)|(.))", r"a", None);
+mat!(match_repetition_22, r"((..)|(.))((..)|(.))((..)|(.))", r"a", None);
+mat!(
+    match_repetition_24,
+    r"((..)|(.)){1}",
+    r"a",
+    Some((0, 1)),
+    Some((0, 1)),
+    None,
+    Some((0, 1))
+);
+mat!(match_repetition_25, r"((..)|(.)){2}", r"a", None);
+mat!(match_repetition_26, r"((..)|(.)){3}", r"a", None);
+mat!(
+    match_repetition_28,
+    r"((..)|(.))*",
+    r"a",
+    Some((0, 1)),
+    Some((0, 1)),
+    None,
+    Some((0, 1))
+);
+mat!(
+    match_repetition_30,
+    r"((..)|(.))",
+    r"aa",
+    Some((0, 2)),
+    Some((0, 2)),
+    Some((0, 2)),
+    None
+);
+mat!(
+    match_repetition_31,
+    r"((..)|(.))((..)|(.))",
+    r"aa",
+    Some((0, 2)),
+    Some((0, 1)),
+    None,
+    Some((0, 1)),
+    Some((1, 2)),
+    None,
+    Some((1, 2))
+);
+mat!(match_repetition_32, r"((..)|(.))((..)|(.))((..)|(.))", r"aa", None);
+mat!(
+    match_repetition_34,
+    r"((..)|(.)){1}",
+    r"aa",
+    Some((0, 2)),
+    Some((0, 2)),
+    Some((0, 2)),
+    None
+);
+mat!(
+    match_repetition_35,
+    r"((..)|(.)){2}",
+    r"aa",
+    Some((0, 2)),
+    Some((1, 2)),
+    None,
+    Some((1, 2))
+);
+mat!(match_repetition_36, r"((..)|(.)){3}", r"aa", None);
+mat!(
+    match_repetition_38,
+    r"((..)|(.))*",
+    r"aa",
+    Some((0, 2)),
+    Some((0, 2)),
+    Some((0, 2)),
+    None
+);
+mat!(
+    match_repetition_40,
+    r"((..)|(.))",
+    r"aaa",
+    Some((0, 2)),
+    Some((0, 2)),
+    Some((0, 2)),
+    None
+);
+mat!(
+    match_repetition_41,
+    r"((..)|(.))((..)|(.))",
+    r"aaa",
+    Some((0, 3)),
+    Some((0, 2)),
+    Some((0, 2)),
+    None,
+    Some((2, 3)),
+    None,
+    Some((2, 3))
+);
+mat!(
+    match_repetition_42,
+    r"((..)|(.))((..)|(.))((..)|(.))",
+    r"aaa",
+    Some((0, 3)),
+    Some((0, 1)),
+    None,
+    Some((0, 1)),
+    Some((1, 2)),
+    None,
+    Some((1, 2)),
+    Some((2, 3)),
+    None,
+    Some((2, 3))
+);
+mat!(
+    match_repetition_44,
+    r"((..)|(.)){1}",
+    r"aaa",
+    Some((0, 2)),
+    Some((0, 2)),
+    Some((0, 2)),
+    None
+);
+mat!(
+    match_repetition_46,
+    r"((..)|(.)){2}",
+    r"aaa",
+    Some((0, 3)),
+    Some((2, 3)),
+    Some((0, 2)),
+    Some((2, 3))
+);
+mat!(
+    match_repetition_47,
+    r"((..)|(.)){3}",
+    r"aaa",
+    Some((0, 3)),
+    Some((2, 3)),
+    None,
+    Some((2, 3))
+);
+mat!(
+    match_repetition_50,
+    r"((..)|(.))*",
+    r"aaa",
+    Some((0, 3)),
+    Some((2, 3)),
+    Some((0, 2)),
+    Some((2, 3))
+);
+mat!(
+    match_repetition_52,
+    r"((..)|(.))",
+    r"aaaa",
+    Some((0, 2)),
+    Some((0, 2)),
+    Some((0, 2)),
+    None
+);
+mat!(
+    match_repetition_53,
+    r"((..)|(.))((..)|(.))",
+    r"aaaa",
+    Some((0, 4)),
+    Some((0, 2)),
+    Some((0, 2)),
+    None,
+    Some((2, 4)),
+    Some((2, 4)),
+    None
+);
+mat!(
+    match_repetition_54,
+    r"((..)|(.))((..)|(.))((..)|(.))",
+    r"aaaa",
+    Some((0, 4)),
+    Some((0, 2)),
+    Some((0, 2)),
+    None,
+    Some((2, 3)),
+    None,
+    Some((2, 3)),
+    Some((3, 4)),
+    None,
+    Some((3, 4))
+);
+mat!(
+    match_repetition_56,
+    r"((..)|(.)){1}",
+    r"aaaa",
+    Some((0, 2)),
+    Some((0, 2)),
+    Some((0, 2)),
+    None
+);
+mat!(
+    match_repetition_57,
+    r"((..)|(.)){2}",
+    r"aaaa",
+    Some((0, 4)),
+    Some((2, 4)),
+    Some((2, 4)),
+    None
+);
+mat!(
+    match_repetition_59,
+    r"((..)|(.)){3}",
+    r"aaaa",
+    Some((0, 4)),
+    Some((3, 4)),
+    Some((0, 2)),
+    Some((3, 4))
+);
+mat!(
+    match_repetition_61,
+    r"((..)|(.))*",
+    r"aaaa",
+    Some((0, 4)),
+    Some((2, 4)),
+    Some((2, 4)),
+    None
+);
+mat!(
+    match_repetition_63,
+    r"((..)|(.))",
+    r"aaaaa",
+    Some((0, 2)),
+    Some((0, 2)),
+    Some((0, 2)),
+    None
+);
+mat!(
+    match_repetition_64,
+    r"((..)|(.))((..)|(.))",
+    r"aaaaa",
+    Some((0, 4)),
+    Some((0, 2)),
+    Some((0, 2)),
+    None,
+    Some((2, 4)),
+    Some((2, 4)),
+    None
+);
+mat!(
+    match_repetition_65,
+    r"((..)|(.))((..)|(.))((..)|(.))",
+    r"aaaaa",
+    Some((0, 5)),
+    Some((0, 2)),
+    Some((0, 2)),
+    None,
+    Some((2, 4)),
+    Some((2, 4)),
+    None,
+    Some((4, 5)),
+    None,
+    Some((4, 5))
+);
+mat!(
+    match_repetition_67,
+    r"((..)|(.)){1}",
+    r"aaaaa",
+    Some((0, 2)),
+    Some((0, 2)),
+    Some((0, 2)),
+    None
+);
+mat!(
+    match_repetition_68,
+    r"((..)|(.)){2}",
+    r"aaaaa",
+    Some((0, 4)),
+    Some((2, 4)),
+    Some((2, 4)),
+    None
+);
+mat!(
+    match_repetition_70,
+    r"((..)|(.)){3}",
+    r"aaaaa",
+    Some((0, 5)),
+    Some((4, 5)),
+    Some((2, 4)),
+    Some((4, 5))
+);
+mat!(
+    match_repetition_73,
+    r"((..)|(.))*",
+    r"aaaaa",
+    Some((0, 5)),
+    Some((4, 5)),
+    Some((2, 4)),
+    Some((4, 5))
+);
+mat!(
+    match_repetition_75,
+    r"((..)|(.))",
+    r"aaaaaa",
+    Some((0, 2)),
+    Some((0, 2)),
+    Some((0, 2)),
+    None
+);
+mat!(
+    match_repetition_76,
+    r"((..)|(.))((..)|(.))",
+    r"aaaaaa",
+    Some((0, 4)),
+    Some((0, 2)),
+    Some((0, 2)),
+    None,
+    Some((2, 4)),
+    Some((2, 4)),
+    None
+);
+mat!(
+    match_repetition_77,
+    r"((..)|(.))((..)|(.))((..)|(.))",
+    r"aaaaaa",
+    Some((0, 6)),
+    Some((0, 2)),
+    Some((0, 2)),
+    None,
+    Some((2, 4)),
+    Some((2, 4)),
+    None,
+    Some((4, 6)),
+    Some((4, 6)),
+    None
+);
+mat!(
+    match_repetition_79,
+    r"((..)|(.)){1}",
+    r"aaaaaa",
+    Some((0, 2)),
+    Some((0, 2)),
+    Some((0, 2)),
+    None
+);
+mat!(
+    match_repetition_80,
+    r"((..)|(.)){2}",
+    r"aaaaaa",
+    Some((0, 4)),
+    Some((2, 4)),
+    Some((2, 4)),
+    None
+);
+mat!(
+    match_repetition_81,
+    r"((..)|(.)){3}",
+    r"aaaaaa",
+    Some((0, 6)),
+    Some((4, 6)),
+    Some((4, 6)),
+    None
+);
+mat!(
+    match_repetition_83,
+    r"((..)|(.))*",
+    r"aaaaaa",
+    Some((0, 6)),
+    Some((4, 6)),
+    Some((4, 6)),
+    None
+);
+mat!(
+    match_repetition_90,
+    r"X(.?){0,}Y",
+    r"X1234567Y",
+    Some((0, 9)),
+    Some((7, 8))
+);
+mat!(
+    match_repetition_91,
+    r"X(.?){1,}Y",
+    r"X1234567Y",
+    Some((0, 9)),
+    Some((7, 8))
+);
+mat!(
+    match_repetition_92,
+    r"X(.?){2,}Y",
+    r"X1234567Y",
+    Some((0, 9)),
+    Some((7, 8))
+);
+mat!(
+    match_repetition_93,
+    r"X(.?){3,}Y",
+    r"X1234567Y",
+    Some((0, 9)),
+    Some((7, 8))
+);
+mat!(
+    match_repetition_94,
+    r"X(.?){4,}Y",
+    r"X1234567Y",
+    Some((0, 9)),
+    Some((7, 8))
+);
+mat!(
+    match_repetition_95,
+    r"X(.?){5,}Y",
+    r"X1234567Y",
+    Some((0, 9)),
+    Some((7, 8))
+);
+mat!(
+    match_repetition_96,
+    r"X(.?){6,}Y",
+    r"X1234567Y",
+    Some((0, 9)),
+    Some((7, 8))
+);
+mat!(
+    match_repetition_97,
+    r"X(.?){7,}Y",
+    r"X1234567Y",
+    Some((0, 9)),
+    Some((7, 8))
+);
+mat!(
+    match_repetition_98,
+    r"X(.?){8,}Y",
+    r"X1234567Y",
+    Some((0, 9)),
+    Some((8, 8))
+);
+mat!(
+    match_repetition_100,
+    r"X(.?){0,8}Y",
+    r"X1234567Y",
+    Some((0, 9)),
+    Some((8, 8))
+);
+mat!(
+    match_repetition_102,
+    r"X(.?){1,8}Y",
+    r"X1234567Y",
+    Some((0, 9)),
+    Some((8, 8))
+);
+mat!(
+    match_repetition_104,
+    r"X(.?){2,8}Y",
+    r"X1234567Y",
+    Some((0, 9)),
+    Some((8, 8))
+);
+mat!(
+    match_repetition_106,
+    r"X(.?){3,8}Y",
+    r"X1234567Y",
+    Some((0, 9)),
+    Some((8, 8))
+);
+mat!(
+    match_repetition_108,
+    r"X(.?){4,8}Y",
+    r"X1234567Y",
+    Some((0, 9)),
+    Some((8, 8))
+);
+mat!(
+    match_repetition_110,
+    r"X(.?){5,8}Y",
+    r"X1234567Y",
+    Some((0, 9)),
+    Some((8, 8))
+);
+mat!(
+    match_repetition_112,
+    r"X(.?){6,8}Y",
+    r"X1234567Y",
+    Some((0, 9)),
+    Some((8, 8))
+);
+mat!(
+    match_repetition_114,
+    r"X(.?){7,8}Y",
+    r"X1234567Y",
+    Some((0, 9)),
+    Some((8, 8))
+);
+mat!(
+    match_repetition_115,
+    r"X(.?){8,8}Y",
+    r"X1234567Y",
+    Some((0, 9)),
+    Some((8, 8))
+);
+mat!(
+    match_repetition_126,
+    r"(a|ab|c|bcd){0,}(d*)",
+    r"ababcd",
+    Some((0, 1)),
+    Some((0, 1)),
+    Some((1, 1))
+);
+mat!(
+    match_repetition_127,
+    r"(a|ab|c|bcd){1,}(d*)",
+    r"ababcd",
+    Some((0, 1)),
+    Some((0, 1)),
+    Some((1, 1))
+);
+mat!(
+    match_repetition_128,
+    r"(a|ab|c|bcd){2,}(d*)",
+    r"ababcd",
+    Some((0, 6)),
+    Some((3, 6)),
+    Some((6, 6))
+);
+mat!(
+    match_repetition_129,
+    r"(a|ab|c|bcd){3,}(d*)",
+    r"ababcd",
+    Some((0, 6)),
+    Some((3, 6)),
+    Some((6, 6))
+);
+mat!(match_repetition_130, r"(a|ab|c|bcd){4,}(d*)", r"ababcd", None);
+mat!(
+    match_repetition_131,
+    r"(a|ab|c|bcd){0,10}(d*)",
+    r"ababcd",
+    Some((0, 1)),
+    Some((0, 1)),
+    Some((1, 1))
+);
+mat!(
+    match_repetition_132,
+    r"(a|ab|c|bcd){1,10}(d*)",
+    r"ababcd",
+    Some((0, 1)),
+    Some((0, 1)),
+    Some((1, 1))
+);
+mat!(
+    match_repetition_133,
+    r"(a|ab|c|bcd){2,10}(d*)",
+    r"ababcd",
+    Some((0, 6)),
+    Some((3, 6)),
+    Some((6, 6))
+);
+mat!(
+    match_repetition_134,
+    r"(a|ab|c|bcd){3,10}(d*)",
+    r"ababcd",
+    Some((0, 6)),
+    Some((3, 6)),
+    Some((6, 6))
+);
+mat!(match_repetition_135, r"(a|ab|c|bcd){4,10}(d*)", r"ababcd", None);
+mat!(
+    match_repetition_136,
+    r"(a|ab|c|bcd)*(d*)",
+    r"ababcd",
+    Some((0, 1)),
+    Some((0, 1)),
+    Some((1, 1))
+);
+mat!(
+    match_repetition_137,
+    r"(a|ab|c|bcd)+(d*)",
+    r"ababcd",
+    Some((0, 1)),
+    Some((0, 1)),
+    Some((1, 1))
+);
+mat!(
+    match_repetition_143,
+    r"(ab|a|c|bcd){0,}(d*)",
+    r"ababcd",
+    Some((0, 6)),
+    Some((4, 5)),
+    Some((5, 6))
+);
+mat!(
+    match_repetition_145,
+    r"(ab|a|c|bcd){1,}(d*)",
+    r"ababcd",
+    Some((0, 6)),
+    Some((4, 5)),
+    Some((5, 6))
+);
+mat!(
+    match_repetition_147,
+    r"(ab|a|c|bcd){2,}(d*)",
+    r"ababcd",
+    Some((0, 6)),
+    Some((4, 5)),
+    Some((5, 6))
+);
+mat!(
+    match_repetition_149,
+    r"(ab|a|c|bcd){3,}(d*)",
+    r"ababcd",
+    Some((0, 6)),
+    Some((4, 5)),
+    Some((5, 6))
+);
+mat!(match_repetition_150, r"(ab|a|c|bcd){4,}(d*)", r"ababcd", None);
+mat!(
+    match_repetition_152,
+    r"(ab|a|c|bcd){0,10}(d*)",
+    r"ababcd",
+    Some((0, 6)),
+    Some((4, 5)),
+    Some((5, 6))
+);
+mat!(
+    match_repetition_154,
+    r"(ab|a|c|bcd){1,10}(d*)",
+    r"ababcd",
+    Some((0, 6)),
+    Some((4, 5)),
+    Some((5, 6))
+);
+mat!(
+    match_repetition_156,
+    r"(ab|a|c|bcd){2,10}(d*)",
+    r"ababcd",
+    Some((0, 6)),
+    Some((4, 5)),
+    Some((5, 6))
+);
+mat!(
+    match_repetition_158,
+    r"(ab|a|c|bcd){3,10}(d*)",
+    r"ababcd",
+    Some((0, 6)),
+    Some((4, 5)),
+    Some((5, 6))
+);
+mat!(match_repetition_159, r"(ab|a|c|bcd){4,10}(d*)", r"ababcd", None);
+mat!(
+    match_repetition_161,
+    r"(ab|a|c|bcd)*(d*)",
+    r"ababcd",
+    Some((0, 6)),
+    Some((4, 5)),
+    Some((5, 6))
+);
+mat!(
+    match_repetition_163,
+    r"(ab|a|c|bcd)+(d*)",
+    r"ababcd",
+    Some((0, 6)),
+    Some((4, 5)),
+    Some((5, 6))
+);

diff --git a/tests/macros.rs b/tests/macros.rs
new file mode 100644
index 0000000..e70e948
--- /dev/null
+++ b/tests/macros.rs

@@ -0,0 +1,160 @@
+// Convenience macros.
+
+macro_rules! findall {
+    ($re:expr, $text:expr) => {{
+        $re.find_iter(text!($text))
+           .map(|m| (m.start(), m.end())).collect::<Vec<_>>()
+    }}
+}
+
+// Macros for automatically producing tests.
+
+macro_rules! ismatch {
+    ($name:ident, $re:expr, $text:expr, $ismatch:expr) => {
+        #[test]
+        fn $name() {
+            let re = regex!($re);
+            assert_eq!($ismatch, re.is_match(text!($text)));
+        }
+    };
+}
+
+macro_rules! mat(
+    ($name:ident, $re:expr, $text:expr, $($loc:tt)+) => (
+        #[test]
+        fn $name() {
+            let text = text!($text);
+            let expected: Vec<Option<_>> = vec![$($loc)+];
+            let r = regex!($re);
+            let got: Vec<Option<_>> = match r.captures(text) {
+                Some(c) => {
+                    assert!(r.is_match(text));
+                    assert!(r.shortest_match(text).is_some());
+                    r.capture_names()
+                     .enumerate()
+                     .map(|(i, _)| c.get(i).map(|m| (m.start(), m.end())))
+                     .collect()
+                }
+                None => vec![None],
+            };
+            // The test set sometimes leave out capture groups, so truncate
+            // actual capture groups to match test set.
+            let mut sgot = &got[..];
+            if sgot.len() > expected.len() {
+                sgot = &sgot[0..expected.len()]
+            }
+            if expected != sgot {
+                panic!("For RE '{}' against '{:?}', \
+                        expected '{:?}' but got '{:?}'",
+                       $re, text, expected, sgot);
+            }
+        }
+    );
+);
+
+macro_rules! matiter(
+    ($name:ident, $re:expr, $text:expr) => (
+        #[test]
+        fn $name() {
+            let text = text!($text);
+            let expected: Vec<(usize, usize)> = vec![];
+            let r = regex!($re);
+            let got: Vec<_> =
+                r.find_iter(text).map(|m| (m.start(), m.end())).collect();
+            if expected != got {
+                panic!("For RE '{}' against '{:?}', \
+                        expected '{:?}' but got '{:?}'",
+                       $re, text, expected, got);
+            }
+            let captures_got: Vec<_> =
+                r.captures_iter(text)
+                 .map(|c| c.get(0).unwrap())
+                 .map(|m| (m.start(), m.end()))
+                 .collect();
+            if captures_got != got {
+                panic!("For RE '{}' against '{:?}', \
+                        got '{:?}' using find_iter but got '{:?}' \
+                        using captures_iter",
+                       $re, text, got, captures_got);
+            }
+        }
+    );
+    ($name:ident, $re:expr, $text:expr, $($loc:tt)+) => (
+        #[test]
+        fn $name() {
+            let text = text!($text);
+            let expected: Vec<_> = vec![$($loc)+];
+            let r = regex!($re);
+            let got: Vec<_> =
+                r.find_iter(text).map(|m| (m.start(), m.end())).collect();
+            if expected != got {
+                panic!("For RE '{}' against '{:?}', \
+                        expected '{:?}' but got '{:?}'",
+                       $re, text, expected, got);
+            }
+            let captures_got: Vec<_> =
+                r.captures_iter(text)
+                 .map(|c| c.get(0).unwrap())
+                 .map(|m| (m.start(), m.end()))
+                 .collect();
+            if captures_got != got {
+                panic!("For RE '{}' against '{:?}', \
+                        got '{:?}' using find_iter but got '{:?}' \
+                        using captures_iter",
+                       $re, text, got, captures_got);
+            }
+        }
+    );
+);
+
+macro_rules! matset {
+    ($name:ident, $res:expr, $text:expr, $($match_index:expr),*) => {
+        #[test]
+        fn $name() {
+            let text = text!($text);
+            let set = regex_set!($res);
+            assert!(set.is_match(text));
+            let expected = vec![$($match_index),*];
+            let matches = set.matches(text);
+            assert!(matches.matched_any());
+            let got: Vec<_> = matches.into_iter().collect();
+            assert_eq!(expected, got);
+        }
+    }
+}
+
+macro_rules! nomatset {
+    ($name:ident, $res:expr, $text:expr) => {
+        #[test]
+        fn $name() {
+            let text = text!($text);
+            let set = regex_set!($res);
+            assert!(!set.is_match(text));
+            let matches = set.matches(text);
+            assert!(!matches.matched_any());
+            assert_eq!(0, matches.into_iter().count());
+        }
+    }
+}
+
+macro_rules! split {
+    ($name:ident, $re:expr, $text:expr, $expected:expr) => {
+        #[test]
+        fn $name() {
+            let re = regex!($re);
+            let splitted: Vec<_> = re.split(t!($text)).collect();
+            assert_eq!($expected, &*splitted);
+        }
+    }
+}
+
+macro_rules! splitn {
+    ($name:ident, $re:expr, $text:expr, $limit:expr, $expected:expr) => {
+        #[test]
+        fn $name() {
+            let re = regex!($re);
+            let splitted: Vec<_> = re.splitn(t!($text), $limit).collect();
+            assert_eq!($expected, &*splitted);
+        }
+    }
+}

diff --git a/tests/macros_bytes.rs b/tests/macros_bytes.rs
new file mode 100644
index 0000000..03c370d
--- /dev/null
+++ b/tests/macros_bytes.rs

@@ -0,0 +1,40 @@
+// Macros for use in writing tests generic over &str/&[u8].
+macro_rules! text { ($text:expr) => { $text.as_bytes() } }
+macro_rules! t { ($re:expr) => { text!($re) } }
+macro_rules! match_text { ($text:expr) => { $text.as_bytes() } }
+macro_rules! use_ { ($($path: tt)*) => { use regex::bytes::$($path)*; } }
+macro_rules! empty_vec { () => { <Vec<&[u8]>>::new() } }
+
+macro_rules! bytes { ($text:expr) => { $text } }
+
+macro_rules! no_expand {
+    ($text:expr) => {{
+        use regex::bytes::NoExpand;
+        NoExpand(text!($text))
+    }}
+}
+
+macro_rules! show {
+    ($text:expr) => {{
+        use std::ascii::escape_default;
+        let mut s = vec![];
+        for &b in bytes!($text) {
+            s.extend(escape_default(b));
+        }
+        String::from_utf8(s).unwrap()
+    }}
+}
+
+macro_rules! expand {
+    ($name:ident, $re:expr, $text:expr, $expand:expr, $expected:expr) => {
+        #[test]
+        fn $name() {
+            let re = regex!($re);
+            let cap = re.captures(t!($text)).unwrap();
+
+            let mut got = vec![];
+            cap.expand(t!($expand), &mut got);
+            assert_eq!(show!(t!($expected)), show!(&*got));
+        }
+    }
+}

diff --git a/tests/macros_str.rs b/tests/macros_str.rs
new file mode 100644
index 0000000..9b996b3
--- /dev/null
+++ b/tests/macros_str.rs

@@ -0,0 +1,37 @@
+// Macros for use in writing tests generic over &str/&[u8].
+macro_rules! text { ($text:expr) => { $text } }
+macro_rules! t { ($text:expr) => { text!($text) } }
+macro_rules! match_text { ($text:expr) => { $text.as_str() } }
+macro_rules! use_ { ($($path: tt)*) => { use regex::$($path)*; } }
+macro_rules! empty_vec { () => { <Vec<&str>>::new() } }
+
+macro_rules! no_expand {
+    ($text:expr) => {{
+        use regex::NoExpand;
+        NoExpand(text!($text))
+    }}
+}
+
+macro_rules! show { ($text:expr) => { $text } }
+
+// N.B. The expansion API for &str and &[u8] APIs differs slightly for now,
+// but they should be unified in 1.0. Then we can move this macro back into
+// tests/api.rs where it is used. ---AG
+macro_rules! expand {
+    ($name:ident, $re:expr, $text:expr, $expand:expr, $expected:expr) => {
+        #[test]
+        fn $name() {
+            let re = regex!($re);
+            let cap = re.captures(t!($text)).unwrap();
+
+            let mut got = String::new();
+            cap.expand(t!($expand), &mut got);
+            assert_eq!(show!(t!($expected)), show!(&*got));
+        }
+    }
+}
+
+#[cfg(feature = "pattern")]
+macro_rules! searcher_expr { ($e:expr) => ($e) }
+#[cfg(not(feature = "pattern"))]
+macro_rules! searcher_expr { ($e:expr) => ({}) }

diff --git a/tests/misc.rs b/tests/misc.rs
new file mode 100644
index 0000000..314811e
--- /dev/null
+++ b/tests/misc.rs

@@ -0,0 +1,4 @@
+mat!(prefix_literal_match, r"^abc", r"abc", Some((0, 3)));
+mat!(prefix_literal_nomatch, r"^abc", r"zabc", None);
+mat!(one_literal_edge, r"abc", r"xxxxxab", None);
+matiter!(terminates, r"a$", r"a", (0, 1));

diff --git a/tests/multiline.rs b/tests/multiline.rs
new file mode 100644
index 0000000..62ee47b
--- /dev/null
+++ b/tests/multiline.rs

@@ -0,0 +1,144 @@
+matiter!(
+    match_multi_1,
+    r"(?m)^[a-z]+$",
+    "abc\ndef\nxyz",
+    (0, 3),
+    (4, 7),
+    (8, 11)
+);
+matiter!(match_multi_2, r"(?m)^$", "abc\ndef\nxyz");
+matiter!(match_multi_3, r"(?m)^", "abc\ndef\nxyz", (0, 0), (4, 4), (8, 8));
+matiter!(match_multi_4, r"(?m)$", "abc\ndef\nxyz", (3, 3), (7, 7), (11, 11));
+matiter!(
+    match_multi_5,
+    r"(?m)^[a-z]",
+    "abc\ndef\nxyz",
+    (0, 1),
+    (4, 5),
+    (8, 9)
+);
+matiter!(match_multi_6, r"(?m)[a-z]^", "abc\ndef\nxyz");
+matiter!(
+    match_multi_7,
+    r"(?m)[a-z]$",
+    "abc\ndef\nxyz",
+    (2, 3),
+    (6, 7),
+    (10, 11)
+);
+matiter!(match_multi_8, r"(?m)$[a-z]", "abc\ndef\nxyz");
+matiter!(match_multi_9, r"(?m)^$", "", (0, 0));
+
+matiter!(
+    match_multi_rep_1,
+    r"(?m)(?:^$)*",
+    "a\nb\nc",
+    (0, 0),
+    (1, 1),
+    (2, 2),
+    (3, 3),
+    (4, 4),
+    (5, 5)
+);
+matiter!(
+    match_multi_rep_2,
+    r"(?m)(?:^|a)+",
+    "a\naaa\n",
+    (0, 0),
+    (2, 2),
+    (3, 5),
+    (6, 6)
+);
+matiter!(
+    match_multi_rep_3,
+    r"(?m)(?:^|a)*",
+    "a\naaa\n",
+    (0, 1),
+    (2, 5),
+    (6, 6)
+);
+matiter!(
+    match_multi_rep_4,
+    r"(?m)(?:^[a-z])+",
+    "abc\ndef\nxyz",
+    (0, 1),
+    (4, 5),
+    (8, 9)
+);
+matiter!(
+    match_multi_rep_5,
+    r"(?m)(?:^[a-z]{3}\n?)+",
+    "abc\ndef\nxyz",
+    (0, 11)
+);
+matiter!(
+    match_multi_rep_6,
+    r"(?m)(?:^[a-z]{3}\n?)*",
+    "abc\ndef\nxyz",
+    (0, 11)
+);
+matiter!(
+    match_multi_rep_7,
+    r"(?m)(?:\n?[a-z]{3}$)+",
+    "abc\ndef\nxyz",
+    (0, 11)
+);
+matiter!(
+    match_multi_rep_8,
+    r"(?m)(?:\n?[a-z]{3}$)*",
+    "abc\ndef\nxyz",
+    (0, 11)
+);
+matiter!(
+    match_multi_rep_9,
+    r"(?m)^*",
+    "\naa\n",
+    (0, 0),
+    (1, 1),
+    (2, 2),
+    (3, 3),
+    (4, 4)
+);
+matiter!(match_multi_rep_10, r"(?m)^+", "\naa\n", (0, 0), (1, 1), (4, 4));
+matiter!(
+    match_multi_rep_11,
+    r"(?m)$*",
+    "\naa\n",
+    (0, 0),
+    (1, 1),
+    (2, 2),
+    (3, 3),
+    (4, 4)
+);
+matiter!(match_multi_rep_12, r"(?m)$+", "\naa\n", (0, 0), (3, 3), (4, 4));
+matiter!(match_multi_rep_13, r"(?m)(?:$\n)+", "\n\naaa\n\n", (0, 2), (5, 7));
+matiter!(
+    match_multi_rep_14,
+    r"(?m)(?:$\n)*",
+    "\n\naaa\n\n",
+    (0, 2),
+    (3, 3),
+    (4, 4),
+    (5, 7)
+);
+matiter!(match_multi_rep_15, r"(?m)(?:$\n^)+", "\n\naaa\n\n", (0, 2), (5, 7));
+matiter!(
+    match_multi_rep_16,
+    r"(?m)(?:^|$)+",
+    "\n\naaa\n\n",
+    (0, 0),
+    (1, 1),
+    (2, 2),
+    (5, 5),
+    (6, 6),
+    (7, 7)
+);
+matiter!(
+    match_multi_rep_17,
+    r"(?m)(?:$\n)*",
+    "\n\naaa\n\n",
+    (0, 2),
+    (3, 3),
+    (4, 4),
+    (5, 7)
+);

diff --git a/tests/noparse.rs b/tests/noparse.rs
new file mode 100644
index 0000000..62eb5be
--- /dev/null
+++ b/tests/noparse.rs

@@ -0,0 +1,50 @@
+macro_rules! noparse(
+    ($name:ident, $re:expr) => (
+        #[test]
+        fn $name() {
+            let re = $re;
+            match regex_new!(re) {
+                Err(_) => {},
+                Ok(_) => panic!("Regex '{}' should cause a parse error.", re),
+            }
+        }
+    );
+);
+
+noparse!(fail_no_repeat_arg, "*");
+noparse!(fail_incomplete_escape, "\\");
+noparse!(fail_class_incomplete, "[A-");
+noparse!(fail_class_not_closed, "[A");
+noparse!(fail_class_no_begin, r"[\A]");
+noparse!(fail_class_no_end, r"[\z]");
+noparse!(fail_class_no_boundary, r"[\b]");
+noparse!(fail_open_paren, "(");
+noparse!(fail_close_paren, ")");
+noparse!(fail_invalid_range, "[a-Z]");
+noparse!(fail_empty_capture_name, "(?P<>a)");
+noparse!(fail_bad_capture_name, "(?P<na-me>)");
+noparse!(fail_bad_flag, "(?a)a");
+noparse!(fail_too_big, "a{10000000}");
+noparse!(fail_counted_no_close, "a{1001");
+noparse!(fail_unfinished_cap, "(?");
+noparse!(fail_unfinished_escape, "\\");
+noparse!(fail_octal_digit, r"\8");
+noparse!(fail_hex_digit, r"\xG0");
+noparse!(fail_hex_short, r"\xF");
+noparse!(fail_hex_long_digits, r"\x{fffg}");
+noparse!(fail_flag_bad, "(?a)");
+noparse!(fail_flag_empty, "(?)");
+noparse!(fail_double_neg, "(?-i-i)");
+noparse!(fail_neg_empty, "(?i-)");
+noparse!(fail_dupe_named, "(?P<a>.)(?P<a>.)");
+noparse!(fail_range_end_no_class, "[a-[:lower:]]");
+noparse!(fail_range_end_no_begin, r"[a-\A]");
+noparse!(fail_range_end_no_end, r"[a-\z]");
+noparse!(fail_range_end_no_boundary, r"[a-\b]");
+noparse!(fail_empty_alt1, r"|z");
+noparse!(fail_empty_alt2, r"z|");
+noparse!(fail_empty_alt3, r"|");
+noparse!(fail_empty_alt4, r"||");
+noparse!(fail_empty_alt5, r"()|z");
+noparse!(fail_empty_alt6, r"z|()");
+noparse!(fail_empty_alt7, r"(|)");

diff --git a/tests/regression.rs b/tests/regression.rs
new file mode 100644
index 0000000..686fe35
--- /dev/null
+++ b/tests/regression.rs

@@ -0,0 +1,212 @@
+// See: https://github.com/rust-lang/regex/issues/48
+#[test]
+fn invalid_regexes_no_crash() {
+    assert!(regex_new!("(*)").is_err());
+    assert!(regex_new!("(?:?)").is_err());
+    assert!(regex_new!("(?)").is_err());
+    assert!(regex_new!("*").is_err());
+}
+
+// See: https://github.com/rust-lang/regex/issues/98
+#[test]
+fn regression_many_repeat_stack_overflow() {
+    let re = regex!("^.{1,2500}");
+    assert_eq!(vec![(0, 1)], findall!(re, "a"));
+}
+
+// See: https://github.com/rust-lang/regex/issues/555
+#[test]
+fn regression_invalid_repetition_expr() {
+    assert!(regex_new!("(?m){1,1}").is_err());
+}
+
+// See: https://github.com/rust-lang/regex/issues/527
+#[test]
+fn regression_invalid_flags_expression() {
+    assert!(regex_new!("(((?x)))").is_ok());
+}
+
+// See: https://github.com/rust-lang/regex/issues/75
+mat!(regression_unsorted_binary_search_1, r"(?i-u)[a_]+", "A_", Some((0, 2)));
+mat!(regression_unsorted_binary_search_2, r"(?i-u)[A_]+", "a_", Some((0, 2)));
+
+// See: https://github.com/rust-lang/regex/issues/99
+#[cfg(feature = "unicode-case")]
+mat!(regression_negated_char_class_1, r"(?i)[^x]", "x", None);
+#[cfg(feature = "unicode-case")]
+mat!(regression_negated_char_class_2, r"(?i)[^x]", "X", None);
+
+// See: https://github.com/rust-lang/regex/issues/101
+mat!(regression_ascii_word_underscore, r"[[:word:]]", "_", Some((0, 1)));
+
+// See: https://github.com/rust-lang/regex/issues/129
+#[test]
+fn regression_captures_rep() {
+    let re = regex!(r"([a-f]){2}(?P<foo>[x-z])");
+    let caps = re.captures(text!("abx")).unwrap();
+    assert_eq!(match_text!(caps.name("foo").unwrap()), text!("x"));
+}
+
+// See: https://github.com/rust-lang/regex/issues/153
+mat!(regression_alt_in_alt1, r"ab?|$", "az", Some((0, 1)));
+mat!(regression_alt_in_alt2, r"^(.*?)(\n|\r\n?|$)", "ab\rcd", Some((0, 3)));
+
+// See: https://github.com/rust-lang/regex/issues/169
+mat!(regression_leftmost_first_prefix, r"z*azb", "azb", Some((0, 3)));
+
+// See: https://github.com/rust-lang/regex/issues/76
+#[cfg(all(feature = "unicode-case", feature = "unicode-gencat"))]
+mat!(uni_case_lower_nocase_flag, r"(?i)\p{Ll}+", "ΛΘΓΔα", Some((0, 10)));
+
+// See: https://github.com/rust-lang/regex/issues/191
+mat!(many_alternates, r"1|2|3|4|5|6|7|8|9|10|int", "int", Some((0, 3)));
+
+// burntsushi was bad and didn't create an issue for this bug.
+mat!(anchored_prefix1, r"^a[[:^space:]]", "a ", None);
+mat!(anchored_prefix2, r"^a[[:^space:]]", "foo boo a ", None);
+mat!(anchored_prefix3, r"^-[a-z]", "r-f", None);
+
+// See: https://github.com/rust-lang/regex/issues/204
+#[cfg(feature = "unicode-perl")]
+split!(
+    split_on_word_boundary,
+    r"\b",
+    r"Should this (work?)",
+    &[
+        t!(""),
+        t!("Should"),
+        t!(" "),
+        t!("this"),
+        t!(" ("),
+        t!("work"),
+        t!("?)")
+    ]
+);
+#[cfg(feature = "unicode-perl")]
+matiter!(
+    word_boundary_dfa,
+    r"\b",
+    "a b c",
+    (0, 0),
+    (1, 1),
+    (2, 2),
+    (3, 3),
+    (4, 4),
+    (5, 5)
+);
+
+// See: https://github.com/rust-lang/regex/issues/268
+matiter!(partial_anchor, r"^a|b", "ba", (0, 1));
+
+// See: https://github.com/rust-lang/regex/issues/280
+ismatch!(partial_anchor_alternate_begin, r"^a|z", "yyyyya", false);
+ismatch!(partial_anchor_alternate_end, r"a$|z", "ayyyyy", false);
+
+// See: https://github.com/rust-lang/regex/issues/289
+mat!(lits_unambiguous1, r"(ABC|CDA|BC)X", "CDAX", Some((0, 4)));
+
+// See: https://github.com/rust-lang/regex/issues/291
+mat!(
+    lits_unambiguous2,
+    r"((IMG|CAM|MG|MB2)_|(DSCN|CIMG))(?P<n>[0-9]+)$",
+    "CIMG2341",
+    Some((0, 8)),
+    Some((0, 4)),
+    None,
+    Some((0, 4)),
+    Some((4, 8))
+);
+
+// See: https://github.com/rust-lang/regex/issues/271
+mat!(endl_or_wb, r"(?m:$)|(?-u:\b)", "\u{6084e}", Some((4, 4)));
+mat!(zero_or_end, r"(?i-u:\x00)|$", "\u{e682f}", Some((4, 4)));
+mat!(y_or_endl, r"(?i-u:y)|(?m:$)", "\u{b4331}", Some((4, 4)));
+#[cfg(feature = "unicode-perl")]
+mat!(wb_start_x, r"(?u:\b)^(?-u:X)", "X", Some((0, 1)));
+
+// See: https://github.com/rust-lang/regex/issues/321
+ismatch!(strange_anchor_non_complete_prefix, r"a^{2}", "", false);
+ismatch!(strange_anchor_non_complete_suffix, r"${2}a", "", false);
+
+// See: https://github.com/BurntSushi/ripgrep/issues/1203
+ismatch!(reverse_suffix1, r"[0-4][0-4][0-4]000", "153.230000", true);
+ismatch!(reverse_suffix2, r"[0-9][0-9][0-9]000", "153.230000\n", true);
+matiter!(reverse_suffix3, r"[0-9][0-9][0-9]000", "153.230000\n", (4, 10));
+
+// See: https://github.com/rust-lang/regex/issues/334
+// See: https://github.com/rust-lang/regex/issues/557
+mat!(
+    captures_after_dfa_premature_end1,
+    r"a(b*(X|$))?",
+    "abcbX",
+    Some((0, 1)),
+    None,
+    None
+);
+mat!(
+    captures_after_dfa_premature_end2,
+    r"a(bc*(X|$))?",
+    "abcbX",
+    Some((0, 1)),
+    None,
+    None
+);
+mat!(captures_after_dfa_premature_end3, r"(aa$)?", "aaz", Some((0, 0)));
+
+// See: https://github.com/rust-lang/regex/issues/437
+ismatch!(
+    literal_panic,
+    r"typename type\-parameter\-[0-9]+\-[0-9]+::.+",
+    "test",
+    false
+);
+
+// See: https://github.com/rust-lang/regex/issues/533
+ismatch!(
+    blank_matches_nothing_between_space_and_tab,
+    r"[[:blank:]]",
+    "\u{a}\u{b}\u{c}\u{d}\u{e}\u{f}\
+     \u{10}\u{11}\u{12}\u{13}\u{14}\u{15}\u{16}\u{17}\
+     \u{18}\u{19}\u{1a}\u{1b}\u{1c}\u{1d}\u{1e}\u{1f}",
+    false
+);
+
+ismatch!(
+    inverted_blank_matches_everything_between_space_and_tab,
+    r"^[[:^blank:]]+$",
+    "\u{a}\u{b}\u{c}\u{d}\u{e}\u{f}\
+     \u{10}\u{11}\u{12}\u{13}\u{14}\u{15}\u{16}\u{17}\
+     \u{18}\u{19}\u{1a}\u{1b}\u{1c}\u{1d}\u{1e}\u{1f}",
+    true
+);
+
+// Tests that our Aho-Corasick optimization works correctly. It only
+// kicks in when we have >32 literals. By "works correctly," we mean that
+// leftmost-first match semantics are properly respected. That is, samwise
+// should match, not sam.
+mat!(
+    ahocorasick1,
+    "samwise|sam|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|\
+     A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z",
+    "samwise",
+    Some((0, 7))
+);
+
+// See: https://github.com/BurntSushi/ripgrep/issues/1247
+#[test]
+#[cfg(feature = "unicode-perl")]
+fn regression_nfa_stops1() {
+    let re = ::regex::bytes::Regex::new(r"\bs(?:[ab])").unwrap();
+    assert_eq!(0, re.find_iter(b"s\xE4").count());
+}
+
+// See: https://github.com/rust-lang/regex/issues/640
+#[cfg(feature = "unicode-case")]
+matiter!(
+    flags_are_unset,
+    r"((?i)foo)|Bar",
+    "foo Foo bar Bar",
+    (0, 3),
+    (4, 7),
+    (12, 15)
+);

diff --git a/tests/replace.rs b/tests/replace.rs
new file mode 100644
index 0000000..c156a39
--- /dev/null
+++ b/tests/replace.rs

@@ -0,0 +1,132 @@
+macro_rules! replace(
+    ($name:ident, $which:ident, $re:expr,
+     $search:expr, $replace:expr, $result:expr) => (
+        #[test]
+        fn $name() {
+            let re = regex!($re);
+            assert_eq!(re.$which(text!($search), $replace), text!($result));
+        }
+    );
+);
+
+replace!(first, replace, r"[0-9]", "age: 26", t!("Z"), "age: Z6");
+replace!(plus, replace, r"[0-9]+", "age: 26", t!("Z"), "age: Z");
+replace!(all, replace_all, r"[0-9]", "age: 26", t!("Z"), "age: ZZ");
+replace!(
+    groups,
+    replace,
+    r"(?-u)(\S+)\s+(\S+)",
+    "w1 w2",
+    t!("$2 $1"),
+    "w2 w1"
+);
+replace!(
+    double_dollar,
+    replace,
+    r"(?-u)(\S+)\s+(\S+)",
+    "w1 w2",
+    t!("$2 $$1"),
+    "w2 $1"
+);
+// replace!(adjacent_index, replace,
+// r"([^aeiouy])ies$", "skies", t!("$1y"), "sky");
+replace!(
+    named,
+    replace_all,
+    r"(?-u)(?P<first>\S+)\s+(?P<last>\S+)(?P<space>\s*)",
+    "w1 w2 w3 w4",
+    t!("$last $first$space"),
+    "w2 w1 w4 w3"
+);
+replace!(
+    trim,
+    replace_all,
+    "^[ \t]+|[ \t]+$",
+    " \t  trim me\t   \t",
+    t!(""),
+    "trim me"
+);
+replace!(number_hypen, replace, r"(.)(.)", "ab", t!("$1-$2"), "a-b");
+// replace!(number_underscore, replace, r"(.)(.)", "ab", t!("$1_$2"), "a_b");
+replace!(
+    simple_expand,
+    replace_all,
+    r"(?-u)(\w) (\w)",
+    "a b",
+    t!("$2 $1"),
+    "b a"
+);
+replace!(
+    literal_dollar1,
+    replace_all,
+    r"(?-u)(\w+) (\w+)",
+    "a b",
+    t!("$$1"),
+    "$1"
+);
+replace!(
+    literal_dollar2,
+    replace_all,
+    r"(?-u)(\w+) (\w+)",
+    "a b",
+    t!("$2 $$c $1"),
+    "b $c a"
+);
+replace!(
+    no_expand1,
+    replace,
+    r"(?-u)(\S+)\s+(\S+)",
+    "w1 w2",
+    no_expand!("$2 $1"),
+    "$2 $1"
+);
+replace!(
+    no_expand2,
+    replace,
+    r"(?-u)(\S+)\s+(\S+)",
+    "w1 w2",
+    no_expand!("$$1"),
+    "$$1"
+);
+use_!(Captures);
+replace!(
+    closure_returning_reference,
+    replace,
+    r"([0-9]+)",
+    "age: 26",
+    |captures: &Captures| {
+        match_text!(captures.get(1).unwrap())[0..1].to_owned()
+    },
+    "age: 2"
+);
+replace!(
+    closure_returning_value,
+    replace,
+    r"[0-9]+",
+    "age: 26",
+    |_captures: &Captures| t!("Z").to_owned(),
+    "age: Z"
+);
+
+// See https://github.com/rust-lang/regex/issues/314
+replace!(
+    match_at_start_replace_with_empty,
+    replace_all,
+    r"foo",
+    "foobar",
+    t!(""),
+    "bar"
+);
+
+// See https://github.com/rust-lang/regex/issues/393
+replace!(single_empty_match, replace, r"^", "bar", t!("foo"), "foobar");
+
+// See https://github.com/rust-lang/regex/issues/399
+replace!(
+    capture_longest_possible_name,
+    replace_all,
+    r"(.)",
+    "b",
+    t!("${1}a $1a"),
+    "ba "
+);

diff --git a/tests/searcher.rs b/tests/searcher.rs
new file mode 100644
index 0000000..3779f54
--- /dev/null
+++ b/tests/searcher.rs

@@ -0,0 +1,95 @@
+macro_rules! searcher {
+    ($name:ident, $re:expr, $haystack:expr) => (
+        searcher!($name, $re, $haystack, vec vec![]);
+    );
+    ($name:ident, $re:expr, $haystack:expr, $($steps:expr,)*) => (
+        searcher!($name, $re, $haystack, vec vec![$($steps),*]);
+    );
+    ($name:ident, $re:expr, $haystack:expr, $($steps:expr),*) => (
+        searcher!($name, $re, $haystack, vec vec![$($steps),*]);
+    );
+    ($name:ident, $re:expr, $haystack:expr, vec $expect_steps:expr) => (
+        #[test]
+        #[allow(unused_imports)]
+        fn $name() {
+            searcher_expr! {{
+                use std::str::pattern::{Pattern, Searcher};
+                use std::str::pattern::SearchStep::{Match, Reject, Done};
+                let re = regex!($re);
+                let mut se = re.into_searcher($haystack);
+                let mut got_steps = vec![];
+                loop {
+                    match se.next() {
+                        Done => break,
+                        step => { got_steps.push(step); }
+                    }
+                }
+                assert_eq!(got_steps, $expect_steps);
+            }}
+        }
+    );
+}
+
+searcher!(searcher_empty_regex_empty_haystack, r"", "", Match(0, 0));
+searcher!(
+    searcher_empty_regex,
+    r"",
+    "ab",
+    Match(0, 0),
+    Reject(0, 1),
+    Match(1, 1),
+    Reject(1, 2),
+    Match(2, 2)
+);
+searcher!(searcher_empty_haystack, r"\d", "");
+searcher!(searcher_one_match, r"\d", "5", Match(0, 1));
+searcher!(searcher_no_match, r"\d", "a", Reject(0, 1));
+searcher!(
+    searcher_two_adjacent_matches,
+    r"\d",
+    "56",
+    Match(0, 1),
+    Match(1, 2)
+);
+searcher!(
+    searcher_two_non_adjacent_matches,
+    r"\d",
+    "5a6",
+    Match(0, 1),
+    Reject(1, 2),
+    Match(2, 3)
+);
+searcher!(searcher_reject_first, r"\d", "a6", Reject(0, 1), Match(1, 2));
+searcher!(
+    searcher_one_zero_length_matches,
+    r"\d*",
+    "a1b2",
+    Match(0, 0),  // ^
+    Reject(0, 1), // a
+    Match(1, 2),  // a1
+    Reject(2, 3), // a1b
+    Match(3, 4),  // a1b2
+);
+searcher!(
+    searcher_many_zero_length_matches,
+    r"\d*",
+    "a1bbb2",
+    Match(0, 0),  // ^
+    Reject(0, 1), // a
+    Match(1, 2),  // a1
+    Reject(2, 3), // a1b
+    Match(3, 3),  // a1bb
+    Reject(3, 4), // a1bb
+    Match(4, 4),  // a1bbb
+    Reject(4, 5), // a1bbb
+    Match(5, 6),  // a1bbba
+);
+searcher!(
+    searcher_unicode,
+    r".+?",
+    "Ⅰ1Ⅱ2",
+    Match(0, 3),
+    Match(3, 4),
+    Match(4, 7),
+    Match(7, 8)
+);

diff --git a/tests/set.rs b/tests/set.rs
new file mode 100644
index 0000000..3e9755c
--- /dev/null
+++ b/tests/set.rs

@@ -0,0 +1,45 @@
+matset!(set1, &["a", "a"], "a", 0, 1);
+matset!(set2, &["a", "a"], "ba", 0, 1);
+matset!(set3, &["a", "b"], "a", 0);
+matset!(set4, &["a", "b"], "b", 1);
+matset!(set5, &["a|b", "b|a"], "b", 0, 1);
+matset!(set6, &["foo", "oo"], "foo", 0, 1);
+matset!(set7, &["^foo", "bar$"], "foo", 0);
+matset!(set8, &["^foo", "bar$"], "foo bar", 0, 1);
+matset!(set9, &["^foo", "bar$"], "bar", 1);
+matset!(set10, &[r"[a-z]+$", "foo"], "01234 foo", 0, 1);
+matset!(set11, &[r"[a-z]+$", "foo"], "foo 01234", 1);
+matset!(set12, &[r".*?", "a"], "zzzzzza", 0, 1);
+matset!(set13, &[r".*", "a"], "zzzzzza", 0, 1);
+matset!(set14, &[r".*", "a"], "zzzzzz", 0);
+matset!(set15, &[r"(?-u)\ba\b"], "hello a bye", 0);
+matset!(set16, &["a"], "a", 0);
+matset!(set17, &[".*a"], "a", 0);
+matset!(set18, &["a", "β"], "β", 1);
+
+nomatset!(nset1, &["a", "a"], "b");
+nomatset!(nset2, &["^foo", "bar$"], "bar foo");
+nomatset!(
+    nset3,
+    {
+        let xs: &[&str] = &[];
+        xs
+    },
+    "a"
+);
+nomatset!(nset4, &[r"^rooted$", r"\.log$"], "notrooted");
+
+// See: https://github.com/rust-lang/regex/issues/187
+#[test]
+fn regression_subsequent_matches() {
+    let set = regex_set!(&["ab", "b"]);
+    let text = text!("ba");
+    assert!(set.matches(text).matched(1));
+    assert!(set.matches(text).matched(1));
+}
+
+#[test]
+fn get_set_patterns() {
+    let set = regex_set!(&["a", "b"]);
+    assert_eq!(vec!["a", "b"], set.patterns());
+}

diff --git a/tests/shortest_match.rs b/tests/shortest_match.rs
new file mode 100644
index 0000000..f8b4fed
--- /dev/null
+++ b/tests/shortest_match.rs

@@ -0,0 +1,14 @@
+macro_rules! shortmat {
+    ($name:ident, $re:expr, $text:expr, $shortest_match:expr) => {
+        #[test]
+        fn $name() {
+            let text = text!($text);
+            let re = regex!($re);
+            assert_eq!($shortest_match, re.shortest_match(text));
+        }
+    };
+}
+
+shortmat!(t01, r"a+", r"aa", Some(1));
+// Test that the reverse suffix optimization gets it right.
+shortmat!(t02, r".*(?:abcd)+", r"abcdabcd", Some(4));

diff --git a/tests/suffix_reverse.rs b/tests/suffix_reverse.rs
new file mode 100644
index 0000000..774c9e8
--- /dev/null
+++ b/tests/suffix_reverse.rs

@@ -0,0 +1,6 @@
+mat!(t01, r".*abcd", r"abcd", Some((0, 4)));
+mat!(t02, r".*(?:abcd)+", r"abcd", Some((0, 4)));
+mat!(t03, r".*(?:abcd)+", r"abcdabcd", Some((0, 8)));
+mat!(t04, r".*(?:abcd)+", r"abcdxabcd", Some((0, 9)));
+mat!(t05, r".*x(?:abcd)+", r"abcdxabcd", Some((0, 9)));
+mat!(t06, r"[^abcd]*x(?:abcd)+", r"abcdxabcd", Some((4, 9)));

diff --git a/tests/test_backtrack.rs b/tests/test_backtrack.rs
new file mode 100644
index 0000000..617185f
--- /dev/null
+++ b/tests/test_backtrack.rs

@@ -0,0 +1,59 @@
+#![cfg_attr(feature = "pattern", feature(pattern))]
+
+extern crate rand;
+extern crate regex;
+
+macro_rules! regex_new {
+    ($re:expr) => {{
+        use regex::internal::ExecBuilder;
+        ExecBuilder::new($re)
+            .bounded_backtracking()
+            .build()
+            .map(|e| e.into_regex())
+    }};
+}
+
+macro_rules! regex {
+    ($re:expr) => {
+        regex_new!($re).unwrap()
+    };
+}
+
+macro_rules! regex_set_new {
+    ($re:expr) => {{
+        use regex::internal::ExecBuilder;
+        ExecBuilder::new_many($re)
+            .bounded_backtracking()
+            .build()
+            .map(|e| e.into_regex_set())
+    }};
+}
+
+macro_rules! regex_set {
+    ($res:expr) => {
+        regex_set_new!($res).unwrap()
+    };
+}
+
+// Must come before other module definitions.
+include!("macros_str.rs");
+include!("macros.rs");
+
+mod api;
+mod api_str;
+mod crazy;
+mod flags;
+mod fowler;
+mod multiline;
+mod noparse;
+mod regression;
+mod replace;
+mod searcher;
+mod set;
+mod suffix_reverse;
+#[cfg(feature = "unicode")]
+mod unicode;
+#[cfg(feature = "unicode-perl")]
+mod word_boundary;
+#[cfg(feature = "unicode-perl")]
+mod word_boundary_unicode;

diff --git a/tests/test_backtrack_bytes.rs b/tests/test_backtrack_bytes.rs
new file mode 100644
index 0000000..17df4d8
--- /dev/null
+++ b/tests/test_backtrack_bytes.rs

@@ -0,0 +1,58 @@
+extern crate rand;
+extern crate regex;
+
+macro_rules! regex_new {
+    ($re:expr) => {{
+        use regex::internal::ExecBuilder;
+        ExecBuilder::new($re)
+            .bounded_backtracking()
+            .only_utf8(false)
+            .build()
+            .map(|e| e.into_byte_regex())
+    }};
+}
+
+macro_rules! regex {
+    ($re:expr) => {
+        regex_new!($re).unwrap()
+    };
+}
+
+macro_rules! regex_set_new {
+    ($re:expr) => {{
+        use regex::internal::ExecBuilder;
+        ExecBuilder::new_many($re)
+            .bounded_backtracking()
+            .only_utf8(false)
+            .build()
+            .map(|e| e.into_byte_regex_set())
+    }};
+}
+
+macro_rules! regex_set {
+    ($res:expr) => {
+        regex_set_new!($res).unwrap()
+    };
+}
+
+// Must come before other module definitions.
+include!("macros_bytes.rs");
+include!("macros.rs");
+
+mod api;
+mod bytes;
+mod crazy;
+mod flags;
+mod fowler;
+mod multiline;
+mod noparse;
+mod regression;
+mod replace;
+mod set;
+mod suffix_reverse;
+#[cfg(feature = "unicode")]
+mod unicode;
+#[cfg(feature = "unicode-perl")]
+mod word_boundary;
+#[cfg(feature = "unicode-perl")]
+mod word_boundary_ascii;

diff --git a/tests/test_backtrack_utf8bytes.rs b/tests/test_backtrack_utf8bytes.rs
new file mode 100644
index 0000000..78a0135
--- /dev/null
+++ b/tests/test_backtrack_utf8bytes.rs

@@ -0,0 +1,61 @@
+#![cfg_attr(feature = "pattern", feature(pattern))]
+
+extern crate rand;
+extern crate regex;
+
+macro_rules! regex_new {
+    ($re:expr) => {{
+        use regex::internal::ExecBuilder;
+        ExecBuilder::new($re)
+            .bounded_backtracking()
+            .bytes(true)
+            .build()
+            .map(|e| e.into_regex())
+    }};
+}
+
+macro_rules! regex {
+    ($re:expr) => {
+        regex_new!($re).unwrap()
+    };
+}
+
+macro_rules! regex_set_new {
+    ($re:expr) => {{
+        use regex::internal::ExecBuilder;
+        ExecBuilder::new_many($re)
+            .bounded_backtracking()
+            .bytes(true)
+            .build()
+            .map(|e| e.into_regex_set())
+    }};
+}
+
+macro_rules! regex_set {
+    ($res:expr) => {
+        regex_set_new!($res).unwrap()
+    };
+}
+
+// Must come before other module definitions.
+include!("macros_str.rs");
+include!("macros.rs");
+
+mod api;
+mod api_str;
+mod crazy;
+mod flags;
+mod fowler;
+mod multiline;
+mod noparse;
+mod regression;
+mod replace;
+mod searcher;
+mod set;
+mod suffix_reverse;
+#[cfg(feature = "unicode")]
+mod unicode;
+#[cfg(feature = "unicode-perl")]
+mod word_boundary;
+#[cfg(feature = "unicode-perl")]
+mod word_boundary_unicode;

diff --git a/tests/test_crates_regex.rs b/tests/test_crates_regex.rs
new file mode 100644
index 0000000..d697683
--- /dev/null
+++ b/tests/test_crates_regex.rs

@@ -0,0 +1,57 @@
+extern crate quickcheck;
+extern crate regex;
+
+/*
+ * This test is a minimal version of <rofl_0> and <subdiff_0>
+ *
+ * Once this bug gets fixed, uncomment rofl_0 and subdiff_0
+ * (in `tests/crates_regex.rs`).
+#[test]
+fn word_boundary_backtracking_default_mismatch() {
+    use regex::internal::ExecBuilder;
+
+    let backtrack_re = ExecBuilder::new(r"\b")
+        .bounded_backtracking()
+        .build()
+        .map(|exec| exec.into_regex())
+        .map_err(|err| format!("{}", err))
+        .unwrap();
+
+    let default_re = ExecBuilder::new(r"\b")
+        .build()
+        .map(|exec| exec.into_regex())
+        .map_err(|err| format!("{}", err))
+        .unwrap();
+
+    let input = "䅅\\u{a0}";
+
+    let fi1 = backtrack_re.find_iter(input);
+    let fi2 = default_re.find_iter(input);
+    for (m1, m2) in fi1.zip(fi2) {
+        assert_eq!(m1, m2);
+    }
+}
+*/
+
+mod consistent;
+
+mod crates_regex {
+
+    macro_rules! consistent {
+        ($test_name:ident, $regex_src:expr) => {
+            #[test]
+            fn $test_name() {
+                use super::consistent::backends_are_consistent;
+
+                if option_env!("RUST_REGEX_RANDOM_TEST").is_some() {
+                    match backends_are_consistent($regex_src) {
+                        Ok(_) => {}
+                        Err(err) => panic!("{}", err),
+                    }
+                }
+            }
+        };
+    }
+
+    include!("crates_regex.rs");
+}

diff --git a/tests/test_default.rs b/tests/test_default.rs
new file mode 100644
index 0000000..c0979c1
--- /dev/null
+++ b/tests/test_default.rs

@@ -0,0 +1,114 @@
+#![cfg_attr(feature = "pattern", feature(pattern))]
+
+extern crate rand;
+extern crate regex;
+
+// Due to macro scoping rules, this definition only applies for the modules
+// defined below. Effectively, it allows us to use the same tests for both
+// native and dynamic regexes.
+//
+// This is also used to test the various matching engines. This one exercises
+// the normal code path which automatically chooses the engine based on the
+// regex and the input. Other dynamic tests explicitly set the engine to use.
+macro_rules! regex_new {
+    ($re:expr) => {{
+        use regex::Regex;
+        Regex::new($re)
+    }};
+}
+
+macro_rules! regex {
+    ($re:expr) => {
+        regex_new!($re).unwrap()
+    };
+}
+
+macro_rules! regex_set_new {
+    ($re:expr) => {{
+        use regex::RegexSet;
+        RegexSet::new($re)
+    }};
+}
+
+macro_rules! regex_set {
+    ($res:expr) => {
+        regex_set_new!($res).unwrap()
+    };
+}
+
+// Must come before other module definitions.
+include!("macros_str.rs");
+include!("macros.rs");
+
+mod api;
+mod api_str;
+mod crazy;
+mod flags;
+mod fowler;
+mod misc;
+mod multiline;
+mod noparse;
+mod regression;
+mod replace;
+mod searcher;
+mod set;
+mod shortest_match;
+mod suffix_reverse;
+#[cfg(feature = "unicode")]
+mod unicode;
+#[cfg(feature = "unicode-perl")]
+mod word_boundary;
+#[cfg(feature = "unicode-perl")]
+mod word_boundary_unicode;
+
+#[test]
+fn disallow_non_utf8() {
+    assert!(regex::Regex::new(r"(?-u)\xFF").is_err());
+    assert!(regex::Regex::new(r"(?-u).").is_err());
+    assert!(regex::Regex::new(r"(?-u)[\xFF]").is_err());
+    assert!(regex::Regex::new(r"(?-u)☃").is_err());
+}
+
+#[test]
+fn disallow_octal() {
+    assert!(regex::Regex::new(r"\0").is_err());
+}
+
+#[test]
+fn allow_octal() {
+    assert!(regex::RegexBuilder::new(r"\0").octal(true).build().is_ok());
+}
+
+#[test]
+fn oibits() {
+    use regex::bytes;
+    use regex::{Regex, RegexBuilder};
+    use std::panic::UnwindSafe;
+
+    fn assert_send<T: Send>() {}
+    fn assert_sync<T: Sync>() {}
+    fn assert_unwind_safe<T: UnwindSafe>() {}
+
+    assert_send::<Regex>();
+    assert_sync::<Regex>();
+    assert_unwind_safe::<Regex>();
+    assert_send::<RegexBuilder>();
+    assert_sync::<RegexBuilder>();
+    assert_unwind_safe::<RegexBuilder>();
+
+    assert_send::<bytes::Regex>();
+    assert_sync::<bytes::Regex>();
+    assert_unwind_safe::<bytes::Regex>();
+    assert_send::<bytes::RegexBuilder>();
+    assert_sync::<bytes::RegexBuilder>();
+    assert_unwind_safe::<bytes::RegexBuilder>();
+}
+
+// See: https://github.com/rust-lang/regex/issues/568
+#[test]
+fn oibits_regression() {
+    use regex::Regex;
+    use std::panic;
+
+    let _ = panic::catch_unwind(|| Regex::new("a").unwrap());
+}

diff --git a/tests/test_default_bytes.rs b/tests/test_default_bytes.rs
new file mode 100644
index 0000000..e4a25dc
--- /dev/null
+++ b/tests/test_default_bytes.rs

@@ -0,0 +1,78 @@
+extern crate rand;
+extern crate regex;
+
+macro_rules! regex_new {
+    ($re:expr) => {{
+        use regex::bytes::Regex;
+        Regex::new($re)
+    }};
+}
+
+macro_rules! regex_set_new {
+    ($res:expr) => {{
+        use regex::bytes::RegexSet;
+        RegexSet::new($res)
+    }};
+}
+
+macro_rules! regex {
+    ($re:expr) => {
+        regex_new!($re).unwrap()
+    };
+}
+
+macro_rules! regex_set {
+    ($res:expr) => {
+        regex_set_new!($res).unwrap()
+    };
+}
+
+// Must come before other module definitions.
+include!("macros_bytes.rs");
+include!("macros.rs");
+
+// A silly wrapper to make it possible to write and match raw bytes.
+struct R<'a>(&'a [u8]);
+impl<'a> R<'a> {
+    fn as_bytes(&self) -> &'a [u8] {
+        self.0
+    }
+}
+
+// See: https://github.com/rust-lang/regex/issues/321
+//
+// These tests are here because they do not have the same behavior in every
+// regex engine.
+mat!(invalid_utf8_nfa1, r".", R(b"\xD4\xC2\x65\x2B\x0E\xFE"), Some((2, 3)));
+mat!(invalid_utf8_nfa2, r"${2}ä", R(b"\xD4\xC2\x65\x2B\x0E\xFE"), None);
+mat!(
+    invalid_utf8_nfa3,
+    r".",
+    R(b"\x0A\xDB\x82\x6E\x33\x01\xDD\x33\xCD"),
+    Some((1, 3))
+);
+mat!(
+    invalid_utf8_nfa4,
+    r"${2}ä",
+    R(b"\x0A\xDB\x82\x6E\x33\x01\xDD\x33\xCD"),
+    None
+);
+
+mod api;
+mod bytes;
+mod crazy;
+mod flags;
+mod fowler;
+mod multiline;
+mod noparse;
+mod regression;
+mod replace;
+mod set;
+mod shortest_match;
+mod suffix_reverse;
+#[cfg(feature = "unicode")]
+mod unicode;
+#[cfg(feature = "unicode-perl")]
+mod word_boundary;
+#[cfg(feature = "unicode-perl")]
+mod word_boundary_unicode;

diff --git a/tests/test_nfa.rs b/tests/test_nfa.rs
new file mode 100644
index 0000000..05dad23
--- /dev/null
+++ b/tests/test_nfa.rs

@@ -0,0 +1,53 @@
+#![cfg_attr(feature = "pattern", feature(pattern))]
+
+extern crate rand;
+extern crate regex;
+
+macro_rules! regex_new {
+    ($re:expr) => {{
+        use regex::internal::ExecBuilder;
+        ExecBuilder::new($re).nfa().build().map(|e| e.into_regex())
+    }};
+}
+
+macro_rules! regex {
+    ($re:expr) => {
+        regex_new!($re).unwrap()
+    };
+}
+
+macro_rules! regex_set_new {
+    ($re:expr) => {{
+        use regex::internal::ExecBuilder;
+        ExecBuilder::new_many($re).nfa().build().map(|e| e.into_regex_set())
+    }};
+}
+
+macro_rules! regex_set {
+    ($res:expr) => {
+        regex_set_new!($res).unwrap()
+    };
+}
+
+// Must come before other module definitions.
+include!("macros_str.rs");
+include!("macros.rs");
+
+mod api;
+mod api_str;
+mod crazy;
+mod flags;
+mod fowler;
+mod multiline;
+mod noparse;
+mod regression;
+mod replace;
+mod searcher;
+mod set;
+mod suffix_reverse;
+#[cfg(feature = "unicode")]
+mod unicode;
+#[cfg(feature = "unicode-perl")]
+mod word_boundary;
+#[cfg(feature = "unicode-perl")]
+mod word_boundary_unicode;

diff --git a/tests/test_nfa_bytes.rs b/tests/test_nfa_bytes.rs
new file mode 100644
index 0000000..1042318
--- /dev/null
+++ b/tests/test_nfa_bytes.rs

@@ -0,0 +1,58 @@
+extern crate rand;
+extern crate regex;
+
+macro_rules! regex_new {
+    ($re:expr) => {{
+        use regex::internal::ExecBuilder;
+        ExecBuilder::new($re)
+            .nfa()
+            .only_utf8(false)
+            .build()
+            .map(|e| e.into_byte_regex())
+    }};
+}
+
+macro_rules! regex {
+    ($re:expr) => {
+        regex_new!($re).unwrap()
+    };
+}
+
+macro_rules! regex_set_new {
+    ($re:expr) => {{
+        use regex::internal::ExecBuilder;
+        ExecBuilder::new_many($re)
+            .nfa()
+            .only_utf8(false)
+            .build()
+            .map(|e| e.into_byte_regex_set())
+    }};
+}
+
+macro_rules! regex_set {
+    ($res:expr) => {
+        regex_set_new!($res).unwrap()
+    };
+}
+
+// Must come before other module definitions.
+include!("macros_bytes.rs");
+include!("macros.rs");
+
+mod api;
+mod bytes;
+mod crazy;
+mod flags;
+mod fowler;
+mod multiline;
+mod noparse;
+mod regression;
+mod replace;
+mod set;
+mod suffix_reverse;
+#[cfg(feature = "unicode")]
+mod unicode;
+#[cfg(feature = "unicode-perl")]
+mod word_boundary;
+#[cfg(feature = "unicode-perl")]
+mod word_boundary_unicode;

diff --git a/tests/test_nfa_utf8bytes.rs b/tests/test_nfa_utf8bytes.rs
new file mode 100644
index 0000000..86487a1
--- /dev/null
+++ b/tests/test_nfa_utf8bytes.rs

@@ -0,0 +1,57 @@
+#![cfg_attr(feature = "pattern", feature(pattern))]
+
+extern crate rand;
+extern crate regex;
+
+macro_rules! regex_new {
+    ($re:expr) => {{
+        use regex::internal::ExecBuilder;
+        ExecBuilder::new($re).nfa().bytes(true).build().map(|e| e.into_regex())
+    }};
+}
+
+macro_rules! regex {
+    ($re:expr) => {
+        regex_new!($re).unwrap()
+    };
+}
+
+macro_rules! regex_set_new {
+    ($re:expr) => {{
+        use regex::internal::ExecBuilder;
+        ExecBuilder::new_many($re)
+            .nfa()
+            .bytes(true)
+            .build()
+            .map(|e| e.into_regex_set())
+    }};
+}
+
+macro_rules! regex_set {
+    ($res:expr) => {
+        regex_set_new!($res).unwrap()
+    };
+}
+
+// Must come before other module definitions.
+include!("macros_str.rs");
+include!("macros.rs");
+
+mod api;
+mod api_str;
+mod crazy;
+mod flags;
+mod fowler;
+mod multiline;
+mod noparse;
+mod regression;
+mod replace;
+mod searcher;
+mod set;
+mod suffix_reverse;
+#[cfg(feature = "unicode")]
+mod unicode;
+#[cfg(feature = "unicode-perl")]
+mod word_boundary;
+#[cfg(feature = "unicode-perl")]
+mod word_boundary_unicode;

diff --git a/tests/unicode.rs b/tests/unicode.rs
new file mode 100644
index 0000000..52522f4
--- /dev/null
+++ b/tests/unicode.rs

@@ -0,0 +1,231 @@
+mat!(uni_literal, r"☃", "☃", Some((0, 3)));
+mat!(uni_literal_plus, r"☃+", "☃", Some((0, 3)));
+mat!(uni_literal_casei_plus, r"(?i)☃+", "☃", Some((0, 3)));
+mat!(uni_class_plus, r"[☃Ⅰ]+", "☃", Some((0, 3)));
+mat!(uni_one, r"\pN", "Ⅰ", Some((0, 3)));
+mat!(uni_mixed, r"\pN+", "Ⅰ1Ⅱ2", Some((0, 8)));
+mat!(uni_not, r"\PN+", "abⅠ", Some((0, 2)));
+mat!(uni_not_class, r"[\PN]+", "abⅠ", Some((0, 2)));
+mat!(uni_not_class_neg, r"[^\PN]+", "abⅠ", Some((2, 5)));
+mat!(uni_case, r"(?i)Δ", "δ", Some((0, 2)));
+mat!(uni_case_upper, r"\p{Lu}+", "ΛΘΓΔα", Some((0, 8)));
+mat!(uni_case_upper_nocase_flag, r"(?i)\p{Lu}+", "ΛΘΓΔα", Some((0, 10)));
+mat!(uni_case_upper_nocase, r"\p{L}+", "ΛΘΓΔα", Some((0, 10)));
+mat!(uni_case_lower, r"\p{Ll}+", "ΛΘΓΔα", Some((8, 10)));
+
+// Test the Unicode friendliness of Perl character classes.
+mat!(uni_perl_w, r"\w+", "dδd", Some((0, 4)));
+mat!(uni_perl_w_not, r"\w+", "⥡", None);
+mat!(uni_perl_w_neg, r"\W+", "⥡", Some((0, 3)));
+mat!(uni_perl_d, r"\d+", "1२३9", Some((0, 8)));
+mat!(uni_perl_d_not, r"\d+", "Ⅱ", None);
+mat!(uni_perl_d_neg, r"\D+", "Ⅱ", Some((0, 3)));
+mat!(uni_perl_s, r"\s+", " ", Some((0, 3)));
+mat!(uni_perl_s_not, r"\s+", "☃", None);
+mat!(uni_perl_s_neg, r"\S+", "☃", Some((0, 3)));
+
+// And do the same for word boundaries.
+mat!(uni_boundary_none, r"\d\b", "6δ", None);
+mat!(uni_boundary_ogham, r"\d\b", "6 ", Some((0, 1)));
+mat!(uni_not_boundary_none, r"\d\B", "6δ", Some((0, 1)));
+mat!(uni_not_boundary_ogham, r"\d\B", "6 ", None);
+
+// Test general categories.
+//
+// We should test more, but there's a lot. Write a script to generate more of
+// these tests.
+mat!(uni_class_gencat_cased_letter, r"\p{Cased_Letter}", "Ａ", Some((0, 3)));
+mat!(
+    uni_class_gencat_close_punctuation,
+    r"\p{Close_Punctuation}",
+    "❯",
+    Some((0, 3))
+);
+mat!(
+    uni_class_gencat_connector_punctuation,
+    r"\p{Connector_Punctuation}",
+    "⁀",
+    Some((0, 3))
+);
+mat!(uni_class_gencat_control, r"\p{Control}", "\u{9f}", Some((0, 2)));
+mat!(
+    uni_class_gencat_currency_symbol,
+    r"\p{Currency_Symbol}",
+    "￡",
+    Some((0, 3))
+);
+mat!(
+    uni_class_gencat_dash_punctuation,
+    r"\p{Dash_Punctuation}",
+    "〰",
+    Some((0, 3))
+);
+mat!(uni_class_gencat_decimal_numer, r"\p{Decimal_Number}", "𑓙", Some((0, 4)));
+mat!(
+    uni_class_gencat_enclosing_mark,
+    r"\p{Enclosing_Mark}",
+    "\u{A672}",
+    Some((0, 3))
+);
+mat!(
+    uni_class_gencat_final_punctuation,
+    r"\p{Final_Punctuation}",
+    "⸡",
+    Some((0, 3))
+);
+mat!(uni_class_gencat_format, r"\p{Format}", "\u{E007F}", Some((0, 4)));
+mat!(
+    uni_class_gencat_initial_punctuation,
+    r"\p{Initial_Punctuation}",
+    "⸜",
+    Some((0, 3))
+);
+mat!(uni_class_gencat_letter, r"\p{Letter}", "Έ", Some((0, 2)));
+mat!(uni_class_gencat_letter_number, r"\p{Letter_Number}", "ↂ", Some((0, 3)));
+mat!(
+    uni_class_gencat_line_separator,
+    r"\p{Line_Separator}",
+    "\u{2028}",
+    Some((0, 3))
+);
+mat!(
+    uni_class_gencat_lowercase_letter,
+    r"\p{Lowercase_Letter}",
+    "ϛ",
+    Some((0, 2))
+);
+mat!(uni_class_gencat_mark, r"\p{Mark}", "\u{E01EF}", Some((0, 4)));
+mat!(uni_class_gencat_math, r"\p{Math}", "⋿", Some((0, 3)));
+mat!(
+    uni_class_gencat_modifier_letter,
+    r"\p{Modifier_Letter}",
+    "𖭃",
+    Some((0, 4))
+);
+mat!(
+    uni_class_gencat_modifier_symbol,
+    r"\p{Modifier_Symbol}",
+    "🏿",
+    Some((0, 4))
+);
+mat!(
+    uni_class_gencat_nonspacing_mark,
+    r"\p{Nonspacing_Mark}",
+    "\u{1E94A}",
+    Some((0, 4))
+);
+mat!(uni_class_gencat_number, r"\p{Number}", "⓿", Some((0, 3)));
+mat!(
+    uni_class_gencat_open_punctuation,
+    r"\p{Open_Punctuation}",
+    "｟",
+    Some((0, 3))
+);
+mat!(uni_class_gencat_other, r"\p{Other}", "\u{bc9}", Some((0, 3)));
+mat!(uni_class_gencat_other_letter, r"\p{Other_Letter}", "ꓷ", Some((0, 3)));
+mat!(uni_class_gencat_other_number, r"\p{Other_Number}", "㉏", Some((0, 3)));
+mat!(
+    uni_class_gencat_other_punctuation,
+    r"\p{Other_Punctuation}",
+    "𞥞",
+    Some((0, 4))
+);
+mat!(uni_class_gencat_other_symbol, r"\p{Other_Symbol}", "⅌", Some((0, 3)));
+mat!(
+    uni_class_gencat_paragraph_separator,
+    r"\p{Paragraph_Separator}",
+    "\u{2029}",
+    Some((0, 3))
+);
+mat!(
+    uni_class_gencat_private_use,
+    r"\p{Private_Use}",
+    "\u{10FFFD}",
+    Some((0, 4))
+);
+mat!(uni_class_gencat_punctuation, r"\p{Punctuation}", "𑁍", Some((0, 4)));
+mat!(uni_class_gencat_separator, r"\p{Separator}", "\u{3000}", Some((0, 3)));
+mat!(
+    uni_class_gencat_space_separator,
+    r"\p{Space_Separator}",
+    "\u{205F}",
+    Some((0, 3))
+);
+mat!(
+    uni_class_gencat_spacing_mark,
+    r"\p{Spacing_Mark}",
+    "\u{16F7E}",
+    Some((0, 4))
+);
+mat!(uni_class_gencat_symbol, r"\p{Symbol}", "⯈", Some((0, 3)));
+mat!(
+    uni_class_gencat_titlecase_letter,
+    r"\p{Titlecase_Letter}",
+    "ῼ",
+    Some((0, 3))
+);
+mat!(
+    uni_class_gencat_unassigned,
+    r"\p{Unassigned}",
+    "\u{10FFFF}",
+    Some((0, 4))
+);
+mat!(
+    uni_class_gencat_uppercase_letter,
+    r"\p{Uppercase_Letter}",
+    "Ꝋ",
+    Some((0, 3))
+);
+
+// Test a smattering of properties.
+mat!(uni_class_prop_emoji1, r"\p{Emoji}", "\u{23E9}", Some((0, 3)));
+mat!(uni_class_prop_emoji2, r"\p{emoji}", "\u{1F21A}", Some((0, 4)));
+mat!(
+    uni_class_prop_picto1,
+    r"\p{extendedpictographic}",
+    "\u{1FA6E}",
+    Some((0, 4))
+);
+mat!(
+    uni_class_prop_picto2,
+    r"\p{extendedpictographic}",
+    "\u{1FFFD}",
+    Some((0, 4))
+);
+
+// grapheme_cluster_break
+mat!(
+    uni_class_gcb_prepend,
+    r"\p{grapheme_cluster_break=prepend}",
+    "\u{11D46}",
+    Some((0, 4))
+);
+mat!(
+    uni_class_gcb_ri1,
+    r"\p{gcb=regional_indicator}",
+    "\u{1F1E6}",
+    Some((0, 4))
+);
+mat!(uni_class_gcb_ri2, r"\p{gcb=ri}", "\u{1F1E7}", Some((0, 4)));
+mat!(
+    uni_class_gcb_ri3,
+    r"\p{gcb=regionalindicator}",
+    "\u{1F1FF}",
+    Some((0, 4))
+);
+mat!(uni_class_gcb_lvt, r"\p{gcb=lvt}", "\u{C989}", Some((0, 3)));
+mat!(uni_class_gcb_zwj, r"\p{gcb=zwj}", "\u{200D}", Some((0, 3)));
+
+// word_break
+mat!(uni_class_wb1, r"\p{word_break=Hebrew_Letter}", "\u{FB46}", Some((0, 3)));
+mat!(uni_class_wb2, r"\p{wb=hebrewletter}", "\u{FB46}", Some((0, 3)));
+mat!(uni_class_wb3, r"\p{wb=ExtendNumLet}", "\u{FF3F}", Some((0, 3)));
+mat!(uni_class_wb4, r"\p{wb=WSegSpace}", "\u{3000}", Some((0, 3)));
+mat!(uni_class_wb5, r"\p{wb=numeric}", "\u{1E950}", Some((0, 4)));
+
+// sentence_break
+mat!(uni_class_sb1, r"\p{sentence_break=Lower}", "\u{0469}", Some((0, 2)));
+mat!(uni_class_sb2, r"\p{sb=lower}", "\u{0469}", Some((0, 2)));
+mat!(uni_class_sb3, r"\p{sb=Close}", "\u{FF60}", Some((0, 3)));
+mat!(uni_class_sb4, r"\p{sb=Close}", "\u{1F677}", Some((0, 4)));
+mat!(uni_class_sb5, r"\p{sb=SContinue}", "\u{FF64}", Some((0, 3)));

diff --git a/tests/word_boundary.rs b/tests/word_boundary.rs
new file mode 100644
index 0000000..7fe97a2
--- /dev/null
+++ b/tests/word_boundary.rs

@@ -0,0 +1,89 @@
+// Many of these are cribbed from RE2's test suite.
+
+matiter!(wb1, r"\b", "");
+matiter!(wb2, r"\b", "a", (0, 0), (1, 1));
+matiter!(wb3, r"\b", "ab", (0, 0), (2, 2));
+matiter!(wb4, r"^\b", "ab", (0, 0));
+matiter!(wb5, r"\b$", "ab", (2, 2));
+matiter!(wb6, r"^\b$", "ab");
+matiter!(wb7, r"\bbar\b", "nobar bar foo bar", (6, 9), (14, 17));
+matiter!(wb8, r"a\b", "faoa x", (3, 4));
+matiter!(wb9, r"\bbar", "bar x", (0, 3));
+matiter!(wb10, r"\bbar", "foo\nbar x", (4, 7));
+matiter!(wb11, r"bar\b", "foobar", (3, 6));
+matiter!(wb12, r"bar\b", "foobar\nxxx", (3, 6));
+matiter!(wb13, r"(foo|bar|[A-Z])\b", "foo", (0, 3));
+matiter!(wb14, r"(foo|bar|[A-Z])\b", "foo\n", (0, 3));
+matiter!(wb15, r"\b(foo|bar|[A-Z])", "foo", (0, 3));
+matiter!(wb16, r"\b(foo|bar|[A-Z])\b", "X", (0, 1));
+matiter!(wb17, r"\b(foo|bar|[A-Z])\b", "XY");
+matiter!(wb18, r"\b(foo|bar|[A-Z])\b", "bar", (0, 3));
+matiter!(wb19, r"\b(foo|bar|[A-Z])\b", "foo", (0, 3));
+matiter!(wb20, r"\b(foo|bar|[A-Z])\b", "foo\n", (0, 3));
+matiter!(wb21, r"\b(foo|bar|[A-Z])\b", "ffoo bbar N x", (10, 11));
+matiter!(wb22, r"\b(fo|foo)\b", "fo", (0, 2));
+matiter!(wb23, r"\b(fo|foo)\b", "foo", (0, 3));
+matiter!(wb24, r"\b\b", "");
+matiter!(wb25, r"\b\b", "a", (0, 0), (1, 1));
+matiter!(wb26, r"\b$", "");
+matiter!(wb27, r"\b$", "x", (1, 1));
+matiter!(wb28, r"\b$", "y x", (3, 3));
+matiter!(wb29, r"\b.$", "x", (0, 1));
+matiter!(wb30, r"^\b(fo|foo)\b", "fo", (0, 2));
+matiter!(wb31, r"^\b(fo|foo)\b", "foo", (0, 3));
+matiter!(wb32, r"^\b$", "");
+matiter!(wb33, r"^\b$", "x");
+matiter!(wb34, r"^\b.$", "x", (0, 1));
+matiter!(wb35, r"^\b.\b$", "x", (0, 1));
+matiter!(wb36, r"^^^^^\b$$$$$", "");
+matiter!(wb37, r"^^^^^\b.$$$$$", "x", (0, 1));
+matiter!(wb38, r"^^^^^\b$$$$$", "x");
+matiter!(wb39, r"^^^^^\b\b\b.\b\b\b$$$$$", "x", (0, 1));
+matiter!(wb40, r"\b.+\b", "$$abc$$", (2, 5));
+matiter!(wb41, r"\b", "a b c", (0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5));
+
+matiter!(nb1, r"\Bfoo\B", "n foo xfoox that", (7, 10));
+matiter!(nb2, r"a\B", "faoa x", (1, 2));
+matiter!(nb3, r"\Bbar", "bar x");
+matiter!(nb4, r"\Bbar", "foo\nbar x");
+matiter!(nb5, r"bar\B", "foobar");
+matiter!(nb6, r"bar\B", "foobar\nxxx");
+matiter!(nb7, r"(foo|bar|[A-Z])\B", "foox", (0, 3));
+matiter!(nb8, r"(foo|bar|[A-Z])\B", "foo\n");
+matiter!(nb9, r"\B", "", (0, 0));
+matiter!(nb10, r"\B", "x");
+matiter!(nb11, r"\B(foo|bar|[A-Z])", "foo");
+matiter!(nb12, r"\B(foo|bar|[A-Z])\B", "xXy", (1, 2));
+matiter!(nb13, r"\B(foo|bar|[A-Z])\B", "XY");
+matiter!(nb14, r"\B(foo|bar|[A-Z])\B", "XYZ", (1, 2));
+matiter!(nb15, r"\B(foo|bar|[A-Z])\B", "abara", (1, 4));
+matiter!(nb16, r"\B(foo|bar|[A-Z])\B", "xfoo_", (1, 4));
+matiter!(nb17, r"\B(foo|bar|[A-Z])\B", "xfoo\n");
+matiter!(nb18, r"\B(foo|bar|[A-Z])\B", "foo bar vNX", (9, 10));
+matiter!(nb19, r"\B(fo|foo)\B", "xfoo", (1, 3));
+matiter!(nb20, r"\B(foo|fo)\B", "xfooo", (1, 4));
+matiter!(nb21, r"\B\B", "", (0, 0));
+matiter!(nb22, r"\B\B", "x");
+matiter!(nb23, r"\B$", "", (0, 0));
+matiter!(nb24, r"\B$", "x");
+matiter!(nb25, r"\B$", "y x");
+matiter!(nb26, r"\B.$", "x");
+matiter!(nb27, r"^\B(fo|foo)\B", "fo");
+matiter!(nb28, r"^\B(fo|foo)\B", "foo");
+matiter!(nb29, r"^\B", "", (0, 0));
+matiter!(nb30, r"^\B", "x");
+matiter!(nb31, r"^\B\B", "", (0, 0));
+matiter!(nb32, r"^\B\B", "x");
+matiter!(nb33, r"^\B$", "", (0, 0));
+matiter!(nb34, r"^\B$", "x");
+matiter!(nb35, r"^\B.$", "x");
+matiter!(nb36, r"^\B.\B$", "x");
+matiter!(nb37, r"^^^^^\B$$$$$", "", (0, 0));
+matiter!(nb38, r"^^^^^\B.$$$$$", "x");
+matiter!(nb39, r"^^^^^\B$$$$$", "x");
+
+// These work for both Unicode and ASCII because all matches are reported as
+// byte offsets, and « and » do not correspond to word boundaries at either
+// the character or byte level.
+matiter!(unicode1, r"\bx\b", "«x", (2, 3));
+matiter!(unicode2, r"\bx\b", "x»", (0, 1));

diff --git a/tests/word_boundary_ascii.rs b/tests/word_boundary_ascii.rs
new file mode 100644
index 0000000..5a3cf11
--- /dev/null
+++ b/tests/word_boundary_ascii.rs

@@ -0,0 +1,9 @@
+// ASCII word boundaries are completely oblivious to Unicode characters.
+// For Unicode word boundaries, the tests are precisely inverted.
+matiter!(ascii1, r"(?-u:\b)x(?-u:\b)", "áxβ", (2, 3));
+matiter!(ascii2, r"(?-u:\B)x(?-u:\B)", "áxβ");
+matiter!(ascii3, r"(?-u:\B)", "0\u{7EF5E}", (2, 2), (3, 3), (4, 4), (5, 5));
+
+// We still get Unicode word boundaries by default in byte regexes.
+matiter!(unicode1, r"\bx\b", "áxβ");
+matiter!(unicode2, r"\Bx\B", "áxβ", (2, 3));

diff --git a/tests/word_boundary_unicode.rs b/tests/word_boundary_unicode.rs
new file mode 100644
index 0000000..c41355f
--- /dev/null
+++ b/tests/word_boundary_unicode.rs

@@ -0,0 +1,6 @@
+// Unicode word boundaries know about Unicode characters.
+// For ASCII word boundaries, the tests are precisely inverted.
+matiter!(unicode1, r"\bx\b", "áxβ");
+matiter!(unicode2, r"\Bx\B", "áxβ", (2, 3));
+
+matiter!(ascii1, r"(?-u:\b)x(?-u:\b)", "áxβ", (2, 3));
commit	afecf3d339a2ff3ef4655b1af1c0622739767fbd	[log] [tgz]
author	android-build-prod (mdb) <android-build-team-robot@google.com>	Mon Apr 27 21:47:20 2020 +0000
committer	android-build-prod (mdb) <android-build-team-robot@google.com>	Mon Apr 27 21:47:20 2020 +0000
tree	3d189268e6a23cc8cf45809b1e9be4156cb4362d
parent	815d0a2344b5321fd47f0cb7dba96e4cb4e84615 [diff]
parent	42134b99b59d452af33538ecd644bbf3771d5ec0 [diff]