Import 'regex-syntax' package vesion 0.6.17 * Add OWNERS Bug: 152884384 Test: make Change-Id: I760a69ffc851039ceaff58980deece308a426aed

commit: 048fc04e7fd0dd2014a94e73fb34f6e59021c170 [log] [tgz]
author: Chih-Hung Hsieh <chh@google.com> Thu Apr 16 10:44:22 2020 -0700
committer: Chih-Hung Hsieh <chh@google.com> Thu Apr 16 10:46:29 2020 -0700
tree: f9b501b392f13f2ca9d86d0a9d377b181f624dfb
parent: 2dd07da5420c40e365646bf756938d8d1abf6d62 [diff]
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json
new file mode 100644
index 0000000..41dbfee
--- /dev/null
+++ b/.cargo_vcs_info.json

@@ -0,0 +1,5 @@
+{
+  "git": {
+    "sha1": "7174f158bdb01a8e60eef28c3676429eae024882"
+  }
+}

diff --git a/Android.bp b/Android.bp
new file mode 100644
index 0000000..a89e1f5
--- /dev/null
+++ b/Android.bp

@@ -0,0 +1,19 @@
+// This file is generated by cargo2android.py.
+
+rust_library_host_rlib {
+    name: "libregex_syntax",
+    crate_name: "regex_syntax",
+    srcs: ["src/lib.rs"],
+    edition: "2015",
+    features: [
+        "default",
+        "unicode",
+        "unicode-age",
+        "unicode-bool",
+        "unicode-case",
+        "unicode-gencat",
+        "unicode-perl",
+        "unicode-script",
+        "unicode-segment",
+    ],
+}

diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..46fafd0
--- /dev/null
+++ b/Cargo.toml

@@ -0,0 +1,32 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies
+#
+# If you believe there's an error in this file please file an
+# issue against the rust-lang/cargo repository. If you're
+# editing this file be aware that the upstream Cargo.toml
+# will likely look very different (and much more reasonable)
+
+[package]
+name = "regex-syntax"
+version = "0.6.17"
+authors = ["The Rust Project Developers"]
+description = "A regular expression parser."
+homepage = "https://github.com/rust-lang/regex"
+documentation = "https://docs.rs/regex-syntax"
+license = "MIT/Apache-2.0"
+repository = "https://github.com/rust-lang/regex"
+
+[features]
+default = ["unicode"]
+unicode = ["unicode-age", "unicode-bool", "unicode-case", "unicode-gencat", "unicode-perl", "unicode-script", "unicode-segment"]
+unicode-age = []
+unicode-bool = []
+unicode-case = []
+unicode-gencat = []
+unicode-perl = []
+unicode-script = []
+unicode-segment = []

diff --git a/Cargo.toml.orig b/Cargo.toml.orig
new file mode 100644
index 0000000..c21b35d
--- /dev/null
+++ b/Cargo.toml.orig

@@ -0,0 +1,32 @@
+[package]
+name = "regex-syntax"
+version = "0.6.17"  #:version
+authors = ["The Rust Project Developers"]
+license = "MIT/Apache-2.0"
+repository = "https://github.com/rust-lang/regex"
+documentation = "https://docs.rs/regex-syntax"
+homepage = "https://github.com/rust-lang/regex"
+description = "A regular expression parser."
+workspace = ".."
+
+# Features are documented in the "Crate features" section of the crate docs:
+# https://docs.rs/regex-syntax/*/#crate-features
+[features]
+default = ["unicode"]
+
+unicode = [
+  "unicode-age",
+  "unicode-bool",
+  "unicode-case",
+  "unicode-gencat",
+  "unicode-perl",
+  "unicode-script",
+  "unicode-segment",
+]
+unicode-age = []
+unicode-bool = []
+unicode-case = []
+unicode-gencat = []
+unicode-perl = []
+unicode-script = []
+unicode-segment = []

diff --git a/LICENSE b/LICENSE
new file mode 120000
index 0000000..6b579aa
--- /dev/null
+++ b/LICENSE

@@ -0,0 +1 @@
+LICENSE-APACHE
\ No newline at end of file

diff --git a/LICENSE-APACHE b/LICENSE-APACHE
new file mode 100644
index 0000000..16fe87b
--- /dev/null
+++ b/LICENSE-APACHE

@@ -0,0 +1,201 @@
+                              Apache License
+                        Version 2.0, January 2004
+                     http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+   "License" shall mean the terms and conditions for use, reproduction,
+   and distribution as defined by Sections 1 through 9 of this document.
+
+   "Licensor" shall mean the copyright owner or entity authorized by
+   the copyright owner that is granting the License.
+
+   "Legal Entity" shall mean the union of the acting entity and all
+   other entities that control, are controlled by, or are under common
+   control with that entity. For the purposes of this definition,
+   "control" means (i) the power, direct or indirect, to cause the
+   direction or management of such entity, whether by contract or
+   otherwise, or (ii) ownership of fifty percent (50%) or more of the
+   outstanding shares, or (iii) beneficial ownership of such entity.
+
+   "You" (or "Your") shall mean an individual or Legal Entity
+   exercising permissions granted by this License.
+
+   "Source" form shall mean the preferred form for making modifications,
+   including but not limited to software source code, documentation
+   source, and configuration files.
+
+   "Object" form shall mean any form resulting from mechanical
+   transformation or translation of a Source form, including but
+   not limited to compiled object code, generated documentation,
+   and conversions to other media types.
+
+   "Work" shall mean the work of authorship, whether in Source or
+   Object form, made available under the License, as indicated by a
+   copyright notice that is included in or attached to the work
+   (an example is provided in the Appendix below).
+
+   "Derivative Works" shall mean any work, whether in Source or Object
+   form, that is based on (or derived from) the Work and for which the
+   editorial revisions, annotations, elaborations, or other modifications
+   represent, as a whole, an original work of authorship. For the purposes
+   of this License, Derivative Works shall not include works that remain
+   separable from, or merely link (or bind by name) to the interfaces of,
+   the Work and Derivative Works thereof.
+
+   "Contribution" shall mean any work of authorship, including
+   the original version of the Work and any modifications or additions
+   to that Work or Derivative Works thereof, that is intentionally
+   submitted to Licensor for inclusion in the Work by the copyright owner
+   or by an individual or Legal Entity authorized to submit on behalf of
+   the copyright owner. For the purposes of this definition, "submitted"
+   means any form of electronic, verbal, or written communication sent
+   to the Licensor or its representatives, including but not limited to
+   communication on electronic mailing lists, source code control systems,
+   and issue tracking systems that are managed by, or on behalf of, the
+   Licensor for the purpose of discussing and improving the Work, but
+   excluding communication that is conspicuously marked or otherwise
+   designated in writing by the copyright owner as "Not a Contribution."
+
+   "Contributor" shall mean Licensor and any individual or Legal Entity
+   on behalf of whom a Contribution has been received by Licensor and
+   subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of
+   this License, each Contributor hereby grants to You a perpetual,
+   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+   copyright license to reproduce, prepare Derivative Works of,
+   publicly display, publicly perform, sublicense, and distribute the
+   Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of
+   this License, each Contributor hereby grants to You a perpetual,
+   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+   (except as stated in this section) patent license to make, have made,
+   use, offer to sell, sell, import, and otherwise transfer the Work,
+   where such license applies only to those patent claims licensable
+   by such Contributor that are necessarily infringed by their
+   Contribution(s) alone or by combination of their Contribution(s)
+   with the Work to which such Contribution(s) was submitted. If You
+   institute patent litigation against any entity (including a
+   cross-claim or counterclaim in a lawsuit) alleging that the Work
+   or a Contribution incorporated within the Work constitutes direct
+   or contributory patent infringement, then any patent licenses
+   granted to You under this License for that Work shall terminate
+   as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the
+   Work or Derivative Works thereof in any medium, with or without
+   modifications, and in Source or Object form, provided that You
+   meet the following conditions:
+
+   (a) You must give any other recipients of the Work or
+       Derivative Works a copy of this License; and
+
+   (b) You must cause any modified files to carry prominent notices
+       stating that You changed the files; and
+
+   (c) You must retain, in the Source form of any Derivative Works
+       that You distribute, all copyright, patent, trademark, and
+       attribution notices from the Source form of the Work,
+       excluding those notices that do not pertain to any part of
+       the Derivative Works; and
+
+   (d) If the Work includes a "NOTICE" text file as part of its
+       distribution, then any Derivative Works that You distribute must
+       include a readable copy of the attribution notices contained
+       within such NOTICE file, excluding those notices that do not
+       pertain to any part of the Derivative Works, in at least one
+       of the following places: within a NOTICE text file distributed
+       as part of the Derivative Works; within the Source form or
+       documentation, if provided along with the Derivative Works; or,
+       within a display generated by the Derivative Works, if and
+       wherever such third-party notices normally appear. The contents
+       of the NOTICE file are for informational purposes only and
+       do not modify the License. You may add Your own attribution
+       notices within Derivative Works that You distribute, alongside
+       or as an addendum to the NOTICE text from the Work, provided
+       that such additional attribution notices cannot be construed
+       as modifying the License.
+
+   You may add Your own copyright statement to Your modifications and
+   may provide additional or different license terms and conditions
+   for use, reproduction, or distribution of Your modifications, or
+   for any such Derivative Works as a whole, provided Your use,
+   reproduction, and distribution of the Work otherwise complies with
+   the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise,
+   any Contribution intentionally submitted for inclusion in the Work
+   by You to the Licensor shall be under the terms and conditions of
+   this License, without any additional terms or conditions.
+   Notwithstanding the above, nothing herein shall supersede or modify
+   the terms of any separate license agreement you may have executed
+   with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade
+   names, trademarks, service marks, or product names of the Licensor,
+   except as required for reasonable and customary use in describing the
+   origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or
+   agreed to in writing, Licensor provides the Work (and each
+   Contributor provides its Contributions) on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+   implied, including, without limitation, any warranties or conditions
+   of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+   PARTICULAR PURPOSE. You are solely responsible for determining the
+   appropriateness of using or redistributing the Work and assume any
+   risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory,
+   whether in tort (including negligence), contract, or otherwise,
+   unless required by applicable law (such as deliberate and grossly
+   negligent acts) or agreed to in writing, shall any Contributor be
+   liable to You for damages, including any direct, indirect, special,
+   incidental, or consequential damages of any character arising as a
+   result of this License or out of the use or inability to use the
+   Work (including but not limited to damages for loss of goodwill,
+   work stoppage, computer failure or malfunction, or any and all
+   other commercial damages or losses), even if such Contributor
+   has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing
+   the Work or Derivative Works thereof, You may choose to offer,
+   and charge a fee for, acceptance of support, warranty, indemnity,
+   or other liability obligations and/or rights consistent with this
+   License. However, in accepting such obligations, You may act only
+   on Your own behalf and on Your sole responsibility, not on behalf
+   of any other Contributor, and only if You agree to indemnify,
+   defend, and hold each Contributor harmless for any liability
+   incurred by, or claims asserted against, such Contributor by reason
+   of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
+
+APPENDIX: How to apply the Apache License to your work.
+
+   To apply the Apache License to your work, attach the following
+   boilerplate notice, with the fields enclosed by brackets "[]"
+   replaced with your own identifying information. (Don't include
+   the brackets!)  The text should be enclosed in the appropriate
+   comment syntax for the file format. We also recommend that a
+   file or class name and description of purpose be included on the
+   same "printed page" as the copyright notice for easier
+   identification within third-party archives.
+
+Copyright [yyyy] [name of copyright owner]
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+	http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.

diff --git a/LICENSE-MIT b/LICENSE-MIT
new file mode 100644
index 0000000..39d4bdb
--- /dev/null
+++ b/LICENSE-MIT

@@ -0,0 +1,25 @@
+Copyright (c) 2014 The Rust Project Developers
+
+Permission is hereby granted, free of charge, to any
+person obtaining a copy of this software and associated
+documentation files (the "Software"), to deal in the
+Software without restriction, including without
+limitation the rights to use, copy, modify, merge,
+publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software
+is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice
+shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.

diff --git a/METADATA b/METADATA
new file mode 100644
index 0000000..e071c39
--- /dev/null
+++ b/METADATA

@@ -0,0 +1,19 @@
+name: "regex-syntax"
+description: "A regular expression parser."
+third_party {
+  url {
+    type: HOMEPAGE
+    value: "https://crates.io/crates/regex-syntax"
+  }
+  url {
+    type: GIT
+    value: "https://github.com/rust-lang/regex"
+  }
+  version: "0.6.17"
+  license_type: NOTICE
+  last_upgrade_date {
+    year: 2020
+    month: 3
+    day: 31
+  }
+}

diff --git a/MODULE_LICENSE_APACHE2 b/MODULE_LICENSE_APACHE2
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/MODULE_LICENSE_APACHE2


diff --git a/OWNERS b/OWNERS
new file mode 100644
index 0000000..46fc303
--- /dev/null
+++ b/OWNERS

@@ -0,0 +1 @@
+include platform/prebuilts/rust:/OWNERS

diff --git a/README.md b/README.md
new file mode 100644
index 0000000..e904601
--- /dev/null
+++ b/README.md

@@ -0,0 +1,99 @@
+regex-syntax
+============
+This crate provides a robust regular expression parser.
+
+[![Build status](https://travis-ci.com/rust-lang/regex.svg?branch=master)](https://travis-ci.com/rust-lang/regex)
+[![Build status](https://ci.appveyor.com/api/projects/status/github/rust-lang/regex?svg=true)](https://ci.appveyor.com/project/rust-lang-libs/regex)
+[![](https://meritbadge.herokuapp.com/regex-syntax)](https://crates.io/crates/regex-syntax)
+[![Rust](https://img.shields.io/badge/rust-1.28.0%2B-blue.svg?maxAge=3600)](https://github.com/rust-lang/regex)
+
+
+### Documentation
+
+https://docs.rs/regex-syntax
+
+
+### Overview
+
+There are two primary types exported by this crate: `Ast` and `Hir`. The former
+is a faithful abstract syntax of a regular expression, and can convert regular
+expressions back to their concrete syntax while mostly preserving its original
+form. The latter type is a high level intermediate representation of a regular
+expression that is amenable to analysis and compilation into byte codes or
+automata. An `Hir` achieves this by drastically simplifying the syntactic
+structure of the regular expression. While an `Hir` can be converted back to
+its equivalent concrete syntax, the result is unlikely to resemble the original
+concrete syntax that produced the `Hir`.
+
+
+### Example
+
+This example shows how to parse a pattern string into its HIR:
+
+```rust
+use regex_syntax::Parser;
+use regex_syntax::hir::{self, Hir};
+
+let hir = Parser::new().parse("a|b").unwrap();
+assert_eq!(hir, Hir::alternation(vec![
+    Hir::literal(hir::Literal::Unicode('a')),
+    Hir::literal(hir::Literal::Unicode('b')),
+]));
+```
+
+
+### Safety
+
+This crate has no `unsafe` code and sets `forbid(unsafe_code)`. While it's
+possible this crate could use `unsafe` code in the future, the standard
+for doing so is extremely high. In general, most code in this crate is not
+performance critical, since it tends to be dwarfed by the time it takes to
+compile a regular expression into an automaton. Therefore, there is little need
+for extreme optimization, and therefore, use of `unsafe`.
+
+The standard for using `unsafe` in this crate is extremely high because this
+crate is intended to be reasonably safe to use with user supplied regular
+expressions. Therefore, while their may be bugs in the regex parser itself,
+they should _never_ result in memory unsafety unless there is either a bug
+in the compiler or the standard library. (Since `regex-syntax` has zero
+dependencies.)
+
+
+### Crate features
+
+By default, this crate bundles a fairly large amount of Unicode data tables
+(a source size of ~750KB). Because of their large size, one can disable some
+or all of these data tables. If a regular expression attempts to use Unicode
+data that is not available, then an error will occur when translating the `Ast`
+to the `Hir`.
+
+The full set of features one can disable are
+[in the "Crate features" section of the documentation](https://docs.rs/regex-syntax/*/#crate-features).
+
+
+### Testing
+
+Simply running `cargo test` will give you very good coverage. However, because
+of the large number of features exposed by this crate, a `test` script is
+included in this directory which will test several feature combinations. This
+is the same script that is run in CI.
+
+
+### Motivation
+
+The primary purpose of this crate is to provide the parser used by `regex`.
+Specifically, this crate is treated as an implementation detail of the `regex`,
+and is primarily developed for the needs of `regex`.
+
+Since this crate is an implementation detail of `regex`, it may experience
+breaking change releases at a different cadence from `regex`. This is only
+possible because this crate is _not_ a public dependency of `regex`.
+
+Another consequence of this de-coupling is that there is no direct way to
+compile a `regex::Regex` from a `regex_syntax::hir::Hir`. Instead, one must
+first convert the `Hir` to a string (via its `std::fmt::Display`) and then
+compile that via `Regex::new`. While this does repeat some work, compilation
+typically takes much longer than parsing.
+
+Stated differently, the coupling between `regex` and `regex-syntax` exists only
+at the level of the concrete syntax.

diff --git a/benches/bench.rs b/benches/bench.rs
new file mode 100644
index 0000000..ba7f81c
--- /dev/null
+++ b/benches/bench.rs

@@ -0,0 +1,64 @@
+#![feature(test)]
+
+extern crate regex_syntax;
+extern crate test;
+
+use regex_syntax::Parser;
+use test::Bencher;
+
+#[bench]
+fn parse_simple1(b: &mut Bencher) {
+    b.iter(|| {
+        let re = r"^bc(d|e)*$";
+        Parser::new().parse(re).unwrap()
+    });
+}
+
+#[bench]
+fn parse_simple2(b: &mut Bencher) {
+    b.iter(|| {
+        let re = r"'[a-zA-Z_][a-zA-Z0-9_]*(')\b";
+        Parser::new().parse(re).unwrap()
+    });
+}
+
+#[bench]
+fn parse_small1(b: &mut Bencher) {
+    b.iter(|| {
+        let re = r"\p{L}|\p{N}|\s|.|\d";
+        Parser::new().parse(re).unwrap()
+    });
+}
+
+#[bench]
+fn parse_medium1(b: &mut Bencher) {
+    b.iter(|| {
+        let re = r"\pL\p{Greek}\p{Hiragana}\p{Alphabetic}\p{Hebrew}\p{Arabic}";
+        Parser::new().parse(re).unwrap()
+    });
+}
+
+#[bench]
+fn parse_medium2(b: &mut Bencher) {
+    b.iter(|| {
+        let re = r"\s\S\w\W\d\D";
+        Parser::new().parse(re).unwrap()
+    });
+}
+
+#[bench]
+fn parse_medium3(b: &mut Bencher) {
+    b.iter(|| {
+        let re =
+            r"\p{age:3.2}\p{hira}\p{scx:hira}\p{alphabetic}\p{sc:Greek}\pL";
+        Parser::new().parse(re).unwrap()
+    });
+}
+
+#[bench]
+fn parse_huge(b: &mut Bencher) {
+    b.iter(|| {
+        let re = r"\p{L}{100}";
+        Parser::new().parse(re).unwrap()
+    });
+}

diff --git a/src/ast/mod.rs b/src/ast/mod.rs
new file mode 100644
index 0000000..7179f2d
--- /dev/null
+++ b/src/ast/mod.rs

@@ -0,0 +1,1502 @@
+/*!
+Defines an abstract syntax for regular expressions.
+*/
+
+use std::cmp::Ordering;
+use std::error;
+use std::fmt;
+
+pub use ast::visitor::{visit, Visitor};
+
+pub mod parse;
+pub mod print;
+mod visitor;
+
+/// An error that occurred while parsing a regular expression into an abstract
+/// syntax tree.
+///
+/// Note that note all ASTs represents a valid regular expression. For example,
+/// an AST is constructed without error for `\p{Quux}`, but `Quux` is not a
+/// valid Unicode property name. That particular error is reported when
+/// translating an AST to the high-level intermediate representation (`HIR`).
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Error {
+    /// The kind of error.
+    kind: ErrorKind,
+    /// The original pattern that the parser generated the error from. Every
+    /// span in an error is a valid range into this string.
+    pattern: String,
+    /// The span of this error.
+    span: Span,
+}
+
+impl Error {
+    /// Return the type of this error.
+    pub fn kind(&self) -> &ErrorKind {
+        &self.kind
+    }
+
+    /// The original pattern string in which this error occurred.
+    ///
+    /// Every span reported by this error is reported in terms of this string.
+    pub fn pattern(&self) -> &str {
+        &self.pattern
+    }
+
+    /// Return the span at which this error occurred.
+    pub fn span(&self) -> &Span {
+        &self.span
+    }
+
+    /// Return an auxiliary span. This span exists only for some errors that
+    /// benefit from being able to point to two locations in the original
+    /// regular expression. For example, "duplicate" errors will have the
+    /// main error position set to the duplicate occurrence while its
+    /// auxiliary span will be set to the initial occurrence.
+    pub fn auxiliary_span(&self) -> Option<&Span> {
+        use self::ErrorKind::*;
+        match self.kind {
+            FlagDuplicate { ref original } => Some(original),
+            FlagRepeatedNegation { ref original, .. } => Some(original),
+            GroupNameDuplicate { ref original, .. } => Some(original),
+            _ => None,
+        }
+    }
+}
+
+/// The type of an error that occurred while building an AST.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum ErrorKind {
+    /// The capturing group limit was exceeded.
+    ///
+    /// Note that this represents a limit on the total number of capturing
+    /// groups in a regex and not necessarily the number of nested capturing
+    /// groups. That is, the nest limit can be low and it is still possible for
+    /// this error to occur.
+    CaptureLimitExceeded,
+    /// An invalid escape sequence was found in a character class set.
+    ClassEscapeInvalid,
+    /// An invalid character class range was found. An invalid range is any
+    /// range where the start is greater than the end.
+    ClassRangeInvalid,
+    /// An invalid range boundary was found in a character class. Range
+    /// boundaries must be a single literal codepoint, but this error indicates
+    /// that something else was found, such as a nested class.
+    ClassRangeLiteral,
+    /// An opening `[` was found with no corresponding closing `]`.
+    ClassUnclosed,
+    /// Note that this error variant is no longer used. Namely, a decimal
+    /// number can only appear as a repetition quantifier. When the number
+    /// in a repetition quantifier is empty, then it gets its own specialized
+    /// error, `RepetitionCountDecimalEmpty`.
+    DecimalEmpty,
+    /// An invalid decimal number was given where one was expected.
+    DecimalInvalid,
+    /// A bracketed hex literal was empty.
+    EscapeHexEmpty,
+    /// A bracketed hex literal did not correspond to a Unicode scalar value.
+    EscapeHexInvalid,
+    /// An invalid hexadecimal digit was found.
+    EscapeHexInvalidDigit,
+    /// EOF was found before an escape sequence was completed.
+    EscapeUnexpectedEof,
+    /// An unrecognized escape sequence.
+    EscapeUnrecognized,
+    /// A dangling negation was used when setting flags, e.g., `i-`.
+    FlagDanglingNegation,
+    /// A flag was used twice, e.g., `i-i`.
+    FlagDuplicate {
+        /// The position of the original flag. The error position
+        /// points to the duplicate flag.
+        original: Span,
+    },
+    /// The negation operator was used twice, e.g., `-i-s`.
+    FlagRepeatedNegation {
+        /// The position of the original negation operator. The error position
+        /// points to the duplicate negation operator.
+        original: Span,
+    },
+    /// Expected a flag but got EOF, e.g., `(?`.
+    FlagUnexpectedEof,
+    /// Unrecognized flag, e.g., `a`.
+    FlagUnrecognized,
+    /// A duplicate capture name was found.
+    GroupNameDuplicate {
+        /// The position of the initial occurrence of the capture name. The
+        /// error position itself points to the duplicate occurrence.
+        original: Span,
+    },
+    /// A capture group name is empty, e.g., `(?P<>abc)`.
+    GroupNameEmpty,
+    /// An invalid character was seen for a capture group name. This includes
+    /// errors where the first character is a digit (even though subsequent
+    /// characters are allowed to be digits).
+    GroupNameInvalid,
+    /// A closing `>` could not be found for a capture group name.
+    GroupNameUnexpectedEof,
+    /// An unclosed group, e.g., `(ab`.
+    ///
+    /// The span of this error corresponds to the unclosed parenthesis.
+    GroupUnclosed,
+    /// An unopened group, e.g., `ab)`.
+    GroupUnopened,
+    /// The nest limit was exceeded. The limit stored here is the limit
+    /// configured in the parser.
+    NestLimitExceeded(u32),
+    /// The range provided in a counted repetition operator is invalid. The
+    /// range is invalid if the start is greater than the end.
+    RepetitionCountInvalid,
+    /// An opening `{` was not followed by a valid decimal value.
+    /// For example, `x{}` or `x{]}` would fail.
+    RepetitionCountDecimalEmpty,
+    /// An opening `{` was found with no corresponding closing `}`.
+    RepetitionCountUnclosed,
+    /// A repetition operator was applied to a missing sub-expression. This
+    /// occurs, for example, in the regex consisting of just a `*` or even
+    /// `(?i)*`. It is, however, possible to create a repetition operating on
+    /// an empty sub-expression. For example, `()*` is still considered valid.
+    RepetitionMissing,
+    /// The Unicode class is not valid. This typically occurs when a `\p` is
+    /// followed by something other than a `{`.
+    UnicodeClassInvalid,
+    /// When octal support is disabled, this error is produced when an octal
+    /// escape is used. The octal escape is assumed to be an invocation of
+    /// a backreference, which is the common case.
+    UnsupportedBackreference,
+    /// When syntax similar to PCRE's look-around is used, this error is
+    /// returned. Some example syntaxes that are rejected include, but are
+    /// not necessarily limited to, `(?=re)`, `(?!re)`, `(?<=re)` and
+    /// `(?<!re)`. Note that all of these syntaxes are otherwise invalid; this
+    /// error is used to improve the user experience.
+    UnsupportedLookAround,
+    /// Hints that destructuring should not be exhaustive.
+    ///
+    /// This enum may grow additional variants, so this makes sure clients
+    /// don't count on exhaustive matching. (Otherwise, adding a new variant
+    /// could break existing code.)
+    #[doc(hidden)]
+    __Nonexhaustive,
+}
+
+impl error::Error for Error {
+    // TODO: Remove this method entirely on the next breaking semver release.
+    #[allow(deprecated)]
+    fn description(&self) -> &str {
+        use self::ErrorKind::*;
+        match self.kind {
+            CaptureLimitExceeded => "capture group limit exceeded",
+            ClassEscapeInvalid => "invalid escape sequence in character class",
+            ClassRangeInvalid => "invalid character class range",
+            ClassRangeLiteral => "invalid range boundary, must be a literal",
+            ClassUnclosed => "unclosed character class",
+            DecimalEmpty => "empty decimal literal",
+            DecimalInvalid => "invalid decimal literal",
+            EscapeHexEmpty => "empty hexadecimal literal",
+            EscapeHexInvalid => "invalid hexadecimal literal",
+            EscapeHexInvalidDigit => "invalid hexadecimal digit",
+            EscapeUnexpectedEof => "unexpected eof (escape sequence)",
+            EscapeUnrecognized => "unrecognized escape sequence",
+            FlagDanglingNegation => "dangling flag negation operator",
+            FlagDuplicate { .. } => "duplicate flag",
+            FlagRepeatedNegation { .. } => "repeated negation",
+            FlagUnexpectedEof => "unexpected eof (flag)",
+            FlagUnrecognized => "unrecognized flag",
+            GroupNameDuplicate { .. } => "duplicate capture group name",
+            GroupNameEmpty => "empty capture group name",
+            GroupNameInvalid => "invalid capture group name",
+            GroupNameUnexpectedEof => "unclosed capture group name",
+            GroupUnclosed => "unclosed group",
+            GroupUnopened => "unopened group",
+            NestLimitExceeded(_) => "nest limit exceeded",
+            RepetitionCountInvalid => "invalid repetition count range",
+            RepetitionCountUnclosed => "unclosed counted repetition",
+            RepetitionMissing => "repetition operator missing expression",
+            UnicodeClassInvalid => "invalid Unicode character class",
+            UnsupportedBackreference => "backreferences are not supported",
+            UnsupportedLookAround => "look-around is not supported",
+            _ => unreachable!(),
+        }
+    }
+}
+
+impl fmt::Display for Error {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        ::error::Formatter::from(self).fmt(f)
+    }
+}
+
+impl fmt::Display for ErrorKind {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        use self::ErrorKind::*;
+        match *self {
+            CaptureLimitExceeded => write!(
+                f,
+                "exceeded the maximum number of \
+                 capturing groups ({})",
+                ::std::u32::MAX
+            ),
+            ClassEscapeInvalid => {
+                write!(f, "invalid escape sequence found in character class")
+            }
+            ClassRangeInvalid => write!(
+                f,
+                "invalid character class range, \
+                 the start must be <= the end"
+            ),
+            ClassRangeLiteral => {
+                write!(f, "invalid range boundary, must be a literal")
+            }
+            ClassUnclosed => write!(f, "unclosed character class"),
+            DecimalEmpty => write!(f, "decimal literal empty"),
+            DecimalInvalid => write!(f, "decimal literal invalid"),
+            EscapeHexEmpty => write!(f, "hexadecimal literal empty"),
+            EscapeHexInvalid => {
+                write!(f, "hexadecimal literal is not a Unicode scalar value")
+            }
+            EscapeHexInvalidDigit => write!(f, "invalid hexadecimal digit"),
+            EscapeUnexpectedEof => write!(
+                f,
+                "incomplete escape sequence, \
+                 reached end of pattern prematurely"
+            ),
+            EscapeUnrecognized => write!(f, "unrecognized escape sequence"),
+            FlagDanglingNegation => {
+                write!(f, "dangling flag negation operator")
+            }
+            FlagDuplicate { .. } => write!(f, "duplicate flag"),
+            FlagRepeatedNegation { .. } => {
+                write!(f, "flag negation operator repeated")
+            }
+            FlagUnexpectedEof => {
+                write!(f, "expected flag but got end of regex")
+            }
+            FlagUnrecognized => write!(f, "unrecognized flag"),
+            GroupNameDuplicate { .. } => {
+                write!(f, "duplicate capture group name")
+            }
+            GroupNameEmpty => write!(f, "empty capture group name"),
+            GroupNameInvalid => write!(f, "invalid capture group character"),
+            GroupNameUnexpectedEof => write!(f, "unclosed capture group name"),
+            GroupUnclosed => write!(f, "unclosed group"),
+            GroupUnopened => write!(f, "unopened group"),
+            NestLimitExceeded(limit) => write!(
+                f,
+                "exceed the maximum number of \
+                 nested parentheses/brackets ({})",
+                limit
+            ),
+            RepetitionCountInvalid => write!(
+                f,
+                "invalid repetition count range, \
+                 the start must be <= the end"
+            ),
+            RepetitionCountDecimalEmpty => {
+                write!(f, "repetition quantifier expects a valid decimal")
+            }
+            RepetitionCountUnclosed => {
+                write!(f, "unclosed counted repetition")
+            }
+            RepetitionMissing => {
+                write!(f, "repetition operator missing expression")
+            }
+            UnicodeClassInvalid => {
+                write!(f, "invalid Unicode character class")
+            }
+            UnsupportedBackreference => {
+                write!(f, "backreferences are not supported")
+            }
+            UnsupportedLookAround => write!(
+                f,
+                "look-around, including look-ahead and look-behind, \
+                 is not supported"
+            ),
+            _ => unreachable!(),
+        }
+    }
+}
+
+/// Span represents the position information of a single AST item.
+///
+/// All span positions are absolute byte offsets that can be used on the
+/// original regular expression that was parsed.
+#[derive(Clone, Copy, Eq, PartialEq)]
+pub struct Span {
+    /// The start byte offset.
+    pub start: Position,
+    /// The end byte offset.
+    pub end: Position,
+}
+
+impl fmt::Debug for Span {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "Span({:?}, {:?})", self.start, self.end)
+    }
+}
+
+impl Ord for Span {
+    fn cmp(&self, other: &Span) -> Ordering {
+        (&self.start, &self.end).cmp(&(&other.start, &other.end))
+    }
+}
+
+impl PartialOrd for Span {
+    fn partial_cmp(&self, other: &Span) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+/// A single position in a regular expression.
+///
+/// A position encodes one half of a span, and include the byte offset, line
+/// number and column number.
+#[derive(Clone, Copy, Eq, PartialEq)]
+pub struct Position {
+    /// The absolute offset of this position, starting at `0` from the
+    /// beginning of the regular expression pattern string.
+    pub offset: usize,
+    /// The line number, starting at `1`.
+    pub line: usize,
+    /// The approximate column number, starting at `1`.
+    pub column: usize,
+}
+
+impl fmt::Debug for Position {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(
+            f,
+            "Position(o: {:?}, l: {:?}, c: {:?})",
+            self.offset, self.line, self.column
+        )
+    }
+}
+
+impl Ord for Position {
+    fn cmp(&self, other: &Position) -> Ordering {
+        self.offset.cmp(&other.offset)
+    }
+}
+
+impl PartialOrd for Position {
+    fn partial_cmp(&self, other: &Position) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl Span {
+    /// Create a new span with the given positions.
+    pub fn new(start: Position, end: Position) -> Span {
+        Span { start: start, end: end }
+    }
+
+    /// Create a new span using the given position as the start and end.
+    pub fn splat(pos: Position) -> Span {
+        Span::new(pos, pos)
+    }
+
+    /// Create a new span by replacing the starting the position with the one
+    /// given.
+    pub fn with_start(self, pos: Position) -> Span {
+        Span { start: pos, ..self }
+    }
+
+    /// Create a new span by replacing the ending the position with the one
+    /// given.
+    pub fn with_end(self, pos: Position) -> Span {
+        Span { end: pos, ..self }
+    }
+
+    /// Returns true if and only if this span occurs on a single line.
+    pub fn is_one_line(&self) -> bool {
+        self.start.line == self.end.line
+    }
+
+    /// Returns true if and only if this span is empty. That is, it points to
+    /// a single position in the concrete syntax of a regular expression.
+    pub fn is_empty(&self) -> bool {
+        self.start.offset == self.end.offset
+    }
+}
+
+impl Position {
+    /// Create a new position with the given information.
+    ///
+    /// `offset` is the absolute offset of the position, starting at `0` from
+    /// the beginning of the regular expression pattern string.
+    ///
+    /// `line` is the line number, starting at `1`.
+    ///
+    /// `column` is the approximate column number, starting at `1`.
+    pub fn new(offset: usize, line: usize, column: usize) -> Position {
+        Position { offset: offset, line: line, column: column }
+    }
+}
+
+/// An abstract syntax tree for a singular expression along with comments
+/// found.
+///
+/// Comments are not stored in the tree itself to avoid complexity. Each
+/// comment contains a span of precisely where it occurred in the original
+/// regular expression.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct WithComments {
+    /// The actual ast.
+    pub ast: Ast,
+    /// All comments found in the original regular expression.
+    pub comments: Vec<Comment>,
+}
+
+/// A comment from a regular expression with an associated span.
+///
+/// A regular expression can only contain comments when the `x` flag is
+/// enabled.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Comment {
+    /// The span of this comment, including the beginning `#` and ending `\n`.
+    pub span: Span,
+    /// The comment text, starting with the first character following the `#`
+    /// and ending with the last character preceding the `\n`.
+    pub comment: String,
+}
+
+/// An abstract syntax tree for a single regular expression.
+///
+/// An `Ast`'s `fmt::Display` implementation uses constant stack space and heap
+/// space proportional to the size of the `Ast`.
+///
+/// This type defines its own destructor that uses constant stack space and
+/// heap space proportional to the size of the `Ast`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum Ast {
+    /// An empty regex that matches everything.
+    Empty(Span),
+    /// A set of flags, e.g., `(?is)`.
+    Flags(SetFlags),
+    /// A single character literal, which includes escape sequences.
+    Literal(Literal),
+    /// The "any character" class.
+    Dot(Span),
+    /// A single zero-width assertion.
+    Assertion(Assertion),
+    /// A single character class. This includes all forms of character classes
+    /// except for `.`. e.g., `\d`, `\pN`, `[a-z]` and `[[:alpha:]]`.
+    Class(Class),
+    /// A repetition operator applied to an arbitrary regular expression.
+    Repetition(Repetition),
+    /// A grouped regular expression.
+    Group(Group),
+    /// An alternation of regular expressions.
+    Alternation(Alternation),
+    /// A concatenation of regular expressions.
+    Concat(Concat),
+}
+
+impl Ast {
+    /// Return the span of this abstract syntax tree.
+    pub fn span(&self) -> &Span {
+        match *self {
+            Ast::Empty(ref span) => span,
+            Ast::Flags(ref x) => &x.span,
+            Ast::Literal(ref x) => &x.span,
+            Ast::Dot(ref span) => span,
+            Ast::Assertion(ref x) => &x.span,
+            Ast::Class(ref x) => x.span(),
+            Ast::Repetition(ref x) => &x.span,
+            Ast::Group(ref x) => &x.span,
+            Ast::Alternation(ref x) => &x.span,
+            Ast::Concat(ref x) => &x.span,
+        }
+    }
+
+    /// Return true if and only if this Ast is empty.
+    pub fn is_empty(&self) -> bool {
+        match *self {
+            Ast::Empty(_) => true,
+            _ => false,
+        }
+    }
+
+    /// Returns true if and only if this AST has any (including possibly empty)
+    /// subexpressions.
+    fn has_subexprs(&self) -> bool {
+        match *self {
+            Ast::Empty(_)
+            | Ast::Flags(_)
+            | Ast::Literal(_)
+            | Ast::Dot(_)
+            | Ast::Assertion(_) => false,
+            Ast::Class(_)
+            | Ast::Repetition(_)
+            | Ast::Group(_)
+            | Ast::Alternation(_)
+            | Ast::Concat(_) => true,
+        }
+    }
+}
+
+/// Print a display representation of this Ast.
+///
+/// This does not preserve any of the original whitespace formatting that may
+/// have originally been present in the concrete syntax from which this Ast
+/// was generated.
+///
+/// This implementation uses constant stack space and heap space proportional
+/// to the size of the `Ast`.
+impl fmt::Display for Ast {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        use ast::print::Printer;
+        Printer::new().print(self, f)
+    }
+}
+
+/// An alternation of regular expressions.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Alternation {
+    /// The span of this alternation.
+    pub span: Span,
+    /// The alternate regular expressions.
+    pub asts: Vec<Ast>,
+}
+
+impl Alternation {
+    /// Return this alternation as an AST.
+    ///
+    /// If this alternation contains zero ASTs, then Ast::Empty is
+    /// returned. If this alternation contains exactly 1 AST, then the
+    /// corresponding AST is returned. Otherwise, Ast::Alternation is returned.
+    pub fn into_ast(mut self) -> Ast {
+        match self.asts.len() {
+            0 => Ast::Empty(self.span),
+            1 => self.asts.pop().unwrap(),
+            _ => Ast::Alternation(self),
+        }
+    }
+}
+
+/// A concatenation of regular expressions.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Concat {
+    /// The span of this concatenation.
+    pub span: Span,
+    /// The concatenation regular expressions.
+    pub asts: Vec<Ast>,
+}
+
+impl Concat {
+    /// Return this concatenation as an AST.
+    ///
+    /// If this concatenation contains zero ASTs, then Ast::Empty is
+    /// returned. If this concatenation contains exactly 1 AST, then the
+    /// corresponding AST is returned. Otherwise, Ast::Concat is returned.
+    pub fn into_ast(mut self) -> Ast {
+        match self.asts.len() {
+            0 => Ast::Empty(self.span),
+            1 => self.asts.pop().unwrap(),
+            _ => Ast::Concat(self),
+        }
+    }
+}
+
+/// A single literal expression.
+///
+/// A literal corresponds to a single Unicode scalar value. Literals may be
+/// represented in their literal form, e.g., `a` or in their escaped form,
+/// e.g., `\x61`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Literal {
+    /// The span of this literal.
+    pub span: Span,
+    /// The kind of this literal.
+    pub kind: LiteralKind,
+    /// The Unicode scalar value corresponding to this literal.
+    pub c: char,
+}
+
+impl Literal {
+    /// If this literal was written as a `\x` hex escape, then this returns
+    /// the corresponding byte value. Otherwise, this returns `None`.
+    pub fn byte(&self) -> Option<u8> {
+        let short_hex = LiteralKind::HexFixed(HexLiteralKind::X);
+        if self.c as u32 <= 255 && self.kind == short_hex {
+            Some(self.c as u8)
+        } else {
+            None
+        }
+    }
+}
+
+/// The kind of a single literal expression.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum LiteralKind {
+    /// The literal is written verbatim, e.g., `a` or `☃`.
+    Verbatim,
+    /// The literal is written as an escape because it is punctuation, e.g.,
+    /// `\*` or `\[`.
+    Punctuation,
+    /// The literal is written as an octal escape, e.g., `\141`.
+    Octal,
+    /// The literal is written as a hex code with a fixed number of digits
+    /// depending on the type of the escape, e.g., `\x61` or or `\u0061` or
+    /// `\U00000061`.
+    HexFixed(HexLiteralKind),
+    /// The literal is written as a hex code with a bracketed number of
+    /// digits. The only restriction is that the bracketed hex code must refer
+    /// to a valid Unicode scalar value.
+    HexBrace(HexLiteralKind),
+    /// The literal is written as a specially recognized escape, e.g., `\f`
+    /// or `\n`.
+    Special(SpecialLiteralKind),
+}
+
+/// The type of a special literal.
+///
+/// A special literal is a special escape sequence recognized by the regex
+/// parser, e.g., `\f` or `\n`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum SpecialLiteralKind {
+    /// Bell, spelled `\a` (`\x07`).
+    Bell,
+    /// Form feed, spelled `\f` (`\x0C`).
+    FormFeed,
+    /// Tab, spelled `\t` (`\x09`).
+    Tab,
+    /// Line feed, spelled `\n` (`\x0A`).
+    LineFeed,
+    /// Carriage return, spelled `\r` (`\x0D`).
+    CarriageReturn,
+    /// Vertical tab, spelled `\v` (`\x0B`).
+    VerticalTab,
+    /// Space, spelled `\ ` (`\x20`). Note that this can only appear when
+    /// parsing in verbose mode.
+    Space,
+}
+
+/// The type of a Unicode hex literal.
+///
+/// Note that all variants behave the same when used with brackets. They only
+/// differ when used without brackets in the number of hex digits that must
+/// follow.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum HexLiteralKind {
+    /// A `\x` prefix. When used without brackets, this form is limited to
+    /// two digits.
+    X,
+    /// A `\u` prefix. When used without brackets, this form is limited to
+    /// four digits.
+    UnicodeShort,
+    /// A `\U` prefix. When used without brackets, this form is limited to
+    /// eight digits.
+    UnicodeLong,
+}
+
+impl HexLiteralKind {
+    /// The number of digits that must be used with this literal form when
+    /// used without brackets. When used with brackets, there is no
+    /// restriction on the number of digits.
+    pub fn digits(&self) -> u32 {
+        match *self {
+            HexLiteralKind::X => 2,
+            HexLiteralKind::UnicodeShort => 4,
+            HexLiteralKind::UnicodeLong => 8,
+        }
+    }
+}
+
+/// A single character class expression.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum Class {
+    /// A Unicode character class, e.g., `\pL` or `\p{Greek}`.
+    Unicode(ClassUnicode),
+    /// A perl character class, e.g., `\d` or `\W`.
+    Perl(ClassPerl),
+    /// A bracketed character class set, which may contain zero or more
+    /// character ranges and/or zero or more nested classes. e.g.,
+    /// `[a-zA-Z\pL]`.
+    Bracketed(ClassBracketed),
+}
+
+impl Class {
+    /// Return the span of this character class.
+    pub fn span(&self) -> &Span {
+        match *self {
+            Class::Perl(ref x) => &x.span,
+            Class::Unicode(ref x) => &x.span,
+            Class::Bracketed(ref x) => &x.span,
+        }
+    }
+}
+
+/// A Perl character class.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct ClassPerl {
+    /// The span of this class.
+    pub span: Span,
+    /// The kind of Perl class.
+    pub kind: ClassPerlKind,
+    /// Whether the class is negated or not. e.g., `\d` is not negated but
+    /// `\D` is.
+    pub negated: bool,
+}
+
+/// The available Perl character classes.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum ClassPerlKind {
+    /// Decimal numbers.
+    Digit,
+    /// Whitespace.
+    Space,
+    /// Word characters.
+    Word,
+}
+
+/// An ASCII character class.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct ClassAscii {
+    /// The span of this class.
+    pub span: Span,
+    /// The kind of ASCII class.
+    pub kind: ClassAsciiKind,
+    /// Whether the class is negated or not. e.g., `[[:alpha:]]` is not negated
+    /// but `[[:^alpha:]]` is.
+    pub negated: bool,
+}
+
+/// The available ASCII character classes.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum ClassAsciiKind {
+    /// `[0-9A-Za-z]`
+    Alnum,
+    /// `[A-Za-z]`
+    Alpha,
+    /// `[\x00-\x7F]`
+    Ascii,
+    /// `[ \t]`
+    Blank,
+    /// `[\x00-\x1F\x7F]`
+    Cntrl,
+    /// `[0-9]`
+    Digit,
+    /// `[!-~]`
+    Graph,
+    /// `[a-z]`
+    Lower,
+    /// `[ -~]`
+    Print,
+    /// `[!-/:-@\[-`{-~]`
+    Punct,
+    /// `[\t\n\v\f\r ]`
+    Space,
+    /// `[A-Z]`
+    Upper,
+    /// `[0-9A-Za-z_]`
+    Word,
+    /// `[0-9A-Fa-f]`
+    Xdigit,
+}
+
+impl ClassAsciiKind {
+    /// Return the corresponding ClassAsciiKind variant for the given name.
+    ///
+    /// The name given should correspond to the lowercase version of the
+    /// variant name. e.g., `cntrl` is the name for `ClassAsciiKind::Cntrl`.
+    ///
+    /// If no variant with the corresponding name exists, then `None` is
+    /// returned.
+    pub fn from_name(name: &str) -> Option<ClassAsciiKind> {
+        use self::ClassAsciiKind::*;
+        match name {
+            "alnum" => Some(Alnum),
+            "alpha" => Some(Alpha),
+            "ascii" => Some(Ascii),
+            "blank" => Some(Blank),
+            "cntrl" => Some(Cntrl),
+            "digit" => Some(Digit),
+            "graph" => Some(Graph),
+            "lower" => Some(Lower),
+            "print" => Some(Print),
+            "punct" => Some(Punct),
+            "space" => Some(Space),
+            "upper" => Some(Upper),
+            "word" => Some(Word),
+            "xdigit" => Some(Xdigit),
+            _ => None,
+        }
+    }
+}
+
+/// A Unicode character class.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct ClassUnicode {
+    /// The span of this class.
+    pub span: Span,
+    /// Whether this class is negated or not.
+    ///
+    /// Note: be careful when using this attribute. This specifically refers
+    /// to whether the class is written as `\p` or `\P`, where the latter
+    /// is `negated = true`. However, it also possible to write something like
+    /// `\P{scx!=Katakana}` which is actually equivalent to
+    /// `\p{scx=Katakana}` and is therefore not actually negated even though
+    /// `negated = true` here. To test whether this class is truly negated
+    /// or not, use the `is_negated` method.
+    pub negated: bool,
+    /// The kind of Unicode class.
+    pub kind: ClassUnicodeKind,
+}
+
+impl ClassUnicode {
+    /// Returns true if this class has been negated.
+    ///
+    /// Note that this takes the Unicode op into account, if it's present.
+    /// e.g., `is_negated` for `\P{scx!=Katakana}` will return `false`.
+    pub fn is_negated(&self) -> bool {
+        match self.kind {
+            ClassUnicodeKind::NamedValue {
+                op: ClassUnicodeOpKind::NotEqual,
+                ..
+            } => !self.negated,
+            _ => self.negated,
+        }
+    }
+}
+
+/// The available forms of Unicode character classes.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum ClassUnicodeKind {
+    /// A one letter abbreviated class, e.g., `\pN`.
+    OneLetter(char),
+    /// A binary property, general category or script. The string may be
+    /// empty.
+    Named(String),
+    /// A property name and an associated value.
+    NamedValue {
+        /// The type of Unicode op used to associate `name` with `value`.
+        op: ClassUnicodeOpKind,
+        /// The property name (which may be empty).
+        name: String,
+        /// The property value (which may be empty).
+        value: String,
+    },
+}
+
+/// The type of op used in a Unicode character class.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum ClassUnicodeOpKind {
+    /// A property set to a specific value, e.g., `\p{scx=Katakana}`.
+    Equal,
+    /// A property set to a specific value using a colon, e.g.,
+    /// `\p{scx:Katakana}`.
+    Colon,
+    /// A property that isn't a particular value, e.g., `\p{scx!=Katakana}`.
+    NotEqual,
+}
+
+impl ClassUnicodeOpKind {
+    /// Whether the op is an equality op or not.
+    pub fn is_equal(&self) -> bool {
+        match *self {
+            ClassUnicodeOpKind::Equal | ClassUnicodeOpKind::Colon => true,
+            _ => false,
+        }
+    }
+}
+
+/// A bracketed character class, e.g., `[a-z0-9]`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct ClassBracketed {
+    /// The span of this class.
+    pub span: Span,
+    /// Whether this class is negated or not. e.g., `[a]` is not negated but
+    /// `[^a]` is.
+    pub negated: bool,
+    /// The type of this set. A set is either a normal union of things, e.g.,
+    /// `[abc]` or a result of applying set operations, e.g., `[\pL--c]`.
+    pub kind: ClassSet,
+}
+
+/// A character class set.
+///
+/// This type corresponds to the internal structure of a bracketed character
+/// class. That is, every bracketed character is one of two types: a union of
+/// items (literals, ranges, other bracketed classes) or a tree of binary set
+/// operations.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum ClassSet {
+    /// An item, which can be a single literal, range, nested character class
+    /// or a union of items.
+    Item(ClassSetItem),
+    /// A single binary operation (i.e., &&, -- or ~~).
+    BinaryOp(ClassSetBinaryOp),
+}
+
+impl ClassSet {
+    /// Build a set from a union.
+    pub fn union(ast: ClassSetUnion) -> ClassSet {
+        ClassSet::Item(ClassSetItem::Union(ast))
+    }
+
+    /// Return the span of this character class set.
+    pub fn span(&self) -> &Span {
+        match *self {
+            ClassSet::Item(ref x) => x.span(),
+            ClassSet::BinaryOp(ref x) => &x.span,
+        }
+    }
+
+    /// Return true if and only if this class set is empty.
+    fn is_empty(&self) -> bool {
+        match *self {
+            ClassSet::Item(ClassSetItem::Empty(_)) => true,
+            _ => false,
+        }
+    }
+}
+
+/// A single component of a character class set.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum ClassSetItem {
+    /// An empty item.
+    ///
+    /// Note that a bracketed character class cannot contain a single empty
+    /// item. Empty items can appear when using one of the binary operators.
+    /// For example, `[&&]` is the intersection of two empty classes.
+    Empty(Span),
+    /// A single literal.
+    Literal(Literal),
+    /// A range between two literals.
+    Range(ClassSetRange),
+    /// An ASCII character class, e.g., `[:alnum:]` or `[:punct:]`.
+    Ascii(ClassAscii),
+    /// A Unicode character class, e.g., `\pL` or `\p{Greek}`.
+    Unicode(ClassUnicode),
+    /// A perl character class, e.g., `\d` or `\W`.
+    Perl(ClassPerl),
+    /// A bracketed character class set, which may contain zero or more
+    /// character ranges and/or zero or more nested classes. e.g.,
+    /// `[a-zA-Z\pL]`.
+    Bracketed(Box<ClassBracketed>),
+    /// A union of items.
+    Union(ClassSetUnion),
+}
+
+impl ClassSetItem {
+    /// Return the span of this character class set item.
+    pub fn span(&self) -> &Span {
+        match *self {
+            ClassSetItem::Empty(ref span) => span,
+            ClassSetItem::Literal(ref x) => &x.span,
+            ClassSetItem::Range(ref x) => &x.span,
+            ClassSetItem::Ascii(ref x) => &x.span,
+            ClassSetItem::Perl(ref x) => &x.span,
+            ClassSetItem::Unicode(ref x) => &x.span,
+            ClassSetItem::Bracketed(ref x) => &x.span,
+            ClassSetItem::Union(ref x) => &x.span,
+        }
+    }
+}
+
+/// A single character class range in a set.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct ClassSetRange {
+    /// The span of this range.
+    pub span: Span,
+    /// The start of this range.
+    pub start: Literal,
+    /// The end of this range.
+    pub end: Literal,
+}
+
+impl ClassSetRange {
+    /// Returns true if and only if this character class range is valid.
+    ///
+    /// The only case where a range is invalid is if its start is greater than
+    /// its end.
+    pub fn is_valid(&self) -> bool {
+        self.start.c <= self.end.c
+    }
+}
+
+/// A union of items inside a character class set.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct ClassSetUnion {
+    /// The span of the items in this operation. e.g., the `a-z0-9` in
+    /// `[^a-z0-9]`
+    pub span: Span,
+    /// The sequence of items that make up this union.
+    pub items: Vec<ClassSetItem>,
+}
+
+impl ClassSetUnion {
+    /// Push a new item in this union.
+    ///
+    /// The ending position of this union's span is updated to the ending
+    /// position of the span of the item given. If the union is empty, then
+    /// the starting position of this union is set to the starting position
+    /// of this item.
+    ///
+    /// In other words, if you only use this method to add items to a union
+    /// and you set the spans on each item correctly, then you should never
+    /// need to adjust the span of the union directly.
+    pub fn push(&mut self, item: ClassSetItem) {
+        if self.items.is_empty() {
+            self.span.start = item.span().start;
+        }
+        self.span.end = item.span().end;
+        self.items.push(item);
+    }
+
+    /// Return this union as a character class set item.
+    ///
+    /// If this union contains zero items, then an empty union is
+    /// returned. If this concatenation contains exactly 1 item, then the
+    /// corresponding item is returned. Otherwise, ClassSetItem::Union is
+    /// returned.
+    pub fn into_item(mut self) -> ClassSetItem {
+        match self.items.len() {
+            0 => ClassSetItem::Empty(self.span),
+            1 => self.items.pop().unwrap(),
+            _ => ClassSetItem::Union(self),
+        }
+    }
+}
+
+/// A Unicode character class set operation.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct ClassSetBinaryOp {
+    /// The span of this operation. e.g., the `a-z--[h-p]` in `[a-z--h-p]`.
+    pub span: Span,
+    /// The type of this set operation.
+    pub kind: ClassSetBinaryOpKind,
+    /// The left hand side of the operation.
+    pub lhs: Box<ClassSet>,
+    /// The right hand side of the operation.
+    pub rhs: Box<ClassSet>,
+}
+
+/// The type of a Unicode character class set operation.
+///
+/// Note that this doesn't explicitly represent union since there is no
+/// explicit union operator. Concatenation inside a character class corresponds
+/// to the union operation.
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum ClassSetBinaryOpKind {
+    /// The intersection of two sets, e.g., `\pN&&[a-z]`.
+    Intersection,
+    /// The difference of two sets, e.g., `\pN--[0-9]`.
+    Difference,
+    /// The symmetric difference of two sets. The symmetric difference is the
+    /// set of elements belonging to one but not both sets.
+    /// e.g., `[\pL~~[:ascii:]]`.
+    SymmetricDifference,
+}
+
+/// A single zero-width assertion.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Assertion {
+    /// The span of this assertion.
+    pub span: Span,
+    /// The assertion kind, e.g., `\b` or `^`.
+    pub kind: AssertionKind,
+}
+
+/// An assertion kind.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum AssertionKind {
+    /// `^`
+    StartLine,
+    /// `$`
+    EndLine,
+    /// `\A`
+    StartText,
+    /// `\z`
+    EndText,
+    /// `\b`
+    WordBoundary,
+    /// `\B`
+    NotWordBoundary,
+}
+
+/// A repetition operation applied to a regular expression.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Repetition {
+    /// The span of this operation.
+    pub span: Span,
+    /// The actual operation.
+    pub op: RepetitionOp,
+    /// Whether this operation was applied greedily or not.
+    pub greedy: bool,
+    /// The regular expression under repetition.
+    pub ast: Box<Ast>,
+}
+
+/// The repetition operator itself.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct RepetitionOp {
+    /// The span of this operator. This includes things like `+`, `*?` and
+    /// `{m,n}`.
+    pub span: Span,
+    /// The type of operation.
+    pub kind: RepetitionKind,
+}
+
+/// The kind of a repetition operator.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum RepetitionKind {
+    /// `?`
+    ZeroOrOne,
+    /// `*`
+    ZeroOrMore,
+    /// `+`
+    OneOrMore,
+    /// `{m,n}`
+    Range(RepetitionRange),
+}
+
+/// A range repetition operator.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum RepetitionRange {
+    /// `{m}`
+    Exactly(u32),
+    /// `{m,}`
+    AtLeast(u32),
+    /// `{m,n}`
+    Bounded(u32, u32),
+}
+
+impl RepetitionRange {
+    /// Returns true if and only if this repetition range is valid.
+    ///
+    /// The only case where a repetition range is invalid is if it is bounded
+    /// and its start is greater than its end.
+    pub fn is_valid(&self) -> bool {
+        match *self {
+            RepetitionRange::Bounded(s, e) if s > e => false,
+            _ => true,
+        }
+    }
+}
+
+/// A grouped regular expression.
+///
+/// This includes both capturing and non-capturing groups. This does **not**
+/// include flag-only groups like `(?is)`, but does contain any group that
+/// contains a sub-expression, e.g., `(a)`, `(?P<name>a)`, `(?:a)` and
+/// `(?is:a)`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Group {
+    /// The span of this group.
+    pub span: Span,
+    /// The kind of this group.
+    pub kind: GroupKind,
+    /// The regular expression in this group.
+    pub ast: Box<Ast>,
+}
+
+impl Group {
+    /// If this group is non-capturing, then this returns the (possibly empty)
+    /// set of flags. Otherwise, `None` is returned.
+    pub fn flags(&self) -> Option<&Flags> {
+        match self.kind {
+            GroupKind::NonCapturing(ref flags) => Some(flags),
+            _ => None,
+        }
+    }
+
+    /// Returns true if and only if this group is capturing.
+    pub fn is_capturing(&self) -> bool {
+        match self.kind {
+            GroupKind::CaptureIndex(_) | GroupKind::CaptureName(_) => true,
+            GroupKind::NonCapturing(_) => false,
+        }
+    }
+
+    /// Returns the capture index of this group, if this is a capturing group.
+    ///
+    /// This returns a capture index precisely when `is_capturing` is `true`.
+    pub fn capture_index(&self) -> Option<u32> {
+        match self.kind {
+            GroupKind::CaptureIndex(i) => Some(i),
+            GroupKind::CaptureName(ref x) => Some(x.index),
+            GroupKind::NonCapturing(_) => None,
+        }
+    }
+}
+
+/// The kind of a group.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum GroupKind {
+    /// `(a)`
+    CaptureIndex(u32),
+    /// `(?P<name>a)`
+    CaptureName(CaptureName),
+    /// `(?:a)` and `(?i:a)`
+    NonCapturing(Flags),
+}
+
+/// A capture name.
+///
+/// This corresponds to the name itself between the angle brackets in, e.g.,
+/// `(?P<foo>expr)`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct CaptureName {
+    /// The span of this capture name.
+    pub span: Span,
+    /// The capture name.
+    pub name: String,
+    /// The capture index.
+    pub index: u32,
+}
+
+/// A group of flags that is not applied to a particular regular expression.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct SetFlags {
+    /// The span of these flags, including the grouping parentheses.
+    pub span: Span,
+    /// The actual sequence of flags.
+    pub flags: Flags,
+}
+
+/// A group of flags.
+///
+/// This corresponds only to the sequence of flags themselves, e.g., `is-u`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Flags {
+    /// The span of this group of flags.
+    pub span: Span,
+    /// A sequence of flag items. Each item is either a flag or a negation
+    /// operator.
+    pub items: Vec<FlagsItem>,
+}
+
+impl Flags {
+    /// Add the given item to this sequence of flags.
+    ///
+    /// If the item was added successfully, then `None` is returned. If the
+    /// given item is a duplicate, then `Some(i)` is returned, where
+    /// `items[i].kind == item.kind`.
+    pub fn add_item(&mut self, item: FlagsItem) -> Option<usize> {
+        for (i, x) in self.items.iter().enumerate() {
+            if x.kind == item.kind {
+                return Some(i);
+            }
+        }
+        self.items.push(item);
+        None
+    }
+
+    /// Returns the state of the given flag in this set.
+    ///
+    /// If the given flag is in the set but is negated, then `Some(false)` is
+    /// returned.
+    ///
+    /// If the given flag is in the set and is not negated, then `Some(true)`
+    /// is returned.
+    ///
+    /// Otherwise, `None` is returned.
+    pub fn flag_state(&self, flag: Flag) -> Option<bool> {
+        let mut negated = false;
+        for x in &self.items {
+            match x.kind {
+                FlagsItemKind::Negation => {
+                    negated = true;
+                }
+                FlagsItemKind::Flag(ref xflag) if xflag == &flag => {
+                    return Some(!negated);
+                }
+                _ => {}
+            }
+        }
+        None
+    }
+}
+
+/// A single item in a group of flags.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct FlagsItem {
+    /// The span of this item.
+    pub span: Span,
+    /// The kind of this item.
+    pub kind: FlagsItemKind,
+}
+
+/// The kind of an item in a group of flags.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum FlagsItemKind {
+    /// A negation operator applied to all subsequent flags in the enclosing
+    /// group.
+    Negation,
+    /// A single flag in a group.
+    Flag(Flag),
+}
+
+impl FlagsItemKind {
+    /// Returns true if and only if this item is a negation operator.
+    pub fn is_negation(&self) -> bool {
+        match *self {
+            FlagsItemKind::Negation => true,
+            _ => false,
+        }
+    }
+}
+
+/// A single flag.
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum Flag {
+    /// `i`
+    CaseInsensitive,
+    /// `m`
+    MultiLine,
+    /// `s`
+    DotMatchesNewLine,
+    /// `U`
+    SwapGreed,
+    /// `u`
+    Unicode,
+    /// `x`
+    IgnoreWhitespace,
+}
+
+/// A custom `Drop` impl is used for `Ast` such that it uses constant stack
+/// space but heap space proportional to the depth of the `Ast`.
+impl Drop for Ast {
+    fn drop(&mut self) {
+        use std::mem;
+
+        match *self {
+            Ast::Empty(_)
+            | Ast::Flags(_)
+            | Ast::Literal(_)
+            | Ast::Dot(_)
+            | Ast::Assertion(_)
+            // Classes are recursive, so they get their own Drop impl.
+            | Ast::Class(_) => return,
+            Ast::Repetition(ref x) if !x.ast.has_subexprs() => return,
+            Ast::Group(ref x) if !x.ast.has_subexprs() => return,
+            Ast::Alternation(ref x) if x.asts.is_empty() => return,
+            Ast::Concat(ref x) if x.asts.is_empty() => return,
+            _ => {}
+        }
+
+        let empty_span = || Span::splat(Position::new(0, 0, 0));
+        let empty_ast = || Ast::Empty(empty_span());
+        let mut stack = vec![mem::replace(self, empty_ast())];
+        while let Some(mut ast) = stack.pop() {
+            match ast {
+                Ast::Empty(_)
+                | Ast::Flags(_)
+                | Ast::Literal(_)
+                | Ast::Dot(_)
+                | Ast::Assertion(_)
+                // Classes are recursive, so they get their own Drop impl.
+                | Ast::Class(_) => {}
+                Ast::Repetition(ref mut x) => {
+                    stack.push(mem::replace(&mut x.ast, empty_ast()));
+                }
+                Ast::Group(ref mut x) => {
+                    stack.push(mem::replace(&mut x.ast, empty_ast()));
+                }
+                Ast::Alternation(ref mut x) => {
+                    stack.extend(x.asts.drain(..));
+                }
+                Ast::Concat(ref mut x) => {
+                    stack.extend(x.asts.drain(..));
+                }
+            }
+        }
+    }
+}
+
+/// A custom `Drop` impl is used for `ClassSet` such that it uses constant
+/// stack space but heap space proportional to the depth of the `ClassSet`.
+impl Drop for ClassSet {
+    fn drop(&mut self) {
+        use std::mem;
+
+        match *self {
+            ClassSet::Item(ref item) => match *item {
+                ClassSetItem::Empty(_)
+                | ClassSetItem::Literal(_)
+                | ClassSetItem::Range(_)
+                | ClassSetItem::Ascii(_)
+                | ClassSetItem::Unicode(_)
+                | ClassSetItem::Perl(_) => return,
+                ClassSetItem::Bracketed(ref x) => {
+                    if x.kind.is_empty() {
+                        return;
+                    }
+                }
+                ClassSetItem::Union(ref x) => {
+                    if x.items.is_empty() {
+                        return;
+                    }
+                }
+            },
+            ClassSet::BinaryOp(ref op) => {
+                if op.lhs.is_empty() && op.rhs.is_empty() {
+                    return;
+                }
+            }
+        }
+
+        let empty_span = || Span::splat(Position::new(0, 0, 0));
+        let empty_set = || ClassSet::Item(ClassSetItem::Empty(empty_span()));
+        let mut stack = vec![mem::replace(self, empty_set())];
+        while let Some(mut set) = stack.pop() {
+            match set {
+                ClassSet::Item(ref mut item) => match *item {
+                    ClassSetItem::Empty(_)
+                    | ClassSetItem::Literal(_)
+                    | ClassSetItem::Range(_)
+                    | ClassSetItem::Ascii(_)
+                    | ClassSetItem::Unicode(_)
+                    | ClassSetItem::Perl(_) => {}
+                    ClassSetItem::Bracketed(ref mut x) => {
+                        stack.push(mem::replace(&mut x.kind, empty_set()));
+                    }
+                    ClassSetItem::Union(ref mut x) => {
+                        stack.extend(x.items.drain(..).map(ClassSet::Item));
+                    }
+                },
+                ClassSet::BinaryOp(ref mut op) => {
+                    stack.push(mem::replace(&mut op.lhs, empty_set()));
+                    stack.push(mem::replace(&mut op.rhs, empty_set()));
+                }
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // We use a thread with an explicit stack size to test that our destructor
+    // for Ast can handle arbitrarily sized expressions in constant stack
+    // space. In case we run on a platform without threads (WASM?), we limit
+    // this test to Windows/Unix.
+    #[test]
+    #[cfg(any(unix, windows))]
+    fn no_stack_overflow_on_drop() {
+        use std::thread;
+
+        let run = || {
+            let span = || Span::splat(Position::new(0, 0, 0));
+            let mut ast = Ast::Empty(span());
+            for i in 0..200 {
+                ast = Ast::Group(Group {
+                    span: span(),
+                    kind: GroupKind::CaptureIndex(i),
+                    ast: Box::new(ast),
+                });
+            }
+            assert!(!ast.is_empty());
+        };
+
+        // We run our test on a thread with a small stack size so we can
+        // force the issue more easily.
+        thread::Builder::new()
+            .stack_size(1 << 10)
+            .spawn(run)
+            .unwrap()
+            .join()
+            .unwrap();
+    }
+}

diff --git a/src/ast/parse.rs b/src/ast/parse.rs
new file mode 100644
index 0000000..f5b4548
--- /dev/null
+++ b/src/ast/parse.rs

@@ -0,0 +1,5901 @@
+/*!
+This module provides a regular expression parser.
+*/
+
+use std::borrow::Borrow;
+use std::cell::{Cell, RefCell};
+use std::mem;
+use std::result;
+
+use ast::{self, Ast, Position, Span};
+use either::Either;
+
+use is_meta_character;
+
+type Result<T> = result::Result<T, ast::Error>;
+
+/// A primitive is an expression with no sub-expressions. This includes
+/// literals, assertions and non-set character classes. This representation
+/// is used as intermediate state in the parser.
+///
+/// This does not include ASCII character classes, since they can only appear
+/// within a set character class.
+#[derive(Clone, Debug, Eq, PartialEq)]
+enum Primitive {
+    Literal(ast::Literal),
+    Assertion(ast::Assertion),
+    Dot(Span),
+    Perl(ast::ClassPerl),
+    Unicode(ast::ClassUnicode),
+}
+
+impl Primitive {
+    /// Return the span of this primitive.
+    fn span(&self) -> &Span {
+        match *self {
+            Primitive::Literal(ref x) => &x.span,
+            Primitive::Assertion(ref x) => &x.span,
+            Primitive::Dot(ref span) => span,
+            Primitive::Perl(ref x) => &x.span,
+            Primitive::Unicode(ref x) => &x.span,
+        }
+    }
+
+    /// Convert this primitive into a proper AST.
+    fn into_ast(self) -> Ast {
+        match self {
+            Primitive::Literal(lit) => Ast::Literal(lit),
+            Primitive::Assertion(assert) => Ast::Assertion(assert),
+            Primitive::Dot(span) => Ast::Dot(span),
+            Primitive::Perl(cls) => Ast::Class(ast::Class::Perl(cls)),
+            Primitive::Unicode(cls) => Ast::Class(ast::Class::Unicode(cls)),
+        }
+    }
+
+    /// Convert this primitive into an item in a character class.
+    ///
+    /// If this primitive is not a legal item (i.e., an assertion or a dot),
+    /// then return an error.
+    fn into_class_set_item<P: Borrow<Parser>>(
+        self,
+        p: &ParserI<P>,
+    ) -> Result<ast::ClassSetItem> {
+        use self::Primitive::*;
+        use ast::ClassSetItem;
+
+        match self {
+            Literal(lit) => Ok(ClassSetItem::Literal(lit)),
+            Perl(cls) => Ok(ClassSetItem::Perl(cls)),
+            Unicode(cls) => Ok(ClassSetItem::Unicode(cls)),
+            x => Err(p.error(*x.span(), ast::ErrorKind::ClassEscapeInvalid)),
+        }
+    }
+
+    /// Convert this primitive into a literal in a character class. In
+    /// particular, literals are the only valid items that can appear in
+    /// ranges.
+    ///
+    /// If this primitive is not a legal item (i.e., a class, assertion or a
+    /// dot), then return an error.
+    fn into_class_literal<P: Borrow<Parser>>(
+        self,
+        p: &ParserI<P>,
+    ) -> Result<ast::Literal> {
+        use self::Primitive::*;
+
+        match self {
+            Literal(lit) => Ok(lit),
+            x => Err(p.error(*x.span(), ast::ErrorKind::ClassRangeLiteral)),
+        }
+    }
+}
+
+/// Returns true if the given character is a hexadecimal digit.
+fn is_hex(c: char) -> bool {
+    ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')
+}
+
+/// Returns true if the given character is a valid in a capture group name.
+///
+/// If `first` is true, then `c` is treated as the first character in the
+/// group name (which is not allowed to be a digit).
+fn is_capture_char(c: char, first: bool) -> bool {
+    c == '_'
+        || (!first && c >= '0' && c <= '9')
+        || (c >= 'a' && c <= 'z')
+        || (c >= 'A' && c <= 'Z')
+}
+
+/// A builder for a regular expression parser.
+///
+/// This builder permits modifying configuration options for the parser.
+#[derive(Clone, Debug)]
+pub struct ParserBuilder {
+    ignore_whitespace: bool,
+    nest_limit: u32,
+    octal: bool,
+}
+
+impl Default for ParserBuilder {
+    fn default() -> ParserBuilder {
+        ParserBuilder::new()
+    }
+}
+
+impl ParserBuilder {
+    /// Create a new parser builder with a default configuration.
+    pub fn new() -> ParserBuilder {
+        ParserBuilder {
+            ignore_whitespace: false,
+            nest_limit: 250,
+            octal: false,
+        }
+    }
+
+    /// Build a parser from this configuration with the given pattern.
+    pub fn build(&self) -> Parser {
+        Parser {
+            pos: Cell::new(Position { offset: 0, line: 1, column: 1 }),
+            capture_index: Cell::new(0),
+            nest_limit: self.nest_limit,
+            octal: self.octal,
+            initial_ignore_whitespace: self.ignore_whitespace,
+            ignore_whitespace: Cell::new(self.ignore_whitespace),
+            comments: RefCell::new(vec![]),
+            stack_group: RefCell::new(vec![]),
+            stack_class: RefCell::new(vec![]),
+            capture_names: RefCell::new(vec![]),
+            scratch: RefCell::new(String::new()),
+        }
+    }
+
+    /// Set the nesting limit for this parser.
+    ///
+    /// The nesting limit controls how deep the abstract syntax tree is allowed
+    /// to be. If the AST exceeds the given limit (e.g., with too many nested
+    /// groups), then an error is returned by the parser.
+    ///
+    /// The purpose of this limit is to act as a heuristic to prevent stack
+    /// overflow for consumers that do structural induction on an `Ast` using
+    /// explicit recursion. While this crate never does this (instead using
+    /// constant stack space and moving the call stack to the heap), other
+    /// crates may.
+    ///
+    /// This limit is not checked until the entire Ast is parsed. Therefore,
+    /// if callers want to put a limit on the amount of heap space used, then
+    /// they should impose a limit on the length, in bytes, of the concrete
+    /// pattern string. In particular, this is viable since this parser
+    /// implementation will limit itself to heap space proportional to the
+    /// lenth of the pattern string.
+    ///
+    /// Note that a nest limit of `0` will return a nest limit error for most
+    /// patterns but not all. For example, a nest limit of `0` permits `a` but
+    /// not `ab`, since `ab` requires a concatenation, which results in a nest
+    /// depth of `1`. In general, a nest limit is not something that manifests
+    /// in an obvious way in the concrete syntax, therefore, it should not be
+    /// used in a granular way.
+    pub fn nest_limit(&mut self, limit: u32) -> &mut ParserBuilder {
+        self.nest_limit = limit;
+        self
+    }
+
+    /// Whether to support octal syntax or not.
+    ///
+    /// Octal syntax is a little-known way of uttering Unicode codepoints in
+    /// a regular expression. For example, `a`, `\x61`, `\u0061` and
+    /// `\141` are all equivalent regular expressions, where the last example
+    /// shows octal syntax.
+    ///
+    /// While supporting octal syntax isn't in and of itself a problem, it does
+    /// make good error messages harder. That is, in PCRE based regex engines,
+    /// syntax like `\0` invokes a backreference, which is explicitly
+    /// unsupported in Rust's regex engine. However, many users expect it to
+    /// be supported. Therefore, when octal support is disabled, the error
+    /// message will explicitly mention that backreferences aren't supported.
+    ///
+    /// Octal syntax is disabled by default.
+    pub fn octal(&mut self, yes: bool) -> &mut ParserBuilder {
+        self.octal = yes;
+        self
+    }
+
+    /// Enable verbose mode in the regular expression.
+    ///
+    /// When enabled, verbose mode permits insigificant whitespace in many
+    /// places in the regular expression, as well as comments. Comments are
+    /// started using `#` and continue until the end of the line.
+    ///
+    /// By default, this is disabled. It may be selectively enabled in the
+    /// regular expression by using the `x` flag regardless of this setting.
+    pub fn ignore_whitespace(&mut self, yes: bool) -> &mut ParserBuilder {
+        self.ignore_whitespace = yes;
+        self
+    }
+}
+
+/// A regular expression parser.
+///
+/// This parses a string representation of a regular expression into an
+/// abstract syntax tree. The size of the tree is proportional to the length
+/// of the regular expression pattern.
+///
+/// A `Parser` can be configured in more detail via a
+/// [`ParserBuilder`](struct.ParserBuilder.html).
+#[derive(Clone, Debug)]
+pub struct Parser {
+    /// The current position of the parser.
+    pos: Cell<Position>,
+    /// The current capture index.
+    capture_index: Cell<u32>,
+    /// The maximum number of open parens/brackets allowed. If the parser
+    /// exceeds this number, then an error is returned.
+    nest_limit: u32,
+    /// Whether to support octal syntax or not. When `false`, the parser will
+    /// return an error helpfully pointing out that backreferences are not
+    /// supported.
+    octal: bool,
+    /// The initial setting for `ignore_whitespace` as provided by
+    /// Th`ParserBuilder`. is is used when reseting the parser's state.
+    initial_ignore_whitespace: bool,
+    /// Whether whitespace should be ignored. When enabled, comments are
+    /// also permitted.
+    ignore_whitespace: Cell<bool>,
+    /// A list of comments, in order of appearance.
+    comments: RefCell<Vec<ast::Comment>>,
+    /// A stack of grouped sub-expressions, including alternations.
+    stack_group: RefCell<Vec<GroupState>>,
+    /// A stack of nested character classes. This is only non-empty when
+    /// parsing a class.
+    stack_class: RefCell<Vec<ClassState>>,
+    /// A sorted sequence of capture names. This is used to detect duplicate
+    /// capture names and report an error if one is detected.
+    capture_names: RefCell<Vec<ast::CaptureName>>,
+    /// A scratch buffer used in various places. Mostly this is used to
+    /// accumulate relevant characters from parts of a pattern.
+    scratch: RefCell<String>,
+}
+
+/// ParserI is the internal parser implementation.
+///
+/// We use this separate type so that we can carry the provided pattern string
+/// along with us. In particular, a `Parser` internal state is not tied to any
+/// one pattern, but `ParserI` is.
+///
+/// This type also lets us use `ParserI<&Parser>` in production code while
+/// retaining the convenience of `ParserI<Parser>` for tests, which sometimes
+/// work against the internal interface of the parser.
+#[derive(Clone, Debug)]
+struct ParserI<'s, P> {
+    /// The parser state/configuration.
+    parser: P,
+    /// The full regular expression provided by the user.
+    pattern: &'s str,
+}
+
+/// GroupState represents a single stack frame while parsing nested groups
+/// and alternations. Each frame records the state up to an opening parenthesis
+/// or a alternating bracket `|`.
+#[derive(Clone, Debug)]
+enum GroupState {
+    /// This state is pushed whenever an opening group is found.
+    Group {
+        /// The concatenation immediately preceding the opening group.
+        concat: ast::Concat,
+        /// The group that has been opened. Its sub-AST is always empty.
+        group: ast::Group,
+        /// Whether this group has the `x` flag enabled or not.
+        ignore_whitespace: bool,
+    },
+    /// This state is pushed whenever a new alternation branch is found. If
+    /// an alternation branch is found and this state is at the top of the
+    /// stack, then this state should be modified to include the new
+    /// alternation.
+    Alternation(ast::Alternation),
+}
+
+/// ClassState represents a single stack frame while parsing character classes.
+/// Each frame records the state up to an intersection, difference, symmetric
+/// difference or nested class.
+///
+/// Note that a parser's character class stack is only non-empty when parsing
+/// a character class. In all other cases, it is empty.
+#[derive(Clone, Debug)]
+enum ClassState {
+    /// This state is pushed whenever an opening bracket is found.
+    Open {
+        /// The union of class items immediately preceding this class.
+        union: ast::ClassSetUnion,
+        /// The class that has been opened. Typically this just corresponds
+        /// to the `[`, but it can also include `[^` since `^` indicates
+        /// negation of the class.
+        set: ast::ClassBracketed,
+    },
+    /// This state is pushed when a operator is seen. When popped, the stored
+    /// set becomes the left hand side of the operator.
+    Op {
+        /// The type of the operation, i.e., &&, -- or ~~.
+        kind: ast::ClassSetBinaryOpKind,
+        /// The left-hand side of the operator.
+        lhs: ast::ClassSet,
+    },
+}
+
+impl Parser {
+    /// Create a new parser with a default configuration.
+    ///
+    /// The parser can be run with either the `parse` or `parse_with_comments`
+    /// methods. The parse methods return an abstract syntax tree.
+    ///
+    /// To set configuration options on the parser, use
+    /// [`ParserBuilder`](struct.ParserBuilder.html).
+    pub fn new() -> Parser {
+        ParserBuilder::new().build()
+    }
+
+    /// Parse the regular expression into an abstract syntax tree.
+    pub fn parse(&mut self, pattern: &str) -> Result<Ast> {
+        ParserI::new(self, pattern).parse()
+    }
+
+    /// Parse the regular expression and return an abstract syntax tree with
+    /// all of the comments found in the pattern.
+    pub fn parse_with_comments(
+        &mut self,
+        pattern: &str,
+    ) -> Result<ast::WithComments> {
+        ParserI::new(self, pattern).parse_with_comments()
+    }
+
+    /// Reset the internal state of a parser.
+    ///
+    /// This is called at the beginning of every parse. This prevents the
+    /// parser from running with inconsistent state (say, if a previous
+    /// invocation returned an error and the parser is reused).
+    fn reset(&self) {
+        // These settings should be in line with the construction
+        // in `ParserBuilder::build`.
+        self.pos.set(Position { offset: 0, line: 1, column: 1 });
+        self.ignore_whitespace.set(self.initial_ignore_whitespace);
+        self.comments.borrow_mut().clear();
+        self.stack_group.borrow_mut().clear();
+        self.stack_class.borrow_mut().clear();
+    }
+}
+
+impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
+    /// Build an internal parser from a parser configuration and a pattern.
+    fn new(parser: P, pattern: &'s str) -> ParserI<'s, P> {
+        ParserI { parser: parser, pattern: pattern }
+    }
+
+    /// Return a reference to the parser state.
+    fn parser(&self) -> &Parser {
+        self.parser.borrow()
+    }
+
+    /// Return a reference to the pattern being parsed.
+    fn pattern(&self) -> &str {
+        self.pattern.borrow()
+    }
+
+    /// Create a new error with the given span and error type.
+    fn error(&self, span: Span, kind: ast::ErrorKind) -> ast::Error {
+        ast::Error {
+            kind: kind,
+            pattern: self.pattern().to_string(),
+            span: span,
+        }
+    }
+
+    /// Return the current offset of the parser.
+    ///
+    /// The offset starts at `0` from the beginning of the regular expression
+    /// pattern string.
+    fn offset(&self) -> usize {
+        self.parser().pos.get().offset
+    }
+
+    /// Return the current line number of the parser.
+    ///
+    /// The line number starts at `1`.
+    fn line(&self) -> usize {
+        self.parser().pos.get().line
+    }
+
+    /// Return the current column of the parser.
+    ///
+    /// The column number starts at `1` and is reset whenever a `\n` is seen.
+    fn column(&self) -> usize {
+        self.parser().pos.get().column
+    }
+
+    /// Return the next capturing index. Each subsequent call increments the
+    /// internal index.
+    ///
+    /// The span given should correspond to the location of the opening
+    /// parenthesis.
+    ///
+    /// If the capture limit is exceeded, then an error is returned.
+    fn next_capture_index(&self, span: Span) -> Result<u32> {
+        let current = self.parser().capture_index.get();
+        let i = current.checked_add(1).ok_or_else(|| {
+            self.error(span, ast::ErrorKind::CaptureLimitExceeded)
+        })?;
+        self.parser().capture_index.set(i);
+        Ok(i)
+    }
+
+    /// Adds the given capture name to this parser. If this capture name has
+    /// already been used, then an error is returned.
+    fn add_capture_name(&self, cap: &ast::CaptureName) -> Result<()> {
+        let mut names = self.parser().capture_names.borrow_mut();
+        match names
+            .binary_search_by_key(&cap.name.as_str(), |c| c.name.as_str())
+        {
+            Err(i) => {
+                names.insert(i, cap.clone());
+                Ok(())
+            }
+            Ok(i) => Err(self.error(
+                cap.span,
+                ast::ErrorKind::GroupNameDuplicate { original: names[i].span },
+            )),
+        }
+    }
+
+    /// Return whether the parser should ignore whitespace or not.
+    fn ignore_whitespace(&self) -> bool {
+        self.parser().ignore_whitespace.get()
+    }
+
+    /// Return the character at the current position of the parser.
+    ///
+    /// This panics if the current position does not point to a valid char.
+    fn char(&self) -> char {
+        self.char_at(self.offset())
+    }
+
+    /// Return the character at the given position.
+    ///
+    /// This panics if the given position does not point to a valid char.
+    fn char_at(&self, i: usize) -> char {
+        self.pattern()[i..]
+            .chars()
+            .next()
+            .unwrap_or_else(|| panic!("expected char at offset {}", i))
+    }
+
+    /// Bump the parser to the next Unicode scalar value.
+    ///
+    /// If the end of the input has been reached, then `false` is returned.
+    fn bump(&self) -> bool {
+        if self.is_eof() {
+            return false;
+        }
+        let Position { mut offset, mut line, mut column } = self.pos();
+        if self.char() == '\n' {
+            line = line.checked_add(1).unwrap();
+            column = 1;
+        } else {
+            column = column.checked_add(1).unwrap();
+        }
+        offset += self.char().len_utf8();
+        self.parser().pos.set(Position {
+            offset: offset,
+            line: line,
+            column: column,
+        });
+        self.pattern()[self.offset()..].chars().next().is_some()
+    }
+
+    /// If the substring starting at the current position of the parser has
+    /// the given prefix, then bump the parser to the character immediately
+    /// following the prefix and return true. Otherwise, don't bump the parser
+    /// and return false.
+    fn bump_if(&self, prefix: &str) -> bool {
+        if self.pattern()[self.offset()..].starts_with(prefix) {
+            for _ in 0..prefix.chars().count() {
+                self.bump();
+            }
+            true
+        } else {
+            false
+        }
+    }
+
+    /// Returns true if and only if the parser is positioned at a look-around
+    /// prefix. The conditions under which this returns true must always
+    /// correspond to a regular expression that would otherwise be consider
+    /// invalid.
+    ///
+    /// This should only be called immediately after parsing the opening of
+    /// a group or a set of flags.
+    fn is_lookaround_prefix(&self) -> bool {
+        self.bump_if("?=")
+            || self.bump_if("?!")
+            || self.bump_if("?<=")
+            || self.bump_if("?<!")
+    }
+
+    /// Bump the parser, and if the `x` flag is enabled, bump through any
+    /// subsequent spaces. Return true if and only if the parser is not at
+    /// EOF.
+    fn bump_and_bump_space(&self) -> bool {
+        if !self.bump() {
+            return false;
+        }
+        self.bump_space();
+        !self.is_eof()
+    }
+
+    /// If the `x` flag is enabled (i.e., whitespace insensitivity with
+    /// comments), then this will advance the parser through all whitespace
+    /// and comments to the next non-whitespace non-comment byte.
+    ///
+    /// If the `x` flag is disabled, then this is a no-op.
+    ///
+    /// This should be used selectively throughout the parser where
+    /// arbitrary whitespace is permitted when the `x` flag is enabled. For
+    /// example, `{   5  , 6}` is equivalent to `{5,6}`.
+    fn bump_space(&self) {
+        if !self.ignore_whitespace() {
+            return;
+        }
+        while !self.is_eof() {
+            if self.char().is_whitespace() {
+                self.bump();
+            } else if self.char() == '#' {
+                let start = self.pos();
+                let mut comment_text = String::new();
+                self.bump();
+                while !self.is_eof() {
+                    let c = self.char();
+                    self.bump();
+                    if c == '\n' {
+                        break;
+                    }
+                    comment_text.push(c);
+                }
+                let comment = ast::Comment {
+                    span: Span::new(start, self.pos()),
+                    comment: comment_text,
+                };
+                self.parser().comments.borrow_mut().push(comment);
+            } else {
+                break;
+            }
+        }
+    }
+
+    /// Peek at the next character in the input without advancing the parser.
+    ///
+    /// If the input has been exhausted, then this returns `None`.
+    fn peek(&self) -> Option<char> {
+        if self.is_eof() {
+            return None;
+        }
+        self.pattern()[self.offset() + self.char().len_utf8()..].chars().next()
+    }
+
+    /// Like peek, but will ignore spaces when the parser is in whitespace
+    /// insensitive mode.
+    fn peek_space(&self) -> Option<char> {
+        if !self.ignore_whitespace() {
+            return self.peek();
+        }
+        if self.is_eof() {
+            return None;
+        }
+        let mut start = self.offset() + self.char().len_utf8();
+        let mut in_comment = false;
+        for (i, c) in self.pattern()[start..].char_indices() {
+            if c.is_whitespace() {
+                continue;
+            } else if !in_comment && c == '#' {
+                in_comment = true;
+            } else if in_comment && c == '\n' {
+                in_comment = false;
+            } else {
+                start += i;
+                break;
+            }
+        }
+        self.pattern()[start..].chars().next()
+    }
+
+    /// Returns true if the next call to `bump` would return false.
+    fn is_eof(&self) -> bool {
+        self.offset() == self.pattern().len()
+    }
+
+    /// Return the current position of the parser, which includes the offset,
+    /// line and column.
+    fn pos(&self) -> Position {
+        self.parser().pos.get()
+    }
+
+    /// Create a span at the current position of the parser. Both the start
+    /// and end of the span are set.
+    fn span(&self) -> Span {
+        Span::splat(self.pos())
+    }
+
+    /// Create a span that covers the current character.
+    fn span_char(&self) -> Span {
+        let mut next = Position {
+            offset: self.offset().checked_add(self.char().len_utf8()).unwrap(),
+            line: self.line(),
+            column: self.column().checked_add(1).unwrap(),
+        };
+        if self.char() == '\n' {
+            next.line += 1;
+            next.column = 1;
+        }
+        Span::new(self.pos(), next)
+    }
+
+    /// Parse and push a single alternation on to the parser's internal stack.
+    /// If the top of the stack already has an alternation, then add to that
+    /// instead of pushing a new one.
+    ///
+    /// The concatenation given corresponds to a single alternation branch.
+    /// The concatenation returned starts the next branch and is empty.
+    ///
+    /// This assumes the parser is currently positioned at `|` and will advance
+    /// the parser to the character following `|`.
+    #[inline(never)]
+    fn push_alternate(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
+        assert_eq!(self.char(), '|');
+        concat.span.end = self.pos();
+        self.push_or_add_alternation(concat);
+        self.bump();
+        Ok(ast::Concat { span: self.span(), asts: vec![] })
+    }
+
+    /// Pushes or adds the given branch of an alternation to the parser's
+    /// internal stack of state.
+    fn push_or_add_alternation(&self, concat: ast::Concat) {
+        use self::GroupState::*;
+
+        let mut stack = self.parser().stack_group.borrow_mut();
+        if let Some(&mut Alternation(ref mut alts)) = stack.last_mut() {
+            alts.asts.push(concat.into_ast());
+            return;
+        }
+        stack.push(Alternation(ast::Alternation {
+            span: Span::new(concat.span.start, self.pos()),
+            asts: vec![concat.into_ast()],
+        }));
+    }
+
+    /// Parse and push a group AST (and its parent concatenation) on to the
+    /// parser's internal stack. Return a fresh concatenation corresponding
+    /// to the group's sub-AST.
+    ///
+    /// If a set of flags was found (with no group), then the concatenation
+    /// is returned with that set of flags added.
+    ///
+    /// This assumes that the parser is currently positioned on the opening
+    /// parenthesis. It advances the parser to the character at the start
+    /// of the sub-expression (or adjoining expression).
+    ///
+    /// If there was a problem parsing the start of the group, then an error
+    /// is returned.
+    #[inline(never)]
+    fn push_group(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
+        assert_eq!(self.char(), '(');
+        match self.parse_group()? {
+            Either::Left(set) => {
+                let ignore = set.flags.flag_state(ast::Flag::IgnoreWhitespace);
+                if let Some(v) = ignore {
+                    self.parser().ignore_whitespace.set(v);
+                }
+
+                concat.asts.push(Ast::Flags(set));
+                Ok(concat)
+            }
+            Either::Right(group) => {
+                let old_ignore_whitespace = self.ignore_whitespace();
+                let new_ignore_whitespace = group
+                    .flags()
+                    .and_then(|f| f.flag_state(ast::Flag::IgnoreWhitespace))
+                    .unwrap_or(old_ignore_whitespace);
+                self.parser().stack_group.borrow_mut().push(
+                    GroupState::Group {
+                        concat: concat,
+                        group: group,
+                        ignore_whitespace: old_ignore_whitespace,
+                    },
+                );
+                self.parser().ignore_whitespace.set(new_ignore_whitespace);
+                Ok(ast::Concat { span: self.span(), asts: vec![] })
+            }
+        }
+    }
+
+    /// Pop a group AST from the parser's internal stack and set the group's
+    /// AST to the given concatenation. Return the concatenation containing
+    /// the group.
+    ///
+    /// This assumes that the parser is currently positioned on the closing
+    /// parenthesis and advances the parser to the character following the `)`.
+    ///
+    /// If no such group could be popped, then an unopened group error is
+    /// returned.
+    #[inline(never)]
+    fn pop_group(&self, mut group_concat: ast::Concat) -> Result<ast::Concat> {
+        use self::GroupState::*;
+
+        assert_eq!(self.char(), ')');
+        let mut stack = self.parser().stack_group.borrow_mut();
+        let (mut prior_concat, mut group, ignore_whitespace, alt) = match stack
+            .pop()
+        {
+            Some(Group { concat, group, ignore_whitespace }) => {
+                (concat, group, ignore_whitespace, None)
+            }
+            Some(Alternation(alt)) => match stack.pop() {
+                Some(Group { concat, group, ignore_whitespace }) => {
+                    (concat, group, ignore_whitespace, Some(alt))
+                }
+                None | Some(Alternation(_)) => {
+                    return Err(self.error(
+                        self.span_char(),
+                        ast::ErrorKind::GroupUnopened,
+                    ));
+                }
+            },
+            None => {
+                return Err(self
+                    .error(self.span_char(), ast::ErrorKind::GroupUnopened));
+            }
+        };
+        self.parser().ignore_whitespace.set(ignore_whitespace);
+        group_concat.span.end = self.pos();
+        self.bump();
+        group.span.end = self.pos();
+        match alt {
+            Some(mut alt) => {
+                alt.span.end = group_concat.span.end;
+                alt.asts.push(group_concat.into_ast());
+                group.ast = Box::new(alt.into_ast());
+            }
+            None => {
+                group.ast = Box::new(group_concat.into_ast());
+            }
+        }
+        prior_concat.asts.push(Ast::Group(group));
+        Ok(prior_concat)
+    }
+
+    /// Pop the last state from the parser's internal stack, if it exists, and
+    /// add the given concatenation to it. There either must be no state or a
+    /// single alternation item on the stack. Any other scenario produces an
+    /// error.
+    ///
+    /// This assumes that the parser has advanced to the end.
+    #[inline(never)]
+    fn pop_group_end(&self, mut concat: ast::Concat) -> Result<Ast> {
+        concat.span.end = self.pos();
+        let mut stack = self.parser().stack_group.borrow_mut();
+        let ast = match stack.pop() {
+            None => Ok(concat.into_ast()),
+            Some(GroupState::Alternation(mut alt)) => {
+                alt.span.end = self.pos();
+                alt.asts.push(concat.into_ast());
+                Ok(Ast::Alternation(alt))
+            }
+            Some(GroupState::Group { group, .. }) => {
+                return Err(
+                    self.error(group.span, ast::ErrorKind::GroupUnclosed)
+                );
+            }
+        };
+        // If we try to pop again, there should be nothing.
+        match stack.pop() {
+            None => ast,
+            Some(GroupState::Alternation(_)) => {
+                // This unreachable is unfortunate. This case can't happen
+                // because the only way we can be here is if there were two
+                // `GroupState::Alternation`s adjacent in the parser's stack,
+                // which we guarantee to never happen because we never push a
+                // `GroupState::Alternation` if one is already at the top of
+                // the stack.
+                unreachable!()
+            }
+            Some(GroupState::Group { group, .. }) => {
+                Err(self.error(group.span, ast::ErrorKind::GroupUnclosed))
+            }
+        }
+    }
+
+    /// Parse the opening of a character class and push the current class
+    /// parsing context onto the parser's stack. This assumes that the parser
+    /// is positioned at an opening `[`. The given union should correspond to
+    /// the union of set items built up before seeing the `[`.
+    ///
+    /// If there was a problem parsing the opening of the class, then an error
+    /// is returned. Otherwise, a new union of set items for the class is
+    /// returned (which may be populated with either a `]` or a `-`).
+    #[inline(never)]
+    fn push_class_open(
+        &self,
+        parent_union: ast::ClassSetUnion,
+    ) -> Result<ast::ClassSetUnion> {
+        assert_eq!(self.char(), '[');
+
+        let (nested_set, nested_union) = self.parse_set_class_open()?;
+        self.parser()
+            .stack_class
+            .borrow_mut()
+            .push(ClassState::Open { union: parent_union, set: nested_set });
+        Ok(nested_union)
+    }
+
+    /// Parse the end of a character class set and pop the character class
+    /// parser stack. The union given corresponds to the last union built
+    /// before seeing the closing `]`. The union returned corresponds to the
+    /// parent character class set with the nested class added to it.
+    ///
+    /// This assumes that the parser is positioned at a `]` and will advance
+    /// the parser to the byte immediately following the `]`.
+    ///
+    /// If the stack is empty after popping, then this returns the final
+    /// "top-level" character class AST (where a "top-level" character class
+    /// is one that is not nested inside any other character class).
+    ///
+    /// If there is no corresponding opening bracket on the parser's stack,
+    /// then an error is returned.
+    #[inline(never)]
+    fn pop_class(
+        &self,
+        nested_union: ast::ClassSetUnion,
+    ) -> Result<Either<ast::ClassSetUnion, ast::Class>> {
+        assert_eq!(self.char(), ']');
+
+        let item = ast::ClassSet::Item(nested_union.into_item());
+        let prevset = self.pop_class_op(item);
+        let mut stack = self.parser().stack_class.borrow_mut();
+        match stack.pop() {
+            None => {
+                // We can never observe an empty stack:
+                //
+                // 1) We are guaranteed to start with a non-empty stack since
+                //    the character class parser is only initiated when it sees
+                //    a `[`.
+                // 2) If we ever observe an empty stack while popping after
+                //    seeing a `]`, then we signal the character class parser
+                //    to terminate.
+                panic!("unexpected empty character class stack")
+            }
+            Some(ClassState::Op { .. }) => {
+                // This panic is unfortunate, but this case is impossible
+                // since we already popped the Op state if one exists above.
+                // Namely, every push to the class parser stack is guarded by
+                // whether an existing Op is already on the top of the stack.
+                // If it is, the existing Op is modified. That is, the stack
+                // can never have consecutive Op states.
+                panic!("unexpected ClassState::Op")
+            }
+            Some(ClassState::Open { mut union, mut set }) => {
+                self.bump();
+                set.span.end = self.pos();
+                set.kind = prevset;
+                if stack.is_empty() {
+                    Ok(Either::Right(ast::Class::Bracketed(set)))
+                } else {
+                    union.push(ast::ClassSetItem::Bracketed(Box::new(set)));
+                    Ok(Either::Left(union))
+                }
+            }
+        }
+    }
+
+    /// Return an "unclosed class" error whose span points to the most
+    /// recently opened class.
+    ///
+    /// This should only be called while parsing a character class.
+    #[inline(never)]
+    fn unclosed_class_error(&self) -> ast::Error {
+        for state in self.parser().stack_class.borrow().iter().rev() {
+            match *state {
+                ClassState::Open { ref set, .. } => {
+                    return self
+                        .error(set.span, ast::ErrorKind::ClassUnclosed);
+                }
+                _ => {}
+            }
+        }
+        // We are guaranteed to have a non-empty stack with at least
+        // one open bracket, so we should never get here.
+        panic!("no open character class found")
+    }
+
+    /// Push the current set of class items on to the class parser's stack as
+    /// the left hand side of the given operator.
+    ///
+    /// A fresh set union is returned, which should be used to build the right
+    /// hand side of this operator.
+    #[inline(never)]
+    fn push_class_op(
+        &self,
+        next_kind: ast::ClassSetBinaryOpKind,
+        next_union: ast::ClassSetUnion,
+    ) -> ast::ClassSetUnion {
+        let item = ast::ClassSet::Item(next_union.into_item());
+        let new_lhs = self.pop_class_op(item);
+        self.parser()
+            .stack_class
+            .borrow_mut()
+            .push(ClassState::Op { kind: next_kind, lhs: new_lhs });
+        ast::ClassSetUnion { span: self.span(), items: vec![] }
+    }
+
+    /// Pop a character class set from the character class parser stack. If the
+    /// top of the stack is just an item (not an operation), then return the
+    /// given set unchanged. If the top of the stack is an operation, then the
+    /// given set will be used as the rhs of the operation on the top of the
+    /// stack. In that case, the binary operation is returned as a set.
+    #[inline(never)]
+    fn pop_class_op(&self, rhs: ast::ClassSet) -> ast::ClassSet {
+        let mut stack = self.parser().stack_class.borrow_mut();
+        let (kind, lhs) = match stack.pop() {
+            Some(ClassState::Op { kind, lhs }) => (kind, lhs),
+            Some(state @ ClassState::Open { .. }) => {
+                stack.push(state);
+                return rhs;
+            }
+            None => unreachable!(),
+        };
+        let span = Span::new(lhs.span().start, rhs.span().end);
+        ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
+            span: span,
+            kind: kind,
+            lhs: Box::new(lhs),
+            rhs: Box::new(rhs),
+        })
+    }
+}
+
+impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
+    /// Parse the regular expression into an abstract syntax tree.
+    fn parse(&self) -> Result<Ast> {
+        self.parse_with_comments().map(|astc| astc.ast)
+    }
+
+    /// Parse the regular expression and return an abstract syntax tree with
+    /// all of the comments found in the pattern.
+    fn parse_with_comments(&self) -> Result<ast::WithComments> {
+        assert_eq!(self.offset(), 0, "parser can only be used once");
+        self.parser().reset();
+        let mut concat = ast::Concat { span: self.span(), asts: vec![] };
+        loop {
+            self.bump_space();
+            if self.is_eof() {
+                break;
+            }
+            match self.char() {
+                '(' => concat = self.push_group(concat)?,
+                ')' => concat = self.pop_group(concat)?,
+                '|' => concat = self.push_alternate(concat)?,
+                '[' => {
+                    let class = self.parse_set_class()?;
+                    concat.asts.push(Ast::Class(class));
+                }
+                '?' => {
+                    concat = self.parse_uncounted_repetition(
+                        concat,
+                        ast::RepetitionKind::ZeroOrOne,
+                    )?;
+                }
+                '*' => {
+                    concat = self.parse_uncounted_repetition(
+                        concat,
+                        ast::RepetitionKind::ZeroOrMore,
+                    )?;
+                }
+                '+' => {
+                    concat = self.parse_uncounted_repetition(
+                        concat,
+                        ast::RepetitionKind::OneOrMore,
+                    )?;
+                }
+                '{' => {
+                    concat = self.parse_counted_repetition(concat)?;
+                }
+                _ => concat.asts.push(self.parse_primitive()?.into_ast()),
+            }
+        }
+        let ast = self.pop_group_end(concat)?;
+        NestLimiter::new(self).check(&ast)?;
+        Ok(ast::WithComments {
+            ast: ast,
+            comments: mem::replace(
+                &mut *self.parser().comments.borrow_mut(),
+                vec![],
+            ),
+        })
+    }
+
+    /// Parses an uncounted repetition operation. An uncounted repetition
+    /// operator includes ?, * and +, but does not include the {m,n} syntax.
+    /// The given `kind` should correspond to the operator observed by the
+    /// caller.
+    ///
+    /// This assumes that the paser is currently positioned at the repetition
+    /// operator and advances the parser to the first character after the
+    /// operator. (Note that the operator may include a single additional `?`,
+    /// which makes the operator ungreedy.)
+    ///
+    /// The caller should include the concatenation that is being built. The
+    /// concatenation returned includes the repetition operator applied to the
+    /// last expression in the given concatenation.
+    #[inline(never)]
+    fn parse_uncounted_repetition(
+        &self,
+        mut concat: ast::Concat,
+        kind: ast::RepetitionKind,
+    ) -> Result<ast::Concat> {
+        assert!(
+            self.char() == '?' || self.char() == '*' || self.char() == '+'
+        );
+        let op_start = self.pos();
+        let ast = match concat.asts.pop() {
+            Some(ast) => ast,
+            None => {
+                return Err(
+                    self.error(self.span(), ast::ErrorKind::RepetitionMissing)
+                )
+            }
+        };
+        match ast {
+            Ast::Empty(_) | Ast::Flags(_) => {
+                return Err(
+                    self.error(self.span(), ast::ErrorKind::RepetitionMissing)
+                )
+            }
+            _ => {}
+        }
+        let mut greedy = true;
+        if self.bump() && self.char() == '?' {
+            greedy = false;
+            self.bump();
+        }
+        concat.asts.push(Ast::Repetition(ast::Repetition {
+            span: ast.span().with_end(self.pos()),
+            op: ast::RepetitionOp {
+                span: Span::new(op_start, self.pos()),
+                kind: kind,
+            },
+            greedy: greedy,
+            ast: Box::new(ast),
+        }));
+        Ok(concat)
+    }
+
+    /// Parses a counted repetition operation. A counted repetition operator
+    /// corresponds to the {m,n} syntax, and does not include the ?, * or +
+    /// operators.
+    ///
+    /// This assumes that the paser is currently positioned at the opening `{`
+    /// and advances the parser to the first character after the operator.
+    /// (Note that the operator may include a single additional `?`, which
+    /// makes the operator ungreedy.)
+    ///
+    /// The caller should include the concatenation that is being built. The
+    /// concatenation returned includes the repetition operator applied to the
+    /// last expression in the given concatenation.
+    #[inline(never)]
+    fn parse_counted_repetition(
+        &self,
+        mut concat: ast::Concat,
+    ) -> Result<ast::Concat> {
+        assert!(self.char() == '{');
+        let start = self.pos();
+        let ast = match concat.asts.pop() {
+            Some(ast) => ast,
+            None => {
+                return Err(
+                    self.error(self.span(), ast::ErrorKind::RepetitionMissing)
+                )
+            }
+        };
+        match ast {
+            Ast::Empty(_) | Ast::Flags(_) => {
+                return Err(
+                    self.error(self.span(), ast::ErrorKind::RepetitionMissing)
+                )
+            }
+            _ => {}
+        }
+        if !self.bump_and_bump_space() {
+            return Err(self.error(
+                Span::new(start, self.pos()),
+                ast::ErrorKind::RepetitionCountUnclosed,
+            ));
+        }
+        let count_start = specialize_err(
+            self.parse_decimal(),
+            ast::ErrorKind::DecimalEmpty,
+            ast::ErrorKind::RepetitionCountDecimalEmpty,
+        )?;
+        let mut range = ast::RepetitionRange::Exactly(count_start);
+        if self.is_eof() {
+            return Err(self.error(
+                Span::new(start, self.pos()),
+                ast::ErrorKind::RepetitionCountUnclosed,
+            ));
+        }
+        if self.char() == ',' {
+            if !self.bump_and_bump_space() {
+                return Err(self.error(
+                    Span::new(start, self.pos()),
+                    ast::ErrorKind::RepetitionCountUnclosed,
+                ));
+            }
+            if self.char() != '}' {
+                let count_end = specialize_err(
+                    self.parse_decimal(),
+                    ast::ErrorKind::DecimalEmpty,
+                    ast::ErrorKind::RepetitionCountDecimalEmpty,
+                )?;
+                range = ast::RepetitionRange::Bounded(count_start, count_end);
+            } else {
+                range = ast::RepetitionRange::AtLeast(count_start);
+            }
+        }
+        if self.is_eof() || self.char() != '}' {
+            return Err(self.error(
+                Span::new(start, self.pos()),
+                ast::ErrorKind::RepetitionCountUnclosed,
+            ));
+        }
+
+        let mut greedy = true;
+        if self.bump_and_bump_space() && self.char() == '?' {
+            greedy = false;
+            self.bump();
+        }
+
+        let op_span = Span::new(start, self.pos());
+        if !range.is_valid() {
+            return Err(
+                self.error(op_span, ast::ErrorKind::RepetitionCountInvalid)
+            );
+        }
+        concat.asts.push(Ast::Repetition(ast::Repetition {
+            span: ast.span().with_end(self.pos()),
+            op: ast::RepetitionOp {
+                span: op_span,
+                kind: ast::RepetitionKind::Range(range),
+            },
+            greedy: greedy,
+            ast: Box::new(ast),
+        }));
+        Ok(concat)
+    }
+
+    /// Parse a group (which contains a sub-expression) or a set of flags.
+    ///
+    /// If a group was found, then it is returned with an empty AST. If a set
+    /// of flags is found, then that set is returned.
+    ///
+    /// The parser should be positioned at the opening parenthesis.
+    ///
+    /// This advances the parser to the character before the start of the
+    /// sub-expression (in the case of a group) or to the closing parenthesis
+    /// immediately following the set of flags.
+    ///
+    /// # Errors
+    ///
+    /// If flags are given and incorrectly specified, then a corresponding
+    /// error is returned.
+    ///
+    /// If a capture name is given and it is incorrectly specified, then a
+    /// corresponding error is returned.
+    #[inline(never)]
+    fn parse_group(&self) -> Result<Either<ast::SetFlags, ast::Group>> {
+        assert_eq!(self.char(), '(');
+        let open_span = self.span_char();
+        self.bump();
+        self.bump_space();
+        if self.is_lookaround_prefix() {
+            return Err(self.error(
+                Span::new(open_span.start, self.span().end),
+                ast::ErrorKind::UnsupportedLookAround,
+            ));
+        }
+        let inner_span = self.span();
+        if self.bump_if("?P<") {
+            let capture_index = self.next_capture_index(open_span)?;
+            let cap = self.parse_capture_name(capture_index)?;
+            Ok(Either::Right(ast::Group {
+                span: open_span,
+                kind: ast::GroupKind::CaptureName(cap),
+                ast: Box::new(Ast::Empty(self.span())),
+            }))
+        } else if self.bump_if("?") {
+            if self.is_eof() {
+                return Err(
+                    self.error(open_span, ast::ErrorKind::GroupUnclosed)
+                );
+            }
+            let flags = self.parse_flags()?;
+            let char_end = self.char();
+            self.bump();
+            if char_end == ')' {
+                // We don't allow empty flags, e.g., `(?)`. We instead
+                // interpret it as a repetition operator missing its argument.
+                if flags.items.is_empty() {
+                    return Err(self.error(
+                        inner_span,
+                        ast::ErrorKind::RepetitionMissing,
+                    ));
+                }
+                Ok(Either::Left(ast::SetFlags {
+                    span: Span { end: self.pos(), ..open_span },
+                    flags: flags,
+                }))
+            } else {
+                assert_eq!(char_end, ':');
+                Ok(Either::Right(ast::Group {
+                    span: open_span,
+                    kind: ast::GroupKind::NonCapturing(flags),
+                    ast: Box::new(Ast::Empty(self.span())),
+                }))
+            }
+        } else {
+            let capture_index = self.next_capture_index(open_span)?;
+            Ok(Either::Right(ast::Group {
+                span: open_span,
+                kind: ast::GroupKind::CaptureIndex(capture_index),
+                ast: Box::new(Ast::Empty(self.span())),
+            }))
+        }
+    }
+
+    /// Parses a capture group name. Assumes that the parser is positioned at
+    /// the first character in the name following the opening `<` (and may
+    /// possibly be EOF). This advances the parser to the first character
+    /// following the closing `>`.
+    ///
+    /// The caller must provide the capture index of the group for this name.
+    #[inline(never)]
+    fn parse_capture_name(
+        &self,
+        capture_index: u32,
+    ) -> Result<ast::CaptureName> {
+        if self.is_eof() {
+            return Err(self
+                .error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
+        }
+        let start = self.pos();
+        loop {
+            if self.char() == '>' {
+                break;
+            }
+            if !is_capture_char(self.char(), self.pos() == start) {
+                return Err(self.error(
+                    self.span_char(),
+                    ast::ErrorKind::GroupNameInvalid,
+                ));
+            }
+            if !self.bump() {
+                break;
+            }
+        }
+        let end = self.pos();
+        if self.is_eof() {
+            return Err(self
+                .error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
+        }
+        assert_eq!(self.char(), '>');
+        self.bump();
+        let name = &self.pattern()[start.offset..end.offset];
+        if name.is_empty() {
+            return Err(self.error(
+                Span::new(start, start),
+                ast::ErrorKind::GroupNameEmpty,
+            ));
+        }
+        let capname = ast::CaptureName {
+            span: Span::new(start, end),
+            name: name.to_string(),
+            index: capture_index,
+        };
+        self.add_capture_name(&capname)?;
+        Ok(capname)
+    }
+
+    /// Parse a sequence of flags starting at the current character.
+    ///
+    /// This advances the parser to the character immediately following the
+    /// flags, which is guaranteed to be either `:` or `)`.
+    ///
+    /// # Errors
+    ///
+    /// If any flags are duplicated, then an error is returned.
+    ///
+    /// If the negation operator is used more than once, then an error is
+    /// returned.
+    ///
+    /// If no flags could be found or if the negation operation is not followed
+    /// by any flags, then an error is returned.
+    #[inline(never)]
+    fn parse_flags(&self) -> Result<ast::Flags> {
+        let mut flags = ast::Flags { span: self.span(), items: vec![] };
+        let mut last_was_negation = None;
+        while self.char() != ':' && self.char() != ')' {
+            if self.char() == '-' {
+                last_was_negation = Some(self.span_char());
+                let item = ast::FlagsItem {
+                    span: self.span_char(),
+                    kind: ast::FlagsItemKind::Negation,
+                };
+                if let Some(i) = flags.add_item(item) {
+                    return Err(self.error(
+                        self.span_char(),
+                        ast::ErrorKind::FlagRepeatedNegation {
+                            original: flags.items[i].span,
+                        },
+                    ));
+                }
+            } else {
+                last_was_negation = None;
+                let item = ast::FlagsItem {
+                    span: self.span_char(),
+                    kind: ast::FlagsItemKind::Flag(self.parse_flag()?),
+                };
+                if let Some(i) = flags.add_item(item) {
+                    return Err(self.error(
+                        self.span_char(),
+                        ast::ErrorKind::FlagDuplicate {
+                            original: flags.items[i].span,
+                        },
+                    ));
+                }
+            }
+            if !self.bump() {
+                return Err(
+                    self.error(self.span(), ast::ErrorKind::FlagUnexpectedEof)
+                );
+            }
+        }
+        if let Some(span) = last_was_negation {
+            return Err(self.error(span, ast::ErrorKind::FlagDanglingNegation));
+        }
+        flags.span.end = self.pos();
+        Ok(flags)
+    }
+
+    /// Parse the current character as a flag. Do not advance the parser.
+    ///
+    /// # Errors
+    ///
+    /// If the flag is not recognized, then an error is returned.
+    #[inline(never)]
+    fn parse_flag(&self) -> Result<ast::Flag> {
+        match self.char() {
+            'i' => Ok(ast::Flag::CaseInsensitive),
+            'm' => Ok(ast::Flag::MultiLine),
+            's' => Ok(ast::Flag::DotMatchesNewLine),
+            'U' => Ok(ast::Flag::SwapGreed),
+            'u' => Ok(ast::Flag::Unicode),
+            'x' => Ok(ast::Flag::IgnoreWhitespace),
+            _ => {
+                Err(self
+                    .error(self.span_char(), ast::ErrorKind::FlagUnrecognized))
+            }
+        }
+    }
+
+    /// Parse a primitive AST. e.g., A literal, non-set character class or
+    /// assertion.
+    ///
+    /// This assumes that the parser expects a primitive at the current
+    /// location. i.e., All other non-primitive cases have been handled.
+    /// For example, if the parser's position is at `|`, then `|` will be
+    /// treated as a literal (e.g., inside a character class).
+    ///
+    /// This advances the parser to the first character immediately following
+    /// the primitive.
+    fn parse_primitive(&self) -> Result<Primitive> {
+        match self.char() {
+            '\\' => self.parse_escape(),
+            '.' => {
+                let ast = Primitive::Dot(self.span_char());
+                self.bump();
+                Ok(ast)
+            }
+            '^' => {
+                let ast = Primitive::Assertion(ast::Assertion {
+                    span: self.span_char(),
+                    kind: ast::AssertionKind::StartLine,
+                });
+                self.bump();
+                Ok(ast)
+            }
+            '$' => {
+                let ast = Primitive::Assertion(ast::Assertion {
+                    span: self.span_char(),
+                    kind: ast::AssertionKind::EndLine,
+                });
+                self.bump();
+                Ok(ast)
+            }
+            c => {
+                let ast = Primitive::Literal(ast::Literal {
+                    span: self.span_char(),
+                    kind: ast::LiteralKind::Verbatim,
+                    c: c,
+                });
+                self.bump();
+                Ok(ast)
+            }
+        }
+    }
+
+    /// Parse an escape sequence as a primitive AST.
+    ///
+    /// This assumes the parser is positioned at the start of the escape
+    /// sequence, i.e., `\`. It advances the parser to the first position
+    /// immediately following the escape sequence.
+    #[inline(never)]
+    fn parse_escape(&self) -> Result<Primitive> {
+        assert_eq!(self.char(), '\\');
+        let start = self.pos();
+        if !self.bump() {
+            return Err(self.error(
+                Span::new(start, self.pos()),
+                ast::ErrorKind::EscapeUnexpectedEof,
+            ));
+        }
+        let c = self.char();
+        // Put some of the more complicated routines into helpers.
+        match c {
+            '0'..='7' => {
+                if !self.parser().octal {
+                    return Err(self.error(
+                        Span::new(start, self.span_char().end),
+                        ast::ErrorKind::UnsupportedBackreference,
+                    ));
+                }
+                let mut lit = self.parse_octal();
+                lit.span.start = start;
+                return Ok(Primitive::Literal(lit));
+            }
+            '8'..='9' if !self.parser().octal => {
+                return Err(self.error(
+                    Span::new(start, self.span_char().end),
+                    ast::ErrorKind::UnsupportedBackreference,
+                ));
+            }
+            'x' | 'u' | 'U' => {
+                let mut lit = self.parse_hex()?;
+                lit.span.start = start;
+                return Ok(Primitive::Literal(lit));
+            }
+            'p' | 'P' => {
+                let mut cls = self.parse_unicode_class()?;
+                cls.span.start = start;
+                return Ok(Primitive::Unicode(cls));
+            }
+            'd' | 's' | 'w' | 'D' | 'S' | 'W' => {
+                let mut cls = self.parse_perl_class();
+                cls.span.start = start;
+                return Ok(Primitive::Perl(cls));
+            }
+            _ => {}
+        }
+
+        // Handle all of the one letter sequences inline.
+        self.bump();
+        let span = Span::new(start, self.pos());
+        if is_meta_character(c) {
+            return Ok(Primitive::Literal(ast::Literal {
+                span: span,
+                kind: ast::LiteralKind::Punctuation,
+                c: c,
+            }));
+        }
+        let special = |kind, c| {
+            Ok(Primitive::Literal(ast::Literal {
+                span: span,
+                kind: ast::LiteralKind::Special(kind),
+                c: c,
+            }))
+        };
+        match c {
+            'a' => special(ast::SpecialLiteralKind::Bell, '\x07'),
+            'f' => special(ast::SpecialLiteralKind::FormFeed, '\x0C'),
+            't' => special(ast::SpecialLiteralKind::Tab, '\t'),
+            'n' => special(ast::SpecialLiteralKind::LineFeed, '\n'),
+            'r' => special(ast::SpecialLiteralKind::CarriageReturn, '\r'),
+            'v' => special(ast::SpecialLiteralKind::VerticalTab, '\x0B'),
+            ' ' if self.ignore_whitespace() => {
+                special(ast::SpecialLiteralKind::Space, ' ')
+            }
+            'A' => Ok(Primitive::Assertion(ast::Assertion {
+                span: span,
+                kind: ast::AssertionKind::StartText,
+            })),
+            'z' => Ok(Primitive::Assertion(ast::Assertion {
+                span: span,
+                kind: ast::AssertionKind::EndText,
+            })),
+            'b' => Ok(Primitive::Assertion(ast::Assertion {
+                span: span,
+                kind: ast::AssertionKind::WordBoundary,
+            })),
+            'B' => Ok(Primitive::Assertion(ast::Assertion {
+                span: span,
+                kind: ast::AssertionKind::NotWordBoundary,
+            })),
+            _ => Err(self.error(span, ast::ErrorKind::EscapeUnrecognized)),
+        }
+    }
+
+    /// Parse an octal representation of a Unicode codepoint up to 3 digits
+    /// long. This expects the parser to be positioned at the first octal
+    /// digit and advances the parser to the first character immediately
+    /// following the octal number. This also assumes that parsing octal
+    /// escapes is enabled.
+    ///
+    /// Assuming the preconditions are met, this routine can never fail.
+    #[inline(never)]
+    fn parse_octal(&self) -> ast::Literal {
+        use std::char;
+        use std::u32;
+
+        assert!(self.parser().octal);
+        assert!('0' <= self.char() && self.char() <= '7');
+        let start = self.pos();
+        // Parse up to two more digits.
+        while self.bump()
+            && '0' <= self.char()
+            && self.char() <= '7'
+            && self.pos().offset - start.offset <= 2
+        {}
+        let end = self.pos();
+        let octal = &self.pattern()[start.offset..end.offset];
+        // Parsing the octal should never fail since the above guarantees a
+        // valid number.
+        let codepoint =
+            u32::from_str_radix(octal, 8).expect("valid octal number");
+        // The max value for 3 digit octal is 0777 = 511 and [0, 511] has no
+        // invalid Unicode scalar values.
+        let c = char::from_u32(codepoint).expect("Unicode scalar value");
+        ast::Literal {
+            span: Span::new(start, end),
+            kind: ast::LiteralKind::Octal,
+            c: c,
+        }
+    }
+
+    /// Parse a hex representation of a Unicode codepoint. This handles both
+    /// hex notations, i.e., `\xFF` and `\x{FFFF}`. This expects the parser to
+    /// be positioned at the `x`, `u` or `U` prefix. The parser is advanced to
+    /// the first character immediately following the hexadecimal literal.
+    #[inline(never)]
+    fn parse_hex(&self) -> Result<ast::Literal> {
+        assert!(
+            self.char() == 'x' || self.char() == 'u' || self.char() == 'U'
+        );
+
+        let hex_kind = match self.char() {
+            'x' => ast::HexLiteralKind::X,
+            'u' => ast::HexLiteralKind::UnicodeShort,
+            _ => ast::HexLiteralKind::UnicodeLong,
+        };
+        if !self.bump_and_bump_space() {
+            return Err(
+                self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof)
+            );
+        }
+        if self.char() == '{' {
+            self.parse_hex_brace(hex_kind)
+        } else {
+            self.parse_hex_digits(hex_kind)
+        }
+    }
+
+    /// Parse an N-digit hex representation of a Unicode codepoint. This
+    /// expects the parser to be positioned at the first digit and will advance
+    /// the parser to the first character immediately following the escape
+    /// sequence.
+    ///
+    /// The number of digits given must be 2 (for `\xNN`), 4 (for `\uNNNN`)
+    /// or 8 (for `\UNNNNNNNN`).
+    #[inline(never)]
+    fn parse_hex_digits(
+        &self,
+        kind: ast::HexLiteralKind,
+    ) -> Result<ast::Literal> {
+        use std::char;
+        use std::u32;
+
+        let mut scratch = self.parser().scratch.borrow_mut();
+        scratch.clear();
+
+        let start = self.pos();
+        for i in 0..kind.digits() {
+            if i > 0 && !self.bump_and_bump_space() {
+                return Err(self
+                    .error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
+            }
+            if !is_hex(self.char()) {
+                return Err(self.error(
+                    self.span_char(),
+                    ast::ErrorKind::EscapeHexInvalidDigit,
+                ));
+            }
+            scratch.push(self.char());
+        }
+        // The final bump just moves the parser past the literal, which may
+        // be EOF.
+        self.bump_and_bump_space();
+        let end = self.pos();
+        let hex = scratch.as_str();
+        match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
+            None => Err(self.error(
+                Span::new(start, end),
+                ast::ErrorKind::EscapeHexInvalid,
+            )),
+            Some(c) => Ok(ast::Literal {
+                span: Span::new(start, end),
+                kind: ast::LiteralKind::HexFixed(kind),
+                c: c,
+            }),
+        }
+    }
+
+    /// Parse a hex representation of any Unicode scalar value. This expects
+    /// the parser to be positioned at the opening brace `{` and will advance
+    /// the parser to the first character following the closing brace `}`.
+    #[inline(never)]
+    fn parse_hex_brace(
+        &self,
+        kind: ast::HexLiteralKind,
+    ) -> Result<ast::Literal> {
+        use std::char;
+        use std::u32;
+
+        let mut scratch = self.parser().scratch.borrow_mut();
+        scratch.clear();
+
+        let brace_pos = self.pos();
+        let start = self.span_char().end;
+        while self.bump_and_bump_space() && self.char() != '}' {
+            if !is_hex(self.char()) {
+                return Err(self.error(
+                    self.span_char(),
+                    ast::ErrorKind::EscapeHexInvalidDigit,
+                ));
+            }
+            scratch.push(self.char());
+        }
+        if self.is_eof() {
+            return Err(self.error(
+                Span::new(brace_pos, self.pos()),
+                ast::ErrorKind::EscapeUnexpectedEof,
+            ));
+        }
+        let end = self.pos();
+        let hex = scratch.as_str();
+        assert_eq!(self.char(), '}');
+        self.bump_and_bump_space();
+
+        if hex.is_empty() {
+            return Err(self.error(
+                Span::new(brace_pos, self.pos()),
+                ast::ErrorKind::EscapeHexEmpty,
+            ));
+        }
+        match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
+            None => Err(self.error(
+                Span::new(start, end),
+                ast::ErrorKind::EscapeHexInvalid,
+            )),
+            Some(c) => Ok(ast::Literal {
+                span: Span::new(start, self.pos()),
+                kind: ast::LiteralKind::HexBrace(kind),
+                c: c,
+            }),
+        }
+    }
+
+    /// Parse a decimal number into a u32 while trimming leading and trailing
+    /// whitespace.
+    ///
+    /// This expects the parser to be positioned at the first position where
+    /// a decimal digit could occur. This will advance the parser to the byte
+    /// immediately following the last contiguous decimal digit.
+    ///
+    /// If no decimal digit could be found or if there was a problem parsing
+    /// the complete set of digits into a u32, then an error is returned.
+    fn parse_decimal(&self) -> Result<u32> {
+        let mut scratch = self.parser().scratch.borrow_mut();
+        scratch.clear();
+
+        while !self.is_eof() && self.char().is_whitespace() {
+            self.bump();
+        }
+        let start = self.pos();
+        while !self.is_eof() && '0' <= self.char() && self.char() <= '9' {
+            scratch.push(self.char());
+            self.bump_and_bump_space();
+        }
+        let span = Span::new(start, self.pos());
+        while !self.is_eof() && self.char().is_whitespace() {
+            self.bump_and_bump_space();
+        }
+        let digits = scratch.as_str();
+        if digits.is_empty() {
+            return Err(self.error(span, ast::ErrorKind::DecimalEmpty));
+        }
+        match u32::from_str_radix(digits, 10).ok() {
+            Some(n) => Ok(n),
+            None => Err(self.error(span, ast::ErrorKind::DecimalInvalid)),
+        }
+    }
+
+    /// Parse a standard character class consisting primarily of characters or
+    /// character ranges, but can also contain nested character classes of
+    /// any type (sans `.`).
+    ///
+    /// This assumes the parser is positioned at the opening `[`. If parsing
+    /// is successful, then the parser is advanced to the position immediately
+    /// following the closing `]`.
+    #[inline(never)]
+    fn parse_set_class(&self) -> Result<ast::Class> {
+        assert_eq!(self.char(), '[');
+
+        let mut union =
+            ast::ClassSetUnion { span: self.span(), items: vec![] };
+        loop {
+            self.bump_space();
+            if self.is_eof() {
+                return Err(self.unclosed_class_error());
+            }
+            match self.char() {
+                '[' => {
+                    // If we've already parsed the opening bracket, then
+                    // attempt to treat this as the beginning of an ASCII
+                    // class. If ASCII class parsing fails, then the parser
+                    // backs up to `[`.
+                    if !self.parser().stack_class.borrow().is_empty() {
+                        if let Some(cls) = self.maybe_parse_ascii_class() {
+                            union.push(ast::ClassSetItem::Ascii(cls));
+                            continue;
+                        }
+                    }
+                    union = self.push_class_open(union)?;
+                }
+                ']' => match self.pop_class(union)? {
+                    Either::Left(nested_union) => {
+                        union = nested_union;
+                    }
+                    Either::Right(class) => return Ok(class),
+                },
+                '&' if self.peek() == Some('&') => {
+                    assert!(self.bump_if("&&"));
+                    union = self.push_class_op(
+                        ast::ClassSetBinaryOpKind::Intersection,
+                        union,
+                    );
+                }
+                '-' if self.peek() == Some('-') => {
+                    assert!(self.bump_if("--"));
+                    union = self.push_class_op(
+                        ast::ClassSetBinaryOpKind::Difference,
+                        union,
+                    );
+                }
+                '~' if self.peek() == Some('~') => {
+                    assert!(self.bump_if("~~"));
+                    union = self.push_class_op(
+                        ast::ClassSetBinaryOpKind::SymmetricDifference,
+                        union,
+                    );
+                }
+                _ => {
+                    union.push(self.parse_set_class_range()?);
+                }
+            }
+        }
+    }
+
+    /// Parse a single primitive item in a character class set. The item to
+    /// be parsed can either be one of a simple literal character, a range
+    /// between two simple literal characters or a "primitive" character
+    /// class like \w or \p{Greek}.
+    ///
+    /// If an invalid escape is found, or if a character class is found where
+    /// a simple literal is expected (e.g., in a range), then an error is
+    /// returned.
+    #[inline(never)]
+    fn parse_set_class_range(&self) -> Result<ast::ClassSetItem> {
+        let prim1 = self.parse_set_class_item()?;
+        self.bump_space();
+        if self.is_eof() {
+            return Err(self.unclosed_class_error());
+        }
+        // If the next char isn't a `-`, then we don't have a range.
+        // There are two exceptions. If the char after a `-` is a `]`, then
+        // `-` is interpreted as a literal `-`. Alternatively, if the char
+        // after a `-` is a `-`, then `--` corresponds to a "difference"
+        // operation.
+        if self.char() != '-'
+            || self.peek_space() == Some(']')
+            || self.peek_space() == Some('-')
+        {
+            return prim1.into_class_set_item(self);
+        }
+        // OK, now we're parsing a range, so bump past the `-` and parse the
+        // second half of the range.
+        if !self.bump_and_bump_space() {
+            return Err(self.unclosed_class_error());
+        }
+        let prim2 = self.parse_set_class_item()?;
+        let range = ast::ClassSetRange {
+            span: Span::new(prim1.span().start, prim2.span().end),
+            start: prim1.into_class_literal(self)?,
+            end: prim2.into_class_literal(self)?,
+        };
+        if !range.is_valid() {
+            return Err(
+                self.error(range.span, ast::ErrorKind::ClassRangeInvalid)
+            );
+        }
+        Ok(ast::ClassSetItem::Range(range))
+    }
+
+    /// Parse a single item in a character class as a primitive, where the
+    /// primitive either consists of a verbatim literal or a single escape
+    /// sequence.
+    ///
+    /// This assumes the parser is positioned at the beginning of a primitive,
+    /// and advances the parser to the first position after the primitive if
+    /// successful.
+    ///
+    /// Note that it is the caller's responsibility to report an error if an
+    /// illegal primitive was parsed.
+    #[inline(never)]
+    fn parse_set_class_item(&self) -> Result<Primitive> {
+        if self.char() == '\\' {
+            self.parse_escape()
+        } else {
+            let x = Primitive::Literal(ast::Literal {
+                span: self.span_char(),
+                kind: ast::LiteralKind::Verbatim,
+                c: self.char(),
+            });
+            self.bump();
+            Ok(x)
+        }
+    }
+
+    /// Parses the opening of a character class set. This includes the opening
+    /// bracket along with `^` if present to indicate negation. This also
+    /// starts parsing the opening set of unioned items if applicable, since
+    /// there are special rules applied to certain characters in the opening
+    /// of a character class. For example, `[^]]` is the class of all
+    /// characters not equal to `]`. (`]` would need to be escaped in any other
+    /// position.) Similarly for `-`.
+    ///
+    /// In all cases, the op inside the returned `ast::ClassBracketed` is an
+    /// empty union. This empty union should be replaced with the actual item
+    /// when it is popped from the parser's stack.
+    ///
+    /// This assumes the parser is positioned at the opening `[` and advances
+    /// the parser to the first non-special byte of the character class.
+    ///
+    /// An error is returned if EOF is found.
+    #[inline(never)]
+    fn parse_set_class_open(
+        &self,
+    ) -> Result<(ast::ClassBracketed, ast::ClassSetUnion)> {
+        assert_eq!(self.char(), '[');
+        let start = self.pos();
+        if !self.bump_and_bump_space() {
+            return Err(self.error(
+                Span::new(start, self.pos()),
+                ast::ErrorKind::ClassUnclosed,
+            ));
+        }
+
+        let negated = if self.char() != '^' {
+            false
+        } else {
+            if !self.bump_and_bump_space() {
+                return Err(self.error(
+                    Span::new(start, self.pos()),
+                    ast::ErrorKind::ClassUnclosed,
+                ));
+            }
+            true
+        };
+        // Accept any number of `-` as literal `-`.
+        let mut union =
+            ast::ClassSetUnion { span: self.span(), items: vec![] };
+        while self.char() == '-' {
+            union.push(ast::ClassSetItem::Literal(ast::Literal {
+                span: self.span_char(),
+                kind: ast::LiteralKind::Verbatim,
+                c: '-',
+            }));
+            if !self.bump_and_bump_space() {
+                return Err(self.error(
+                    Span::new(start, self.pos()),
+                    ast::ErrorKind::ClassUnclosed,
+                ));
+            }
+        }
+        // If `]` is the *first* char in a set, then interpret it as a literal
+        // `]`. That is, an empty class is impossible to write.
+        if union.items.is_empty() && self.char() == ']' {
+            union.push(ast::ClassSetItem::Literal(ast::Literal {
+                span: self.span_char(),
+                kind: ast::LiteralKind::Verbatim,
+                c: ']',
+            }));
+            if !self.bump_and_bump_space() {
+                return Err(self.error(
+                    Span::new(start, self.pos()),
+                    ast::ErrorKind::ClassUnclosed,
+                ));
+            }
+        }
+        let set = ast::ClassBracketed {
+            span: Span::new(start, self.pos()),
+            negated: negated,
+            kind: ast::ClassSet::union(ast::ClassSetUnion {
+                span: Span::new(union.span.start, union.span.start),
+                items: vec![],
+            }),
+        };
+        Ok((set, union))
+    }
+
+    /// Attempt to parse an ASCII character class, e.g., `[:alnum:]`.
+    ///
+    /// This assumes the parser is positioned at the opening `[`.
+    ///
+    /// If no valid ASCII character class could be found, then this does not
+    /// advance the parser and `None` is returned. Otherwise, the parser is
+    /// advanced to the first byte following the closing `]` and the
+    /// corresponding ASCII class is returned.
+    #[inline(never)]
+    fn maybe_parse_ascii_class(&self) -> Option<ast::ClassAscii> {
+        // ASCII character classes are interesting from a parsing perspective
+        // because parsing cannot fail with any interesting error. For example,
+        // in order to use an ASCII character class, it must be enclosed in
+        // double brackets, e.g., `[[:alnum:]]`. Alternatively, you might think
+        // of it as "ASCII character characters have the syntax `[:NAME:]`
+        // which can only appear within character brackets." This means that
+        // things like `[[:lower:]A]` are legal constructs.
+        //
+        // However, if one types an incorrect ASCII character class, e.g.,
+        // `[[:loower:]]`, then we treat that as a normal nested character
+        // class containing the characters `:elorw`. One might argue that we
+        // should return an error instead since the repeated colons give away
+        // the intent to write an ASCII class. But what if the user typed
+        // `[[:lower]]` instead? How can we tell that was intended to be an
+        // ASCII class and not just a normal nested class?
+        //
+        // Reasonable people can probably disagree over this, but for better
+        // or worse, we implement semantics that never fails at the expense
+        // of better failure modes.
+        assert_eq!(self.char(), '[');
+        // If parsing fails, then we back up the parser to this starting point.
+        let start = self.pos();
+        let mut negated = false;
+        if !self.bump() || self.char() != ':' {
+            self.parser().pos.set(start);
+            return None;
+        }
+        if !self.bump() {
+            self.parser().pos.set(start);
+            return None;
+        }
+        if self.char() == '^' {
+            negated = true;
+            if !self.bump() {
+                self.parser().pos.set(start);
+                return None;
+            }
+        }
+        let name_start = self.offset();
+        while self.char() != ':' && self.bump() {}
+        if self.is_eof() {
+            self.parser().pos.set(start);
+            return None;
+        }
+        let name = &self.pattern()[name_start..self.offset()];
+        if !self.bump_if(":]") {
+            self.parser().pos.set(start);
+            return None;
+        }
+        let kind = match ast::ClassAsciiKind::from_name(name) {
+            Some(kind) => kind,
+            None => {
+                self.parser().pos.set(start);
+                return None;
+            }
+        };
+        Some(ast::ClassAscii {
+            span: Span::new(start, self.pos()),
+            kind: kind,
+            negated: negated,
+        })
+    }
+
+    /// Parse a Unicode class in either the single character notation, `\pN`
+    /// or the multi-character bracketed notation, `\p{Greek}`. This assumes
+    /// the parser is positioned at the `p` (or `P` for negation) and will
+    /// advance the parser to the character immediately following the class.
+    ///
+    /// Note that this does not check whether the class name is valid or not.
+    #[inline(never)]
+    fn parse_unicode_class(&self) -> Result<ast::ClassUnicode> {
+        assert!(self.char() == 'p' || self.char() == 'P');
+
+        let mut scratch = self.parser().scratch.borrow_mut();
+        scratch.clear();
+
+        let negated = self.char() == 'P';
+        if !self.bump_and_bump_space() {
+            return Err(
+                self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof)
+            );
+        }
+        let (start, kind) = if self.char() == '{' {
+            let start = self.span_char().end;
+            while self.bump_and_bump_space() && self.char() != '}' {
+                scratch.push(self.char());
+            }
+            if self.is_eof() {
+                return Err(self
+                    .error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
+            }
+            assert_eq!(self.char(), '}');
+            self.bump();
+
+            let name = scratch.as_str();
+            if let Some(i) = name.find("!=") {
+                (
+                    start,
+                    ast::ClassUnicodeKind::NamedValue {
+                        op: ast::ClassUnicodeOpKind::NotEqual,
+                        name: name[..i].to_string(),
+                        value: name[i + 2..].to_string(),
+                    },
+                )
+            } else if let Some(i) = name.find(':') {
+                (
+                    start,
+                    ast::ClassUnicodeKind::NamedValue {
+                        op: ast::ClassUnicodeOpKind::Colon,
+                        name: name[..i].to_string(),
+                        value: name[i + 1..].to_string(),
+                    },
+                )
+            } else if let Some(i) = name.find('=') {
+                (
+                    start,
+                    ast::ClassUnicodeKind::NamedValue {
+                        op: ast::ClassUnicodeOpKind::Equal,
+                        name: name[..i].to_string(),
+                        value: name[i + 1..].to_string(),
+                    },
+                )
+            } else {
+                (start, ast::ClassUnicodeKind::Named(name.to_string()))
+            }
+        } else {
+            let start = self.pos();
+            let c = self.char();
+            if c == '\\' {
+                return Err(self.error(
+                    self.span_char(),
+                    ast::ErrorKind::UnicodeClassInvalid,
+                ));
+            }
+            self.bump_and_bump_space();
+            let kind = ast::ClassUnicodeKind::OneLetter(c);
+            (start, kind)
+        };
+        Ok(ast::ClassUnicode {
+            span: Span::new(start, self.pos()),
+            negated: negated,
+            kind: kind,
+        })
+    }
+
+    /// Parse a Perl character class, e.g., `\d` or `\W`. This assumes the
+    /// parser is currently at a valid character class name and will be
+    /// advanced to the character immediately following the class.
+    #[inline(never)]
+    fn parse_perl_class(&self) -> ast::ClassPerl {
+        let c = self.char();
+        let span = self.span_char();
+        self.bump();
+        let (negated, kind) = match c {
+            'd' => (false, ast::ClassPerlKind::Digit),
+            'D' => (true, ast::ClassPerlKind::Digit),
+            's' => (false, ast::ClassPerlKind::Space),
+            'S' => (true, ast::ClassPerlKind::Space),
+            'w' => (false, ast::ClassPerlKind::Word),
+            'W' => (true, ast::ClassPerlKind::Word),
+            c => panic!("expected valid Perl class but got '{}'", c),
+        };
+        ast::ClassPerl { span: span, kind: kind, negated: negated }
+    }
+}
+
+/// A type that traverses a fully parsed Ast and checks whether its depth
+/// exceeds the specified nesting limit. If it does, then an error is returned.
+#[derive(Debug)]
+struct NestLimiter<'p, 's: 'p, P: 'p + 's> {
+    /// The parser that is checking the nest limit.
+    p: &'p ParserI<'s, P>,
+    /// The current depth while walking an Ast.
+    depth: u32,
+}
+
+impl<'p, 's, P: Borrow<Parser>> NestLimiter<'p, 's, P> {
+    fn new(p: &'p ParserI<'s, P>) -> NestLimiter<'p, 's, P> {
+        NestLimiter { p: p, depth: 0 }
+    }
+
+    #[inline(never)]
+    fn check(self, ast: &Ast) -> Result<()> {
+        ast::visit(ast, self)
+    }
+
+    fn increment_depth(&mut self, span: &Span) -> Result<()> {
+        let new = self.depth.checked_add(1).ok_or_else(|| {
+            self.p.error(
+                span.clone(),
+                ast::ErrorKind::NestLimitExceeded(::std::u32::MAX),
+            )
+        })?;
+        let limit = self.p.parser().nest_limit;
+        if new > limit {
+            return Err(self.p.error(
+                span.clone(),
+                ast::ErrorKind::NestLimitExceeded(limit),
+            ));
+        }
+        self.depth = new;
+        Ok(())
+    }
+
+    fn decrement_depth(&mut self) {
+        // Assuming the correctness of the visitor, this should never drop
+        // below 0.
+        self.depth = self.depth.checked_sub(1).unwrap();
+    }
+}
+
+impl<'p, 's, P: Borrow<Parser>> ast::Visitor for NestLimiter<'p, 's, P> {
+    type Output = ();
+    type Err = ast::Error;
+
+    fn finish(self) -> Result<()> {
+        Ok(())
+    }
+
+    fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
+        let span = match *ast {
+            Ast::Empty(_)
+            | Ast::Flags(_)
+            | Ast::Literal(_)
+            | Ast::Dot(_)
+            | Ast::Assertion(_)
+            | Ast::Class(ast::Class::Unicode(_))
+            | Ast::Class(ast::Class::Perl(_)) => {
+                // These are all base cases, so we don't increment depth.
+                return Ok(());
+            }
+            Ast::Class(ast::Class::Bracketed(ref x)) => &x.span,
+            Ast::Repetition(ref x) => &x.span,
+            Ast::Group(ref x) => &x.span,
+            Ast::Alternation(ref x) => &x.span,
+            Ast::Concat(ref x) => &x.span,
+        };
+        self.increment_depth(span)
+    }
+
+    fn visit_post(&mut self, ast: &Ast) -> Result<()> {
+        match *ast {
+            Ast::Empty(_)
+            | Ast::Flags(_)
+            | Ast::Literal(_)
+            | Ast::Dot(_)
+            | Ast::Assertion(_)
+            | Ast::Class(ast::Class::Unicode(_))
+            | Ast::Class(ast::Class::Perl(_)) => {
+                // These are all base cases, so we don't decrement depth.
+                Ok(())
+            }
+            Ast::Class(ast::Class::Bracketed(_))
+            | Ast::Repetition(_)
+            | Ast::Group(_)
+            | Ast::Alternation(_)
+            | Ast::Concat(_) => {
+                self.decrement_depth();
+                Ok(())
+            }
+        }
+    }
+
+    fn visit_class_set_item_pre(
+        &mut self,
+        ast: &ast::ClassSetItem,
+    ) -> Result<()> {
+        let span = match *ast {
+            ast::ClassSetItem::Empty(_)
+            | ast::ClassSetItem::Literal(_)
+            | ast::ClassSetItem::Range(_)
+            | ast::ClassSetItem::Ascii(_)
+            | ast::ClassSetItem::Unicode(_)
+            | ast::ClassSetItem::Perl(_) => {
+                // These are all base cases, so we don't increment depth.
+                return Ok(());
+            }
+            ast::ClassSetItem::Bracketed(ref x) => &x.span,
+            ast::ClassSetItem::Union(ref x) => &x.span,
+        };
+        self.increment_depth(span)
+    }
+
+    fn visit_class_set_item_post(
+        &mut self,
+        ast: &ast::ClassSetItem,
+    ) -> Result<()> {
+        match *ast {
+            ast::ClassSetItem::Empty(_)
+            | ast::ClassSetItem::Literal(_)
+            | ast::ClassSetItem::Range(_)
+            | ast::ClassSetItem::Ascii(_)
+            | ast::ClassSetItem::Unicode(_)
+            | ast::ClassSetItem::Perl(_) => {
+                // These are all base cases, so we don't decrement depth.
+                Ok(())
+            }
+            ast::ClassSetItem::Bracketed(_) | ast::ClassSetItem::Union(_) => {
+                self.decrement_depth();
+                Ok(())
+            }
+        }
+    }
+
+    fn visit_class_set_binary_op_pre(
+        &mut self,
+        ast: &ast::ClassSetBinaryOp,
+    ) -> Result<()> {
+        self.increment_depth(&ast.span)
+    }
+
+    fn visit_class_set_binary_op_post(
+        &mut self,
+        _ast: &ast::ClassSetBinaryOp,
+    ) -> Result<()> {
+        self.decrement_depth();
+        Ok(())
+    }
+}
+
+/// When the result is an error, transforms the ast::ErrorKind from the source
+/// Result into another one. This function is used to return clearer error
+/// messages when possible.
+fn specialize_err<T>(
+    result: Result<T>,
+    from: ast::ErrorKind,
+    to: ast::ErrorKind,
+) -> Result<T> {
+    if let Err(e) = result {
+        if e.kind == from {
+            Err(ast::Error { kind: to, pattern: e.pattern, span: e.span })
+        } else {
+            Err(e)
+        }
+    } else {
+        result
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::ops::Range;
+
+    use super::{Parser, ParserBuilder, ParserI, Primitive};
+    use ast::{self, Ast, Position, Span};
+
+    // Our own assert_eq, which has slightly better formatting (but honestly
+    // still kind of crappy).
+    macro_rules! assert_eq {
+        ($left:expr, $right:expr) => {{
+            match (&$left, &$right) {
+                (left_val, right_val) => {
+                    if !(*left_val == *right_val) {
+                        panic!(
+                            "assertion failed: `(left == right)`\n\n\
+                             left:  `{:?}`\nright: `{:?}`\n\n",
+                            left_val, right_val
+                        )
+                    }
+                }
+            }
+        }};
+    }
+
+    // We create these errors to compare with real ast::Errors in the tests.
+    // We define equality between TestError and ast::Error to disregard the
+    // pattern string in ast::Error, which is annoying to provide in tests.
+    #[derive(Clone, Debug)]
+    struct TestError {
+        span: Span,
+        kind: ast::ErrorKind,
+    }
+
+    impl PartialEq<ast::Error> for TestError {
+        fn eq(&self, other: &ast::Error) -> bool {
+            self.span == other.span && self.kind == other.kind
+        }
+    }
+
+    impl PartialEq<TestError> for ast::Error {
+        fn eq(&self, other: &TestError) -> bool {
+            self.span == other.span && self.kind == other.kind
+        }
+    }
+
+    fn s(str: &str) -> String {
+        str.to_string()
+    }
+
+    fn parser(pattern: &str) -> ParserI<Parser> {
+        ParserI::new(Parser::new(), pattern)
+    }
+
+    fn parser_octal(pattern: &str) -> ParserI<Parser> {
+        let parser = ParserBuilder::new().octal(true).build();
+        ParserI::new(parser, pattern)
+    }
+
+    fn parser_nest_limit(pattern: &str, nest_limit: u32) -> ParserI<Parser> {
+        let p = ParserBuilder::new().nest_limit(nest_limit).build();
+        ParserI::new(p, pattern)
+    }
+
+    fn parser_ignore_whitespace(pattern: &str) -> ParserI<Parser> {
+        let p = ParserBuilder::new().ignore_whitespace(true).build();
+        ParserI::new(p, pattern)
+    }
+
+    /// Short alias for creating a new span.
+    fn nspan(start: Position, end: Position) -> Span {
+        Span::new(start, end)
+    }
+
+    /// Short alias for creating a new position.
+    fn npos(offset: usize, line: usize, column: usize) -> Position {
+        Position::new(offset, line, column)
+    }
+
+    /// Create a new span from the given offset range. This assumes a single
+    /// line and sets the columns based on the offsets. i.e., This only works
+    /// out of the box for ASCII, which is fine for most tests.
+    fn span(range: Range<usize>) -> Span {
+        let start = Position::new(range.start, 1, range.start + 1);
+        let end = Position::new(range.end, 1, range.end + 1);
+        Span::new(start, end)
+    }
+
+    /// Create a new span for the corresponding byte range in the given string.
+    fn span_range(subject: &str, range: Range<usize>) -> Span {
+        let start = Position {
+            offset: range.start,
+            line: 1 + subject[..range.start].matches('\n').count(),
+            column: 1 + subject[..range.start]
+                .chars()
+                .rev()
+                .position(|c| c == '\n')
+                .unwrap_or(subject[..range.start].chars().count()),
+        };
+        let end = Position {
+            offset: range.end,
+            line: 1 + subject[..range.end].matches('\n').count(),
+            column: 1 + subject[..range.end]
+                .chars()
+                .rev()
+                .position(|c| c == '\n')
+                .unwrap_or(subject[..range.end].chars().count()),
+        };
+        Span::new(start, end)
+    }
+
+    /// Create a verbatim literal starting at the given position.
+    fn lit(c: char, start: usize) -> Ast {
+        lit_with(c, span(start..start + c.len_utf8()))
+    }
+
+    /// Create a punctuation literal starting at the given position.
+    fn punct_lit(c: char, span: Span) -> Ast {
+        Ast::Literal(ast::Literal {
+            span: span,
+            kind: ast::LiteralKind::Punctuation,
+            c: c,
+        })
+    }
+
+    /// Create a verbatim literal with the given span.
+    fn lit_with(c: char, span: Span) -> Ast {
+        Ast::Literal(ast::Literal {
+            span: span,
+            kind: ast::LiteralKind::Verbatim,
+            c: c,
+        })
+    }
+
+    /// Create a concatenation with the given range.
+    fn concat(range: Range<usize>, asts: Vec<Ast>) -> Ast {
+        concat_with(span(range), asts)
+    }
+
+    /// Create a concatenation with the given span.
+    fn concat_with(span: Span, asts: Vec<Ast>) -> Ast {
+        Ast::Concat(ast::Concat { span: span, asts: asts })
+    }
+
+    /// Create an alternation with the given span.
+    fn alt(range: Range<usize>, asts: Vec<Ast>) -> Ast {
+        Ast::Alternation(ast::Alternation { span: span(range), asts: asts })
+    }
+
+    /// Create a capturing group with the given span.
+    fn group(range: Range<usize>, index: u32, ast: Ast) -> Ast {
+        Ast::Group(ast::Group {
+            span: span(range),
+            kind: ast::GroupKind::CaptureIndex(index),
+            ast: Box::new(ast),
+        })
+    }
+
+    /// Create an ast::SetFlags.
+    ///
+    /// The given pattern should be the full pattern string. The range given
+    /// should correspond to the byte offsets where the flag set occurs.
+    ///
+    /// If negated is true, then the set is interpreted as beginning with a
+    /// negation.
+    fn flag_set(
+        pat: &str,
+        range: Range<usize>,
+        flag: ast::Flag,
+        negated: bool,
+    ) -> Ast {
+        let mut items = vec![ast::FlagsItem {
+            span: span_range(pat, (range.end - 2)..(range.end - 1)),
+            kind: ast::FlagsItemKind::Flag(flag),
+        }];
+        if negated {
+            items.insert(
+                0,
+                ast::FlagsItem {
+                    span: span_range(pat, (range.start + 2)..(range.end - 2)),
+                    kind: ast::FlagsItemKind::Negation,
+                },
+            );
+        }
+        Ast::Flags(ast::SetFlags {
+            span: span_range(pat, range.clone()),
+            flags: ast::Flags {
+                span: span_range(pat, (range.start + 2)..(range.end - 1)),
+                items: items,
+            },
+        })
+    }
+
+    #[test]
+    fn parse_nest_limit() {
+        // A nest limit of 0 still allows some types of regexes.
+        assert_eq!(
+            parser_nest_limit("", 0).parse(),
+            Ok(Ast::Empty(span(0..0)))
+        );
+        assert_eq!(parser_nest_limit("a", 0).parse(), Ok(lit('a', 0)));
+
+        // Test repetition operations, which require one level of nesting.
+        assert_eq!(
+            parser_nest_limit("a+", 0).parse().unwrap_err(),
+            TestError {
+                span: span(0..2),
+                kind: ast::ErrorKind::NestLimitExceeded(0),
+            }
+        );
+        assert_eq!(
+            parser_nest_limit("a+", 1).parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..2),
+                op: ast::RepetitionOp {
+                    span: span(1..2),
+                    kind: ast::RepetitionKind::OneOrMore,
+                },
+                greedy: true,
+                ast: Box::new(lit('a', 0)),
+            }))
+        );
+        assert_eq!(
+            parser_nest_limit("(a)+", 1).parse().unwrap_err(),
+            TestError {
+                span: span(0..3),
+                kind: ast::ErrorKind::NestLimitExceeded(1),
+            }
+        );
+        assert_eq!(
+            parser_nest_limit("a+*", 1).parse().unwrap_err(),
+            TestError {
+                span: span(0..2),
+                kind: ast::ErrorKind::NestLimitExceeded(1),
+            }
+        );
+        assert_eq!(
+            parser_nest_limit("a+*", 2).parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..3),
+                op: ast::RepetitionOp {
+                    span: span(2..3),
+                    kind: ast::RepetitionKind::ZeroOrMore,
+                },
+                greedy: true,
+                ast: Box::new(Ast::Repetition(ast::Repetition {
+                    span: span(0..2),
+                    op: ast::RepetitionOp {
+                        span: span(1..2),
+                        kind: ast::RepetitionKind::OneOrMore,
+                    },
+                    greedy: true,
+                    ast: Box::new(lit('a', 0)),
+                })),
+            }))
+        );
+
+        // Test concatenations. A concatenation requires one level of nesting.
+        assert_eq!(
+            parser_nest_limit("ab", 0).parse().unwrap_err(),
+            TestError {
+                span: span(0..2),
+                kind: ast::ErrorKind::NestLimitExceeded(0),
+            }
+        );
+        assert_eq!(
+            parser_nest_limit("ab", 1).parse(),
+            Ok(concat(0..2, vec![lit('a', 0), lit('b', 1)]))
+        );
+        assert_eq!(
+            parser_nest_limit("abc", 1).parse(),
+            Ok(concat(0..3, vec![lit('a', 0), lit('b', 1), lit('c', 2)]))
+        );
+
+        // Test alternations. An alternation requires one level of nesting.
+        assert_eq!(
+            parser_nest_limit("a|b", 0).parse().unwrap_err(),
+            TestError {
+                span: span(0..3),
+                kind: ast::ErrorKind::NestLimitExceeded(0),
+            }
+        );
+        assert_eq!(
+            parser_nest_limit("a|b", 1).parse(),
+            Ok(alt(0..3, vec![lit('a', 0), lit('b', 2)]))
+        );
+        assert_eq!(
+            parser_nest_limit("a|b|c", 1).parse(),
+            Ok(alt(0..5, vec![lit('a', 0), lit('b', 2), lit('c', 4)]))
+        );
+
+        // Test character classes. Classes form their own mini-recursive
+        // syntax!
+        assert_eq!(
+            parser_nest_limit("[a]", 0).parse().unwrap_err(),
+            TestError {
+                span: span(0..3),
+                kind: ast::ErrorKind::NestLimitExceeded(0),
+            }
+        );
+        assert_eq!(
+            parser_nest_limit("[a]", 1).parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..3),
+                negated: false,
+                kind: ast::ClassSet::Item(ast::ClassSetItem::Literal(
+                    ast::Literal {
+                        span: span(1..2),
+                        kind: ast::LiteralKind::Verbatim,
+                        c: 'a',
+                    }
+                )),
+            })))
+        );
+        assert_eq!(
+            parser_nest_limit("[ab]", 1).parse().unwrap_err(),
+            TestError {
+                span: span(1..3),
+                kind: ast::ErrorKind::NestLimitExceeded(1),
+            }
+        );
+        assert_eq!(
+            parser_nest_limit("[ab[cd]]", 2).parse().unwrap_err(),
+            TestError {
+                span: span(3..7),
+                kind: ast::ErrorKind::NestLimitExceeded(2),
+            }
+        );
+        assert_eq!(
+            parser_nest_limit("[ab[cd]]", 3).parse().unwrap_err(),
+            TestError {
+                span: span(4..6),
+                kind: ast::ErrorKind::NestLimitExceeded(3),
+            }
+        );
+        assert_eq!(
+            parser_nest_limit("[a--b]", 1).parse().unwrap_err(),
+            TestError {
+                span: span(1..5),
+                kind: ast::ErrorKind::NestLimitExceeded(1),
+            }
+        );
+        assert_eq!(
+            parser_nest_limit("[a--bc]", 2).parse().unwrap_err(),
+            TestError {
+                span: span(4..6),
+                kind: ast::ErrorKind::NestLimitExceeded(2),
+            }
+        );
+    }
+
+    #[test]
+    fn parse_comments() {
+        let pat = "(?x)
+# This is comment 1.
+foo # This is comment 2.
+  # This is comment 3.
+bar
+# This is comment 4.";
+        let astc = parser(pat).parse_with_comments().unwrap();
+        assert_eq!(
+            astc.ast,
+            concat_with(
+                span_range(pat, 0..pat.len()),
+                vec![
+                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
+                    lit_with('f', span_range(pat, 26..27)),
+                    lit_with('o', span_range(pat, 27..28)),
+                    lit_with('o', span_range(pat, 28..29)),
+                    lit_with('b', span_range(pat, 74..75)),
+                    lit_with('a', span_range(pat, 75..76)),
+                    lit_with('r', span_range(pat, 76..77)),
+                ]
+            )
+        );
+        assert_eq!(
+            astc.comments,
+            vec![
+                ast::Comment {
+                    span: span_range(pat, 5..26),
+                    comment: s(" This is comment 1."),
+                },
+                ast::Comment {
+                    span: span_range(pat, 30..51),
+                    comment: s(" This is comment 2."),
+                },
+                ast::Comment {
+                    span: span_range(pat, 53..74),
+                    comment: s(" This is comment 3."),
+                },
+                ast::Comment {
+                    span: span_range(pat, 78..98),
+                    comment: s(" This is comment 4."),
+                },
+            ]
+        );
+    }
+
+    #[test]
+    fn parse_holistic() {
+        assert_eq!(parser("]").parse(), Ok(lit(']', 0)));
+        assert_eq!(
+            parser(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#\&\-\~").parse(),
+            Ok(concat(
+                0..36,
+                vec![
+                    punct_lit('\\', span(0..2)),
+                    punct_lit('.', span(2..4)),
+                    punct_lit('+', span(4..6)),
+                    punct_lit('*', span(6..8)),
+                    punct_lit('?', span(8..10)),
+                    punct_lit('(', span(10..12)),
+                    punct_lit(')', span(12..14)),
+                    punct_lit('|', span(14..16)),
+                    punct_lit('[', span(16..18)),
+                    punct_lit(']', span(18..20)),
+                    punct_lit('{', span(20..22)),
+                    punct_lit('}', span(22..24)),
+                    punct_lit('^', span(24..26)),
+                    punct_lit('$', span(26..28)),
+                    punct_lit('#', span(28..30)),
+                    punct_lit('&', span(30..32)),
+                    punct_lit('-', span(32..34)),
+                    punct_lit('~', span(34..36)),
+                ]
+            ))
+        );
+    }
+
+    #[test]
+    fn parse_ignore_whitespace() {
+        // Test that basic whitespace insensitivity works.
+        let pat = "(?x)a b";
+        assert_eq!(
+            parser(pat).parse(),
+            Ok(concat_with(
+                nspan(npos(0, 1, 1), npos(7, 1, 8)),
+                vec![
+                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
+                    lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
+                    lit_with('b', nspan(npos(6, 1, 7), npos(7, 1, 8))),
+                ]
+            ))
+        );
+
+        // Test that we can toggle whitespace insensitivity.
+        let pat = "(?x)a b(?-x)a b";
+        assert_eq!(
+            parser(pat).parse(),
+            Ok(concat_with(
+                nspan(npos(0, 1, 1), npos(15, 1, 16)),
+                vec![
+                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
+                    lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
+                    lit_with('b', nspan(npos(6, 1, 7), npos(7, 1, 8))),
+                    flag_set(pat, 7..12, ast::Flag::IgnoreWhitespace, true),
+                    lit_with('a', nspan(npos(12, 1, 13), npos(13, 1, 14))),
+                    lit_with(' ', nspan(npos(13, 1, 14), npos(14, 1, 15))),
+                    lit_with('b', nspan(npos(14, 1, 15), npos(15, 1, 16))),
+                ]
+            ))
+        );
+
+        // Test that nesting whitespace insensitive flags works.
+        let pat = "a (?x:a )a ";
+        assert_eq!(
+            parser(pat).parse(),
+            Ok(concat_with(
+                span_range(pat, 0..11),
+                vec![
+                    lit_with('a', span_range(pat, 0..1)),
+                    lit_with(' ', span_range(pat, 1..2)),
+                    Ast::Group(ast::Group {
+                        span: span_range(pat, 2..9),
+                        kind: ast::GroupKind::NonCapturing(ast::Flags {
+                            span: span_range(pat, 4..5),
+                            items: vec![ast::FlagsItem {
+                                span: span_range(pat, 4..5),
+                                kind: ast::FlagsItemKind::Flag(
+                                    ast::Flag::IgnoreWhitespace
+                                ),
+                            },],
+                        }),
+                        ast: Box::new(lit_with('a', span_range(pat, 6..7))),
+                    }),
+                    lit_with('a', span_range(pat, 9..10)),
+                    lit_with(' ', span_range(pat, 10..11)),
+                ]
+            ))
+        );
+
+        // Test that whitespace after an opening paren is insignificant.
+        let pat = "(?x)( ?P<foo> a )";
+        assert_eq!(
+            parser(pat).parse(),
+            Ok(concat_with(
+                span_range(pat, 0..pat.len()),
+                vec![
+                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
+                    Ast::Group(ast::Group {
+                        span: span_range(pat, 4..pat.len()),
+                        kind: ast::GroupKind::CaptureName(ast::CaptureName {
+                            span: span_range(pat, 9..12),
+                            name: s("foo"),
+                            index: 1,
+                        }),
+                        ast: Box::new(lit_with('a', span_range(pat, 14..15))),
+                    }),
+                ]
+            ))
+        );
+        let pat = "(?x)(  a )";
+        assert_eq!(
+            parser(pat).parse(),
+            Ok(concat_with(
+                span_range(pat, 0..pat.len()),
+                vec![
+                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
+                    Ast::Group(ast::Group {
+                        span: span_range(pat, 4..pat.len()),
+                        kind: ast::GroupKind::CaptureIndex(1),
+                        ast: Box::new(lit_with('a', span_range(pat, 7..8))),
+                    }),
+                ]
+            ))
+        );
+        let pat = "(?x)(  ?:  a )";
+        assert_eq!(
+            parser(pat).parse(),
+            Ok(concat_with(
+                span_range(pat, 0..pat.len()),
+                vec![
+                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
+                    Ast::Group(ast::Group {
+                        span: span_range(pat, 4..pat.len()),
+                        kind: ast::GroupKind::NonCapturing(ast::Flags {
+                            span: span_range(pat, 8..8),
+                            items: vec![],
+                        }),
+                        ast: Box::new(lit_with('a', span_range(pat, 11..12))),
+                    }),
+                ]
+            ))
+        );
+        let pat = r"(?x)\x { 53 }";
+        assert_eq!(
+            parser(pat).parse(),
+            Ok(concat_with(
+                span_range(pat, 0..pat.len()),
+                vec![
+                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
+                    Ast::Literal(ast::Literal {
+                        span: span(4..13),
+                        kind: ast::LiteralKind::HexBrace(
+                            ast::HexLiteralKind::X
+                        ),
+                        c: 'S',
+                    }),
+                ]
+            ))
+        );
+
+        // Test that whitespace after an escape is OK.
+        let pat = r"(?x)\ ";
+        assert_eq!(
+            parser(pat).parse(),
+            Ok(concat_with(
+                span_range(pat, 0..pat.len()),
+                vec![
+                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
+                    Ast::Literal(ast::Literal {
+                        span: span_range(pat, 4..6),
+                        kind: ast::LiteralKind::Special(
+                            ast::SpecialLiteralKind::Space
+                        ),
+                        c: ' ',
+                    }),
+                ]
+            ))
+        );
+        // ... but only when `x` mode is enabled.
+        let pat = r"\ ";
+        assert_eq!(
+            parser(pat).parse().unwrap_err(),
+            TestError {
+                span: span_range(pat, 0..2),
+                kind: ast::ErrorKind::EscapeUnrecognized,
+            }
+        );
+    }
+
+    #[test]
+    fn parse_newlines() {
+        let pat = ".\n.";
+        assert_eq!(
+            parser(pat).parse(),
+            Ok(concat_with(
+                span_range(pat, 0..3),
+                vec![
+                    Ast::Dot(span_range(pat, 0..1)),
+                    lit_with('\n', span_range(pat, 1..2)),
+                    Ast::Dot(span_range(pat, 2..3)),
+                ]
+            ))
+        );
+
+        let pat = "foobar\nbaz\nquux\n";
+        assert_eq!(
+            parser(pat).parse(),
+            Ok(concat_with(
+                span_range(pat, 0..pat.len()),
+                vec![
+                    lit_with('f', nspan(npos(0, 1, 1), npos(1, 1, 2))),
+                    lit_with('o', nspan(npos(1, 1, 2), npos(2, 1, 3))),
+                    lit_with('o', nspan(npos(2, 1, 3), npos(3, 1, 4))),
+                    lit_with('b', nspan(npos(3, 1, 4), npos(4, 1, 5))),
+                    lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
+                    lit_with('r', nspan(npos(5, 1, 6), npos(6, 1, 7))),
+                    lit_with('\n', nspan(npos(6, 1, 7), npos(7, 2, 1))),
+                    lit_with('b', nspan(npos(7, 2, 1), npos(8, 2, 2))),
+                    lit_with('a', nspan(npos(8, 2, 2), npos(9, 2, 3))),
+                    lit_with('z', nspan(npos(9, 2, 3), npos(10, 2, 4))),
+                    lit_with('\n', nspan(npos(10, 2, 4), npos(11, 3, 1))),
+                    lit_with('q', nspan(npos(11, 3, 1), npos(12, 3, 2))),
+                    lit_with('u', nspan(npos(12, 3, 2), npos(13, 3, 3))),
+                    lit_with('u', nspan(npos(13, 3, 3), npos(14, 3, 4))),
+                    lit_with('x', nspan(npos(14, 3, 4), npos(15, 3, 5))),
+                    lit_with('\n', nspan(npos(15, 3, 5), npos(16, 4, 1))),
+                ]
+            ))
+        );
+    }
+
+    #[test]
+    fn parse_uncounted_repetition() {
+        assert_eq!(
+            parser(r"a*").parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..2),
+                op: ast::RepetitionOp {
+                    span: span(1..2),
+                    kind: ast::RepetitionKind::ZeroOrMore,
+                },
+                greedy: true,
+                ast: Box::new(lit('a', 0)),
+            }))
+        );
+        assert_eq!(
+            parser(r"a+").parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..2),
+                op: ast::RepetitionOp {
+                    span: span(1..2),
+                    kind: ast::RepetitionKind::OneOrMore,
+                },
+                greedy: true,
+                ast: Box::new(lit('a', 0)),
+            }))
+        );
+
+        assert_eq!(
+            parser(r"a?").parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..2),
+                op: ast::RepetitionOp {
+                    span: span(1..2),
+                    kind: ast::RepetitionKind::ZeroOrOne,
+                },
+                greedy: true,
+                ast: Box::new(lit('a', 0)),
+            }))
+        );
+        assert_eq!(
+            parser(r"a??").parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..3),
+                op: ast::RepetitionOp {
+                    span: span(1..3),
+                    kind: ast::RepetitionKind::ZeroOrOne,
+                },
+                greedy: false,
+                ast: Box::new(lit('a', 0)),
+            }))
+        );
+        assert_eq!(
+            parser(r"a?").parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..2),
+                op: ast::RepetitionOp {
+                    span: span(1..2),
+                    kind: ast::RepetitionKind::ZeroOrOne,
+                },
+                greedy: true,
+                ast: Box::new(lit('a', 0)),
+            }))
+        );
+        assert_eq!(
+            parser(r"a?b").parse(),
+            Ok(concat(
+                0..3,
+                vec![
+                    Ast::Repetition(ast::Repetition {
+                        span: span(0..2),
+                        op: ast::RepetitionOp {
+                            span: span(1..2),
+                            kind: ast::RepetitionKind::ZeroOrOne,
+                        },
+                        greedy: true,
+                        ast: Box::new(lit('a', 0)),
+                    }),
+                    lit('b', 2),
+                ]
+            ))
+        );
+        assert_eq!(
+            parser(r"a??b").parse(),
+            Ok(concat(
+                0..4,
+                vec![
+                    Ast::Repetition(ast::Repetition {
+                        span: span(0..3),
+                        op: ast::RepetitionOp {
+                            span: span(1..3),
+                            kind: ast::RepetitionKind::ZeroOrOne,
+                        },
+                        greedy: false,
+                        ast: Box::new(lit('a', 0)),
+                    }),
+                    lit('b', 3),
+                ]
+            ))
+        );
+        assert_eq!(
+            parser(r"ab?").parse(),
+            Ok(concat(
+                0..3,
+                vec![
+                    lit('a', 0),
+                    Ast::Repetition(ast::Repetition {
+                        span: span(1..3),
+                        op: ast::RepetitionOp {
+                            span: span(2..3),
+                            kind: ast::RepetitionKind::ZeroOrOne,
+                        },
+                        greedy: true,
+                        ast: Box::new(lit('b', 1)),
+                    }),
+                ]
+            ))
+        );
+        assert_eq!(
+            parser(r"(ab)?").parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..5),
+                op: ast::RepetitionOp {
+                    span: span(4..5),
+                    kind: ast::RepetitionKind::ZeroOrOne,
+                },
+                greedy: true,
+                ast: Box::new(group(
+                    0..4,
+                    1,
+                    concat(1..3, vec![lit('a', 1), lit('b', 2),])
+                )),
+            }))
+        );
+        assert_eq!(
+            parser(r"|a?").parse(),
+            Ok(alt(
+                0..3,
+                vec![
+                    Ast::Empty(span(0..0)),
+                    Ast::Repetition(ast::Repetition {
+                        span: span(1..3),
+                        op: ast::RepetitionOp {
+                            span: span(2..3),
+                            kind: ast::RepetitionKind::ZeroOrOne,
+                        },
+                        greedy: true,
+                        ast: Box::new(lit('a', 1)),
+                    }),
+                ]
+            ))
+        );
+
+        assert_eq!(
+            parser(r"*").parse().unwrap_err(),
+            TestError {
+                span: span(0..0),
+                kind: ast::ErrorKind::RepetitionMissing,
+            }
+        );
+        assert_eq!(
+            parser(r"(?i)*").parse().unwrap_err(),
+            TestError {
+                span: span(4..4),
+                kind: ast::ErrorKind::RepetitionMissing,
+            }
+        );
+        assert_eq!(
+            parser(r"(*)").parse().unwrap_err(),
+            TestError {
+                span: span(1..1),
+                kind: ast::ErrorKind::RepetitionMissing,
+            }
+        );
+        assert_eq!(
+            parser(r"(?:?)").parse().unwrap_err(),
+            TestError {
+                span: span(3..3),
+                kind: ast::ErrorKind::RepetitionMissing,
+            }
+        );
+        assert_eq!(
+            parser(r"+").parse().unwrap_err(),
+            TestError {
+                span: span(0..0),
+                kind: ast::ErrorKind::RepetitionMissing,
+            }
+        );
+        assert_eq!(
+            parser(r"?").parse().unwrap_err(),
+            TestError {
+                span: span(0..0),
+                kind: ast::ErrorKind::RepetitionMissing,
+            }
+        );
+        assert_eq!(
+            parser(r"(?)").parse().unwrap_err(),
+            TestError {
+                span: span(1..1),
+                kind: ast::ErrorKind::RepetitionMissing,
+            }
+        );
+        assert_eq!(
+            parser(r"|*").parse().unwrap_err(),
+            TestError {
+                span: span(1..1),
+                kind: ast::ErrorKind::RepetitionMissing,
+            }
+        );
+        assert_eq!(
+            parser(r"|+").parse().unwrap_err(),
+            TestError {
+                span: span(1..1),
+                kind: ast::ErrorKind::RepetitionMissing,
+            }
+        );
+        assert_eq!(
+            parser(r"|?").parse().unwrap_err(),
+            TestError {
+                span: span(1..1),
+                kind: ast::ErrorKind::RepetitionMissing,
+            }
+        );
+    }
+
+    #[test]
+    fn parse_counted_repetition() {
+        assert_eq!(
+            parser(r"a{5}").parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..4),
+                op: ast::RepetitionOp {
+                    span: span(1..4),
+                    kind: ast::RepetitionKind::Range(
+                        ast::RepetitionRange::Exactly(5)
+                    ),
+                },
+                greedy: true,
+                ast: Box::new(lit('a', 0)),
+            }))
+        );
+        assert_eq!(
+            parser(r"a{5,}").parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..5),
+                op: ast::RepetitionOp {
+                    span: span(1..5),
+                    kind: ast::RepetitionKind::Range(
+                        ast::RepetitionRange::AtLeast(5)
+                    ),
+                },
+                greedy: true,
+                ast: Box::new(lit('a', 0)),
+            }))
+        );
+        assert_eq!(
+            parser(r"a{5,9}").parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..6),
+                op: ast::RepetitionOp {
+                    span: span(1..6),
+                    kind: ast::RepetitionKind::Range(
+                        ast::RepetitionRange::Bounded(5, 9)
+                    ),
+                },
+                greedy: true,
+                ast: Box::new(lit('a', 0)),
+            }))
+        );
+        assert_eq!(
+            parser(r"a{5}?").parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..5),
+                op: ast::RepetitionOp {
+                    span: span(1..5),
+                    kind: ast::RepetitionKind::Range(
+                        ast::RepetitionRange::Exactly(5)
+                    ),
+                },
+                greedy: false,
+                ast: Box::new(lit('a', 0)),
+            }))
+        );
+        assert_eq!(
+            parser(r"ab{5}").parse(),
+            Ok(concat(
+                0..5,
+                vec![
+                    lit('a', 0),
+                    Ast::Repetition(ast::Repetition {
+                        span: span(1..5),
+                        op: ast::RepetitionOp {
+                            span: span(2..5),
+                            kind: ast::RepetitionKind::Range(
+                                ast::RepetitionRange::Exactly(5)
+                            ),
+                        },
+                        greedy: true,
+                        ast: Box::new(lit('b', 1)),
+                    }),
+                ]
+            ))
+        );
+        assert_eq!(
+            parser(r"ab{5}c").parse(),
+            Ok(concat(
+                0..6,
+                vec![
+                    lit('a', 0),
+                    Ast::Repetition(ast::Repetition {
+                        span: span(1..5),
+                        op: ast::RepetitionOp {
+                            span: span(2..5),
+                            kind: ast::RepetitionKind::Range(
+                                ast::RepetitionRange::Exactly(5)
+                            ),
+                        },
+                        greedy: true,
+                        ast: Box::new(lit('b', 1)),
+                    }),
+                    lit('c', 5),
+                ]
+            ))
+        );
+
+        assert_eq!(
+            parser(r"a{ 5 }").parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..6),
+                op: ast::RepetitionOp {
+                    span: span(1..6),
+                    kind: ast::RepetitionKind::Range(
+                        ast::RepetitionRange::Exactly(5)
+                    ),
+                },
+                greedy: true,
+                ast: Box::new(lit('a', 0)),
+            }))
+        );
+        assert_eq!(
+            parser(r"a{ 5 , 9 }").parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..10),
+                op: ast::RepetitionOp {
+                    span: span(1..10),
+                    kind: ast::RepetitionKind::Range(
+                        ast::RepetitionRange::Bounded(5, 9)
+                    ),
+                },
+                greedy: true,
+                ast: Box::new(lit('a', 0)),
+            }))
+        );
+        assert_eq!(
+            parser_ignore_whitespace(r"a{5,9} ?").parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..8),
+                op: ast::RepetitionOp {
+                    span: span(1..8),
+                    kind: ast::RepetitionKind::Range(
+                        ast::RepetitionRange::Bounded(5, 9)
+                    ),
+                },
+                greedy: false,
+                ast: Box::new(lit('a', 0)),
+            }))
+        );
+
+        assert_eq!(
+            parser(r"(?i){0}").parse().unwrap_err(),
+            TestError {
+                span: span(4..4),
+                kind: ast::ErrorKind::RepetitionMissing,
+            }
+        );
+        assert_eq!(
+            parser(r"(?m){1,1}").parse().unwrap_err(),
+            TestError {
+                span: span(4..4),
+                kind: ast::ErrorKind::RepetitionMissing,
+            }
+        );
+        assert_eq!(
+            parser(r"a{]}").parse().unwrap_err(),
+            TestError {
+                span: span(2..2),
+                kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
+            }
+        );
+        assert_eq!(
+            parser(r"a{1,]}").parse().unwrap_err(),
+            TestError {
+                span: span(4..4),
+                kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
+            }
+        );
+        assert_eq!(
+            parser(r"a{").parse().unwrap_err(),
+            TestError {
+                span: span(1..2),
+                kind: ast::ErrorKind::RepetitionCountUnclosed,
+            }
+        );
+        assert_eq!(
+            parser(r"a{}").parse().unwrap_err(),
+            TestError {
+                span: span(2..2),
+                kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
+            }
+        );
+        assert_eq!(
+            parser(r"a{a").parse().unwrap_err(),
+            TestError {
+                span: span(2..2),
+                kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
+            }
+        );
+        assert_eq!(
+            parser(r"a{9999999999}").parse().unwrap_err(),
+            TestError {
+                span: span(2..12),
+                kind: ast::ErrorKind::DecimalInvalid,
+            }
+        );
+        assert_eq!(
+            parser(r"a{9").parse().unwrap_err(),
+            TestError {
+                span: span(1..3),
+                kind: ast::ErrorKind::RepetitionCountUnclosed,
+            }
+        );
+        assert_eq!(
+            parser(r"a{9,a").parse().unwrap_err(),
+            TestError {
+                span: span(4..4),
+                kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
+            }
+        );
+        assert_eq!(
+            parser(r"a{9,9999999999}").parse().unwrap_err(),
+            TestError {
+                span: span(4..14),
+                kind: ast::ErrorKind::DecimalInvalid,
+            }
+        );
+        assert_eq!(
+            parser(r"a{9,").parse().unwrap_err(),
+            TestError {
+                span: span(1..4),
+                kind: ast::ErrorKind::RepetitionCountUnclosed,
+            }
+        );
+        assert_eq!(
+            parser(r"a{9,11").parse().unwrap_err(),
+            TestError {
+                span: span(1..6),
+                kind: ast::ErrorKind::RepetitionCountUnclosed,
+            }
+        );
+        assert_eq!(
+            parser(r"a{2,1}").parse().unwrap_err(),
+            TestError {
+                span: span(1..6),
+                kind: ast::ErrorKind::RepetitionCountInvalid,
+            }
+        );
+        assert_eq!(
+            parser(r"{5}").parse().unwrap_err(),
+            TestError {
+                span: span(0..0),
+                kind: ast::ErrorKind::RepetitionMissing,
+            }
+        );
+        assert_eq!(
+            parser(r"|{5}").parse().unwrap_err(),
+            TestError {
+                span: span(1..1),
+                kind: ast::ErrorKind::RepetitionMissing,
+            }
+        );
+    }
+
+    #[test]
+    fn parse_alternate() {
+        assert_eq!(
+            parser(r"a|b").parse(),
+            Ok(Ast::Alternation(ast::Alternation {
+                span: span(0..3),
+                asts: vec![lit('a', 0), lit('b', 2)],
+            }))
+        );
+        assert_eq!(
+            parser(r"(a|b)").parse(),
+            Ok(group(
+                0..5,
+                1,
+                Ast::Alternation(ast::Alternation {
+                    span: span(1..4),
+                    asts: vec![lit('a', 1), lit('b', 3)],
+                })
+            ))
+        );
+
+        assert_eq!(
+            parser(r"a|b|c").parse(),
+            Ok(Ast::Alternation(ast::Alternation {
+                span: span(0..5),
+                asts: vec![lit('a', 0), lit('b', 2), lit('c', 4)],
+            }))
+        );
+        assert_eq!(
+            parser(r"ax|by|cz").parse(),
+            Ok(Ast::Alternation(ast::Alternation {
+                span: span(0..8),
+                asts: vec![
+                    concat(0..2, vec![lit('a', 0), lit('x', 1)]),
+                    concat(3..5, vec![lit('b', 3), lit('y', 4)]),
+                    concat(6..8, vec![lit('c', 6), lit('z', 7)]),
+                ],
+            }))
+        );
+        assert_eq!(
+            parser(r"(ax|by|cz)").parse(),
+            Ok(group(
+                0..10,
+                1,
+                Ast::Alternation(ast::Alternation {
+                    span: span(1..9),
+                    asts: vec![
+                        concat(1..3, vec![lit('a', 1), lit('x', 2)]),
+                        concat(4..6, vec![lit('b', 4), lit('y', 5)]),
+                        concat(7..9, vec![lit('c', 7), lit('z', 8)]),
+                    ],
+                })
+            ))
+        );
+        assert_eq!(
+            parser(r"(ax|(by|(cz)))").parse(),
+            Ok(group(
+                0..14,
+                1,
+                alt(
+                    1..13,
+                    vec![
+                        concat(1..3, vec![lit('a', 1), lit('x', 2)]),
+                        group(
+                            4..13,
+                            2,
+                            alt(
+                                5..12,
+                                vec![
+                                    concat(
+                                        5..7,
+                                        vec![lit('b', 5), lit('y', 6)]
+                                    ),
+                                    group(
+                                        8..12,
+                                        3,
+                                        concat(
+                                            9..11,
+                                            vec![lit('c', 9), lit('z', 10),]
+                                        )
+                                    ),
+                                ]
+                            )
+                        ),
+                    ]
+                )
+            ))
+        );
+
+        assert_eq!(
+            parser(r"|").parse(),
+            Ok(alt(
+                0..1,
+                vec![Ast::Empty(span(0..0)), Ast::Empty(span(1..1)),]
+            ))
+        );
+        assert_eq!(
+            parser(r"||").parse(),
+            Ok(alt(
+                0..2,
+                vec![
+                    Ast::Empty(span(0..0)),
+                    Ast::Empty(span(1..1)),
+                    Ast::Empty(span(2..2)),
+                ]
+            ))
+        );
+        assert_eq!(
+            parser(r"a|").parse(),
+            Ok(alt(0..2, vec![lit('a', 0), Ast::Empty(span(2..2)),]))
+        );
+        assert_eq!(
+            parser(r"|a").parse(),
+            Ok(alt(0..2, vec![Ast::Empty(span(0..0)), lit('a', 1),]))
+        );
+
+        assert_eq!(
+            parser(r"(|)").parse(),
+            Ok(group(
+                0..3,
+                1,
+                alt(
+                    1..2,
+                    vec![Ast::Empty(span(1..1)), Ast::Empty(span(2..2)),]
+                )
+            ))
+        );
+        assert_eq!(
+            parser(r"(a|)").parse(),
+            Ok(group(
+                0..4,
+                1,
+                alt(1..3, vec![lit('a', 1), Ast::Empty(span(3..3)),])
+            ))
+        );
+        assert_eq!(
+            parser(r"(|a)").parse(),
+            Ok(group(
+                0..4,
+                1,
+                alt(1..3, vec![Ast::Empty(span(1..1)), lit('a', 2),])
+            ))
+        );
+
+        assert_eq!(
+            parser(r"a|b)").parse().unwrap_err(),
+            TestError {
+                span: span(3..4),
+                kind: ast::ErrorKind::GroupUnopened,
+            }
+        );
+        assert_eq!(
+            parser(r"(a|b").parse().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::GroupUnclosed,
+            }
+        );
+    }
+
+    #[test]
+    fn parse_unsupported_lookaround() {
+        assert_eq!(
+            parser(r"(?=a)").parse().unwrap_err(),
+            TestError {
+                span: span(0..3),
+                kind: ast::ErrorKind::UnsupportedLookAround,
+            }
+        );
+        assert_eq!(
+            parser(r"(?!a)").parse().unwrap_err(),
+            TestError {
+                span: span(0..3),
+                kind: ast::ErrorKind::UnsupportedLookAround,
+            }
+        );
+        assert_eq!(
+            parser(r"(?<=a)").parse().unwrap_err(),
+            TestError {
+                span: span(0..4),
+                kind: ast::ErrorKind::UnsupportedLookAround,
+            }
+        );
+        assert_eq!(
+            parser(r"(?<!a)").parse().unwrap_err(),
+            TestError {
+                span: span(0..4),
+                kind: ast::ErrorKind::UnsupportedLookAround,
+            }
+        );
+    }
+
+    #[test]
+    fn parse_group() {
+        assert_eq!(
+            parser("(?i)").parse(),
+            Ok(Ast::Flags(ast::SetFlags {
+                span: span(0..4),
+                flags: ast::Flags {
+                    span: span(2..3),
+                    items: vec![ast::FlagsItem {
+                        span: span(2..3),
+                        kind: ast::FlagsItemKind::Flag(
+                            ast::Flag::CaseInsensitive
+                        ),
+                    }],
+                },
+            }))
+        );
+        assert_eq!(
+            parser("(?iU)").parse(),
+            Ok(Ast::Flags(ast::SetFlags {
+                span: span(0..5),
+                flags: ast::Flags {
+                    span: span(2..4),
+                    items: vec![
+                        ast::FlagsItem {
+                            span: span(2..3),
+                            kind: ast::FlagsItemKind::Flag(
+                                ast::Flag::CaseInsensitive
+                            ),
+                        },
+                        ast::FlagsItem {
+                            span: span(3..4),
+                            kind: ast::FlagsItemKind::Flag(
+                                ast::Flag::SwapGreed
+                            ),
+                        },
+                    ],
+                },
+            }))
+        );
+        assert_eq!(
+            parser("(?i-U)").parse(),
+            Ok(Ast::Flags(ast::SetFlags {
+                span: span(0..6),
+                flags: ast::Flags {
+                    span: span(2..5),
+                    items: vec![
+                        ast::FlagsItem {
+                            span: span(2..3),
+                            kind: ast::FlagsItemKind::Flag(
+                                ast::Flag::CaseInsensitive
+                            ),
+                        },
+                        ast::FlagsItem {
+                            span: span(3..4),
+                            kind: ast::FlagsItemKind::Negation,
+                        },
+                        ast::FlagsItem {
+                            span: span(4..5),
+                            kind: ast::FlagsItemKind::Flag(
+                                ast::Flag::SwapGreed
+                            ),
+                        },
+                    ],
+                },
+            }))
+        );
+
+        assert_eq!(
+            parser("()").parse(),
+            Ok(Ast::Group(ast::Group {
+                span: span(0..2),
+                kind: ast::GroupKind::CaptureIndex(1),
+                ast: Box::new(Ast::Empty(span(1..1))),
+            }))
+        );
+        assert_eq!(
+            parser("(a)").parse(),
+            Ok(Ast::Group(ast::Group {
+                span: span(0..3),
+                kind: ast::GroupKind::CaptureIndex(1),
+                ast: Box::new(lit('a', 1)),
+            }))
+        );
+        assert_eq!(
+            parser("(())").parse(),
+            Ok(Ast::Group(ast::Group {
+                span: span(0..4),
+                kind: ast::GroupKind::CaptureIndex(1),
+                ast: Box::new(Ast::Group(ast::Group {
+                    span: span(1..3),
+                    kind: ast::GroupKind::CaptureIndex(2),
+                    ast: Box::new(Ast::Empty(span(2..2))),
+                })),
+            }))
+        );
+
+        assert_eq!(
+            parser("(?:a)").parse(),
+            Ok(Ast::Group(ast::Group {
+                span: span(0..5),
+                kind: ast::GroupKind::NonCapturing(ast::Flags {
+                    span: span(2..2),
+                    items: vec![],
+                }),
+                ast: Box::new(lit('a', 3)),
+            }))
+        );
+
+        assert_eq!(
+            parser("(?i:a)").parse(),
+            Ok(Ast::Group(ast::Group {
+                span: span(0..6),
+                kind: ast::GroupKind::NonCapturing(ast::Flags {
+                    span: span(2..3),
+                    items: vec![ast::FlagsItem {
+                        span: span(2..3),
+                        kind: ast::FlagsItemKind::Flag(
+                            ast::Flag::CaseInsensitive
+                        ),
+                    },],
+                }),
+                ast: Box::new(lit('a', 4)),
+            }))
+        );
+        assert_eq!(
+            parser("(?i-U:a)").parse(),
+            Ok(Ast::Group(ast::Group {
+                span: span(0..8),
+                kind: ast::GroupKind::NonCapturing(ast::Flags {
+                    span: span(2..5),
+                    items: vec![
+                        ast::FlagsItem {
+                            span: span(2..3),
+                            kind: ast::FlagsItemKind::Flag(
+                                ast::Flag::CaseInsensitive
+                            ),
+                        },
+                        ast::FlagsItem {
+                            span: span(3..4),
+                            kind: ast::FlagsItemKind::Negation,
+                        },
+                        ast::FlagsItem {
+                            span: span(4..5),
+                            kind: ast::FlagsItemKind::Flag(
+                                ast::Flag::SwapGreed
+                            ),
+                        },
+                    ],
+                }),
+                ast: Box::new(lit('a', 6)),
+            }))
+        );
+
+        assert_eq!(
+            parser("(").parse().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::GroupUnclosed,
+            }
+        );
+        assert_eq!(
+            parser("(?").parse().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::GroupUnclosed,
+            }
+        );
+        assert_eq!(
+            parser("(?P").parse().unwrap_err(),
+            TestError {
+                span: span(2..3),
+                kind: ast::ErrorKind::FlagUnrecognized,
+            }
+        );
+        assert_eq!(
+            parser("(?P<").parse().unwrap_err(),
+            TestError {
+                span: span(4..4),
+                kind: ast::ErrorKind::GroupNameUnexpectedEof,
+            }
+        );
+        assert_eq!(
+            parser("(a").parse().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::GroupUnclosed,
+            }
+        );
+        assert_eq!(
+            parser("(()").parse().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::GroupUnclosed,
+            }
+        );
+        assert_eq!(
+            parser(")").parse().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::GroupUnopened,
+            }
+        );
+        assert_eq!(
+            parser("a)").parse().unwrap_err(),
+            TestError {
+                span: span(1..2),
+                kind: ast::ErrorKind::GroupUnopened,
+            }
+        );
+    }
+
+    #[test]
+    fn parse_capture_name() {
+        assert_eq!(
+            parser("(?P<a>z)").parse(),
+            Ok(Ast::Group(ast::Group {
+                span: span(0..8),
+                kind: ast::GroupKind::CaptureName(ast::CaptureName {
+                    span: span(4..5),
+                    name: s("a"),
+                    index: 1,
+                }),
+                ast: Box::new(lit('z', 6)),
+            }))
+        );
+        assert_eq!(
+            parser("(?P<abc>z)").parse(),
+            Ok(Ast::Group(ast::Group {
+                span: span(0..10),
+                kind: ast::GroupKind::CaptureName(ast::CaptureName {
+                    span: span(4..7),
+                    name: s("abc"),
+                    index: 1,
+                }),
+                ast: Box::new(lit('z', 8)),
+            }))
+        );
+
+        assert_eq!(
+            parser("(?P<").parse().unwrap_err(),
+            TestError {
+                span: span(4..4),
+                kind: ast::ErrorKind::GroupNameUnexpectedEof,
+            }
+        );
+        assert_eq!(
+            parser("(?P<>z)").parse().unwrap_err(),
+            TestError {
+                span: span(4..4),
+                kind: ast::ErrorKind::GroupNameEmpty,
+            }
+        );
+        assert_eq!(
+            parser("(?P<a").parse().unwrap_err(),
+            TestError {
+                span: span(5..5),
+                kind: ast::ErrorKind::GroupNameUnexpectedEof,
+            }
+        );
+        assert_eq!(
+            parser("(?P<ab").parse().unwrap_err(),
+            TestError {
+                span: span(6..6),
+                kind: ast::ErrorKind::GroupNameUnexpectedEof,
+            }
+        );
+        assert_eq!(
+            parser("(?P<0a").parse().unwrap_err(),
+            TestError {
+                span: span(4..5),
+                kind: ast::ErrorKind::GroupNameInvalid,
+            }
+        );
+        assert_eq!(
+            parser("(?P<~").parse().unwrap_err(),
+            TestError {
+                span: span(4..5),
+                kind: ast::ErrorKind::GroupNameInvalid,
+            }
+        );
+        assert_eq!(
+            parser("(?P<abc~").parse().unwrap_err(),
+            TestError {
+                span: span(7..8),
+                kind: ast::ErrorKind::GroupNameInvalid,
+            }
+        );
+        assert_eq!(
+            parser("(?P<a>y)(?P<a>z)").parse().unwrap_err(),
+            TestError {
+                span: span(12..13),
+                kind: ast::ErrorKind::GroupNameDuplicate {
+                    original: span(4..5),
+                },
+            }
+        );
+    }
+
+    #[test]
+    fn parse_flags() {
+        assert_eq!(
+            parser("i:").parse_flags(),
+            Ok(ast::Flags {
+                span: span(0..1),
+                items: vec![ast::FlagsItem {
+                    span: span(0..1),
+                    kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
+                }],
+            })
+        );
+        assert_eq!(
+            parser("i)").parse_flags(),
+            Ok(ast::Flags {
+                span: span(0..1),
+                items: vec![ast::FlagsItem {
+                    span: span(0..1),
+                    kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
+                }],
+            })
+        );
+
+        assert_eq!(
+            parser("isU:").parse_flags(),
+            Ok(ast::Flags {
+                span: span(0..3),
+                items: vec![
+                    ast::FlagsItem {
+                        span: span(0..1),
+                        kind: ast::FlagsItemKind::Flag(
+                            ast::Flag::CaseInsensitive
+                        ),
+                    },
+                    ast::FlagsItem {
+                        span: span(1..2),
+                        kind: ast::FlagsItemKind::Flag(
+                            ast::Flag::DotMatchesNewLine
+                        ),
+                    },
+                    ast::FlagsItem {
+                        span: span(2..3),
+                        kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
+                    },
+                ],
+            })
+        );
+
+        assert_eq!(
+            parser("-isU:").parse_flags(),
+            Ok(ast::Flags {
+                span: span(0..4),
+                items: vec![
+                    ast::FlagsItem {
+                        span: span(0..1),
+                        kind: ast::FlagsItemKind::Negation,
+                    },
+                    ast::FlagsItem {
+                        span: span(1..2),
+                        kind: ast::FlagsItemKind::Flag(
+                            ast::Flag::CaseInsensitive
+                        ),
+                    },
+                    ast::FlagsItem {
+                        span: span(2..3),
+                        kind: ast::FlagsItemKind::Flag(
+                            ast::Flag::DotMatchesNewLine
+                        ),
+                    },
+                    ast::FlagsItem {
+                        span: span(3..4),
+                        kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
+                    },
+                ],
+            })
+        );
+        assert_eq!(
+            parser("i-sU:").parse_flags(),
+            Ok(ast::Flags {
+                span: span(0..4),
+                items: vec![
+                    ast::FlagsItem {
+                        span: span(0..1),
+                        kind: ast::FlagsItemKind::Flag(
+                            ast::Flag::CaseInsensitive
+                        ),
+                    },
+                    ast::FlagsItem {
+                        span: span(1..2),
+                        kind: ast::FlagsItemKind::Negation,
+                    },
+                    ast::FlagsItem {
+                        span: span(2..3),
+                        kind: ast::FlagsItemKind::Flag(
+                            ast::Flag::DotMatchesNewLine
+                        ),
+                    },
+                    ast::FlagsItem {
+                        span: span(3..4),
+                        kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
+                    },
+                ],
+            })
+        );
+
+        assert_eq!(
+            parser("isU").parse_flags().unwrap_err(),
+            TestError {
+                span: span(3..3),
+                kind: ast::ErrorKind::FlagUnexpectedEof,
+            }
+        );
+        assert_eq!(
+            parser("isUa:").parse_flags().unwrap_err(),
+            TestError {
+                span: span(3..4),
+                kind: ast::ErrorKind::FlagUnrecognized,
+            }
+        );
+        assert_eq!(
+            parser("isUi:").parse_flags().unwrap_err(),
+            TestError {
+                span: span(3..4),
+                kind: ast::ErrorKind::FlagDuplicate { original: span(0..1) },
+            }
+        );
+        assert_eq!(
+            parser("i-sU-i:").parse_flags().unwrap_err(),
+            TestError {
+                span: span(4..5),
+                kind: ast::ErrorKind::FlagRepeatedNegation {
+                    original: span(1..2),
+                },
+            }
+        );
+        assert_eq!(
+            parser("-)").parse_flags().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::FlagDanglingNegation,
+            }
+        );
+        assert_eq!(
+            parser("i-)").parse_flags().unwrap_err(),
+            TestError {
+                span: span(1..2),
+                kind: ast::ErrorKind::FlagDanglingNegation,
+            }
+        );
+        assert_eq!(
+            parser("iU-)").parse_flags().unwrap_err(),
+            TestError {
+                span: span(2..3),
+                kind: ast::ErrorKind::FlagDanglingNegation,
+            }
+        );
+    }
+
+    #[test]
+    fn parse_flag() {
+        assert_eq!(parser("i").parse_flag(), Ok(ast::Flag::CaseInsensitive));
+        assert_eq!(parser("m").parse_flag(), Ok(ast::Flag::MultiLine));
+        assert_eq!(parser("s").parse_flag(), Ok(ast::Flag::DotMatchesNewLine));
+        assert_eq!(parser("U").parse_flag(), Ok(ast::Flag::SwapGreed));
+        assert_eq!(parser("u").parse_flag(), Ok(ast::Flag::Unicode));
+        assert_eq!(parser("x").parse_flag(), Ok(ast::Flag::IgnoreWhitespace));
+
+        assert_eq!(
+            parser("a").parse_flag().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::FlagUnrecognized,
+            }
+        );
+        assert_eq!(
+            parser("☃").parse_flag().unwrap_err(),
+            TestError {
+                span: span_range("☃", 0..3),
+                kind: ast::ErrorKind::FlagUnrecognized,
+            }
+        );
+    }
+
+    #[test]
+    fn parse_primitive_non_escape() {
+        assert_eq!(
+            parser(r".").parse_primitive(),
+            Ok(Primitive::Dot(span(0..1)))
+        );
+        assert_eq!(
+            parser(r"^").parse_primitive(),
+            Ok(Primitive::Assertion(ast::Assertion {
+                span: span(0..1),
+                kind: ast::AssertionKind::StartLine,
+            }))
+        );
+        assert_eq!(
+            parser(r"$").parse_primitive(),
+            Ok(Primitive::Assertion(ast::Assertion {
+                span: span(0..1),
+                kind: ast::AssertionKind::EndLine,
+            }))
+        );
+
+        assert_eq!(
+            parser(r"a").parse_primitive(),
+            Ok(Primitive::Literal(ast::Literal {
+                span: span(0..1),
+                kind: ast::LiteralKind::Verbatim,
+                c: 'a',
+            }))
+        );
+        assert_eq!(
+            parser(r"|").parse_primitive(),
+            Ok(Primitive::Literal(ast::Literal {
+                span: span(0..1),
+                kind: ast::LiteralKind::Verbatim,
+                c: '|',
+            }))
+        );
+        assert_eq!(
+            parser(r"☃").parse_primitive(),
+            Ok(Primitive::Literal(ast::Literal {
+                span: span_range("☃", 0..3),
+                kind: ast::LiteralKind::Verbatim,
+                c: '☃',
+            }))
+        );
+    }
+
+    #[test]
+    fn parse_escape() {
+        assert_eq!(
+            parser(r"\|").parse_primitive(),
+            Ok(Primitive::Literal(ast::Literal {
+                span: span(0..2),
+                kind: ast::LiteralKind::Punctuation,
+                c: '|',
+            }))
+        );
+        let specials = &[
+            (r"\a", '\x07', ast::SpecialLiteralKind::Bell),
+            (r"\f", '\x0C', ast::SpecialLiteralKind::FormFeed),
+            (r"\t", '\t', ast::SpecialLiteralKind::Tab),
+            (r"\n", '\n', ast::SpecialLiteralKind::LineFeed),
+            (r"\r", '\r', ast::SpecialLiteralKind::CarriageReturn),
+            (r"\v", '\x0B', ast::SpecialLiteralKind::VerticalTab),
+        ];
+        for &(pat, c, ref kind) in specials {
+            assert_eq!(
+                parser(pat).parse_primitive(),
+                Ok(Primitive::Literal(ast::Literal {
+                    span: span(0..2),
+                    kind: ast::LiteralKind::Special(kind.clone()),
+                    c: c,
+                }))
+            );
+        }
+        assert_eq!(
+            parser(r"\A").parse_primitive(),
+            Ok(Primitive::Assertion(ast::Assertion {
+                span: span(0..2),
+                kind: ast::AssertionKind::StartText,
+            }))
+        );
+        assert_eq!(
+            parser(r"\z").parse_primitive(),
+            Ok(Primitive::Assertion(ast::Assertion {
+                span: span(0..2),
+                kind: ast::AssertionKind::EndText,
+            }))
+        );
+        assert_eq!(
+            parser(r"\b").parse_primitive(),
+            Ok(Primitive::Assertion(ast::Assertion {
+                span: span(0..2),
+                kind: ast::AssertionKind::WordBoundary,
+            }))
+        );
+        assert_eq!(
+            parser(r"\B").parse_primitive(),
+            Ok(Primitive::Assertion(ast::Assertion {
+                span: span(0..2),
+                kind: ast::AssertionKind::NotWordBoundary,
+            }))
+        );
+
+        assert_eq!(
+            parser(r"\").parse_escape().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::EscapeUnexpectedEof,
+            }
+        );
+        assert_eq!(
+            parser(r"\y").parse_escape().unwrap_err(),
+            TestError {
+                span: span(0..2),
+                kind: ast::ErrorKind::EscapeUnrecognized,
+            }
+        );
+    }
+
+    #[test]
+    fn parse_unsupported_backreference() {
+        assert_eq!(
+            parser(r"\0").parse_escape().unwrap_err(),
+            TestError {
+                span: span(0..2),
+                kind: ast::ErrorKind::UnsupportedBackreference,
+            }
+        );
+        assert_eq!(
+            parser(r"\9").parse_escape().unwrap_err(),
+            TestError {
+                span: span(0..2),
+                kind: ast::ErrorKind::UnsupportedBackreference,
+            }
+        );
+    }
+
+    #[test]
+    fn parse_octal() {
+        for i in 0..511 {
+            let pat = format!(r"\{:o}", i);
+            assert_eq!(
+                parser_octal(&pat).parse_escape(),
+                Ok(Primitive::Literal(ast::Literal {
+                    span: span(0..pat.len()),
+                    kind: ast::LiteralKind::Octal,
+                    c: ::std::char::from_u32(i).unwrap(),
+                }))
+            );
+        }
+        assert_eq!(
+            parser_octal(r"\778").parse_escape(),
+            Ok(Primitive::Literal(ast::Literal {
+                span: span(0..3),
+                kind: ast::LiteralKind::Octal,
+                c: '?',
+            }))
+        );
+        assert_eq!(
+            parser_octal(r"\7777").parse_escape(),
+            Ok(Primitive::Literal(ast::Literal {
+                span: span(0..4),
+                kind: ast::LiteralKind::Octal,
+                c: '\u{01FF}',
+            }))
+        );
+        assert_eq!(
+            parser_octal(r"\778").parse(),
+            Ok(Ast::Concat(ast::Concat {
+                span: span(0..4),
+                asts: vec![
+                    Ast::Literal(ast::Literal {
+                        span: span(0..3),
+                        kind: ast::LiteralKind::Octal,
+                        c: '?',
+                    }),
+                    Ast::Literal(ast::Literal {
+                        span: span(3..4),
+                        kind: ast::LiteralKind::Verbatim,
+                        c: '8',
+                    }),
+                ],
+            }))
+        );
+        assert_eq!(
+            parser_octal(r"\7777").parse(),
+            Ok(Ast::Concat(ast::Concat {
+                span: span(0..5),
+                asts: vec![
+                    Ast::Literal(ast::Literal {
+                        span: span(0..4),
+                        kind: ast::LiteralKind::Octal,
+                        c: '\u{01FF}',
+                    }),
+                    Ast::Literal(ast::Literal {
+                        span: span(4..5),
+                        kind: ast::LiteralKind::Verbatim,
+                        c: '7',
+                    }),
+                ],
+            }))
+        );
+
+        assert_eq!(
+            parser_octal(r"\8").parse_escape().unwrap_err(),
+            TestError {
+                span: span(0..2),
+                kind: ast::ErrorKind::EscapeUnrecognized,
+            }
+        );
+    }
+
+    #[test]
+    fn parse_hex_two() {
+        for i in 0..256 {
+            let pat = format!(r"\x{:02x}", i);
+            assert_eq!(
+                parser(&pat).parse_escape(),
+                Ok(Primitive::Literal(ast::Literal {
+                    span: span(0..pat.len()),
+                    kind: ast::LiteralKind::HexFixed(ast::HexLiteralKind::X),
+                    c: ::std::char::from_u32(i).unwrap(),
+                }))
+            );
+        }
+
+        assert_eq!(
+            parser(r"\xF").parse_escape().unwrap_err(),
+            TestError {
+                span: span(3..3),
+                kind: ast::ErrorKind::EscapeUnexpectedEof,
+            }
+        );
+        assert_eq!(
+            parser(r"\xG").parse_escape().unwrap_err(),
+            TestError {
+                span: span(2..3),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            }
+        );
+        assert_eq!(
+            parser(r"\xFG").parse_escape().unwrap_err(),
+            TestError {
+                span: span(3..4),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            }
+        );
+    }
+
+    #[test]
+    fn parse_hex_four() {
+        for i in 0..65536 {
+            let c = match ::std::char::from_u32(i) {
+                None => continue,
+                Some(c) => c,
+            };
+            let pat = format!(r"\u{:04x}", i);
+            assert_eq!(
+                parser(&pat).parse_escape(),
+                Ok(Primitive::Literal(ast::Literal {
+                    span: span(0..pat.len()),
+                    kind: ast::LiteralKind::HexFixed(
+                        ast::HexLiteralKind::UnicodeShort
+                    ),
+                    c: c,
+                }))
+            );
+        }
+
+        assert_eq!(
+            parser(r"\uF").parse_escape().unwrap_err(),
+            TestError {
+                span: span(3..3),
+                kind: ast::ErrorKind::EscapeUnexpectedEof,
+            }
+        );
+        assert_eq!(
+            parser(r"\uG").parse_escape().unwrap_err(),
+            TestError {
+                span: span(2..3),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            }
+        );
+        assert_eq!(
+            parser(r"\uFG").parse_escape().unwrap_err(),
+            TestError {
+                span: span(3..4),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            }
+        );
+        assert_eq!(
+            parser(r"\uFFG").parse_escape().unwrap_err(),
+            TestError {
+                span: span(4..5),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            }
+        );
+        assert_eq!(
+            parser(r"\uFFFG").parse_escape().unwrap_err(),
+            TestError {
+                span: span(5..6),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            }
+        );
+        assert_eq!(
+            parser(r"\uD800").parse_escape().unwrap_err(),
+            TestError {
+                span: span(2..6),
+                kind: ast::ErrorKind::EscapeHexInvalid,
+            }
+        );
+    }
+
+    #[test]
+    fn parse_hex_eight() {
+        for i in 0..65536 {
+            let c = match ::std::char::from_u32(i) {
+                None => continue,
+                Some(c) => c,
+            };
+            let pat = format!(r"\U{:08x}", i);
+            assert_eq!(
+                parser(&pat).parse_escape(),
+                Ok(Primitive::Literal(ast::Literal {
+                    span: span(0..pat.len()),
+                    kind: ast::LiteralKind::HexFixed(
+                        ast::HexLiteralKind::UnicodeLong
+                    ),
+                    c: c,
+                }))
+            );
+        }
+
+        assert_eq!(
+            parser(r"\UF").parse_escape().unwrap_err(),
+            TestError {
+                span: span(3..3),
+                kind: ast::ErrorKind::EscapeUnexpectedEof,
+            }
+        );
+        assert_eq!(
+            parser(r"\UG").parse_escape().unwrap_err(),
+            TestError {
+                span: span(2..3),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            }
+        );
+        assert_eq!(
+            parser(r"\UFG").parse_escape().unwrap_err(),
+            TestError {
+                span: span(3..4),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            }
+        );
+        assert_eq!(
+            parser(r"\UFFG").parse_escape().unwrap_err(),
+            TestError {
+                span: span(4..5),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            }
+        );
+        assert_eq!(
+            parser(r"\UFFFG").parse_escape().unwrap_err(),
+            TestError {
+                span: span(5..6),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            }
+        );
+        assert_eq!(
+            parser(r"\UFFFFG").parse_escape().unwrap_err(),
+            TestError {
+                span: span(6..7),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            }
+        );
+        assert_eq!(
+            parser(r"\UFFFFFG").parse_escape().unwrap_err(),
+            TestError {
+                span: span(7..8),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            }
+        );
+        assert_eq!(
+            parser(r"\UFFFFFFG").parse_escape().unwrap_err(),
+            TestError {
+                span: span(8..9),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            }
+        );
+        assert_eq!(
+            parser(r"\UFFFFFFFG").parse_escape().unwrap_err(),
+            TestError {
+                span: span(9..10),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            }
+        );
+    }
+
+    #[test]
+    fn parse_hex_brace() {
+        assert_eq!(
+            parser(r"\u{26c4}").parse_escape(),
+            Ok(Primitive::Literal(ast::Literal {
+                span: span(0..8),
+                kind: ast::LiteralKind::HexBrace(
+                    ast::HexLiteralKind::UnicodeShort
+                ),
+                c: '⛄',
+            }))
+        );
+        assert_eq!(
+            parser(r"\U{26c4}").parse_escape(),
+            Ok(Primitive::Literal(ast::Literal {
+                span: span(0..8),
+                kind: ast::LiteralKind::HexBrace(
+                    ast::HexLiteralKind::UnicodeLong
+                ),
+                c: '⛄',
+            }))
+        );
+        assert_eq!(
+            parser(r"\x{26c4}").parse_escape(),
+            Ok(Primitive::Literal(ast::Literal {
+                span: span(0..8),
+                kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
+                c: '⛄',
+            }))
+        );
+        assert_eq!(
+            parser(r"\x{26C4}").parse_escape(),
+            Ok(Primitive::Literal(ast::Literal {
+                span: span(0..8),
+                kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
+                c: '⛄',
+            }))
+        );
+        assert_eq!(
+            parser(r"\x{10fFfF}").parse_escape(),
+            Ok(Primitive::Literal(ast::Literal {
+                span: span(0..10),
+                kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
+                c: '\u{10FFFF}',
+            }))
+        );
+
+        assert_eq!(
+            parser(r"\x").parse_escape().unwrap_err(),
+            TestError {
+                span: span(2..2),
+                kind: ast::ErrorKind::EscapeUnexpectedEof,
+            }
+        );
+        assert_eq!(
+            parser(r"\x{").parse_escape().unwrap_err(),
+            TestError {
+                span: span(2..3),
+                kind: ast::ErrorKind::EscapeUnexpectedEof,
+            }
+        );
+        assert_eq!(
+            parser(r"\x{FF").parse_escape().unwrap_err(),
+            TestError {
+                span: span(2..5),
+                kind: ast::ErrorKind::EscapeUnexpectedEof,
+            }
+        );
+        assert_eq!(
+            parser(r"\x{}").parse_escape().unwrap_err(),
+            TestError {
+                span: span(2..4),
+                kind: ast::ErrorKind::EscapeHexEmpty,
+            }
+        );
+        assert_eq!(
+            parser(r"\x{FGF}").parse_escape().unwrap_err(),
+            TestError {
+                span: span(4..5),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            }
+        );
+        assert_eq!(
+            parser(r"\x{FFFFFF}").parse_escape().unwrap_err(),
+            TestError {
+                span: span(3..9),
+                kind: ast::ErrorKind::EscapeHexInvalid,
+            }
+        );
+        assert_eq!(
+            parser(r"\x{D800}").parse_escape().unwrap_err(),
+            TestError {
+                span: span(3..7),
+                kind: ast::ErrorKind::EscapeHexInvalid,
+            }
+        );
+        assert_eq!(
+            parser(r"\x{FFFFFFFFF}").parse_escape().unwrap_err(),
+            TestError {
+                span: span(3..12),
+                kind: ast::ErrorKind::EscapeHexInvalid,
+            }
+        );
+    }
+
+    #[test]
+    fn parse_decimal() {
+        assert_eq!(parser("123").parse_decimal(), Ok(123));
+        assert_eq!(parser("0").parse_decimal(), Ok(0));
+        assert_eq!(parser("01").parse_decimal(), Ok(1));
+
+        assert_eq!(
+            parser("-1").parse_decimal().unwrap_err(),
+            TestError { span: span(0..0), kind: ast::ErrorKind::DecimalEmpty }
+        );
+        assert_eq!(
+            parser("").parse_decimal().unwrap_err(),
+            TestError { span: span(0..0), kind: ast::ErrorKind::DecimalEmpty }
+        );
+        assert_eq!(
+            parser("9999999999").parse_decimal().unwrap_err(),
+            TestError {
+                span: span(0..10),
+                kind: ast::ErrorKind::DecimalInvalid,
+            }
+        );
+    }
+
+    #[test]
+    fn parse_set_class() {
+        fn union(span: Span, items: Vec<ast::ClassSetItem>) -> ast::ClassSet {
+            ast::ClassSet::union(ast::ClassSetUnion {
+                span: span,
+                items: items,
+            })
+        }
+
+        fn intersection(
+            span: Span,
+            lhs: ast::ClassSet,
+            rhs: ast::ClassSet,
+        ) -> ast::ClassSet {
+            ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
+                span: span,
+                kind: ast::ClassSetBinaryOpKind::Intersection,
+                lhs: Box::new(lhs),
+                rhs: Box::new(rhs),
+            })
+        }
+
+        fn difference(
+            span: Span,
+            lhs: ast::ClassSet,
+            rhs: ast::ClassSet,
+        ) -> ast::ClassSet {
+            ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
+                span: span,
+                kind: ast::ClassSetBinaryOpKind::Difference,
+                lhs: Box::new(lhs),
+                rhs: Box::new(rhs),
+            })
+        }
+
+        fn symdifference(
+            span: Span,
+            lhs: ast::ClassSet,
+            rhs: ast::ClassSet,
+        ) -> ast::ClassSet {
+            ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
+                span: span,
+                kind: ast::ClassSetBinaryOpKind::SymmetricDifference,
+                lhs: Box::new(lhs),
+                rhs: Box::new(rhs),
+            })
+        }
+
+        fn itemset(item: ast::ClassSetItem) -> ast::ClassSet {
+            ast::ClassSet::Item(item)
+        }
+
+        fn item_ascii(cls: ast::ClassAscii) -> ast::ClassSetItem {
+            ast::ClassSetItem::Ascii(cls)
+        }
+
+        fn item_unicode(cls: ast::ClassUnicode) -> ast::ClassSetItem {
+            ast::ClassSetItem::Unicode(cls)
+        }
+
+        fn item_perl(cls: ast::ClassPerl) -> ast::ClassSetItem {
+            ast::ClassSetItem::Perl(cls)
+        }
+
+        fn item_bracket(cls: ast::ClassBracketed) -> ast::ClassSetItem {
+            ast::ClassSetItem::Bracketed(Box::new(cls))
+        }
+
+        fn lit(span: Span, c: char) -> ast::ClassSetItem {
+            ast::ClassSetItem::Literal(ast::Literal {
+                span: span,
+                kind: ast::LiteralKind::Verbatim,
+                c: c,
+            })
+        }
+
+        fn empty(span: Span) -> ast::ClassSetItem {
+            ast::ClassSetItem::Empty(span)
+        }
+
+        fn range(span: Span, start: char, end: char) -> ast::ClassSetItem {
+            let pos1 = Position {
+                offset: span.start.offset + start.len_utf8(),
+                column: span.start.column + 1,
+                ..span.start
+            };
+            let pos2 = Position {
+                offset: span.end.offset - end.len_utf8(),
+                column: span.end.column - 1,
+                ..span.end
+            };
+            ast::ClassSetItem::Range(ast::ClassSetRange {
+                span: span,
+                start: ast::Literal {
+                    span: Span { end: pos1, ..span },
+                    kind: ast::LiteralKind::Verbatim,
+                    c: start,
+                },
+                end: ast::Literal {
+                    span: Span { start: pos2, ..span },
+                    kind: ast::LiteralKind::Verbatim,
+                    c: end,
+                },
+            })
+        }
+
+        fn alnum(span: Span, negated: bool) -> ast::ClassAscii {
+            ast::ClassAscii {
+                span: span,
+                kind: ast::ClassAsciiKind::Alnum,
+                negated: negated,
+            }
+        }
+
+        fn lower(span: Span, negated: bool) -> ast::ClassAscii {
+            ast::ClassAscii {
+                span: span,
+                kind: ast::ClassAsciiKind::Lower,
+                negated: negated,
+            }
+        }
+
+        assert_eq!(
+            parser("[[:alnum:]]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..11),
+                negated: false,
+                kind: itemset(item_ascii(alnum(span(1..10), false))),
+            })))
+        );
+        assert_eq!(
+            parser("[[[:alnum:]]]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..13),
+                negated: false,
+                kind: itemset(item_bracket(ast::ClassBracketed {
+                    span: span(1..12),
+                    negated: false,
+                    kind: itemset(item_ascii(alnum(span(2..11), false))),
+                })),
+            })))
+        );
+        assert_eq!(
+            parser("[[:alnum:]&&[:lower:]]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..22),
+                negated: false,
+                kind: intersection(
+                    span(1..21),
+                    itemset(item_ascii(alnum(span(1..10), false))),
+                    itemset(item_ascii(lower(span(12..21), false))),
+                ),
+            })))
+        );
+        assert_eq!(
+            parser("[[:alnum:]--[:lower:]]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..22),
+                negated: false,
+                kind: difference(
+                    span(1..21),
+                    itemset(item_ascii(alnum(span(1..10), false))),
+                    itemset(item_ascii(lower(span(12..21), false))),
+                ),
+            })))
+        );
+        assert_eq!(
+            parser("[[:alnum:]~~[:lower:]]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..22),
+                negated: false,
+                kind: symdifference(
+                    span(1..21),
+                    itemset(item_ascii(alnum(span(1..10), false))),
+                    itemset(item_ascii(lower(span(12..21), false))),
+                ),
+            })))
+        );
+
+        assert_eq!(
+            parser("[a]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..3),
+                negated: false,
+                kind: itemset(lit(span(1..2), 'a')),
+            })))
+        );
+        assert_eq!(
+            parser(r"[a\]]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..5),
+                negated: false,
+                kind: union(
+                    span(1..4),
+                    vec![
+                        lit(span(1..2), 'a'),
+                        ast::ClassSetItem::Literal(ast::Literal {
+                            span: span(2..4),
+                            kind: ast::LiteralKind::Punctuation,
+                            c: ']',
+                        }),
+                    ]
+                ),
+            })))
+        );
+        assert_eq!(
+            parser(r"[a\-z]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..6),
+                negated: false,
+                kind: union(
+                    span(1..5),
+                    vec![
+                        lit(span(1..2), 'a'),
+                        ast::ClassSetItem::Literal(ast::Literal {
+                            span: span(2..4),
+                            kind: ast::LiteralKind::Punctuation,
+                            c: '-',
+                        }),
+                        lit(span(4..5), 'z'),
+                    ]
+                ),
+            })))
+        );
+        assert_eq!(
+            parser("[ab]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..4),
+                negated: false,
+                kind: union(
+                    span(1..3),
+                    vec![lit(span(1..2), 'a'), lit(span(2..3), 'b'),]
+                ),
+            })))
+        );
+        assert_eq!(
+            parser("[a-]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..4),
+                negated: false,
+                kind: union(
+                    span(1..3),
+                    vec![lit(span(1..2), 'a'), lit(span(2..3), '-'),]
+                ),
+            })))
+        );
+        assert_eq!(
+            parser("[-a]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..4),
+                negated: false,
+                kind: union(
+                    span(1..3),
+                    vec![lit(span(1..2), '-'), lit(span(2..3), 'a'),]
+                ),
+            })))
+        );
+        assert_eq!(
+            parser(r"[\pL]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..5),
+                negated: false,
+                kind: itemset(item_unicode(ast::ClassUnicode {
+                    span: span(1..4),
+                    negated: false,
+                    kind: ast::ClassUnicodeKind::OneLetter('L'),
+                })),
+            })))
+        );
+        assert_eq!(
+            parser(r"[\w]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..4),
+                negated: false,
+                kind: itemset(item_perl(ast::ClassPerl {
+                    span: span(1..3),
+                    kind: ast::ClassPerlKind::Word,
+                    negated: false,
+                })),
+            })))
+        );
+        assert_eq!(
+            parser(r"[a\wz]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..6),
+                negated: false,
+                kind: union(
+                    span(1..5),
+                    vec![
+                        lit(span(1..2), 'a'),
+                        item_perl(ast::ClassPerl {
+                            span: span(2..4),
+                            kind: ast::ClassPerlKind::Word,
+                            negated: false,
+                        }),
+                        lit(span(4..5), 'z'),
+                    ]
+                ),
+            })))
+        );
+
+        assert_eq!(
+            parser("[a-z]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..5),
+                negated: false,
+                kind: itemset(range(span(1..4), 'a', 'z')),
+            })))
+        );
+        assert_eq!(
+            parser("[a-cx-z]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..8),
+                negated: false,
+                kind: union(
+                    span(1..7),
+                    vec![
+                        range(span(1..4), 'a', 'c'),
+                        range(span(4..7), 'x', 'z'),
+                    ]
+                ),
+            })))
+        );
+        assert_eq!(
+            parser(r"[\w&&a-cx-z]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..12),
+                negated: false,
+                kind: intersection(
+                    span(1..11),
+                    itemset(item_perl(ast::ClassPerl {
+                        span: span(1..3),
+                        kind: ast::ClassPerlKind::Word,
+                        negated: false,
+                    })),
+                    union(
+                        span(5..11),
+                        vec![
+                            range(span(5..8), 'a', 'c'),
+                            range(span(8..11), 'x', 'z'),
+                        ]
+                    ),
+                ),
+            })))
+        );
+        assert_eq!(
+            parser(r"[a-cx-z&&\w]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..12),
+                negated: false,
+                kind: intersection(
+                    span(1..11),
+                    union(
+                        span(1..7),
+                        vec![
+                            range(span(1..4), 'a', 'c'),
+                            range(span(4..7), 'x', 'z'),
+                        ]
+                    ),
+                    itemset(item_perl(ast::ClassPerl {
+                        span: span(9..11),
+                        kind: ast::ClassPerlKind::Word,
+                        negated: false,
+                    })),
+                ),
+            })))
+        );
+        assert_eq!(
+            parser(r"[a--b--c]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..9),
+                negated: false,
+                kind: difference(
+                    span(1..8),
+                    difference(
+                        span(1..5),
+                        itemset(lit(span(1..2), 'a')),
+                        itemset(lit(span(4..5), 'b')),
+                    ),
+                    itemset(lit(span(7..8), 'c')),
+                ),
+            })))
+        );
+        assert_eq!(
+            parser(r"[a~~b~~c]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..9),
+                negated: false,
+                kind: symdifference(
+                    span(1..8),
+                    symdifference(
+                        span(1..5),
+                        itemset(lit(span(1..2), 'a')),
+                        itemset(lit(span(4..5), 'b')),
+                    ),
+                    itemset(lit(span(7..8), 'c')),
+                ),
+            })))
+        );
+        assert_eq!(
+            parser(r"[\^&&^]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..7),
+                negated: false,
+                kind: intersection(
+                    span(1..6),
+                    itemset(ast::ClassSetItem::Literal(ast::Literal {
+                        span: span(1..3),
+                        kind: ast::LiteralKind::Punctuation,
+                        c: '^',
+                    })),
+                    itemset(lit(span(5..6), '^')),
+                ),
+            })))
+        );
+        assert_eq!(
+            parser(r"[\&&&&]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..7),
+                negated: false,
+                kind: intersection(
+                    span(1..6),
+                    itemset(ast::ClassSetItem::Literal(ast::Literal {
+                        span: span(1..3),
+                        kind: ast::LiteralKind::Punctuation,
+                        c: '&',
+                    })),
+                    itemset(lit(span(5..6), '&')),
+                ),
+            })))
+        );
+        assert_eq!(
+            parser(r"[&&&&]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..6),
+                negated: false,
+                kind: intersection(
+                    span(1..5),
+                    intersection(
+                        span(1..3),
+                        itemset(empty(span(1..1))),
+                        itemset(empty(span(3..3))),
+                    ),
+                    itemset(empty(span(5..5))),
+                ),
+            })))
+        );
+
+        let pat = "[☃-⛄]";
+        assert_eq!(
+            parser(pat).parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span_range(pat, 0..9),
+                negated: false,
+                kind: itemset(ast::ClassSetItem::Range(ast::ClassSetRange {
+                    span: span_range(pat, 1..8),
+                    start: ast::Literal {
+                        span: span_range(pat, 1..4),
+                        kind: ast::LiteralKind::Verbatim,
+                        c: '☃',
+                    },
+                    end: ast::Literal {
+                        span: span_range(pat, 5..8),
+                        kind: ast::LiteralKind::Verbatim,
+                        c: '⛄',
+                    },
+                })),
+            })))
+        );
+
+        assert_eq!(
+            parser(r"[]]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..3),
+                negated: false,
+                kind: itemset(lit(span(1..2), ']')),
+            })))
+        );
+        assert_eq!(
+            parser(r"[]\[]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..5),
+                negated: false,
+                kind: union(
+                    span(1..4),
+                    vec![
+                        lit(span(1..2), ']'),
+                        ast::ClassSetItem::Literal(ast::Literal {
+                            span: span(2..4),
+                            kind: ast::LiteralKind::Punctuation,
+                            c: '[',
+                        }),
+                    ]
+                ),
+            })))
+        );
+        assert_eq!(
+            parser(r"[\[]]").parse(),
+            Ok(concat(
+                0..5,
+                vec![
+                    Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                        span: span(0..4),
+                        negated: false,
+                        kind: itemset(ast::ClassSetItem::Literal(
+                            ast::Literal {
+                                span: span(1..3),
+                                kind: ast::LiteralKind::Punctuation,
+                                c: '[',
+                            }
+                        )),
+                    })),
+                    Ast::Literal(ast::Literal {
+                        span: span(4..5),
+                        kind: ast::LiteralKind::Verbatim,
+                        c: ']',
+                    }),
+                ]
+            ))
+        );
+
+        assert_eq!(
+            parser("[").parse().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::ClassUnclosed,
+            }
+        );
+        assert_eq!(
+            parser("[[").parse().unwrap_err(),
+            TestError {
+                span: span(1..2),
+                kind: ast::ErrorKind::ClassUnclosed,
+            }
+        );
+        assert_eq!(
+            parser("[[-]").parse().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::ClassUnclosed,
+            }
+        );
+        assert_eq!(
+            parser("[[[:alnum:]").parse().unwrap_err(),
+            TestError {
+                span: span(1..2),
+                kind: ast::ErrorKind::ClassUnclosed,
+            }
+        );
+        assert_eq!(
+            parser(r"[\b]").parse().unwrap_err(),
+            TestError {
+                span: span(1..3),
+                kind: ast::ErrorKind::ClassEscapeInvalid,
+            }
+        );
+        assert_eq!(
+            parser(r"[\w-a]").parse().unwrap_err(),
+            TestError {
+                span: span(1..3),
+                kind: ast::ErrorKind::ClassRangeLiteral,
+            }
+        );
+        assert_eq!(
+            parser(r"[a-\w]").parse().unwrap_err(),
+            TestError {
+                span: span(3..5),
+                kind: ast::ErrorKind::ClassRangeLiteral,
+            }
+        );
+        assert_eq!(
+            parser(r"[z-a]").parse().unwrap_err(),
+            TestError {
+                span: span(1..4),
+                kind: ast::ErrorKind::ClassRangeInvalid,
+            }
+        );
+
+        assert_eq!(
+            parser_ignore_whitespace("[a ").parse().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::ClassUnclosed,
+            }
+        );
+        assert_eq!(
+            parser_ignore_whitespace("[a- ").parse().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::ClassUnclosed,
+            }
+        );
+    }
+
+    #[test]
+    fn parse_set_class_open() {
+        assert_eq!(parser("[a]").parse_set_class_open(), {
+            let set = ast::ClassBracketed {
+                span: span(0..1),
+                negated: false,
+                kind: ast::ClassSet::union(ast::ClassSetUnion {
+                    span: span(1..1),
+                    items: vec![],
+                }),
+            };
+            let union = ast::ClassSetUnion { span: span(1..1), items: vec![] };
+            Ok((set, union))
+        });
+        assert_eq!(
+            parser_ignore_whitespace("[   a]").parse_set_class_open(),
+            {
+                let set = ast::ClassBracketed {
+                    span: span(0..4),
+                    negated: false,
+                    kind: ast::ClassSet::union(ast::ClassSetUnion {
+                        span: span(4..4),
+                        items: vec![],
+                    }),
+                };
+                let union =
+                    ast::ClassSetUnion { span: span(4..4), items: vec![] };
+                Ok((set, union))
+            }
+        );
+        assert_eq!(parser("[^a]").parse_set_class_open(), {
+            let set = ast::ClassBracketed {
+                span: span(0..2),
+                negated: true,
+                kind: ast::ClassSet::union(ast::ClassSetUnion {
+                    span: span(2..2),
+                    items: vec![],
+                }),
+            };
+            let union = ast::ClassSetUnion { span: span(2..2), items: vec![] };
+            Ok((set, union))
+        });
+        assert_eq!(
+            parser_ignore_whitespace("[ ^ a]").parse_set_class_open(),
+            {
+                let set = ast::ClassBracketed {
+                    span: span(0..4),
+                    negated: true,
+                    kind: ast::ClassSet::union(ast::ClassSetUnion {
+                        span: span(4..4),
+                        items: vec![],
+                    }),
+                };
+                let union =
+                    ast::ClassSetUnion { span: span(4..4), items: vec![] };
+                Ok((set, union))
+            }
+        );
+        assert_eq!(parser("[-a]").parse_set_class_open(), {
+            let set = ast::ClassBracketed {
+                span: span(0..2),
+                negated: false,
+                kind: ast::ClassSet::union(ast::ClassSetUnion {
+                    span: span(1..1),
+                    items: vec![],
+                }),
+            };
+            let union = ast::ClassSetUnion {
+                span: span(1..2),
+                items: vec![ast::ClassSetItem::Literal(ast::Literal {
+                    span: span(1..2),
+                    kind: ast::LiteralKind::Verbatim,
+                    c: '-',
+                })],
+            };
+            Ok((set, union))
+        });
+        assert_eq!(
+            parser_ignore_whitespace("[ - a]").parse_set_class_open(),
+            {
+                let set = ast::ClassBracketed {
+                    span: span(0..4),
+                    negated: false,
+                    kind: ast::ClassSet::union(ast::ClassSetUnion {
+                        span: span(2..2),
+                        items: vec![],
+                    }),
+                };
+                let union = ast::ClassSetUnion {
+                    span: span(2..3),
+                    items: vec![ast::ClassSetItem::Literal(ast::Literal {
+                        span: span(2..3),
+                        kind: ast::LiteralKind::Verbatim,
+                        c: '-',
+                    })],
+                };
+                Ok((set, union))
+            }
+        );
+        assert_eq!(parser("[^-a]").parse_set_class_open(), {
+            let set = ast::ClassBracketed {
+                span: span(0..3),
+                negated: true,
+                kind: ast::ClassSet::union(ast::ClassSetUnion {
+                    span: span(2..2),
+                    items: vec![],
+                }),
+            };
+            let union = ast::ClassSetUnion {
+                span: span(2..3),
+                items: vec![ast::ClassSetItem::Literal(ast::Literal {
+                    span: span(2..3),
+                    kind: ast::LiteralKind::Verbatim,
+                    c: '-',
+                })],
+            };
+            Ok((set, union))
+        });
+        assert_eq!(parser("[--a]").parse_set_class_open(), {
+            let set = ast::ClassBracketed {
+                span: span(0..3),
+                negated: false,
+                kind: ast::ClassSet::union(ast::ClassSetUnion {
+                    span: span(1..1),
+                    items: vec![],
+                }),
+            };
+            let union = ast::ClassSetUnion {
+                span: span(1..3),
+                items: vec![
+                    ast::ClassSetItem::Literal(ast::Literal {
+                        span: span(1..2),
+                        kind: ast::LiteralKind::Verbatim,
+                        c: '-',
+                    }),
+                    ast::ClassSetItem::Literal(ast::Literal {
+                        span: span(2..3),
+                        kind: ast::LiteralKind::Verbatim,
+                        c: '-',
+                    }),
+                ],
+            };
+            Ok((set, union))
+        });
+        assert_eq!(parser("[]a]").parse_set_class_open(), {
+            let set = ast::ClassBracketed {
+                span: span(0..2),
+                negated: false,
+                kind: ast::ClassSet::union(ast::ClassSetUnion {
+                    span: span(1..1),
+                    items: vec![],
+                }),
+            };
+            let union = ast::ClassSetUnion {
+                span: span(1..2),
+                items: vec![ast::ClassSetItem::Literal(ast::Literal {
+                    span: span(1..2),
+                    kind: ast::LiteralKind::Verbatim,
+                    c: ']',
+                })],
+            };
+            Ok((set, union))
+        });
+        assert_eq!(
+            parser_ignore_whitespace("[ ] a]").parse_set_class_open(),
+            {
+                let set = ast::ClassBracketed {
+                    span: span(0..4),
+                    negated: false,
+                    kind: ast::ClassSet::union(ast::ClassSetUnion {
+                        span: span(2..2),
+                        items: vec![],
+                    }),
+                };
+                let union = ast::ClassSetUnion {
+                    span: span(2..3),
+                    items: vec![ast::ClassSetItem::Literal(ast::Literal {
+                        span: span(2..3),
+                        kind: ast::LiteralKind::Verbatim,
+                        c: ']',
+                    })],
+                };
+                Ok((set, union))
+            }
+        );
+        assert_eq!(parser("[^]a]").parse_set_class_open(), {
+            let set = ast::ClassBracketed {
+                span: span(0..3),
+                negated: true,
+                kind: ast::ClassSet::union(ast::ClassSetUnion {
+                    span: span(2..2),
+                    items: vec![],
+                }),
+            };
+            let union = ast::ClassSetUnion {
+                span: span(2..3),
+                items: vec![ast::ClassSetItem::Literal(ast::Literal {
+                    span: span(2..3),
+                    kind: ast::LiteralKind::Verbatim,
+                    c: ']',
+                })],
+            };
+            Ok((set, union))
+        });
+        assert_eq!(parser("[-]a]").parse_set_class_open(), {
+            let set = ast::ClassBracketed {
+                span: span(0..2),
+                negated: false,
+                kind: ast::ClassSet::union(ast::ClassSetUnion {
+                    span: span(1..1),
+                    items: vec![],
+                }),
+            };
+            let union = ast::ClassSetUnion {
+                span: span(1..2),
+                items: vec![ast::ClassSetItem::Literal(ast::Literal {
+                    span: span(1..2),
+                    kind: ast::LiteralKind::Verbatim,
+                    c: '-',
+                })],
+            };
+            Ok((set, union))
+        });
+
+        assert_eq!(
+            parser("[").parse_set_class_open().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::ClassUnclosed,
+            }
+        );
+        assert_eq!(
+            parser_ignore_whitespace("[    ")
+                .parse_set_class_open()
+                .unwrap_err(),
+            TestError {
+                span: span(0..5),
+                kind: ast::ErrorKind::ClassUnclosed,
+            }
+        );
+        assert_eq!(
+            parser("[^").parse_set_class_open().unwrap_err(),
+            TestError {
+                span: span(0..2),
+                kind: ast::ErrorKind::ClassUnclosed,
+            }
+        );
+        assert_eq!(
+            parser("[]").parse_set_class_open().unwrap_err(),
+            TestError {
+                span: span(0..2),
+                kind: ast::ErrorKind::ClassUnclosed,
+            }
+        );
+        assert_eq!(
+            parser("[-").parse_set_class_open().unwrap_err(),
+            TestError {
+                span: span(0..2),
+                kind: ast::ErrorKind::ClassUnclosed,
+            }
+        );
+        assert_eq!(
+            parser("[--").parse_set_class_open().unwrap_err(),
+            TestError {
+                span: span(0..3),
+                kind: ast::ErrorKind::ClassUnclosed,
+            }
+        );
+    }
+
+    #[test]
+    fn maybe_parse_ascii_class() {
+        assert_eq!(
+            parser(r"[:alnum:]").maybe_parse_ascii_class(),
+            Some(ast::ClassAscii {
+                span: span(0..9),
+                kind: ast::ClassAsciiKind::Alnum,
+                negated: false,
+            })
+        );
+        assert_eq!(
+            parser(r"[:alnum:]A").maybe_parse_ascii_class(),
+            Some(ast::ClassAscii {
+                span: span(0..9),
+                kind: ast::ClassAsciiKind::Alnum,
+                negated: false,
+            })
+        );
+        assert_eq!(
+            parser(r"[:^alnum:]").maybe_parse_ascii_class(),
+            Some(ast::ClassAscii {
+                span: span(0..10),
+                kind: ast::ClassAsciiKind::Alnum,
+                negated: true,
+            })
+        );
+
+        let p = parser(r"[:");
+        assert_eq!(p.maybe_parse_ascii_class(), None);
+        assert_eq!(p.offset(), 0);
+
+        let p = parser(r"[:^");
+        assert_eq!(p.maybe_parse_ascii_class(), None);
+        assert_eq!(p.offset(), 0);
+
+        let p = parser(r"[^:alnum:]");
+        assert_eq!(p.maybe_parse_ascii_class(), None);
+        assert_eq!(p.offset(), 0);
+
+        let p = parser(r"[:alnnum:]");
+        assert_eq!(p.maybe_parse_ascii_class(), None);
+        assert_eq!(p.offset(), 0);
+
+        let p = parser(r"[:alnum]");
+        assert_eq!(p.maybe_parse_ascii_class(), None);
+        assert_eq!(p.offset(), 0);
+
+        let p = parser(r"[:alnum:");
+        assert_eq!(p.maybe_parse_ascii_class(), None);
+        assert_eq!(p.offset(), 0);
+    }
+
+    #[test]
+    fn parse_unicode_class() {
+        assert_eq!(
+            parser(r"\pN").parse_escape(),
+            Ok(Primitive::Unicode(ast::ClassUnicode {
+                span: span(0..3),
+                negated: false,
+                kind: ast::ClassUnicodeKind::OneLetter('N'),
+            }))
+        );
+        assert_eq!(
+            parser(r"\PN").parse_escape(),
+            Ok(Primitive::Unicode(ast::ClassUnicode {
+                span: span(0..3),
+                negated: true,
+                kind: ast::ClassUnicodeKind::OneLetter('N'),
+            }))
+        );
+        assert_eq!(
+            parser(r"\p{N}").parse_escape(),
+            Ok(Primitive::Unicode(ast::ClassUnicode {
+                span: span(0..5),
+                negated: false,
+                kind: ast::ClassUnicodeKind::Named(s("N")),
+            }))
+        );
+        assert_eq!(
+            parser(r"\P{N}").parse_escape(),
+            Ok(Primitive::Unicode(ast::ClassUnicode {
+                span: span(0..5),
+                negated: true,
+                kind: ast::ClassUnicodeKind::Named(s("N")),
+            }))
+        );
+        assert_eq!(
+            parser(r"\p{Greek}").parse_escape(),
+            Ok(Primitive::Unicode(ast::ClassUnicode {
+                span: span(0..9),
+                negated: false,
+                kind: ast::ClassUnicodeKind::Named(s("Greek")),
+            }))
+        );
+
+        assert_eq!(
+            parser(r"\p{scx:Katakana}").parse_escape(),
+            Ok(Primitive::Unicode(ast::ClassUnicode {
+                span: span(0..16),
+                negated: false,
+                kind: ast::ClassUnicodeKind::NamedValue {
+                    op: ast::ClassUnicodeOpKind::Colon,
+                    name: s("scx"),
+                    value: s("Katakana"),
+                },
+            }))
+        );
+        assert_eq!(
+            parser(r"\p{scx=Katakana}").parse_escape(),
+            Ok(Primitive::Unicode(ast::ClassUnicode {
+                span: span(0..16),
+                negated: false,
+                kind: ast::ClassUnicodeKind::NamedValue {
+                    op: ast::ClassUnicodeOpKind::Equal,
+                    name: s("scx"),
+                    value: s("Katakana"),
+                },
+            }))
+        );
+        assert_eq!(
+            parser(r"\p{scx!=Katakana}").parse_escape(),
+            Ok(Primitive::Unicode(ast::ClassUnicode {
+                span: span(0..17),
+                negated: false,
+                kind: ast::ClassUnicodeKind::NamedValue {
+                    op: ast::ClassUnicodeOpKind::NotEqual,
+                    name: s("scx"),
+                    value: s("Katakana"),
+                },
+            }))
+        );
+
+        assert_eq!(
+            parser(r"\p{:}").parse_escape(),
+            Ok(Primitive::Unicode(ast::ClassUnicode {
+                span: span(0..5),
+                negated: false,
+                kind: ast::ClassUnicodeKind::NamedValue {
+                    op: ast::ClassUnicodeOpKind::Colon,
+                    name: s(""),
+                    value: s(""),
+                },
+            }))
+        );
+        assert_eq!(
+            parser(r"\p{=}").parse_escape(),
+            Ok(Primitive::Unicode(ast::ClassUnicode {
+                span: span(0..5),
+                negated: false,
+                kind: ast::ClassUnicodeKind::NamedValue {
+                    op: ast::ClassUnicodeOpKind::Equal,
+                    name: s(""),
+                    value: s(""),
+                },
+            }))
+        );
+        assert_eq!(
+            parser(r"\p{!=}").parse_escape(),
+            Ok(Primitive::Unicode(ast::ClassUnicode {
+                span: span(0..6),
+                negated: false,
+                kind: ast::ClassUnicodeKind::NamedValue {
+                    op: ast::ClassUnicodeOpKind::NotEqual,
+                    name: s(""),
+                    value: s(""),
+                },
+            }))
+        );
+
+        assert_eq!(
+            parser(r"\p").parse_escape().unwrap_err(),
+            TestError {
+                span: span(2..2),
+                kind: ast::ErrorKind::EscapeUnexpectedEof,
+            }
+        );
+        assert_eq!(
+            parser(r"\p{").parse_escape().unwrap_err(),
+            TestError {
+                span: span(3..3),
+                kind: ast::ErrorKind::EscapeUnexpectedEof,
+            }
+        );
+        assert_eq!(
+            parser(r"\p{N").parse_escape().unwrap_err(),
+            TestError {
+                span: span(4..4),
+                kind: ast::ErrorKind::EscapeUnexpectedEof,
+            }
+        );
+        assert_eq!(
+            parser(r"\p{Greek").parse_escape().unwrap_err(),
+            TestError {
+                span: span(8..8),
+                kind: ast::ErrorKind::EscapeUnexpectedEof,
+            }
+        );
+
+        assert_eq!(
+            parser(r"\pNz").parse(),
+            Ok(Ast::Concat(ast::Concat {
+                span: span(0..4),
+                asts: vec![
+                    Ast::Class(ast::Class::Unicode(ast::ClassUnicode {
+                        span: span(0..3),
+                        negated: false,
+                        kind: ast::ClassUnicodeKind::OneLetter('N'),
+                    })),
+                    Ast::Literal(ast::Literal {
+                        span: span(3..4),
+                        kind: ast::LiteralKind::Verbatim,
+                        c: 'z',
+                    }),
+                ],
+            }))
+        );
+        assert_eq!(
+            parser(r"\p{Greek}z").parse(),
+            Ok(Ast::Concat(ast::Concat {
+                span: span(0..10),
+                asts: vec![
+                    Ast::Class(ast::Class::Unicode(ast::ClassUnicode {
+                        span: span(0..9),
+                        negated: false,
+                        kind: ast::ClassUnicodeKind::Named(s("Greek")),
+                    })),
+                    Ast::Literal(ast::Literal {
+                        span: span(9..10),
+                        kind: ast::LiteralKind::Verbatim,
+                        c: 'z',
+                    }),
+                ],
+            }))
+        );
+        assert_eq!(
+            parser(r"\p\{").parse().unwrap_err(),
+            TestError {
+                span: span(2..3),
+                kind: ast::ErrorKind::UnicodeClassInvalid,
+            }
+        );
+        assert_eq!(
+            parser(r"\P\{").parse().unwrap_err(),
+            TestError {
+                span: span(2..3),
+                kind: ast::ErrorKind::UnicodeClassInvalid,
+            }
+        );
+    }
+
+    #[test]
+    fn parse_perl_class() {
+        assert_eq!(
+            parser(r"\d").parse_escape(),
+            Ok(Primitive::Perl(ast::ClassPerl {
+                span: span(0..2),
+                kind: ast::ClassPerlKind::Digit,
+                negated: false,
+            }))
+        );
+        assert_eq!(
+            parser(r"\D").parse_escape(),
+            Ok(Primitive::Perl(ast::ClassPerl {
+                span: span(0..2),
+                kind: ast::ClassPerlKind::Digit,
+                negated: true,
+            }))
+        );
+        assert_eq!(
+            parser(r"\s").parse_escape(),
+            Ok(Primitive::Perl(ast::ClassPerl {
+                span: span(0..2),
+                kind: ast::ClassPerlKind::Space,
+                negated: false,
+            }))
+        );
+        assert_eq!(
+            parser(r"\S").parse_escape(),
+            Ok(Primitive::Perl(ast::ClassPerl {
+                span: span(0..2),
+                kind: ast::ClassPerlKind::Space,
+                negated: true,
+            }))
+        );
+        assert_eq!(
+            parser(r"\w").parse_escape(),
+            Ok(Primitive::Perl(ast::ClassPerl {
+                span: span(0..2),
+                kind: ast::ClassPerlKind::Word,
+                negated: false,
+            }))
+        );
+        assert_eq!(
+            parser(r"\W").parse_escape(),
+            Ok(Primitive::Perl(ast::ClassPerl {
+                span: span(0..2),
+                kind: ast::ClassPerlKind::Word,
+                negated: true,
+            }))
+        );
+
+        assert_eq!(
+            parser(r"\d").parse(),
+            Ok(Ast::Class(ast::Class::Perl(ast::ClassPerl {
+                span: span(0..2),
+                kind: ast::ClassPerlKind::Digit,
+                negated: false,
+            })))
+        );
+        assert_eq!(
+            parser(r"\dz").parse(),
+            Ok(Ast::Concat(ast::Concat {
+                span: span(0..3),
+                asts: vec![
+                    Ast::Class(ast::Class::Perl(ast::ClassPerl {
+                        span: span(0..2),
+                        kind: ast::ClassPerlKind::Digit,
+                        negated: false,
+                    })),
+                    Ast::Literal(ast::Literal {
+                        span: span(2..3),
+                        kind: ast::LiteralKind::Verbatim,
+                        c: 'z',
+                    }),
+                ],
+            }))
+        );
+    }
+
+    // This tests a bug fix where the nest limit checker wasn't decrementing
+    // its depth during post-traversal, which causes long regexes to trip
+    // the default limit too aggressively.
+    #[test]
+    fn regression_454_nest_too_big() {
+        let pattern = r#"
+        2(?:
+          [45]\d{3}|
+          7(?:
+            1[0-267]|
+            2[0-289]|
+            3[0-29]|
+            4[01]|
+            5[1-3]|
+            6[013]|
+            7[0178]|
+            91
+          )|
+          8(?:
+            0[125]|
+            [139][1-6]|
+            2[0157-9]|
+            41|
+            6[1-35]|
+            7[1-5]|
+            8[1-8]|
+            90
+          )|
+          9(?:
+            0[0-2]|
+            1[0-4]|
+            2[568]|
+            3[3-6]|
+            5[5-7]|
+            6[0167]|
+            7[15]|
+            8[0146-9]
+          )
+        )\d{4}
+        "#;
+        assert!(parser_nest_limit(pattern, 50).parse().is_ok());
+    }
+
+    // This tests that we treat a trailing `-` in a character class as a
+    // literal `-` even when whitespace mode is enabled and there is whitespace
+    // after the trailing `-`.
+    #[test]
+    fn regression_455_trailing_dash_ignore_whitespace() {
+        assert!(parser("(?x)[ / - ]").parse().is_ok());
+        assert!(parser("(?x)[ a - ]").parse().is_ok());
+        assert!(parser(
+            "(?x)[
+            a
+            - ]
+        "
+        )
+        .parse()
+        .is_ok());
+        assert!(parser(
+            "(?x)[
+            a # wat
+            - ]
+        "
+        )
+        .parse()
+        .is_ok());
+
+        assert!(parser("(?x)[ / -").parse().is_err());
+        assert!(parser("(?x)[ / - ").parse().is_err());
+        assert!(parser(
+            "(?x)[
+            / -
+        "
+        )
+        .parse()
+        .is_err());
+        assert!(parser(
+            "(?x)[
+            / - # wat
+        "
+        )
+        .parse()
+        .is_err());
+    }
+}

diff --git a/src/ast/print.rs b/src/ast/print.rs
new file mode 100644
index 0000000..1b9bc41
--- /dev/null
+++ b/src/ast/print.rs

@@ -0,0 +1,569 @@
+/*!
+This module provides a regular expression printer for `Ast`.
+*/
+
+use std::fmt;
+
+use ast::visitor::{self, Visitor};
+use ast::{self, Ast};
+
+/// A builder for constructing a printer.
+///
+/// Note that since a printer doesn't have any configuration knobs, this type
+/// remains unexported.
+#[derive(Clone, Debug)]
+struct PrinterBuilder {
+    _priv: (),
+}
+
+impl Default for PrinterBuilder {
+    fn default() -> PrinterBuilder {
+        PrinterBuilder::new()
+    }
+}
+
+impl PrinterBuilder {
+    fn new() -> PrinterBuilder {
+        PrinterBuilder { _priv: () }
+    }
+
+    fn build(&self) -> Printer {
+        Printer { _priv: () }
+    }
+}
+
+/// A printer for a regular expression abstract syntax tree.
+///
+/// A printer converts an abstract syntax tree (AST) to a regular expression
+/// pattern string. This particular printer uses constant stack space and heap
+/// space proportional to the size of the AST.
+///
+/// This printer will not necessarily preserve the original formatting of the
+/// regular expression pattern string. For example, all whitespace and comments
+/// are ignored.
+#[derive(Debug)]
+pub struct Printer {
+    _priv: (),
+}
+
+impl Printer {
+    /// Create a new printer.
+    pub fn new() -> Printer {
+        PrinterBuilder::new().build()
+    }
+
+    /// Print the given `Ast` to the given writer. The writer must implement
+    /// `fmt::Write`. Typical implementations of `fmt::Write` that can be used
+    /// here are a `fmt::Formatter` (which is available in `fmt::Display`
+    /// implementations) or a `&mut String`.
+    pub fn print<W: fmt::Write>(&mut self, ast: &Ast, wtr: W) -> fmt::Result {
+        visitor::visit(ast, Writer { printer: self, wtr: wtr })
+    }
+}
+
+#[derive(Debug)]
+struct Writer<'p, W> {
+    printer: &'p mut Printer,
+    wtr: W,
+}
+
+impl<'p, W: fmt::Write> Visitor for Writer<'p, W> {
+    type Output = ();
+    type Err = fmt::Error;
+
+    fn finish(self) -> fmt::Result {
+        Ok(())
+    }
+
+    fn visit_pre(&mut self, ast: &Ast) -> fmt::Result {
+        match *ast {
+            Ast::Group(ref x) => self.fmt_group_pre(x),
+            Ast::Class(ast::Class::Bracketed(ref x)) => {
+                self.fmt_class_bracketed_pre(x)
+            }
+            _ => Ok(()),
+        }
+    }
+
+    fn visit_post(&mut self, ast: &Ast) -> fmt::Result {
+        use ast::Class;
+
+        match *ast {
+            Ast::Empty(_) => Ok(()),
+            Ast::Flags(ref x) => self.fmt_set_flags(x),
+            Ast::Literal(ref x) => self.fmt_literal(x),
+            Ast::Dot(_) => self.wtr.write_str("."),
+            Ast::Assertion(ref x) => self.fmt_assertion(x),
+            Ast::Class(Class::Perl(ref x)) => self.fmt_class_perl(x),
+            Ast::Class(Class::Unicode(ref x)) => self.fmt_class_unicode(x),
+            Ast::Class(Class::Bracketed(ref x)) => {
+                self.fmt_class_bracketed_post(x)
+            }
+            Ast::Repetition(ref x) => self.fmt_repetition(x),
+            Ast::Group(ref x) => self.fmt_group_post(x),
+            Ast::Alternation(_) => Ok(()),
+            Ast::Concat(_) => Ok(()),
+        }
+    }
+
+    fn visit_alternation_in(&mut self) -> fmt::Result {
+        self.wtr.write_str("|")
+    }
+
+    fn visit_class_set_item_pre(
+        &mut self,
+        ast: &ast::ClassSetItem,
+    ) -> Result<(), Self::Err> {
+        match *ast {
+            ast::ClassSetItem::Bracketed(ref x) => {
+                self.fmt_class_bracketed_pre(x)
+            }
+            _ => Ok(()),
+        }
+    }
+
+    fn visit_class_set_item_post(
+        &mut self,
+        ast: &ast::ClassSetItem,
+    ) -> Result<(), Self::Err> {
+        use ast::ClassSetItem::*;
+
+        match *ast {
+            Empty(_) => Ok(()),
+            Literal(ref x) => self.fmt_literal(x),
+            Range(ref x) => {
+                self.fmt_literal(&x.start)?;
+                self.wtr.write_str("-")?;
+                self.fmt_literal(&x.end)?;
+                Ok(())
+            }
+            Ascii(ref x) => self.fmt_class_ascii(x),
+            Unicode(ref x) => self.fmt_class_unicode(x),
+            Perl(ref x) => self.fmt_class_perl(x),
+            Bracketed(ref x) => self.fmt_class_bracketed_post(x),
+            Union(_) => Ok(()),
+        }
+    }
+
+    fn visit_class_set_binary_op_in(
+        &mut self,
+        ast: &ast::ClassSetBinaryOp,
+    ) -> Result<(), Self::Err> {
+        self.fmt_class_set_binary_op_kind(&ast.kind)
+    }
+}
+
+impl<'p, W: fmt::Write> Writer<'p, W> {
+    fn fmt_group_pre(&mut self, ast: &ast::Group) -> fmt::Result {
+        use ast::GroupKind::*;
+        match ast.kind {
+            CaptureIndex(_) => self.wtr.write_str("("),
+            CaptureName(ref x) => {
+                self.wtr.write_str("(?P<")?;
+                self.wtr.write_str(&x.name)?;
+                self.wtr.write_str(">")?;
+                Ok(())
+            }
+            NonCapturing(ref flags) => {
+                self.wtr.write_str("(?")?;
+                self.fmt_flags(flags)?;
+                self.wtr.write_str(":")?;
+                Ok(())
+            }
+        }
+    }
+
+    fn fmt_group_post(&mut self, _ast: &ast::Group) -> fmt::Result {
+        self.wtr.write_str(")")
+    }
+
+    fn fmt_repetition(&mut self, ast: &ast::Repetition) -> fmt::Result {
+        use ast::RepetitionKind::*;
+        match ast.op.kind {
+            ZeroOrOne if ast.greedy => self.wtr.write_str("?"),
+            ZeroOrOne => self.wtr.write_str("??"),
+            ZeroOrMore if ast.greedy => self.wtr.write_str("*"),
+            ZeroOrMore => self.wtr.write_str("*?"),
+            OneOrMore if ast.greedy => self.wtr.write_str("+"),
+            OneOrMore => self.wtr.write_str("+?"),
+            Range(ref x) => {
+                self.fmt_repetition_range(x)?;
+                if !ast.greedy {
+                    self.wtr.write_str("?")?;
+                }
+                Ok(())
+            }
+        }
+    }
+
+    fn fmt_repetition_range(
+        &mut self,
+        ast: &ast::RepetitionRange,
+    ) -> fmt::Result {
+        use ast::RepetitionRange::*;
+        match *ast {
+            Exactly(x) => write!(self.wtr, "{{{}}}", x),
+            AtLeast(x) => write!(self.wtr, "{{{},}}", x),
+            Bounded(x, y) => write!(self.wtr, "{{{},{}}}", x, y),
+        }
+    }
+
+    fn fmt_literal(&mut self, ast: &ast::Literal) -> fmt::Result {
+        use ast::LiteralKind::*;
+
+        match ast.kind {
+            Verbatim => self.wtr.write_char(ast.c),
+            Punctuation => write!(self.wtr, r"\{}", ast.c),
+            Octal => write!(self.wtr, r"\{:o}", ast.c as u32),
+            HexFixed(ast::HexLiteralKind::X) => {
+                write!(self.wtr, r"\x{:02X}", ast.c as u32)
+            }
+            HexFixed(ast::HexLiteralKind::UnicodeShort) => {
+                write!(self.wtr, r"\u{:04X}", ast.c as u32)
+            }
+            HexFixed(ast::HexLiteralKind::UnicodeLong) => {
+                write!(self.wtr, r"\U{:08X}", ast.c as u32)
+            }
+            HexBrace(ast::HexLiteralKind::X) => {
+                write!(self.wtr, r"\x{{{:X}}}", ast.c as u32)
+            }
+            HexBrace(ast::HexLiteralKind::UnicodeShort) => {
+                write!(self.wtr, r"\u{{{:X}}}", ast.c as u32)
+            }
+            HexBrace(ast::HexLiteralKind::UnicodeLong) => {
+                write!(self.wtr, r"\U{{{:X}}}", ast.c as u32)
+            }
+            Special(ast::SpecialLiteralKind::Bell) => {
+                self.wtr.write_str(r"\a")
+            }
+            Special(ast::SpecialLiteralKind::FormFeed) => {
+                self.wtr.write_str(r"\f")
+            }
+            Special(ast::SpecialLiteralKind::Tab) => self.wtr.write_str(r"\t"),
+            Special(ast::SpecialLiteralKind::LineFeed) => {
+                self.wtr.write_str(r"\n")
+            }
+            Special(ast::SpecialLiteralKind::CarriageReturn) => {
+                self.wtr.write_str(r"\r")
+            }
+            Special(ast::SpecialLiteralKind::VerticalTab) => {
+                self.wtr.write_str(r"\v")
+            }
+            Special(ast::SpecialLiteralKind::Space) => {
+                self.wtr.write_str(r"\ ")
+            }
+        }
+    }
+
+    fn fmt_assertion(&mut self, ast: &ast::Assertion) -> fmt::Result {
+        use ast::AssertionKind::*;
+        match ast.kind {
+            StartLine => self.wtr.write_str("^"),
+            EndLine => self.wtr.write_str("$"),
+            StartText => self.wtr.write_str(r"\A"),
+            EndText => self.wtr.write_str(r"\z"),
+            WordBoundary => self.wtr.write_str(r"\b"),
+            NotWordBoundary => self.wtr.write_str(r"\B"),
+        }
+    }
+
+    fn fmt_set_flags(&mut self, ast: &ast::SetFlags) -> fmt::Result {
+        self.wtr.write_str("(?")?;
+        self.fmt_flags(&ast.flags)?;
+        self.wtr.write_str(")")?;
+        Ok(())
+    }
+
+    fn fmt_flags(&mut self, ast: &ast::Flags) -> fmt::Result {
+        use ast::{Flag, FlagsItemKind};
+
+        for item in &ast.items {
+            match item.kind {
+                FlagsItemKind::Negation => self.wtr.write_str("-"),
+                FlagsItemKind::Flag(ref flag) => match *flag {
+                    Flag::CaseInsensitive => self.wtr.write_str("i"),
+                    Flag::MultiLine => self.wtr.write_str("m"),
+                    Flag::DotMatchesNewLine => self.wtr.write_str("s"),
+                    Flag::SwapGreed => self.wtr.write_str("U"),
+                    Flag::Unicode => self.wtr.write_str("u"),
+                    Flag::IgnoreWhitespace => self.wtr.write_str("x"),
+                },
+            }?;
+        }
+        Ok(())
+    }
+
+    fn fmt_class_bracketed_pre(
+        &mut self,
+        ast: &ast::ClassBracketed,
+    ) -> fmt::Result {
+        if ast.negated {
+            self.wtr.write_str("[^")
+        } else {
+            self.wtr.write_str("[")
+        }
+    }
+
+    fn fmt_class_bracketed_post(
+        &mut self,
+        _ast: &ast::ClassBracketed,
+    ) -> fmt::Result {
+        self.wtr.write_str("]")
+    }
+
+    fn fmt_class_set_binary_op_kind(
+        &mut self,
+        ast: &ast::ClassSetBinaryOpKind,
+    ) -> fmt::Result {
+        use ast::ClassSetBinaryOpKind::*;
+        match *ast {
+            Intersection => self.wtr.write_str("&&"),
+            Difference => self.wtr.write_str("--"),
+            SymmetricDifference => self.wtr.write_str("~~"),
+        }
+    }
+
+    fn fmt_class_perl(&mut self, ast: &ast::ClassPerl) -> fmt::Result {
+        use ast::ClassPerlKind::*;
+        match ast.kind {
+            Digit if ast.negated => self.wtr.write_str(r"\D"),
+            Digit => self.wtr.write_str(r"\d"),
+            Space if ast.negated => self.wtr.write_str(r"\S"),
+            Space => self.wtr.write_str(r"\s"),
+            Word if ast.negated => self.wtr.write_str(r"\W"),
+            Word => self.wtr.write_str(r"\w"),
+        }
+    }
+
+    fn fmt_class_ascii(&mut self, ast: &ast::ClassAscii) -> fmt::Result {
+        use ast::ClassAsciiKind::*;
+        match ast.kind {
+            Alnum if ast.negated => self.wtr.write_str("[:^alnum:]"),
+            Alnum => self.wtr.write_str("[:alnum:]"),
+            Alpha if ast.negated => self.wtr.write_str("[:^alpha:]"),
+            Alpha => self.wtr.write_str("[:alpha:]"),
+            Ascii if ast.negated => self.wtr.write_str("[:^ascii:]"),
+            Ascii => self.wtr.write_str("[:ascii:]"),
+            Blank if ast.negated => self.wtr.write_str("[:^blank:]"),
+            Blank => self.wtr.write_str("[:blank:]"),
+            Cntrl if ast.negated => self.wtr.write_str("[:^cntrl:]"),
+            Cntrl => self.wtr.write_str("[:cntrl:]"),
+            Digit if ast.negated => self.wtr.write_str("[:^digit:]"),
+            Digit => self.wtr.write_str("[:digit:]"),
+            Graph if ast.negated => self.wtr.write_str("[:^graph:]"),
+            Graph => self.wtr.write_str("[:graph:]"),
+            Lower if ast.negated => self.wtr.write_str("[:^lower:]"),
+            Lower => self.wtr.write_str("[:lower:]"),
+            Print if ast.negated => self.wtr.write_str("[:^print:]"),
+            Print => self.wtr.write_str("[:print:]"),
+            Punct if ast.negated => self.wtr.write_str("[:^punct:]"),
+            Punct => self.wtr.write_str("[:punct:]"),
+            Space if ast.negated => self.wtr.write_str("[:^space:]"),
+            Space => self.wtr.write_str("[:space:]"),
+            Upper if ast.negated => self.wtr.write_str("[:^upper:]"),
+            Upper => self.wtr.write_str("[:upper:]"),
+            Word if ast.negated => self.wtr.write_str("[:^word:]"),
+            Word => self.wtr.write_str("[:word:]"),
+            Xdigit if ast.negated => self.wtr.write_str("[:^xdigit:]"),
+            Xdigit => self.wtr.write_str("[:xdigit:]"),
+        }
+    }
+
+    fn fmt_class_unicode(&mut self, ast: &ast::ClassUnicode) -> fmt::Result {
+        use ast::ClassUnicodeKind::*;
+        use ast::ClassUnicodeOpKind::*;
+
+        if ast.negated {
+            self.wtr.write_str(r"\P")?;
+        } else {
+            self.wtr.write_str(r"\p")?;
+        }
+        match ast.kind {
+            OneLetter(c) => self.wtr.write_char(c),
+            Named(ref x) => write!(self.wtr, "{{{}}}", x),
+            NamedValue { op: Equal, ref name, ref value } => {
+                write!(self.wtr, "{{{}={}}}", name, value)
+            }
+            NamedValue { op: Colon, ref name, ref value } => {
+                write!(self.wtr, "{{{}:{}}}", name, value)
+            }
+            NamedValue { op: NotEqual, ref name, ref value } => {
+                write!(self.wtr, "{{{}!={}}}", name, value)
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::Printer;
+    use ast::parse::ParserBuilder;
+
+    fn roundtrip(given: &str) {
+        roundtrip_with(|b| b, given);
+    }
+
+    fn roundtrip_with<F>(mut f: F, given: &str)
+    where
+        F: FnMut(&mut ParserBuilder) -> &mut ParserBuilder,
+    {
+        let mut builder = ParserBuilder::new();
+        f(&mut builder);
+        let ast = builder.build().parse(given).unwrap();
+
+        let mut printer = Printer::new();
+        let mut dst = String::new();
+        printer.print(&ast, &mut dst).unwrap();
+        assert_eq!(given, dst);
+    }
+
+    #[test]
+    fn print_literal() {
+        roundtrip("a");
+        roundtrip(r"\[");
+        roundtrip_with(|b| b.octal(true), r"\141");
+        roundtrip(r"\x61");
+        roundtrip(r"\x7F");
+        roundtrip(r"\u0061");
+        roundtrip(r"\U00000061");
+        roundtrip(r"\x{61}");
+        roundtrip(r"\x{7F}");
+        roundtrip(r"\u{61}");
+        roundtrip(r"\U{61}");
+
+        roundtrip(r"\a");
+        roundtrip(r"\f");
+        roundtrip(r"\t");
+        roundtrip(r"\n");
+        roundtrip(r"\r");
+        roundtrip(r"\v");
+        roundtrip(r"(?x)\ ");
+    }
+
+    #[test]
+    fn print_dot() {
+        roundtrip(".");
+    }
+
+    #[test]
+    fn print_concat() {
+        roundtrip("ab");
+        roundtrip("abcde");
+        roundtrip("a(bcd)ef");
+    }
+
+    #[test]
+    fn print_alternation() {
+        roundtrip("a|b");
+        roundtrip("a|b|c|d|e");
+        roundtrip("|a|b|c|d|e");
+        roundtrip("|a|b|c|d|e|");
+        roundtrip("a(b|c|d)|e|f");
+    }
+
+    #[test]
+    fn print_assertion() {
+        roundtrip(r"^");
+        roundtrip(r"$");
+        roundtrip(r"\A");
+        roundtrip(r"\z");
+        roundtrip(r"\b");
+        roundtrip(r"\B");
+    }
+
+    #[test]
+    fn print_repetition() {
+        roundtrip("a?");
+        roundtrip("a??");
+        roundtrip("a*");
+        roundtrip("a*?");
+        roundtrip("a+");
+        roundtrip("a+?");
+        roundtrip("a{5}");
+        roundtrip("a{5}?");
+        roundtrip("a{5,}");
+        roundtrip("a{5,}?");
+        roundtrip("a{5,10}");
+        roundtrip("a{5,10}?");
+    }
+
+    #[test]
+    fn print_flags() {
+        roundtrip("(?i)");
+        roundtrip("(?-i)");
+        roundtrip("(?s-i)");
+        roundtrip("(?-si)");
+        roundtrip("(?siUmux)");
+    }
+
+    #[test]
+    fn print_group() {
+        roundtrip("(?i:a)");
+        roundtrip("(?P<foo>a)");
+        roundtrip("(a)");
+    }
+
+    #[test]
+    fn print_class() {
+        roundtrip(r"[abc]");
+        roundtrip(r"[a-z]");
+        roundtrip(r"[^a-z]");
+        roundtrip(r"[a-z0-9]");
+        roundtrip(r"[-a-z0-9]");
+        roundtrip(r"[-a-z0-9]");
+        roundtrip(r"[a-z0-9---]");
+        roundtrip(r"[a-z&&m-n]");
+        roundtrip(r"[[a-z&&m-n]]");
+        roundtrip(r"[a-z--m-n]");
+        roundtrip(r"[a-z~~m-n]");
+        roundtrip(r"[a-z[0-9]]");
+        roundtrip(r"[a-z[^0-9]]");
+
+        roundtrip(r"\d");
+        roundtrip(r"\D");
+        roundtrip(r"\s");
+        roundtrip(r"\S");
+        roundtrip(r"\w");
+        roundtrip(r"\W");
+
+        roundtrip(r"[[:alnum:]]");
+        roundtrip(r"[[:^alnum:]]");
+        roundtrip(r"[[:alpha:]]");
+        roundtrip(r"[[:^alpha:]]");
+        roundtrip(r"[[:ascii:]]");
+        roundtrip(r"[[:^ascii:]]");
+        roundtrip(r"[[:blank:]]");
+        roundtrip(r"[[:^blank:]]");
+        roundtrip(r"[[:cntrl:]]");
+        roundtrip(r"[[:^cntrl:]]");
+        roundtrip(r"[[:digit:]]");
+        roundtrip(r"[[:^digit:]]");
+        roundtrip(r"[[:graph:]]");
+        roundtrip(r"[[:^graph:]]");
+        roundtrip(r"[[:lower:]]");
+        roundtrip(r"[[:^lower:]]");
+        roundtrip(r"[[:print:]]");
+        roundtrip(r"[[:^print:]]");
+        roundtrip(r"[[:punct:]]");
+        roundtrip(r"[[:^punct:]]");
+        roundtrip(r"[[:space:]]");
+        roundtrip(r"[[:^space:]]");
+        roundtrip(r"[[:upper:]]");
+        roundtrip(r"[[:^upper:]]");
+        roundtrip(r"[[:word:]]");
+        roundtrip(r"[[:^word:]]");
+        roundtrip(r"[[:xdigit:]]");
+        roundtrip(r"[[:^xdigit:]]");
+
+        roundtrip(r"\pL");
+        roundtrip(r"\PL");
+        roundtrip(r"\p{L}");
+        roundtrip(r"\P{L}");
+        roundtrip(r"\p{X=Y}");
+        roundtrip(r"\P{X=Y}");
+        roundtrip(r"\p{X:Y}");
+        roundtrip(r"\P{X:Y}");
+        roundtrip(r"\p{X!=Y}");
+        roundtrip(r"\P{X!=Y}");
+    }
+}

diff --git a/src/ast/visitor.rs b/src/ast/visitor.rs
new file mode 100644
index 0000000..3eaa4b0
--- /dev/null
+++ b/src/ast/visitor.rs

@@ -0,0 +1,519 @@
+use std::fmt;
+
+use ast::{self, Ast};
+
+/// A trait for visiting an abstract syntax tree (AST) in depth first order.
+///
+/// The principle aim of this trait is to enable callers to perform case
+/// analysis on an abstract syntax tree without necessarily using recursion.
+/// In particular, this permits callers to do case analysis with constant stack
+/// usage, which can be important since the size of an abstract syntax tree
+/// may be proportional to end user input.
+///
+/// Typical usage of this trait involves providing an implementation and then
+/// running it using the [`visit`](fn.visit.html) function.
+///
+/// Note that the abstract syntax tree for a regular expression is quite
+/// complex. Unless you specifically need it, you might be able to use the
+/// much simpler
+/// [high-level intermediate representation](../hir/struct.Hir.html)
+/// and its
+/// [corresponding `Visitor` trait](../hir/trait.Visitor.html)
+/// instead.
+pub trait Visitor {
+    /// The result of visiting an AST.
+    type Output;
+    /// An error that visiting an AST might return.
+    type Err;
+
+    /// All implementors of `Visitor` must provide a `finish` method, which
+    /// yields the result of visiting the AST or an error.
+    fn finish(self) -> Result<Self::Output, Self::Err>;
+
+    /// This method is called before beginning traversal of the AST.
+    fn start(&mut self) {}
+
+    /// This method is called on an `Ast` before descending into child `Ast`
+    /// nodes.
+    fn visit_pre(&mut self, _ast: &Ast) -> Result<(), Self::Err> {
+        Ok(())
+    }
+
+    /// This method is called on an `Ast` after descending all of its child
+    /// `Ast` nodes.
+    fn visit_post(&mut self, _ast: &Ast) -> Result<(), Self::Err> {
+        Ok(())
+    }
+
+    /// This method is called between child nodes of an
+    /// [`Alternation`](struct.Alternation.html).
+    fn visit_alternation_in(&mut self) -> Result<(), Self::Err> {
+        Ok(())
+    }
+
+    /// This method is called on every
+    /// [`ClassSetItem`](enum.ClassSetItem.html)
+    /// before descending into child nodes.
+    fn visit_class_set_item_pre(
+        &mut self,
+        _ast: &ast::ClassSetItem,
+    ) -> Result<(), Self::Err> {
+        Ok(())
+    }
+
+    /// This method is called on every
+    /// [`ClassSetItem`](enum.ClassSetItem.html)
+    /// after descending into child nodes.
+    fn visit_class_set_item_post(
+        &mut self,
+        _ast: &ast::ClassSetItem,
+    ) -> Result<(), Self::Err> {
+        Ok(())
+    }
+
+    /// This method is called on every
+    /// [`ClassSetBinaryOp`](struct.ClassSetBinaryOp.html)
+    /// before descending into child nodes.
+    fn visit_class_set_binary_op_pre(
+        &mut self,
+        _ast: &ast::ClassSetBinaryOp,
+    ) -> Result<(), Self::Err> {
+        Ok(())
+    }
+
+    /// This method is called on every
+    /// [`ClassSetBinaryOp`](struct.ClassSetBinaryOp.html)
+    /// after descending into child nodes.
+    fn visit_class_set_binary_op_post(
+        &mut self,
+        _ast: &ast::ClassSetBinaryOp,
+    ) -> Result<(), Self::Err> {
+        Ok(())
+    }
+
+    /// This method is called between the left hand and right hand child nodes
+    /// of a [`ClassSetBinaryOp`](struct.ClassSetBinaryOp.html).
+    fn visit_class_set_binary_op_in(
+        &mut self,
+        _ast: &ast::ClassSetBinaryOp,
+    ) -> Result<(), Self::Err> {
+        Ok(())
+    }
+}
+
+/// Executes an implementation of `Visitor` in constant stack space.
+///
+/// This function will visit every node in the given `Ast` while calling the
+/// appropriate methods provided by the
+/// [`Visitor`](trait.Visitor.html) trait.
+///
+/// The primary use case for this method is when one wants to perform case
+/// analysis over an `Ast` without using a stack size proportional to the depth
+/// of the `Ast`. Namely, this method will instead use constant stack size, but
+/// will use heap space proportional to the size of the `Ast`. This may be
+/// desirable in cases where the size of `Ast` is proportional to end user
+/// input.
+///
+/// If the visitor returns an error at any point, then visiting is stopped and
+/// the error is returned.
+pub fn visit<V: Visitor>(ast: &Ast, visitor: V) -> Result<V::Output, V::Err> {
+    HeapVisitor::new().visit(ast, visitor)
+}
+
+/// HeapVisitor visits every item in an `Ast` recursively using constant stack
+/// size and a heap size proportional to the size of the `Ast`.
+struct HeapVisitor<'a> {
+    /// A stack of `Ast` nodes. This is roughly analogous to the call stack
+    /// used in a typical recursive visitor.
+    stack: Vec<(&'a Ast, Frame<'a>)>,
+    /// Similar to the `Ast` stack above, but is used only for character
+    /// classes. In particular, character classes embed their own mini
+    /// recursive syntax.
+    stack_class: Vec<(ClassInduct<'a>, ClassFrame<'a>)>,
+}
+
+/// Represents a single stack frame while performing structural induction over
+/// an `Ast`.
+enum Frame<'a> {
+    /// A stack frame allocated just before descending into a repetition
+    /// operator's child node.
+    Repetition(&'a ast::Repetition),
+    /// A stack frame allocated just before descending into a group's child
+    /// node.
+    Group(&'a ast::Group),
+    /// The stack frame used while visiting every child node of a concatenation
+    /// of expressions.
+    Concat {
+        /// The child node we are currently visiting.
+        head: &'a Ast,
+        /// The remaining child nodes to visit (which may be empty).
+        tail: &'a [Ast],
+    },
+    /// The stack frame used while visiting every child node of an alternation
+    /// of expressions.
+    Alternation {
+        /// The child node we are currently visiting.
+        head: &'a Ast,
+        /// The remaining child nodes to visit (which may be empty).
+        tail: &'a [Ast],
+    },
+}
+
+/// Represents a single stack frame while performing structural induction over
+/// a character class.
+enum ClassFrame<'a> {
+    /// The stack frame used while visiting every child node of a union of
+    /// character class items.
+    Union {
+        /// The child node we are currently visiting.
+        head: &'a ast::ClassSetItem,
+        /// The remaining child nodes to visit (which may be empty).
+        tail: &'a [ast::ClassSetItem],
+    },
+    /// The stack frame used while a binary class operation.
+    Binary { op: &'a ast::ClassSetBinaryOp },
+    /// A stack frame allocated just before descending into a binary operator's
+    /// left hand child node.
+    BinaryLHS {
+        op: &'a ast::ClassSetBinaryOp,
+        lhs: &'a ast::ClassSet,
+        rhs: &'a ast::ClassSet,
+    },
+    /// A stack frame allocated just before descending into a binary operator's
+    /// right hand child node.
+    BinaryRHS { op: &'a ast::ClassSetBinaryOp, rhs: &'a ast::ClassSet },
+}
+
+/// A representation of the inductive step when performing structural induction
+/// over a character class.
+///
+/// Note that there is no analogous explicit type for the inductive step for
+/// `Ast` nodes because the inductive step is just an `Ast`. For character
+/// classes, the inductive step can produce one of two possible child nodes:
+/// an item or a binary operation. (An item cannot be a binary operation
+/// because that would imply binary operations can be unioned in the concrete
+/// syntax, which is not possible.)
+enum ClassInduct<'a> {
+    Item(&'a ast::ClassSetItem),
+    BinaryOp(&'a ast::ClassSetBinaryOp),
+}
+
+impl<'a> HeapVisitor<'a> {
+    fn new() -> HeapVisitor<'a> {
+        HeapVisitor { stack: vec![], stack_class: vec![] }
+    }
+
+    fn visit<V: Visitor>(
+        &mut self,
+        mut ast: &'a Ast,
+        mut visitor: V,
+    ) -> Result<V::Output, V::Err> {
+        self.stack.clear();
+        self.stack_class.clear();
+
+        visitor.start();
+        loop {
+            visitor.visit_pre(ast)?;
+            if let Some(x) = self.induct(ast, &mut visitor)? {
+                let child = x.child();
+                self.stack.push((ast, x));
+                ast = child;
+                continue;
+            }
+            // No induction means we have a base case, so we can post visit
+            // it now.
+            visitor.visit_post(ast)?;
+
+            // At this point, we now try to pop our call stack until it is
+            // either empty or we hit another inductive case.
+            loop {
+                let (post_ast, frame) = match self.stack.pop() {
+                    None => return visitor.finish(),
+                    Some((post_ast, frame)) => (post_ast, frame),
+                };
+                // If this is a concat/alternate, then we might have additional
+                // inductive steps to process.
+                if let Some(x) = self.pop(frame) {
+                    if let Frame::Alternation { .. } = x {
+                        visitor.visit_alternation_in()?;
+                    }
+                    ast = x.child();
+                    self.stack.push((post_ast, x));
+                    break;
+                }
+                // Otherwise, we've finished visiting all the child nodes for
+                // this AST, so we can post visit it now.
+                visitor.visit_post(post_ast)?;
+            }
+        }
+    }
+
+    /// Build a stack frame for the given AST if one is needed (which occurs if
+    /// and only if there are child nodes in the AST). Otherwise, return None.
+    ///
+    /// If this visits a class, then the underlying visitor implementation may
+    /// return an error which will be passed on here.
+    fn induct<V: Visitor>(
+        &mut self,
+        ast: &'a Ast,
+        visitor: &mut V,
+    ) -> Result<Option<Frame<'a>>, V::Err> {
+        Ok(match *ast {
+            Ast::Class(ast::Class::Bracketed(ref x)) => {
+                self.visit_class(x, visitor)?;
+                None
+            }
+            Ast::Repetition(ref x) => Some(Frame::Repetition(x)),
+            Ast::Group(ref x) => Some(Frame::Group(x)),
+            Ast::Concat(ref x) if x.asts.is_empty() => None,
+            Ast::Concat(ref x) => {
+                Some(Frame::Concat { head: &x.asts[0], tail: &x.asts[1..] })
+            }
+            Ast::Alternation(ref x) if x.asts.is_empty() => None,
+            Ast::Alternation(ref x) => Some(Frame::Alternation {
+                head: &x.asts[0],
+                tail: &x.asts[1..],
+            }),
+            _ => None,
+        })
+    }
+
+    /// Pops the given frame. If the frame has an additional inductive step,
+    /// then return it, otherwise return `None`.
+    fn pop(&self, induct: Frame<'a>) -> Option<Frame<'a>> {
+        match induct {
+            Frame::Repetition(_) => None,
+            Frame::Group(_) => None,
+            Frame::Concat { tail, .. } => {
+                if tail.is_empty() {
+                    None
+                } else {
+                    Some(Frame::Concat { head: &tail[0], tail: &tail[1..] })
+                }
+            }
+            Frame::Alternation { tail, .. } => {
+                if tail.is_empty() {
+                    None
+                } else {
+                    Some(Frame::Alternation {
+                        head: &tail[0],
+                        tail: &tail[1..],
+                    })
+                }
+            }
+        }
+    }
+
+    fn visit_class<V: Visitor>(
+        &mut self,
+        ast: &'a ast::ClassBracketed,
+        visitor: &mut V,
+    ) -> Result<(), V::Err> {
+        let mut ast = ClassInduct::from_bracketed(ast);
+        loop {
+            self.visit_class_pre(&ast, visitor)?;
+            if let Some(x) = self.induct_class(&ast) {
+                let child = x.child();
+                self.stack_class.push((ast, x));
+                ast = child;
+                continue;
+            }
+            self.visit_class_post(&ast, visitor)?;
+
+            // At this point, we now try to pop our call stack until it is
+            // either empty or we hit another inductive case.
+            loop {
+                let (post_ast, frame) = match self.stack_class.pop() {
+                    None => return Ok(()),
+                    Some((post_ast, frame)) => (post_ast, frame),
+                };
+                // If this is a union or a binary op, then we might have
+                // additional inductive steps to process.
+                if let Some(x) = self.pop_class(frame) {
+                    if let ClassFrame::BinaryRHS { ref op, .. } = x {
+                        visitor.visit_class_set_binary_op_in(op)?;
+                    }
+                    ast = x.child();
+                    self.stack_class.push((post_ast, x));
+                    break;
+                }
+                // Otherwise, we've finished visiting all the child nodes for
+                // this class node, so we can post visit it now.
+                self.visit_class_post(&post_ast, visitor)?;
+            }
+        }
+    }
+
+    /// Call the appropriate `Visitor` methods given an inductive step.
+    fn visit_class_pre<V: Visitor>(
+        &self,
+        ast: &ClassInduct<'a>,
+        visitor: &mut V,
+    ) -> Result<(), V::Err> {
+        match *ast {
+            ClassInduct::Item(item) => {
+                visitor.visit_class_set_item_pre(item)?;
+            }
+            ClassInduct::BinaryOp(op) => {
+                visitor.visit_class_set_binary_op_pre(op)?;
+            }
+        }
+        Ok(())
+    }
+
+    /// Call the appropriate `Visitor` methods given an inductive step.
+    fn visit_class_post<V: Visitor>(
+        &self,
+        ast: &ClassInduct<'a>,
+        visitor: &mut V,
+    ) -> Result<(), V::Err> {
+        match *ast {
+            ClassInduct::Item(item) => {
+                visitor.visit_class_set_item_post(item)?;
+            }
+            ClassInduct::BinaryOp(op) => {
+                visitor.visit_class_set_binary_op_post(op)?;
+            }
+        }
+        Ok(())
+    }
+
+    /// Build a stack frame for the given class node if one is needed (which
+    /// occurs if and only if there are child nodes). Otherwise, return None.
+    fn induct_class(&self, ast: &ClassInduct<'a>) -> Option<ClassFrame<'a>> {
+        match *ast {
+            ClassInduct::Item(&ast::ClassSetItem::Bracketed(ref x)) => {
+                match x.kind {
+                    ast::ClassSet::Item(ref item) => {
+                        Some(ClassFrame::Union { head: item, tail: &[] })
+                    }
+                    ast::ClassSet::BinaryOp(ref op) => {
+                        Some(ClassFrame::Binary { op: op })
+                    }
+                }
+            }
+            ClassInduct::Item(&ast::ClassSetItem::Union(ref x)) => {
+                if x.items.is_empty() {
+                    None
+                } else {
+                    Some(ClassFrame::Union {
+                        head: &x.items[0],
+                        tail: &x.items[1..],
+                    })
+                }
+            }
+            ClassInduct::BinaryOp(op) => Some(ClassFrame::BinaryLHS {
+                op: op,
+                lhs: &op.lhs,
+                rhs: &op.rhs,
+            }),
+            _ => None,
+        }
+    }
+
+    /// Pops the given frame. If the frame has an additional inductive step,
+    /// then return it, otherwise return `None`.
+    fn pop_class(&self, induct: ClassFrame<'a>) -> Option<ClassFrame<'a>> {
+        match induct {
+            ClassFrame::Union { tail, .. } => {
+                if tail.is_empty() {
+                    None
+                } else {
+                    Some(ClassFrame::Union {
+                        head: &tail[0],
+                        tail: &tail[1..],
+                    })
+                }
+            }
+            ClassFrame::Binary { .. } => None,
+            ClassFrame::BinaryLHS { op, rhs, .. } => {
+                Some(ClassFrame::BinaryRHS { op: op, rhs: rhs })
+            }
+            ClassFrame::BinaryRHS { .. } => None,
+        }
+    }
+}
+
+impl<'a> Frame<'a> {
+    /// Perform the next inductive step on this frame and return the next
+    /// child AST node to visit.
+    fn child(&self) -> &'a Ast {
+        match *self {
+            Frame::Repetition(rep) => &rep.ast,
+            Frame::Group(group) => &group.ast,
+            Frame::Concat { head, .. } => head,
+            Frame::Alternation { head, .. } => head,
+        }
+    }
+}
+
+impl<'a> ClassFrame<'a> {
+    /// Perform the next inductive step on this frame and return the next
+    /// child class node to visit.
+    fn child(&self) -> ClassInduct<'a> {
+        match *self {
+            ClassFrame::Union { head, .. } => ClassInduct::Item(head),
+            ClassFrame::Binary { op, .. } => ClassInduct::BinaryOp(op),
+            ClassFrame::BinaryLHS { ref lhs, .. } => {
+                ClassInduct::from_set(lhs)
+            }
+            ClassFrame::BinaryRHS { ref rhs, .. } => {
+                ClassInduct::from_set(rhs)
+            }
+        }
+    }
+}
+
+impl<'a> ClassInduct<'a> {
+    fn from_bracketed(ast: &'a ast::ClassBracketed) -> ClassInduct<'a> {
+        ClassInduct::from_set(&ast.kind)
+    }
+
+    fn from_set(ast: &'a ast::ClassSet) -> ClassInduct<'a> {
+        match *ast {
+            ast::ClassSet::Item(ref item) => ClassInduct::Item(item),
+            ast::ClassSet::BinaryOp(ref op) => ClassInduct::BinaryOp(op),
+        }
+    }
+}
+
+impl<'a> fmt::Debug for ClassFrame<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let x = match *self {
+            ClassFrame::Union { .. } => "Union",
+            ClassFrame::Binary { .. } => "Binary",
+            ClassFrame::BinaryLHS { .. } => "BinaryLHS",
+            ClassFrame::BinaryRHS { .. } => "BinaryRHS",
+        };
+        write!(f, "{}", x)
+    }
+}
+
+impl<'a> fmt::Debug for ClassInduct<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let x = match *self {
+            ClassInduct::Item(it) => match *it {
+                ast::ClassSetItem::Empty(_) => "Item(Empty)",
+                ast::ClassSetItem::Literal(_) => "Item(Literal)",
+                ast::ClassSetItem::Range(_) => "Item(Range)",
+                ast::ClassSetItem::Ascii(_) => "Item(Ascii)",
+                ast::ClassSetItem::Perl(_) => "Item(Perl)",
+                ast::ClassSetItem::Unicode(_) => "Item(Unicode)",
+                ast::ClassSetItem::Bracketed(_) => "Item(Bracketed)",
+                ast::ClassSetItem::Union(_) => "Item(Union)",
+            },
+            ClassInduct::BinaryOp(it) => match it.kind {
+                ast::ClassSetBinaryOpKind::Intersection => {
+                    "BinaryOp(Intersection)"
+                }
+                ast::ClassSetBinaryOpKind::Difference => {
+                    "BinaryOp(Difference)"
+                }
+                ast::ClassSetBinaryOpKind::SymmetricDifference => {
+                    "BinaryOp(SymmetricDifference)"
+                }
+            },
+        };
+        write!(f, "{}", x)
+    }
+}

diff --git a/src/either.rs b/src/either.rs
new file mode 100644
index 0000000..7ae41e4
--- /dev/null
+++ b/src/either.rs

@@ -0,0 +1,8 @@
+/// A simple binary sum type.
+///
+/// This is occasionally useful in an ad hoc fashion.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum Either<Left, Right> {
+    Left(Left),
+    Right(Right),
+}

diff --git a/src/error.rs b/src/error.rs
new file mode 100644
index 0000000..93c2b0d
--- /dev/null
+++ b/src/error.rs

@@ -0,0 +1,324 @@
+use std::cmp;
+use std::error;
+use std::fmt;
+use std::result;
+
+use ast;
+use hir;
+
+/// A type alias for dealing with errors returned by this crate.
+pub type Result<T> = result::Result<T, Error>;
+
+/// This error type encompasses any error that can be returned by this crate.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum Error {
+    /// An error that occurred while translating concrete syntax into abstract
+    /// syntax (AST).
+    Parse(ast::Error),
+    /// An error that occurred while translating abstract syntax into a high
+    /// level intermediate representation (HIR).
+    Translate(hir::Error),
+    /// Hints that destructuring should not be exhaustive.
+    ///
+    /// This enum may grow additional variants, so this makes sure clients
+    /// don't count on exhaustive matching. (Otherwise, adding a new variant
+    /// could break existing code.)
+    #[doc(hidden)]
+    __Nonexhaustive,
+}
+
+impl From<ast::Error> for Error {
+    fn from(err: ast::Error) -> Error {
+        Error::Parse(err)
+    }
+}
+
+impl From<hir::Error> for Error {
+    fn from(err: hir::Error) -> Error {
+        Error::Translate(err)
+    }
+}
+
+impl error::Error for Error {
+    // TODO: Remove this method entirely on the next breaking semver release.
+    #[allow(deprecated)]
+    fn description(&self) -> &str {
+        match *self {
+            Error::Parse(ref x) => x.description(),
+            Error::Translate(ref x) => x.description(),
+            _ => unreachable!(),
+        }
+    }
+}
+
+impl fmt::Display for Error {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match *self {
+            Error::Parse(ref x) => x.fmt(f),
+            Error::Translate(ref x) => x.fmt(f),
+            _ => unreachable!(),
+        }
+    }
+}
+
+/// A helper type for formatting nice error messages.
+///
+/// This type is responsible for reporting regex parse errors in a nice human
+/// readable format. Most of its complexity is from interspersing notational
+/// markers pointing out the position where an error occurred.
+#[derive(Debug)]
+pub struct Formatter<'e, E: 'e> {
+    /// The original regex pattern in which the error occurred.
+    pattern: &'e str,
+    /// The error kind. It must impl fmt::Display.
+    err: &'e E,
+    /// The primary span of the error.
+    span: &'e ast::Span,
+    /// An auxiliary and optional span, in case the error needs to point to
+    /// two locations (e.g., when reporting a duplicate capture group name).
+    aux_span: Option<&'e ast::Span>,
+}
+
+impl<'e> From<&'e ast::Error> for Formatter<'e, ast::ErrorKind> {
+    fn from(err: &'e ast::Error) -> Self {
+        Formatter {
+            pattern: err.pattern(),
+            err: err.kind(),
+            span: err.span(),
+            aux_span: err.auxiliary_span(),
+        }
+    }
+}
+
+impl<'e> From<&'e hir::Error> for Formatter<'e, hir::ErrorKind> {
+    fn from(err: &'e hir::Error) -> Self {
+        Formatter {
+            pattern: err.pattern(),
+            err: err.kind(),
+            span: err.span(),
+            aux_span: None,
+        }
+    }
+}
+
+impl<'e, E: fmt::Display> fmt::Display for Formatter<'e, E> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let spans = Spans::from_formatter(self);
+        if self.pattern.contains('\n') {
+            let divider = repeat_char('~', 79);
+
+            writeln!(f, "regex parse error:")?;
+            writeln!(f, "{}", divider)?;
+            let notated = spans.notate();
+            write!(f, "{}", notated)?;
+            writeln!(f, "{}", divider)?;
+            // If we have error spans that cover multiple lines, then we just
+            // note the line numbers.
+            if !spans.multi_line.is_empty() {
+                let mut notes = vec![];
+                for span in &spans.multi_line {
+                    notes.push(format!(
+                        "on line {} (column {}) through line {} (column {})",
+                        span.start.line,
+                        span.start.column,
+                        span.end.line,
+                        span.end.column - 1
+                    ));
+                }
+                writeln!(f, "{}", notes.join("\n"))?;
+            }
+            write!(f, "error: {}", self.err)?;
+        } else {
+            writeln!(f, "regex parse error:")?;
+            let notated = Spans::from_formatter(self).notate();
+            write!(f, "{}", notated)?;
+            write!(f, "error: {}", self.err)?;
+        }
+        Ok(())
+    }
+}
+
+/// This type represents an arbitrary number of error spans in a way that makes
+/// it convenient to notate the regex pattern. ("Notate" means "point out
+/// exactly where the error occurred in the regex pattern.")
+///
+/// Technically, we can only ever have two spans given our current error
+/// structure. However, after toiling with a specific algorithm for handling
+/// two spans, it became obvious that an algorithm to handle an arbitrary
+/// number of spans was actually much simpler.
+struct Spans<'p> {
+    /// The original regex pattern string.
+    pattern: &'p str,
+    /// The total width that should be used for line numbers. The width is
+    /// used for left padding the line numbers for alignment.
+    ///
+    /// A value of `0` means line numbers should not be displayed. That is,
+    /// the pattern is itself only one line.
+    line_number_width: usize,
+    /// All error spans that occur on a single line. This sequence always has
+    /// length equivalent to the number of lines in `pattern`, where the index
+    /// of the sequence represents a line number, starting at `0`. The spans
+    /// in each line are sorted in ascending order.
+    by_line: Vec<Vec<ast::Span>>,
+    /// All error spans that occur over one or more lines. That is, the start
+    /// and end position of the span have different line numbers. The spans are
+    /// sorted in ascending order.
+    multi_line: Vec<ast::Span>,
+}
+
+impl<'p> Spans<'p> {
+    /// Build a sequence of spans from a formatter.
+    fn from_formatter<'e, E: fmt::Display>(
+        fmter: &'p Formatter<'e, E>,
+    ) -> Spans<'p> {
+        let mut line_count = fmter.pattern.lines().count();
+        // If the pattern ends with a `\n` literal, then our line count is
+        // off by one, since a span can occur immediately after the last `\n`,
+        // which is consider to be an additional line.
+        if fmter.pattern.ends_with('\n') {
+            line_count += 1;
+        }
+        let line_number_width =
+            if line_count <= 1 { 0 } else { line_count.to_string().len() };
+        let mut spans = Spans {
+            pattern: &fmter.pattern,
+            line_number_width: line_number_width,
+            by_line: vec![vec![]; line_count],
+            multi_line: vec![],
+        };
+        spans.add(fmter.span.clone());
+        if let Some(span) = fmter.aux_span {
+            spans.add(span.clone());
+        }
+        spans
+    }
+
+    /// Add the given span to this sequence, putting it in the right place.
+    fn add(&mut self, span: ast::Span) {
+        // This is grossly inefficient since we sort after each add, but right
+        // now, we only ever add two spans at most.
+        if span.is_one_line() {
+            let i = span.start.line - 1; // because lines are 1-indexed
+            self.by_line[i].push(span);
+            self.by_line[i].sort();
+        } else {
+            self.multi_line.push(span);
+            self.multi_line.sort();
+        }
+    }
+
+    /// Notate the pattern string with carents (`^`) pointing at each span
+    /// location. This only applies to spans that occur within a single line.
+    fn notate(&self) -> String {
+        let mut notated = String::new();
+        for (i, line) in self.pattern.lines().enumerate() {
+            if self.line_number_width > 0 {
+                notated.push_str(&self.left_pad_line_number(i + 1));
+                notated.push_str(": ");
+            } else {
+                notated.push_str("    ");
+            }
+            notated.push_str(line);
+            notated.push('\n');
+            if let Some(notes) = self.notate_line(i) {
+                notated.push_str(&notes);
+                notated.push('\n');
+            }
+        }
+        notated
+    }
+
+    /// Return notes for the line indexed at `i` (zero-based). If there are no
+    /// spans for the given line, then `None` is returned. Otherwise, an
+    /// appropriately space padded string with correctly positioned `^` is
+    /// returned, accounting for line numbers.
+    fn notate_line(&self, i: usize) -> Option<String> {
+        let spans = &self.by_line[i];
+        if spans.is_empty() {
+            return None;
+        }
+        let mut notes = String::new();
+        for _ in 0..self.line_number_padding() {
+            notes.push(' ');
+        }
+        let mut pos = 0;
+        for span in spans {
+            for _ in pos..(span.start.column - 1) {
+                notes.push(' ');
+                pos += 1;
+            }
+            let note_len = span.end.column.saturating_sub(span.start.column);
+            for _ in 0..cmp::max(1, note_len) {
+                notes.push('^');
+                pos += 1;
+            }
+        }
+        Some(notes)
+    }
+
+    /// Left pad the given line number with spaces such that it is aligned with
+    /// other line numbers.
+    fn left_pad_line_number(&self, n: usize) -> String {
+        let n = n.to_string();
+        let pad = self.line_number_width.checked_sub(n.len()).unwrap();
+        let mut result = repeat_char(' ', pad);
+        result.push_str(&n);
+        result
+    }
+
+    /// Return the line number padding beginning at the start of each line of
+    /// the pattern.
+    ///
+    /// If the pattern is only one line, then this returns a fixed padding
+    /// for visual indentation.
+    fn line_number_padding(&self) -> usize {
+        if self.line_number_width == 0 {
+            4
+        } else {
+            2 + self.line_number_width
+        }
+    }
+}
+
+fn repeat_char(c: char, count: usize) -> String {
+    ::std::iter::repeat(c).take(count).collect()
+}
+
+#[cfg(test)]
+mod tests {
+    use ast::parse::Parser;
+
+    fn assert_panic_message(pattern: &str, expected_msg: &str) -> () {
+        let result = Parser::new().parse(pattern);
+        match result {
+            Ok(_) => {
+                panic!("regex should not have parsed");
+            }
+            Err(err) => {
+                assert_eq!(err.to_string(), expected_msg.trim());
+            }
+        }
+    }
+
+    // See: https://github.com/rust-lang/regex/issues/464
+    #[test]
+    fn regression_464() {
+        let err = Parser::new().parse("a{\n").unwrap_err();
+        // This test checks that the error formatter doesn't panic.
+        assert!(!err.to_string().is_empty());
+    }
+
+    // See: https://github.com/rust-lang/regex/issues/545
+    #[test]
+    fn repetition_quantifier_expects_a_valid_decimal() {
+        assert_panic_message(
+            r"\\u{[^}]*}",
+            r#"
+regex parse error:
+    \\u{[^}]*}
+        ^
+error: repetition quantifier expects a valid decimal
+"#,
+        );
+    }
+}

diff --git a/src/hir/interval.rs b/src/hir/interval.rs
new file mode 100644
index 0000000..51eed52
--- /dev/null
+++ b/src/hir/interval.rs

@@ -0,0 +1,520 @@
+use std::char;
+use std::cmp;
+use std::fmt::Debug;
+use std::slice;
+use std::u8;
+
+use unicode;
+
+// This module contains an *internal* implementation of interval sets.
+//
+// The primary invariant that interval sets guards is canonical ordering. That
+// is, every interval set contains an ordered sequence of intervals where
+// no two intervals are overlapping or adjacent. While this invariant is
+// occasionally broken within the implementation, it should be impossible for
+// callers to observe it.
+//
+// Since case folding (as implemented below) breaks that invariant, we roll
+// that into this API even though it is a little out of place in an otherwise
+// generic interval set. (Hence the reason why the `unicode` module is imported
+// here.)
+//
+// Some of the implementation complexity here is a result of me wanting to
+// preserve the sequential representation without using additional memory.
+// In many cases, we do use linear extra memory, but it is at most 2x and it
+// is amortized. If we relaxed the memory requirements, this implementation
+// could become much simpler. The extra memory is honestly probably OK, but
+// character classes (especially of the Unicode variety) can become quite
+// large, and it would be nice to keep regex compilation snappy even in debug
+// builds. (In the past, I have been careless with this area of code and it has
+// caused slow regex compilations in debug mode, so this isn't entirely
+// unwarranted.)
+//
+// Tests on this are relegated to the public API of HIR in src/hir.rs.
+
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct IntervalSet<I> {
+    ranges: Vec<I>,
+}
+
+impl<I: Interval> IntervalSet<I> {
+    /// Create a new set from a sequence of intervals. Each interval is
+    /// specified as a pair of bounds, where both bounds are inclusive.
+    ///
+    /// The given ranges do not need to be in any specific order, and ranges
+    /// may overlap.
+    pub fn new<T: IntoIterator<Item = I>>(intervals: T) -> IntervalSet<I> {
+        let mut set = IntervalSet { ranges: intervals.into_iter().collect() };
+        set.canonicalize();
+        set
+    }
+
+    /// Add a new interval to this set.
+    pub fn push(&mut self, interval: I) {
+        // TODO: This could be faster. e.g., Push the interval such that
+        // it preserves canonicalization.
+        self.ranges.push(interval);
+        self.canonicalize();
+    }
+
+    /// Return an iterator over all intervals in this set.
+    ///
+    /// The iterator yields intervals in ascending order.
+    pub fn iter(&self) -> IntervalSetIter<I> {
+        IntervalSetIter(self.ranges.iter())
+    }
+
+    /// Return an immutable slice of intervals in this set.
+    ///
+    /// The sequence returned is in canonical ordering.
+    pub fn intervals(&self) -> &[I] {
+        &self.ranges
+    }
+
+    /// Expand this interval set such that it contains all case folded
+    /// characters. For example, if this class consists of the range `a-z`,
+    /// then applying case folding will result in the class containing both the
+    /// ranges `a-z` and `A-Z`.
+    ///
+    /// This returns an error if the necessary case mapping data is not
+    /// available.
+    pub fn case_fold_simple(&mut self) -> Result<(), unicode::CaseFoldError> {
+        let len = self.ranges.len();
+        for i in 0..len {
+            let range = self.ranges[i];
+            if let Err(err) = range.case_fold_simple(&mut self.ranges) {
+                self.canonicalize();
+                return Err(err);
+            }
+        }
+        self.canonicalize();
+        Ok(())
+    }
+
+    /// Union this set with the given set, in place.
+    pub fn union(&mut self, other: &IntervalSet<I>) {
+        // This could almost certainly be done more efficiently.
+        self.ranges.extend(&other.ranges);
+        self.canonicalize();
+    }
+
+    /// Intersect this set with the given set, in place.
+    pub fn intersect(&mut self, other: &IntervalSet<I>) {
+        if self.ranges.is_empty() {
+            return;
+        }
+        if other.ranges.is_empty() {
+            self.ranges.clear();
+            return;
+        }
+
+        // There should be a way to do this in-place with constant memory,
+        // but I couldn't figure out a simple way to do it. So just append
+        // the intersection to the end of this range, and then drain it before
+        // we're done.
+        let drain_end = self.ranges.len();
+
+        let mut ita = (0..drain_end).into_iter();
+        let mut itb = (0..other.ranges.len()).into_iter();
+        let mut a = ita.next().unwrap();
+        let mut b = itb.next().unwrap();
+        loop {
+            if let Some(ab) = self.ranges[a].intersect(&other.ranges[b]) {
+                self.ranges.push(ab);
+            }
+            let (it, aorb) =
+                if self.ranges[a].upper() < other.ranges[b].upper() {
+                    (&mut ita, &mut a)
+                } else {
+                    (&mut itb, &mut b)
+                };
+            match it.next() {
+                Some(v) => *aorb = v,
+                None => break,
+            }
+        }
+        self.ranges.drain(..drain_end);
+    }
+
+    /// Subtract the given set from this set, in place.
+    pub fn difference(&mut self, other: &IntervalSet<I>) {
+        if self.ranges.is_empty() || other.ranges.is_empty() {
+            return;
+        }
+
+        // This algorithm is (to me) surprisingly complex. A search of the
+        // interwebs indicate that this is a potentially interesting problem.
+        // Folks seem to suggest interval or segment trees, but I'd like to
+        // avoid the overhead (both runtime and conceptual) of that.
+        //
+        // The following is basically my Shitty First Draft. Therefore, in
+        // order to grok it, you probably need to read each line carefully.
+        // Simplifications are most welcome!
+        //
+        // Remember, we can assume the canonical format invariant here, which
+        // says that all ranges are sorted, not overlapping and not adjacent in
+        // each class.
+        let drain_end = self.ranges.len();
+        let (mut a, mut b) = (0, 0);
+        'LOOP: while a < drain_end && b < other.ranges.len() {
+            // Basically, the easy cases are when neither range overlaps with
+            // each other. If the `b` range is less than our current `a`
+            // range, then we can skip it and move on.
+            if other.ranges[b].upper() < self.ranges[a].lower() {
+                b += 1;
+                continue;
+            }
+            // ... similarly for the `a` range. If it's less than the smallest
+            // `b` range, then we can add it as-is.
+            if self.ranges[a].upper() < other.ranges[b].lower() {
+                let range = self.ranges[a];
+                self.ranges.push(range);
+                a += 1;
+                continue;
+            }
+            // Otherwise, we have overlapping ranges.
+            assert!(!self.ranges[a].is_intersection_empty(&other.ranges[b]));
+
+            // This part is tricky and was non-obvious to me without looking
+            // at explicit examples (see the tests). The trickiness stems from
+            // two things: 1) subtracting a range from another range could
+            // yield two ranges and 2) after subtracting a range, it's possible
+            // that future ranges can have an impact. The loop below advances
+            // the `b` ranges until they can't possible impact the current
+            // range.
+            //
+            // For example, if our `a` range is `a-t` and our next three `b`
+            // ranges are `a-c`, `g-i`, `r-t` and `x-z`, then we need to apply
+            // subtraction three times before moving on to the next `a` range.
+            let mut range = self.ranges[a];
+            while b < other.ranges.len()
+                && !range.is_intersection_empty(&other.ranges[b])
+            {
+                let old_range = range;
+                range = match range.difference(&other.ranges[b]) {
+                    (None, None) => {
+                        // We lost the entire range, so move on to the next
+                        // without adding this one.
+                        a += 1;
+                        continue 'LOOP;
+                    }
+                    (Some(range1), None) | (None, Some(range1)) => range1,
+                    (Some(range1), Some(range2)) => {
+                        self.ranges.push(range1);
+                        range2
+                    }
+                };
+                // It's possible that the `b` range has more to contribute
+                // here. In particular, if it is greater than the original
+                // range, then it might impact the next `a` range *and* it
+                // has impacted the current `a` range as much as possible,
+                // so we can quit. We don't bump `b` so that the next `a`
+                // range can apply it.
+                if other.ranges[b].upper() > old_range.upper() {
+                    break;
+                }
+                // Otherwise, the next `b` range might apply to the current
+                // `a` range.
+                b += 1;
+            }
+            self.ranges.push(range);
+            a += 1;
+        }
+        while a < drain_end {
+            let range = self.ranges[a];
+            self.ranges.push(range);
+            a += 1;
+        }
+        self.ranges.drain(..drain_end);
+    }
+
+    /// Compute the symmetric difference of the two sets, in place.
+    ///
+    /// This computes the symmetric difference of two interval sets. This
+    /// removes all elements in this set that are also in the given set,
+    /// but also adds all elements from the given set that aren't in this
+    /// set. That is, the set will contain all elements in either set,
+    /// but will not contain any elements that are in both sets.
+    pub fn symmetric_difference(&mut self, other: &IntervalSet<I>) {
+        // TODO(burntsushi): Fix this so that it amortizes allocation.
+        let mut intersection = self.clone();
+        intersection.intersect(other);
+        self.union(other);
+        self.difference(&intersection);
+    }
+
+    /// Negate this interval set.
+    ///
+    /// For all `x` where `x` is any element, if `x` was in this set, then it
+    /// will not be in this set after negation.
+    pub fn negate(&mut self) {
+        if self.ranges.is_empty() {
+            let (min, max) = (I::Bound::min_value(), I::Bound::max_value());
+            self.ranges.push(I::create(min, max));
+            return;
+        }
+
+        // There should be a way to do this in-place with constant memory,
+        // but I couldn't figure out a simple way to do it. So just append
+        // the negation to the end of this range, and then drain it before
+        // we're done.
+        let drain_end = self.ranges.len();
+
+        // We do checked arithmetic below because of the canonical ordering
+        // invariant.
+        if self.ranges[0].lower() > I::Bound::min_value() {
+            let upper = self.ranges[0].lower().decrement();
+            self.ranges.push(I::create(I::Bound::min_value(), upper));
+        }
+        for i in 1..drain_end {
+            let lower = self.ranges[i - 1].upper().increment();
+            let upper = self.ranges[i].lower().decrement();
+            self.ranges.push(I::create(lower, upper));
+        }
+        if self.ranges[drain_end - 1].upper() < I::Bound::max_value() {
+            let lower = self.ranges[drain_end - 1].upper().increment();
+            self.ranges.push(I::create(lower, I::Bound::max_value()));
+        }
+        self.ranges.drain(..drain_end);
+    }
+
+    /// Converts this set into a canonical ordering.
+    fn canonicalize(&mut self) {
+        if self.is_canonical() {
+            return;
+        }
+        self.ranges.sort();
+        assert!(!self.ranges.is_empty());
+
+        // Is there a way to do this in-place with constant memory? I couldn't
+        // figure out a way to do it. So just append the canonicalization to
+        // the end of this range, and then drain it before we're done.
+        let drain_end = self.ranges.len();
+        for oldi in 0..drain_end {
+            // If we've added at least one new range, then check if we can
+            // merge this range in the previously added range.
+            if self.ranges.len() > drain_end {
+                let (last, rest) = self.ranges.split_last_mut().unwrap();
+                if let Some(union) = last.union(&rest[oldi]) {
+                    *last = union;
+                    continue;
+                }
+            }
+            let range = self.ranges[oldi];
+            self.ranges.push(range);
+        }
+        self.ranges.drain(..drain_end);
+    }
+
+    /// Returns true if and only if this class is in a canonical ordering.
+    fn is_canonical(&self) -> bool {
+        for pair in self.ranges.windows(2) {
+            if pair[0] >= pair[1] {
+                return false;
+            }
+            if pair[0].is_contiguous(&pair[1]) {
+                return false;
+            }
+        }
+        true
+    }
+}
+
+/// An iterator over intervals.
+#[derive(Debug)]
+pub struct IntervalSetIter<'a, I: 'a>(slice::Iter<'a, I>);
+
+impl<'a, I> Iterator for IntervalSetIter<'a, I> {
+    type Item = &'a I;
+
+    fn next(&mut self) -> Option<&'a I> {
+        self.0.next()
+    }
+}
+
+pub trait Interval:
+    Clone + Copy + Debug + Default + Eq + PartialEq + PartialOrd + Ord
+{
+    type Bound: Bound;
+
+    fn lower(&self) -> Self::Bound;
+    fn upper(&self) -> Self::Bound;
+    fn set_lower(&mut self, bound: Self::Bound);
+    fn set_upper(&mut self, bound: Self::Bound);
+    fn case_fold_simple(
+        &self,
+        intervals: &mut Vec<Self>,
+    ) -> Result<(), unicode::CaseFoldError>;
+
+    /// Create a new interval.
+    fn create(lower: Self::Bound, upper: Self::Bound) -> Self {
+        let mut int = Self::default();
+        if lower <= upper {
+            int.set_lower(lower);
+            int.set_upper(upper);
+        } else {
+            int.set_lower(upper);
+            int.set_upper(lower);
+        }
+        int
+    }
+
+    /// Union the given overlapping range into this range.
+    ///
+    /// If the two ranges aren't contiguous, then this returns `None`.
+    fn union(&self, other: &Self) -> Option<Self> {
+        if !self.is_contiguous(other) {
+            return None;
+        }
+        let lower = cmp::min(self.lower(), other.lower());
+        let upper = cmp::max(self.upper(), other.upper());
+        Some(Self::create(lower, upper))
+    }
+
+    /// Intersect this range with the given range and return the result.
+    ///
+    /// If the intersection is empty, then this returns `None`.
+    fn intersect(&self, other: &Self) -> Option<Self> {
+        let lower = cmp::max(self.lower(), other.lower());
+        let upper = cmp::min(self.upper(), other.upper());
+        if lower <= upper {
+            Some(Self::create(lower, upper))
+        } else {
+            None
+        }
+    }
+
+    /// Subtract the given range from this range and return the resulting
+    /// ranges.
+    ///
+    /// If subtraction would result in an empty range, then no ranges are
+    /// returned.
+    fn difference(&self, other: &Self) -> (Option<Self>, Option<Self>) {
+        if self.is_subset(other) {
+            return (None, None);
+        }
+        if self.is_intersection_empty(other) {
+            return (Some(self.clone()), None);
+        }
+        let add_lower = other.lower() > self.lower();
+        let add_upper = other.upper() < self.upper();
+        // We know this because !self.is_subset(other) and the ranges have
+        // a non-empty intersection.
+        assert!(add_lower || add_upper);
+        let mut ret = (None, None);
+        if add_lower {
+            let upper = other.lower().decrement();
+            ret.0 = Some(Self::create(self.lower(), upper));
+        }
+        if add_upper {
+            let lower = other.upper().increment();
+            let range = Self::create(lower, self.upper());
+            if ret.0.is_none() {
+                ret.0 = Some(range);
+            } else {
+                ret.1 = Some(range);
+            }
+        }
+        ret
+    }
+
+    /// Compute the symmetric difference the given range from this range. This
+    /// returns the union of the two ranges minus its intersection.
+    fn symmetric_difference(
+        &self,
+        other: &Self,
+    ) -> (Option<Self>, Option<Self>) {
+        let union = match self.union(other) {
+            None => return (Some(self.clone()), Some(other.clone())),
+            Some(union) => union,
+        };
+        let intersection = match self.intersect(other) {
+            None => return (Some(self.clone()), Some(other.clone())),
+            Some(intersection) => intersection,
+        };
+        union.difference(&intersection)
+    }
+
+    /// Returns true if and only if the two ranges are contiguous. Two ranges
+    /// are contiguous if and only if the ranges are either overlapping or
+    /// adjacent.
+    fn is_contiguous(&self, other: &Self) -> bool {
+        let lower1 = self.lower().as_u32();
+        let upper1 = self.upper().as_u32();
+        let lower2 = other.lower().as_u32();
+        let upper2 = other.upper().as_u32();
+        cmp::max(lower1, lower2) <= cmp::min(upper1, upper2).saturating_add(1)
+    }
+
+    /// Returns true if and only if the intersection of this range and the
+    /// other range is empty.
+    fn is_intersection_empty(&self, other: &Self) -> bool {
+        let (lower1, upper1) = (self.lower(), self.upper());
+        let (lower2, upper2) = (other.lower(), other.upper());
+        cmp::max(lower1, lower2) > cmp::min(upper1, upper2)
+    }
+
+    /// Returns true if and only if this range is a subset of the other range.
+    fn is_subset(&self, other: &Self) -> bool {
+        let (lower1, upper1) = (self.lower(), self.upper());
+        let (lower2, upper2) = (other.lower(), other.upper());
+        (lower2 <= lower1 && lower1 <= upper2)
+            && (lower2 <= upper1 && upper1 <= upper2)
+    }
+}
+
+pub trait Bound:
+    Copy + Clone + Debug + Eq + PartialEq + PartialOrd + Ord
+{
+    fn min_value() -> Self;
+    fn max_value() -> Self;
+    fn as_u32(self) -> u32;
+    fn increment(self) -> Self;
+    fn decrement(self) -> Self;
+}
+
+impl Bound for u8 {
+    fn min_value() -> Self {
+        u8::MIN
+    }
+    fn max_value() -> Self {
+        u8::MAX
+    }
+    fn as_u32(self) -> u32 {
+        self as u32
+    }
+    fn increment(self) -> Self {
+        self.checked_add(1).unwrap()
+    }
+    fn decrement(self) -> Self {
+        self.checked_sub(1).unwrap()
+    }
+}
+
+impl Bound for char {
+    fn min_value() -> Self {
+        '\x00'
+    }
+    fn max_value() -> Self {
+        '\u{10FFFF}'
+    }
+    fn as_u32(self) -> u32 {
+        self as u32
+    }
+
+    fn increment(self) -> Self {
+        match self {
+            '\u{D7FF}' => '\u{E000}',
+            c => char::from_u32((c as u32).checked_add(1).unwrap()).unwrap(),
+        }
+    }
+
+    fn decrement(self) -> Self {
+        match self {
+            '\u{E000}' => '\u{D7FF}',
+            c => char::from_u32((c as u32).checked_sub(1).unwrap()).unwrap(),
+        }
+    }
+}
+
+// Tests for interval sets are written in src/hir.rs against the public API.

diff --git a/src/hir/literal/mod.rs b/src/hir/literal/mod.rs
new file mode 100644
index 0000000..3ba225c
--- /dev/null
+++ b/src/hir/literal/mod.rs

@@ -0,0 +1,1685 @@
+/*!
+Provides routines for extracting literal prefixes and suffixes from an `Hir`.
+*/
+
+use std::cmp;
+use std::fmt;
+use std::iter;
+use std::mem;
+use std::ops;
+
+use hir::{self, Hir, HirKind};
+
+/// A set of literal byte strings extracted from a regular expression.
+///
+/// Every member of the set is a `Literal`, which is represented by a
+/// `Vec<u8>`. (Notably, it may contain invalid UTF-8.) Every member is
+/// said to be either *complete* or *cut*. A complete literal means that
+/// it extends until the beginning (or end) of the regular expression. In
+/// some circumstances, this can be used to indicate a match in the regular
+/// expression.
+///
+/// A key aspect of literal extraction is knowing when to stop. It is not
+/// feasible to blindly extract all literals from a regular expression, even if
+/// there are finitely many. For example, the regular expression `[0-9]{10}`
+/// has `10^10` distinct literals. For this reason, literal extraction is
+/// bounded to some low number by default using heuristics, but the limits can
+/// be tweaked.
+///
+/// **WARNING**: Literal extraction uses stack space proportional to the size
+/// of the `Hir` expression. At some point, this drawback will be eliminated.
+/// To protect yourself, set a reasonable
+/// [`nest_limit` on your `Parser`](../../struct.ParserBuilder.html#method.nest_limit).
+/// This is done for you by default.
+#[derive(Clone, Eq, PartialEq)]
+pub struct Literals {
+    lits: Vec<Literal>,
+    limit_size: usize,
+    limit_class: usize,
+}
+
+/// A single member of a set of literals extracted from a regular expression.
+///
+/// This type has `Deref` and `DerefMut` impls to `Vec<u8>` so that all slice
+/// and `Vec` operations are available.
+#[derive(Clone, Eq, Ord)]
+pub struct Literal {
+    v: Vec<u8>,
+    cut: bool,
+}
+
+impl Literals {
+    /// Returns a new empty set of literals using default limits.
+    pub fn empty() -> Literals {
+        Literals { lits: vec![], limit_size: 250, limit_class: 10 }
+    }
+
+    /// Returns a set of literal prefixes extracted from the given `Hir`.
+    pub fn prefixes(expr: &Hir) -> Literals {
+        let mut lits = Literals::empty();
+        lits.union_prefixes(expr);
+        lits
+    }
+
+    /// Returns a set of literal suffixes extracted from the given `Hir`.
+    pub fn suffixes(expr: &Hir) -> Literals {
+        let mut lits = Literals::empty();
+        lits.union_suffixes(expr);
+        lits
+    }
+
+    /// Get the approximate size limit (in bytes) of this set.
+    pub fn limit_size(&self) -> usize {
+        self.limit_size
+    }
+
+    /// Set the approximate size limit (in bytes) of this set.
+    ///
+    /// If extracting a literal would put the set over this limit, then
+    /// extraction stops.
+    ///
+    /// The new limits will only apply to additions to this set. Existing
+    /// members remain unchanged, even if the set exceeds the new limit.
+    pub fn set_limit_size(&mut self, size: usize) -> &mut Literals {
+        self.limit_size = size;
+        self
+    }
+
+    /// Get the character class size limit for this set.
+    pub fn limit_class(&self) -> usize {
+        self.limit_class
+    }
+
+    /// Limits the size of character(or byte) classes considered.
+    ///
+    /// A value of `0` prevents all character classes from being considered.
+    ///
+    /// This limit also applies to case insensitive literals, since each
+    /// character in the case insensitive literal is converted to a class, and
+    /// then case folded.
+    ///
+    /// The new limits will only apply to additions to this set. Existing
+    /// members remain unchanged, even if the set exceeds the new limit.
+    pub fn set_limit_class(&mut self, size: usize) -> &mut Literals {
+        self.limit_class = size;
+        self
+    }
+
+    /// Returns the set of literals as a slice. Its order is unspecified.
+    pub fn literals(&self) -> &[Literal] {
+        &self.lits
+    }
+
+    /// Returns the length of the smallest literal.
+    ///
+    /// Returns None is there are no literals in the set.
+    pub fn min_len(&self) -> Option<usize> {
+        let mut min = None;
+        for lit in &self.lits {
+            match min {
+                None => min = Some(lit.len()),
+                Some(m) if lit.len() < m => min = Some(lit.len()),
+                _ => {}
+            }
+        }
+        min
+    }
+
+    /// Returns true if all members in this set are complete.
+    pub fn all_complete(&self) -> bool {
+        !self.lits.is_empty() && self.lits.iter().all(|l| !l.is_cut())
+    }
+
+    /// Returns true if any member in this set is complete.
+    pub fn any_complete(&self) -> bool {
+        self.lits.iter().any(|lit| !lit.is_cut())
+    }
+
+    /// Returns true if this set contains an empty literal.
+    pub fn contains_empty(&self) -> bool {
+        self.lits.iter().any(|lit| lit.is_empty())
+    }
+
+    /// Returns true if this set is empty or if all of its members is empty.
+    pub fn is_empty(&self) -> bool {
+        self.lits.is_empty() || self.lits.iter().all(|lit| lit.is_empty())
+    }
+
+    /// Returns a new empty set of literals using this set's limits.
+    pub fn to_empty(&self) -> Literals {
+        let mut lits = Literals::empty();
+        lits.set_limit_size(self.limit_size).set_limit_class(self.limit_class);
+        lits
+    }
+
+    /// Returns the longest common prefix of all members in this set.
+    pub fn longest_common_prefix(&self) -> &[u8] {
+        if self.is_empty() {
+            return &[];
+        }
+        let lit0 = &*self.lits[0];
+        let mut len = lit0.len();
+        for lit in &self.lits[1..] {
+            len = cmp::min(
+                len,
+                lit.iter().zip(lit0).take_while(|&(a, b)| a == b).count(),
+            );
+        }
+        &self.lits[0][..len]
+    }
+
+    /// Returns the longest common suffix of all members in this set.
+    pub fn longest_common_suffix(&self) -> &[u8] {
+        if self.is_empty() {
+            return &[];
+        }
+        let lit0 = &*self.lits[0];
+        let mut len = lit0.len();
+        for lit in &self.lits[1..] {
+            len = cmp::min(
+                len,
+                lit.iter()
+                    .rev()
+                    .zip(lit0.iter().rev())
+                    .take_while(|&(a, b)| a == b)
+                    .count(),
+            );
+        }
+        &self.lits[0][self.lits[0].len() - len..]
+    }
+
+    /// Returns a new set of literals with the given number of bytes trimmed
+    /// from the suffix of each literal.
+    ///
+    /// If any literal would be cut out completely by trimming, then None is
+    /// returned.
+    ///
+    /// Any duplicates that are created as a result of this transformation are
+    /// removed.
+    pub fn trim_suffix(&self, num_bytes: usize) -> Option<Literals> {
+        if self.min_len().map(|len| len <= num_bytes).unwrap_or(true) {
+            return None;
+        }
+        let mut new = self.to_empty();
+        for mut lit in self.lits.iter().cloned() {
+            let new_len = lit.len() - num_bytes;
+            lit.truncate(new_len);
+            lit.cut();
+            new.lits.push(lit);
+        }
+        new.lits.sort();
+        new.lits.dedup();
+        Some(new)
+    }
+
+    /// Returns a new set of prefixes of this set of literals that are
+    /// guaranteed to be unambiguous.
+    ///
+    /// Any substring match with a member of the set is returned is guaranteed
+    /// to never overlap with a substring match of another member of the set
+    /// at the same starting position.
+    ///
+    /// Given any two members of the returned set, neither is a substring of
+    /// the other.
+    pub fn unambiguous_prefixes(&self) -> Literals {
+        if self.lits.is_empty() {
+            return self.to_empty();
+        }
+        let mut old: Vec<Literal> = self.lits.iter().cloned().collect();
+        let mut new = self.to_empty();
+        'OUTER: while let Some(mut candidate) = old.pop() {
+            if candidate.is_empty() {
+                continue;
+            }
+            if new.lits.is_empty() {
+                new.lits.push(candidate);
+                continue;
+            }
+            for lit2 in &mut new.lits {
+                if lit2.is_empty() {
+                    continue;
+                }
+                if &candidate == lit2 {
+                    // If the literal is already in the set, then we can
+                    // just drop it. But make sure that cut literals are
+                    // infectious!
+                    candidate.cut = candidate.cut || lit2.cut;
+                    lit2.cut = candidate.cut;
+                    continue 'OUTER;
+                }
+                if candidate.len() < lit2.len() {
+                    if let Some(i) = position(&candidate, &lit2) {
+                        candidate.cut();
+                        let mut lit3 = lit2.clone();
+                        lit3.truncate(i);
+                        lit3.cut();
+                        old.push(lit3);
+                        lit2.clear();
+                    }
+                } else {
+                    if let Some(i) = position(&lit2, &candidate) {
+                        lit2.cut();
+                        let mut new_candidate = candidate.clone();
+                        new_candidate.truncate(i);
+                        new_candidate.cut();
+                        old.push(new_candidate);
+                        candidate.clear();
+                    }
+                }
+                // Oops, the candidate is already represented in the set.
+                if candidate.is_empty() {
+                    continue 'OUTER;
+                }
+            }
+            new.lits.push(candidate);
+        }
+        new.lits.retain(|lit| !lit.is_empty());
+        new.lits.sort();
+        new.lits.dedup();
+        new
+    }
+
+    /// Returns a new set of suffixes of this set of literals that are
+    /// guaranteed to be unambiguous.
+    ///
+    /// Any substring match with a member of the set is returned is guaranteed
+    /// to never overlap with a substring match of another member of the set
+    /// at the same ending position.
+    ///
+    /// Given any two members of the returned set, neither is a substring of
+    /// the other.
+    pub fn unambiguous_suffixes(&self) -> Literals {
+        // This is a touch wasteful...
+        let mut lits = self.clone();
+        lits.reverse();
+        let mut unamb = lits.unambiguous_prefixes();
+        unamb.reverse();
+        unamb
+    }
+
+    /// Unions the prefixes from the given expression to this set.
+    ///
+    /// If prefixes could not be added (for example, this set would exceed its
+    /// size limits or the set of prefixes from `expr` includes the empty
+    /// string), then false is returned.
+    ///
+    /// Note that prefix literals extracted from `expr` are said to be complete
+    /// if and only if the literal extends from the beginning of `expr` to the
+    /// end of `expr`.
+    pub fn union_prefixes(&mut self, expr: &Hir) -> bool {
+        let mut lits = self.to_empty();
+        prefixes(expr, &mut lits);
+        !lits.is_empty() && !lits.contains_empty() && self.union(lits)
+    }
+
+    /// Unions the suffixes from the given expression to this set.
+    ///
+    /// If suffixes could not be added (for example, this set would exceed its
+    /// size limits or the set of suffixes from `expr` includes the empty
+    /// string), then false is returned.
+    ///
+    /// Note that prefix literals extracted from `expr` are said to be complete
+    /// if and only if the literal extends from the end of `expr` to the
+    /// beginning of `expr`.
+    pub fn union_suffixes(&mut self, expr: &Hir) -> bool {
+        let mut lits = self.to_empty();
+        suffixes(expr, &mut lits);
+        lits.reverse();
+        !lits.is_empty() && !lits.contains_empty() && self.union(lits)
+    }
+
+    /// Unions this set with another set.
+    ///
+    /// If the union would cause the set to exceed its limits, then the union
+    /// is skipped and it returns false. Otherwise, if the union succeeds, it
+    /// returns true.
+    pub fn union(&mut self, lits: Literals) -> bool {
+        if self.num_bytes() + lits.num_bytes() > self.limit_size {
+            return false;
+        }
+        if lits.is_empty() {
+            self.lits.push(Literal::empty());
+        } else {
+            self.lits.extend(lits.lits);
+        }
+        true
+    }
+
+    /// Extends this set with another set.
+    ///
+    /// The set of literals is extended via a cross product.
+    ///
+    /// If a cross product would cause this set to exceed its limits, then the
+    /// cross product is skipped and it returns false. Otherwise, if the cross
+    /// product succeeds, it returns true.
+    pub fn cross_product(&mut self, lits: &Literals) -> bool {
+        if lits.is_empty() {
+            return true;
+        }
+        // Check that we make sure we stay in our limits.
+        let mut size_after;
+        if self.is_empty() || !self.any_complete() {
+            size_after = self.num_bytes();
+            for lits_lit in lits.literals() {
+                size_after += lits_lit.len();
+            }
+        } else {
+            size_after = self.lits.iter().fold(0, |accum, lit| {
+                accum + if lit.is_cut() { lit.len() } else { 0 }
+            });
+            for lits_lit in lits.literals() {
+                for self_lit in self.literals() {
+                    if !self_lit.is_cut() {
+                        size_after += self_lit.len() + lits_lit.len();
+                    }
+                }
+            }
+        }
+        if size_after > self.limit_size {
+            return false;
+        }
+
+        let mut base = self.remove_complete();
+        if base.is_empty() {
+            base = vec![Literal::empty()];
+        }
+        for lits_lit in lits.literals() {
+            for mut self_lit in base.clone() {
+                self_lit.extend(&**lits_lit);
+                self_lit.cut = lits_lit.cut;
+                self.lits.push(self_lit);
+            }
+        }
+        true
+    }
+
+    /// Extends each literal in this set with the bytes given.
+    ///
+    /// If the set is empty, then the given literal is added to the set.
+    ///
+    /// If adding any number of bytes to all members of this set causes a limit
+    /// to be exceeded, then no bytes are added and false is returned. If a
+    /// prefix of `bytes` can be fit into this set, then it is used and all
+    /// resulting literals are cut.
+    pub fn cross_add(&mut self, bytes: &[u8]) -> bool {
+        // N.B. This could be implemented by simply calling cross_product with
+        // a literal set containing just `bytes`, but we can be smarter about
+        // taking shorter prefixes of `bytes` if they'll fit.
+        if bytes.is_empty() {
+            return true;
+        }
+        if self.lits.is_empty() {
+            let i = cmp::min(self.limit_size, bytes.len());
+            self.lits.push(Literal::new(bytes[..i].to_owned()));
+            self.lits[0].cut = i < bytes.len();
+            return !self.lits[0].is_cut();
+        }
+        let size = self.num_bytes();
+        if size + self.lits.len() >= self.limit_size {
+            return false;
+        }
+        let mut i = 1;
+        while size + (i * self.lits.len()) <= self.limit_size
+            && i < bytes.len()
+        {
+            i += 1;
+        }
+        for lit in &mut self.lits {
+            if !lit.is_cut() {
+                lit.extend(&bytes[..i]);
+                if i < bytes.len() {
+                    lit.cut();
+                }
+            }
+        }
+        true
+    }
+
+    /// Adds the given literal to this set.
+    ///
+    /// Returns false if adding this literal would cause the class to be too
+    /// big.
+    pub fn add(&mut self, lit: Literal) -> bool {
+        if self.num_bytes() + lit.len() > self.limit_size {
+            return false;
+        }
+        self.lits.push(lit);
+        true
+    }
+
+    /// Extends each literal in this set with the character class given.
+    ///
+    /// Returns false if the character class was too big to add.
+    pub fn add_char_class(&mut self, cls: &hir::ClassUnicode) -> bool {
+        self._add_char_class(cls, false)
+    }
+
+    /// Extends each literal in this set with the character class given,
+    /// writing the bytes of each character in reverse.
+    ///
+    /// Returns false if the character class was too big to add.
+    fn add_char_class_reverse(&mut self, cls: &hir::ClassUnicode) -> bool {
+        self._add_char_class(cls, true)
+    }
+
+    fn _add_char_class(
+        &mut self,
+        cls: &hir::ClassUnicode,
+        reverse: bool,
+    ) -> bool {
+        use std::char;
+
+        if self.class_exceeds_limits(cls_char_count(cls)) {
+            return false;
+        }
+        let mut base = self.remove_complete();
+        if base.is_empty() {
+            base = vec![Literal::empty()];
+        }
+        for r in cls.iter() {
+            let (s, e) = (r.start as u32, r.end as u32 + 1);
+            for c in (s..e).filter_map(char::from_u32) {
+                for mut lit in base.clone() {
+                    let mut bytes = c.to_string().into_bytes();
+                    if reverse {
+                        bytes.reverse();
+                    }
+                    lit.extend(&bytes);
+                    self.lits.push(lit);
+                }
+            }
+        }
+        true
+    }
+
+    /// Extends each literal in this set with the byte class given.
+    ///
+    /// Returns false if the byte class was too big to add.
+    pub fn add_byte_class(&mut self, cls: &hir::ClassBytes) -> bool {
+        if self.class_exceeds_limits(cls_byte_count(cls)) {
+            return false;
+        }
+        let mut base = self.remove_complete();
+        if base.is_empty() {
+            base = vec![Literal::empty()];
+        }
+        for r in cls.iter() {
+            let (s, e) = (r.start as u32, r.end as u32 + 1);
+            for b in (s..e).map(|b| b as u8) {
+                for mut lit in base.clone() {
+                    lit.push(b);
+                    self.lits.push(lit);
+                }
+            }
+        }
+        true
+    }
+
+    /// Cuts every member of this set. When a member is cut, it can never
+    /// be extended.
+    pub fn cut(&mut self) {
+        for lit in &mut self.lits {
+            lit.cut();
+        }
+    }
+
+    /// Reverses all members in place.
+    pub fn reverse(&mut self) {
+        for lit in &mut self.lits {
+            lit.reverse();
+        }
+    }
+
+    /// Clears this set of all members.
+    pub fn clear(&mut self) {
+        self.lits.clear();
+    }
+
+    /// Pops all complete literals out of this set.
+    fn remove_complete(&mut self) -> Vec<Literal> {
+        let mut base = vec![];
+        for lit in mem::replace(&mut self.lits, vec![]) {
+            if lit.is_cut() {
+                self.lits.push(lit);
+            } else {
+                base.push(lit);
+            }
+        }
+        base
+    }
+
+    /// Returns the total number of bytes in this set.
+    fn num_bytes(&self) -> usize {
+        self.lits.iter().fold(0, |accum, lit| accum + lit.len())
+    }
+
+    /// Returns true if a character class with the given size would cause this
+    /// set to exceed its limits.
+    ///
+    /// The size given should correspond to the number of items in the class.
+    fn class_exceeds_limits(&self, size: usize) -> bool {
+        if size > self.limit_class {
+            return true;
+        }
+        // This is an approximation since codepoints in a char class can encode
+        // to 1-4 bytes.
+        let new_byte_count = if self.lits.is_empty() {
+            size
+        } else {
+            self.lits.iter().fold(0, |accum, lit| {
+                accum
+                    + if lit.is_cut() {
+                        // If the literal is cut, then we'll never add
+                        // anything to it, so don't count it.
+                        0
+                    } else {
+                        (lit.len() + 1) * size
+                    }
+            })
+        };
+        new_byte_count > self.limit_size
+    }
+}
+
+fn prefixes(expr: &Hir, lits: &mut Literals) {
+    match *expr.kind() {
+        HirKind::Literal(hir::Literal::Unicode(c)) => {
+            let mut buf = [0; 4];
+            lits.cross_add(c.encode_utf8(&mut buf).as_bytes());
+        }
+        HirKind::Literal(hir::Literal::Byte(b)) => {
+            lits.cross_add(&[b]);
+        }
+        HirKind::Class(hir::Class::Unicode(ref cls)) => {
+            if !lits.add_char_class(cls) {
+                lits.cut();
+            }
+        }
+        HirKind::Class(hir::Class::Bytes(ref cls)) => {
+            if !lits.add_byte_class(cls) {
+                lits.cut();
+            }
+        }
+        HirKind::Group(hir::Group { ref hir, .. }) => {
+            prefixes(&**hir, lits);
+        }
+        HirKind::Repetition(ref x) => match x.kind {
+            hir::RepetitionKind::ZeroOrOne => {
+                repeat_zero_or_one_literals(&x.hir, lits, prefixes);
+            }
+            hir::RepetitionKind::ZeroOrMore => {
+                repeat_zero_or_more_literals(&x.hir, lits, prefixes);
+            }
+            hir::RepetitionKind::OneOrMore => {
+                repeat_one_or_more_literals(&x.hir, lits, prefixes);
+            }
+            hir::RepetitionKind::Range(ref rng) => {
+                let (min, max) = match *rng {
+                    hir::RepetitionRange::Exactly(m) => (m, Some(m)),
+                    hir::RepetitionRange::AtLeast(m) => (m, None),
+                    hir::RepetitionRange::Bounded(m, n) => (m, Some(n)),
+                };
+                repeat_range_literals(
+                    &x.hir, min, max, x.greedy, lits, prefixes,
+                )
+            }
+        },
+        HirKind::Concat(ref es) if es.is_empty() => {}
+        HirKind::Concat(ref es) if es.len() == 1 => prefixes(&es[0], lits),
+        HirKind::Concat(ref es) => {
+            for e in es {
+                if let HirKind::Anchor(hir::Anchor::StartText) = *e.kind() {
+                    if !lits.is_empty() {
+                        lits.cut();
+                        break;
+                    }
+                    lits.add(Literal::empty());
+                    continue;
+                }
+                let mut lits2 = lits.to_empty();
+                prefixes(e, &mut lits2);
+                if !lits.cross_product(&lits2) || !lits2.any_complete() {
+                    // If this expression couldn't yield any literal that
+                    // could be extended, then we need to quit. Since we're
+                    // short-circuiting, we also need to freeze every member.
+                    lits.cut();
+                    break;
+                }
+            }
+        }
+        HirKind::Alternation(ref es) => {
+            alternate_literals(es, lits, prefixes);
+        }
+        _ => lits.cut(),
+    }
+}
+
+fn suffixes(expr: &Hir, lits: &mut Literals) {
+    match *expr.kind() {
+        HirKind::Literal(hir::Literal::Unicode(c)) => {
+            let mut buf = [0u8; 4];
+            let i = c.encode_utf8(&mut buf).len();
+            let buf = &mut buf[..i];
+            buf.reverse();
+            lits.cross_add(buf);
+        }
+        HirKind::Literal(hir::Literal::Byte(b)) => {
+            lits.cross_add(&[b]);
+        }
+        HirKind::Class(hir::Class::Unicode(ref cls)) => {
+            if !lits.add_char_class_reverse(cls) {
+                lits.cut();
+            }
+        }
+        HirKind::Class(hir::Class::Bytes(ref cls)) => {
+            if !lits.add_byte_class(cls) {
+                lits.cut();
+            }
+        }
+        HirKind::Group(hir::Group { ref hir, .. }) => {
+            suffixes(&**hir, lits);
+        }
+        HirKind::Repetition(ref x) => match x.kind {
+            hir::RepetitionKind::ZeroOrOne => {
+                repeat_zero_or_one_literals(&x.hir, lits, suffixes);
+            }
+            hir::RepetitionKind::ZeroOrMore => {
+                repeat_zero_or_more_literals(&x.hir, lits, suffixes);
+            }
+            hir::RepetitionKind::OneOrMore => {
+                repeat_one_or_more_literals(&x.hir, lits, suffixes);
+            }
+            hir::RepetitionKind::Range(ref rng) => {
+                let (min, max) = match *rng {
+                    hir::RepetitionRange::Exactly(m) => (m, Some(m)),
+                    hir::RepetitionRange::AtLeast(m) => (m, None),
+                    hir::RepetitionRange::Bounded(m, n) => (m, Some(n)),
+                };
+                repeat_range_literals(
+                    &x.hir, min, max, x.greedy, lits, suffixes,
+                )
+            }
+        },
+        HirKind::Concat(ref es) if es.is_empty() => {}
+        HirKind::Concat(ref es) if es.len() == 1 => suffixes(&es[0], lits),
+        HirKind::Concat(ref es) => {
+            for e in es.iter().rev() {
+                if let HirKind::Anchor(hir::Anchor::EndText) = *e.kind() {
+                    if !lits.is_empty() {
+                        lits.cut();
+                        break;
+                    }
+                    lits.add(Literal::empty());
+                    continue;
+                }
+                let mut lits2 = lits.to_empty();
+                suffixes(e, &mut lits2);
+                if !lits.cross_product(&lits2) || !lits2.any_complete() {
+                    // If this expression couldn't yield any literal that
+                    // could be extended, then we need to quit. Since we're
+                    // short-circuiting, we also need to freeze every member.
+                    lits.cut();
+                    break;
+                }
+            }
+        }
+        HirKind::Alternation(ref es) => {
+            alternate_literals(es, lits, suffixes);
+        }
+        _ => lits.cut(),
+    }
+}
+
+fn repeat_zero_or_one_literals<F: FnMut(&Hir, &mut Literals)>(
+    e: &Hir,
+    lits: &mut Literals,
+    mut f: F,
+) {
+    let (mut lits2, mut lits3) = (lits.clone(), lits.to_empty());
+    lits3.set_limit_size(lits.limit_size() / 2);
+    f(e, &mut lits3);
+
+    if lits3.is_empty() || !lits2.cross_product(&lits3) {
+        lits.cut();
+        return;
+    }
+    lits2.add(Literal::empty());
+    if !lits.union(lits2) {
+        lits.cut();
+    }
+}
+
+fn repeat_zero_or_more_literals<F: FnMut(&Hir, &mut Literals)>(
+    e: &Hir,
+    lits: &mut Literals,
+    mut f: F,
+) {
+    let (mut lits2, mut lits3) = (lits.clone(), lits.to_empty());
+    lits3.set_limit_size(lits.limit_size() / 2);
+    f(e, &mut lits3);
+
+    if lits3.is_empty() || !lits2.cross_product(&lits3) {
+        lits.cut();
+        return;
+    }
+    lits2.cut();
+    lits2.add(Literal::empty());
+    if !lits.union(lits2) {
+        lits.cut();
+    }
+}
+
+fn repeat_one_or_more_literals<F: FnMut(&Hir, &mut Literals)>(
+    e: &Hir,
+    lits: &mut Literals,
+    mut f: F,
+) {
+    f(e, lits);
+    lits.cut();
+}
+
+fn repeat_range_literals<F: FnMut(&Hir, &mut Literals)>(
+    e: &Hir,
+    min: u32,
+    max: Option<u32>,
+    greedy: bool,
+    lits: &mut Literals,
+    mut f: F,
+) {
+    if min == 0 {
+        // This is a bit conservative. If `max` is set, then we could
+        // treat this as a finite set of alternations. For now, we
+        // just treat it as `e*`.
+        f(
+            &Hir::repetition(hir::Repetition {
+                kind: hir::RepetitionKind::ZeroOrMore,
+                greedy: greedy,
+                hir: Box::new(e.clone()),
+            }),
+            lits,
+        );
+    } else {
+        if min > 0 {
+            let n = cmp::min(lits.limit_size, min as usize);
+            let es = iter::repeat(e.clone()).take(n).collect();
+            f(&Hir::concat(es), lits);
+            if n < min as usize || lits.contains_empty() {
+                lits.cut();
+            }
+        }
+        if max.map_or(true, |max| min < max) {
+            lits.cut();
+        }
+    }
+}
+
+fn alternate_literals<F: FnMut(&Hir, &mut Literals)>(
+    es: &[Hir],
+    lits: &mut Literals,
+    mut f: F,
+) {
+    let mut lits2 = lits.to_empty();
+    for e in es {
+        let mut lits3 = lits.to_empty();
+        lits3.set_limit_size(lits.limit_size() / 5);
+        f(e, &mut lits3);
+        if lits3.is_empty() || !lits2.union(lits3) {
+            // If we couldn't find suffixes for *any* of the
+            // alternates, then the entire alternation has to be thrown
+            // away and any existing members must be frozen. Similarly,
+            // if the union couldn't complete, stop and freeze.
+            lits.cut();
+            return;
+        }
+    }
+    if !lits.cross_product(&lits2) {
+        lits.cut();
+    }
+}
+
+impl fmt::Debug for Literals {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.debug_struct("Literals")
+            .field("lits", &self.lits)
+            .field("limit_size", &self.limit_size)
+            .field("limit_class", &self.limit_class)
+            .finish()
+    }
+}
+
+impl Literal {
+    /// Returns a new complete literal with the bytes given.
+    pub fn new(bytes: Vec<u8>) -> Literal {
+        Literal { v: bytes, cut: false }
+    }
+
+    /// Returns a new complete empty literal.
+    pub fn empty() -> Literal {
+        Literal { v: vec![], cut: false }
+    }
+
+    /// Returns true if this literal was "cut."
+    pub fn is_cut(&self) -> bool {
+        self.cut
+    }
+
+    /// Cuts this literal.
+    pub fn cut(&mut self) {
+        self.cut = true;
+    }
+}
+
+impl PartialEq for Literal {
+    fn eq(&self, other: &Literal) -> bool {
+        self.v == other.v
+    }
+}
+
+impl PartialOrd for Literal {
+    fn partial_cmp(&self, other: &Literal) -> Option<cmp::Ordering> {
+        self.v.partial_cmp(&other.v)
+    }
+}
+
+impl fmt::Debug for Literal {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        if self.is_cut() {
+            write!(f, "Cut({})", escape_unicode(&self.v))
+        } else {
+            write!(f, "Complete({})", escape_unicode(&self.v))
+        }
+    }
+}
+
+impl AsRef<[u8]> for Literal {
+    fn as_ref(&self) -> &[u8] {
+        &self.v
+    }
+}
+
+impl ops::Deref for Literal {
+    type Target = Vec<u8>;
+    fn deref(&self) -> &Vec<u8> {
+        &self.v
+    }
+}
+
+impl ops::DerefMut for Literal {
+    fn deref_mut(&mut self) -> &mut Vec<u8> {
+        &mut self.v
+    }
+}
+
+fn position(needle: &[u8], mut haystack: &[u8]) -> Option<usize> {
+    let mut i = 0;
+    while haystack.len() >= needle.len() {
+        if needle == &haystack[..needle.len()] {
+            return Some(i);
+        }
+        i += 1;
+        haystack = &haystack[1..];
+    }
+    None
+}
+
+fn escape_unicode(bytes: &[u8]) -> String {
+    let show = match ::std::str::from_utf8(bytes) {
+        Ok(v) => v.to_string(),
+        Err(_) => escape_bytes(bytes),
+    };
+    let mut space_escaped = String::new();
+    for c in show.chars() {
+        if c.is_whitespace() {
+            let escaped = if c as u32 <= 0x7F {
+                escape_byte(c as u8)
+            } else {
+                if c as u32 <= 0xFFFF {
+                    format!(r"\u{{{:04x}}}", c as u32)
+                } else {
+                    format!(r"\U{{{:08x}}}", c as u32)
+                }
+            };
+            space_escaped.push_str(&escaped);
+        } else {
+            space_escaped.push(c);
+        }
+    }
+    space_escaped
+}
+
+fn escape_bytes(bytes: &[u8]) -> String {
+    let mut s = String::new();
+    for &b in bytes {
+        s.push_str(&escape_byte(b));
+    }
+    s
+}
+
+fn escape_byte(byte: u8) -> String {
+    use std::ascii::escape_default;
+
+    let escaped: Vec<u8> = escape_default(byte).collect();
+    String::from_utf8_lossy(&escaped).into_owned()
+}
+
+fn cls_char_count(cls: &hir::ClassUnicode) -> usize {
+    cls.iter().map(|&r| 1 + (r.end as u32) - (r.start as u32)).sum::<u32>()
+        as usize
+}
+
+fn cls_byte_count(cls: &hir::ClassBytes) -> usize {
+    cls.iter().map(|&r| 1 + (r.end as u32) - (r.start as u32)).sum::<u32>()
+        as usize
+}
+
+#[cfg(test)]
+mod tests {
+    use std::fmt;
+
+    use super::{escape_bytes, Literal, Literals};
+    use hir::Hir;
+    use ParserBuilder;
+
+    // To make test failures easier to read.
+    #[derive(Debug, Eq, PartialEq)]
+    struct Bytes(Vec<ULiteral>);
+    #[derive(Debug, Eq, PartialEq)]
+    struct Unicode(Vec<ULiteral>);
+
+    fn escape_lits(blits: &[Literal]) -> Vec<ULiteral> {
+        let mut ulits = vec![];
+        for blit in blits {
+            ulits
+                .push(ULiteral { v: escape_bytes(&blit), cut: blit.is_cut() });
+        }
+        ulits
+    }
+
+    fn create_lits<I: IntoIterator<Item = Literal>>(it: I) -> Literals {
+        Literals {
+            lits: it.into_iter().collect(),
+            limit_size: 0,
+            limit_class: 0,
+        }
+    }
+
+    // Needs to be pub for 1.3?
+    #[derive(Clone, Eq, PartialEq)]
+    pub struct ULiteral {
+        v: String,
+        cut: bool,
+    }
+
+    impl ULiteral {
+        fn is_cut(&self) -> bool {
+            self.cut
+        }
+    }
+
+    impl fmt::Debug for ULiteral {
+        fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+            if self.is_cut() {
+                write!(f, "Cut({})", self.v)
+            } else {
+                write!(f, "Complete({})", self.v)
+            }
+        }
+    }
+
+    impl PartialEq<Literal> for ULiteral {
+        fn eq(&self, other: &Literal) -> bool {
+            self.v.as_bytes() == &*other.v && self.is_cut() == other.is_cut()
+        }
+    }
+
+    impl PartialEq<ULiteral> for Literal {
+        fn eq(&self, other: &ULiteral) -> bool {
+            &*self.v == other.v.as_bytes() && self.is_cut() == other.is_cut()
+        }
+    }
+
+    #[allow(non_snake_case)]
+    fn C(s: &'static str) -> ULiteral {
+        ULiteral { v: s.to_owned(), cut: true }
+    }
+    #[allow(non_snake_case)]
+    fn M(s: &'static str) -> ULiteral {
+        ULiteral { v: s.to_owned(), cut: false }
+    }
+
+    fn prefixes(lits: &mut Literals, expr: &Hir) {
+        lits.union_prefixes(expr);
+    }
+
+    fn suffixes(lits: &mut Literals, expr: &Hir) {
+        lits.union_suffixes(expr);
+    }
+
+    macro_rules! assert_lit_eq {
+        ($which:ident, $got_lits:expr, $($expected_lit:expr),*) => {{
+            let expected: Vec<ULiteral> = vec![$($expected_lit),*];
+            let lits = $got_lits;
+            assert_eq!(
+                $which(expected.clone()),
+                $which(escape_lits(lits.literals())));
+            assert_eq!(
+                !expected.is_empty() && expected.iter().all(|l| !l.is_cut()),
+                lits.all_complete());
+            assert_eq!(
+                expected.iter().any(|l| !l.is_cut()),
+                lits.any_complete());
+        }};
+    }
+
+    macro_rules! test_lit {
+        ($name:ident, $which:ident, $re:expr) => {
+            test_lit!($name, $which, $re,);
+        };
+        ($name:ident, $which:ident, $re:expr, $($lit:expr),*) => {
+            #[test]
+            fn $name() {
+                let expr = ParserBuilder::new()
+                    .build()
+                    .parse($re)
+                    .unwrap();
+                let lits = Literals::$which(&expr);
+                assert_lit_eq!(Unicode, lits, $($lit),*);
+
+                let expr = ParserBuilder::new()
+                    .allow_invalid_utf8(true)
+                    .unicode(false)
+                    .build()
+                    .parse($re)
+                    .unwrap();
+                let lits = Literals::$which(&expr);
+                assert_lit_eq!(Bytes, lits, $($lit),*);
+            }
+        };
+    }
+
+    // ************************************************************************
+    // Tests for prefix literal extraction.
+    // ************************************************************************
+
+    // Elementary tests.
+    test_lit!(pfx_one_lit1, prefixes, "a", M("a"));
+    test_lit!(pfx_one_lit2, prefixes, "abc", M("abc"));
+    test_lit!(pfx_one_lit3, prefixes, "(?u)☃", M("\\xe2\\x98\\x83"));
+    #[cfg(feature = "unicode-case")]
+    test_lit!(pfx_one_lit4, prefixes, "(?ui)☃", M("\\xe2\\x98\\x83"));
+    test_lit!(pfx_class1, prefixes, "[1-4]", M("1"), M("2"), M("3"), M("4"));
+    test_lit!(
+        pfx_class2,
+        prefixes,
+        "(?u)[☃Ⅰ]",
+        M("\\xe2\\x85\\xa0"),
+        M("\\xe2\\x98\\x83")
+    );
+    #[cfg(feature = "unicode-case")]
+    test_lit!(
+        pfx_class3,
+        prefixes,
+        "(?ui)[☃Ⅰ]",
+        M("\\xe2\\x85\\xa0"),
+        M("\\xe2\\x85\\xb0"),
+        M("\\xe2\\x98\\x83")
+    );
+    test_lit!(pfx_one_lit_casei1, prefixes, "(?i-u)a", M("A"), M("a"));
+    test_lit!(
+        pfx_one_lit_casei2,
+        prefixes,
+        "(?i-u)abc",
+        M("ABC"),
+        M("aBC"),
+        M("AbC"),
+        M("abC"),
+        M("ABc"),
+        M("aBc"),
+        M("Abc"),
+        M("abc")
+    );
+    test_lit!(pfx_group1, prefixes, "(a)", M("a"));
+    test_lit!(pfx_rep_zero_or_one1, prefixes, "a?");
+    test_lit!(pfx_rep_zero_or_one2, prefixes, "(?:abc)?");
+    test_lit!(pfx_rep_zero_or_more1, prefixes, "a*");
+    test_lit!(pfx_rep_zero_or_more2, prefixes, "(?:abc)*");
+    test_lit!(pfx_rep_one_or_more1, prefixes, "a+", C("a"));
+    test_lit!(pfx_rep_one_or_more2, prefixes, "(?:abc)+", C("abc"));
+    test_lit!(pfx_rep_nested_one_or_more, prefixes, "(?:a+)+", C("a"));
+    test_lit!(pfx_rep_range1, prefixes, "a{0}");
+    test_lit!(pfx_rep_range2, prefixes, "a{0,}");
+    test_lit!(pfx_rep_range3, prefixes, "a{0,1}");
+    test_lit!(pfx_rep_range4, prefixes, "a{1}", M("a"));
+    test_lit!(pfx_rep_range5, prefixes, "a{2}", M("aa"));
+    test_lit!(pfx_rep_range6, prefixes, "a{1,2}", C("a"));
+    test_lit!(pfx_rep_range7, prefixes, "a{2,3}", C("aa"));
+
+    // Test regexes with concatenations.
+    test_lit!(pfx_cat1, prefixes, "(?:a)(?:b)", M("ab"));
+    test_lit!(pfx_cat2, prefixes, "[ab]z", M("az"), M("bz"));
+    test_lit!(
+        pfx_cat3,
+        prefixes,
+        "(?i-u)[ab]z",
+        M("AZ"),
+        M("BZ"),
+        M("aZ"),
+        M("bZ"),
+        M("Az"),
+        M("Bz"),
+        M("az"),
+        M("bz")
+    );
+    test_lit!(
+        pfx_cat4,
+        prefixes,
+        "[ab][yz]",
+        M("ay"),
+        M("by"),
+        M("az"),
+        M("bz")
+    );
+    test_lit!(pfx_cat5, prefixes, "a*b", C("a"), M("b"));
+    test_lit!(pfx_cat6, prefixes, "a*b*c", C("a"), C("b"), M("c"));
+    test_lit!(pfx_cat7, prefixes, "a*b*c+", C("a"), C("b"), C("c"));
+    test_lit!(pfx_cat8, prefixes, "a*b+c", C("a"), C("b"));
+    test_lit!(pfx_cat9, prefixes, "a*b+c*", C("a"), C("b"));
+    test_lit!(pfx_cat10, prefixes, "ab*", C("ab"), M("a"));
+    test_lit!(pfx_cat11, prefixes, "ab*c", C("ab"), M("ac"));
+    test_lit!(pfx_cat12, prefixes, "ab+", C("ab"));
+    test_lit!(pfx_cat13, prefixes, "ab+c", C("ab"));
+    test_lit!(pfx_cat14, prefixes, "a^", C("a"));
+    test_lit!(pfx_cat15, prefixes, "$a");
+    test_lit!(pfx_cat16, prefixes, r"ab*c", C("ab"), M("ac"));
+    test_lit!(pfx_cat17, prefixes, r"ab+c", C("ab"));
+    test_lit!(pfx_cat18, prefixes, r"z*azb", C("z"), M("azb"));
+    test_lit!(pfx_cat19, prefixes, "a.z", C("a"));
+
+    // Test regexes with alternations.
+    test_lit!(pfx_alt1, prefixes, "a|b", M("a"), M("b"));
+    test_lit!(pfx_alt2, prefixes, "[1-3]|b", M("1"), M("2"), M("3"), M("b"));
+    test_lit!(pfx_alt3, prefixes, "y(?:a|b)z", M("yaz"), M("ybz"));
+    test_lit!(pfx_alt4, prefixes, "a|b*");
+    test_lit!(pfx_alt5, prefixes, "a|b+", M("a"), C("b"));
+    test_lit!(pfx_alt6, prefixes, "a|(?:b|c*)");
+    test_lit!(
+        pfx_alt7,
+        prefixes,
+        "(a|b)*c|(a|ab)*c",
+        C("a"),
+        C("b"),
+        M("c"),
+        C("a"),
+        C("ab"),
+        M("c")
+    );
+    test_lit!(pfx_alt8, prefixes, "a*b|c", C("a"), M("b"), M("c"));
+
+    // Test regexes with empty assertions.
+    test_lit!(pfx_empty1, prefixes, "^a", M("a"));
+    test_lit!(pfx_empty2, prefixes, "a${2}", C("a"));
+    test_lit!(pfx_empty3, prefixes, "^abc", M("abc"));
+    test_lit!(pfx_empty4, prefixes, "(?:^abc)|(?:^z)", M("abc"), M("z"));
+
+    // Make sure some curious regexes have no prefixes.
+    test_lit!(pfx_nothing1, prefixes, ".");
+    test_lit!(pfx_nothing2, prefixes, "(?s).");
+    test_lit!(pfx_nothing3, prefixes, "^");
+    test_lit!(pfx_nothing4, prefixes, "$");
+    test_lit!(pfx_nothing6, prefixes, "(?m)$");
+    test_lit!(pfx_nothing7, prefixes, r"\b");
+    test_lit!(pfx_nothing8, prefixes, r"\B");
+
+    // Test a few regexes that defeat any prefix literal detection.
+    test_lit!(pfx_defeated1, prefixes, ".a");
+    test_lit!(pfx_defeated2, prefixes, "(?s).a");
+    test_lit!(pfx_defeated3, prefixes, "a*b*c*");
+    test_lit!(pfx_defeated4, prefixes, "a|.");
+    test_lit!(pfx_defeated5, prefixes, ".|a");
+    test_lit!(pfx_defeated6, prefixes, "a|^");
+    test_lit!(pfx_defeated7, prefixes, ".(?:a(?:b)(?:c))");
+    test_lit!(pfx_defeated8, prefixes, "$a");
+    test_lit!(pfx_defeated9, prefixes, "(?m)$a");
+    test_lit!(pfx_defeated10, prefixes, r"\ba");
+    test_lit!(pfx_defeated11, prefixes, r"\Ba");
+    test_lit!(pfx_defeated12, prefixes, "^*a");
+    test_lit!(pfx_defeated13, prefixes, "^+a");
+
+    test_lit!(
+        pfx_crazy1,
+        prefixes,
+        r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]",
+        C("Mo\\'am"),
+        C("Mu\\'am"),
+        C("Moam"),
+        C("Muam")
+    );
+
+    // ************************************************************************
+    // Tests for quiting prefix literal search.
+    // ************************************************************************
+
+    macro_rules! test_exhausted {
+        ($name:ident, $which:ident, $re:expr) => {
+            test_exhausted!($name, $which, $re,);
+        };
+        ($name:ident, $which:ident, $re:expr, $($lit:expr),*) => {
+            #[test]
+            fn $name() {
+                let expr = ParserBuilder::new()
+                    .build()
+                    .parse($re)
+                    .unwrap();
+                let mut lits = Literals::empty();
+                lits.set_limit_size(20).set_limit_class(10);
+                $which(&mut lits, &expr);
+                assert_lit_eq!(Unicode, lits, $($lit),*);
+
+                let expr = ParserBuilder::new()
+                    .allow_invalid_utf8(true)
+                    .unicode(false)
+                    .build()
+                    .parse($re)
+                    .unwrap();
+                let mut lits = Literals::empty();
+                lits.set_limit_size(20).set_limit_class(10);
+                $which(&mut lits, &expr);
+                assert_lit_eq!(Bytes, lits, $($lit),*);
+            }
+        };
+    }
+
+    // These test use a much lower limit than the default so that we can
+    // write test cases of reasonable size.
+    test_exhausted!(pfx_exhausted1, prefixes, "[a-z]");
+    test_exhausted!(pfx_exhausted2, prefixes, "[a-z]*A");
+    test_exhausted!(pfx_exhausted3, prefixes, "A[a-z]Z", C("A"));
+    test_exhausted!(
+        pfx_exhausted4,
+        prefixes,
+        "(?i-u)foobar",
+        C("FO"),
+        C("fO"),
+        C("Fo"),
+        C("fo")
+    );
+    test_exhausted!(
+        pfx_exhausted5,
+        prefixes,
+        "(?:ab){100}",
+        C("abababababababababab")
+    );
+    test_exhausted!(
+        pfx_exhausted6,
+        prefixes,
+        "(?:(?:ab){100})*cd",
+        C("ababababab"),
+        M("cd")
+    );
+    test_exhausted!(
+        pfx_exhausted7,
+        prefixes,
+        "z(?:(?:ab){100})*cd",
+        C("zababababab"),
+        M("zcd")
+    );
+    test_exhausted!(
+        pfx_exhausted8,
+        prefixes,
+        "aaaaaaaaaaaaaaaaaaaaz",
+        C("aaaaaaaaaaaaaaaaaaaa")
+    );
+
+    // ************************************************************************
+    // Tests for suffix literal extraction.
+    // ************************************************************************
+
+    // Elementary tests.
+    test_lit!(sfx_one_lit1, suffixes, "a", M("a"));
+    test_lit!(sfx_one_lit2, suffixes, "abc", M("abc"));
+    test_lit!(sfx_one_lit3, suffixes, "(?u)☃", M("\\xe2\\x98\\x83"));
+    #[cfg(feature = "unicode-case")]
+    test_lit!(sfx_one_lit4, suffixes, "(?ui)☃", M("\\xe2\\x98\\x83"));
+    test_lit!(sfx_class1, suffixes, "[1-4]", M("1"), M("2"), M("3"), M("4"));
+    test_lit!(
+        sfx_class2,
+        suffixes,
+        "(?u)[☃Ⅰ]",
+        M("\\xe2\\x85\\xa0"),
+        M("\\xe2\\x98\\x83")
+    );
+    #[cfg(feature = "unicode-case")]
+    test_lit!(
+        sfx_class3,
+        suffixes,
+        "(?ui)[☃Ⅰ]",
+        M("\\xe2\\x85\\xa0"),
+        M("\\xe2\\x85\\xb0"),
+        M("\\xe2\\x98\\x83")
+    );
+    test_lit!(sfx_one_lit_casei1, suffixes, "(?i-u)a", M("A"), M("a"));
+    test_lit!(
+        sfx_one_lit_casei2,
+        suffixes,
+        "(?i-u)abc",
+        M("ABC"),
+        M("ABc"),
+        M("AbC"),
+        M("Abc"),
+        M("aBC"),
+        M("aBc"),
+        M("abC"),
+        M("abc")
+    );
+    test_lit!(sfx_group1, suffixes, "(a)", M("a"));
+    test_lit!(sfx_rep_zero_or_one1, suffixes, "a?");
+    test_lit!(sfx_rep_zero_or_one2, suffixes, "(?:abc)?");
+    test_lit!(sfx_rep_zero_or_more1, suffixes, "a*");
+    test_lit!(sfx_rep_zero_or_more2, suffixes, "(?:abc)*");
+    test_lit!(sfx_rep_one_or_more1, suffixes, "a+", C("a"));
+    test_lit!(sfx_rep_one_or_more2, suffixes, "(?:abc)+", C("abc"));
+    test_lit!(sfx_rep_nested_one_or_more, suffixes, "(?:a+)+", C("a"));
+    test_lit!(sfx_rep_range1, suffixes, "a{0}");
+    test_lit!(sfx_rep_range2, suffixes, "a{0,}");
+    test_lit!(sfx_rep_range3, suffixes, "a{0,1}");
+    test_lit!(sfx_rep_range4, suffixes, "a{1}", M("a"));
+    test_lit!(sfx_rep_range5, suffixes, "a{2}", M("aa"));
+    test_lit!(sfx_rep_range6, suffixes, "a{1,2}", C("a"));
+    test_lit!(sfx_rep_range7, suffixes, "a{2,3}", C("aa"));
+
+    // Test regexes with concatenations.
+    test_lit!(sfx_cat1, suffixes, "(?:a)(?:b)", M("ab"));
+    test_lit!(sfx_cat2, suffixes, "[ab]z", M("az"), M("bz"));
+    test_lit!(
+        sfx_cat3,
+        suffixes,
+        "(?i-u)[ab]z",
+        M("AZ"),
+        M("Az"),
+        M("BZ"),
+        M("Bz"),
+        M("aZ"),
+        M("az"),
+        M("bZ"),
+        M("bz")
+    );
+    test_lit!(
+        sfx_cat4,
+        suffixes,
+        "[ab][yz]",
+        M("ay"),
+        M("az"),
+        M("by"),
+        M("bz")
+    );
+    test_lit!(sfx_cat5, suffixes, "a*b", C("ab"), M("b"));
+    test_lit!(sfx_cat6, suffixes, "a*b*c", C("bc"), C("ac"), M("c"));
+    test_lit!(sfx_cat7, suffixes, "a*b*c+", C("c"));
+    test_lit!(sfx_cat8, suffixes, "a*b+c", C("bc"));
+    test_lit!(sfx_cat9, suffixes, "a*b+c*", C("c"), C("b"));
+    test_lit!(sfx_cat10, suffixes, "ab*", C("b"), M("a"));
+    test_lit!(sfx_cat11, suffixes, "ab*c", C("bc"), M("ac"));
+    test_lit!(sfx_cat12, suffixes, "ab+", C("b"));
+    test_lit!(sfx_cat13, suffixes, "ab+c", C("bc"));
+    test_lit!(sfx_cat14, suffixes, "a^");
+    test_lit!(sfx_cat15, suffixes, "$a", C("a"));
+    test_lit!(sfx_cat16, suffixes, r"ab*c", C("bc"), M("ac"));
+    test_lit!(sfx_cat17, suffixes, r"ab+c", C("bc"));
+    test_lit!(sfx_cat18, suffixes, r"z*azb", C("zazb"), M("azb"));
+    test_lit!(sfx_cat19, suffixes, "a.z", C("z"));
+
+    // Test regexes with alternations.
+    test_lit!(sfx_alt1, suffixes, "a|b", M("a"), M("b"));
+    test_lit!(sfx_alt2, suffixes, "[1-3]|b", M("1"), M("2"), M("3"), M("b"));
+    test_lit!(sfx_alt3, suffixes, "y(?:a|b)z", M("yaz"), M("ybz"));
+    test_lit!(sfx_alt4, suffixes, "a|b*");
+    test_lit!(sfx_alt5, suffixes, "a|b+", M("a"), C("b"));
+    test_lit!(sfx_alt6, suffixes, "a|(?:b|c*)");
+    test_lit!(
+        sfx_alt7,
+        suffixes,
+        "(a|b)*c|(a|ab)*c",
+        C("ac"),
+        C("bc"),
+        M("c"),
+        C("ac"),
+        C("abc"),
+        M("c")
+    );
+    test_lit!(sfx_alt8, suffixes, "a*b|c", C("ab"), M("b"), M("c"));
+
+    // Test regexes with empty assertions.
+    test_lit!(sfx_empty1, suffixes, "a$", M("a"));
+    test_lit!(sfx_empty2, suffixes, "${2}a", C("a"));
+
+    // Make sure some curious regexes have no suffixes.
+    test_lit!(sfx_nothing1, suffixes, ".");
+    test_lit!(sfx_nothing2, suffixes, "(?s).");
+    test_lit!(sfx_nothing3, suffixes, "^");
+    test_lit!(sfx_nothing4, suffixes, "$");
+    test_lit!(sfx_nothing6, suffixes, "(?m)$");
+    test_lit!(sfx_nothing7, suffixes, r"\b");
+    test_lit!(sfx_nothing8, suffixes, r"\B");
+
+    // Test a few regexes that defeat any suffix literal detection.
+    test_lit!(sfx_defeated1, suffixes, "a.");
+    test_lit!(sfx_defeated2, suffixes, "(?s)a.");
+    test_lit!(sfx_defeated3, suffixes, "a*b*c*");
+    test_lit!(sfx_defeated4, suffixes, "a|.");
+    test_lit!(sfx_defeated5, suffixes, ".|a");
+    test_lit!(sfx_defeated6, suffixes, "a|^");
+    test_lit!(sfx_defeated7, suffixes, "(?:a(?:b)(?:c)).");
+    test_lit!(sfx_defeated8, suffixes, "a^");
+    test_lit!(sfx_defeated9, suffixes, "(?m)a$");
+    test_lit!(sfx_defeated10, suffixes, r"a\b");
+    test_lit!(sfx_defeated11, suffixes, r"a\B");
+    test_lit!(sfx_defeated12, suffixes, "a^*");
+    test_lit!(sfx_defeated13, suffixes, "a^+");
+
+    // These test use a much lower limit than the default so that we can
+    // write test cases of reasonable size.
+    test_exhausted!(sfx_exhausted1, suffixes, "[a-z]");
+    test_exhausted!(sfx_exhausted2, suffixes, "A[a-z]*");
+    test_exhausted!(sfx_exhausted3, suffixes, "A[a-z]Z", C("Z"));
+    test_exhausted!(
+        sfx_exhausted4,
+        suffixes,
+        "(?i-u)foobar",
+        C("AR"),
+        C("Ar"),
+        C("aR"),
+        C("ar")
+    );
+    test_exhausted!(
+        sfx_exhausted5,
+        suffixes,
+        "(?:ab){100}",
+        C("abababababababababab")
+    );
+    test_exhausted!(
+        sfx_exhausted6,
+        suffixes,
+        "cd(?:(?:ab){100})*",
+        C("ababababab"),
+        M("cd")
+    );
+    test_exhausted!(
+        sfx_exhausted7,
+        suffixes,
+        "cd(?:(?:ab){100})*z",
+        C("abababababz"),
+        M("cdz")
+    );
+    test_exhausted!(
+        sfx_exhausted8,
+        suffixes,
+        "zaaaaaaaaaaaaaaaaaaaa",
+        C("aaaaaaaaaaaaaaaaaaaa")
+    );
+
+    // ************************************************************************
+    // Tests for generating unambiguous literal sets.
+    // ************************************************************************
+
+    macro_rules! test_unamb {
+        ($name:ident, $given:expr, $expected:expr) => {
+            #[test]
+            fn $name() {
+                let given: Vec<Literal> = $given
+                    .into_iter()
+                    .map(|ul| {
+                        let cut = ul.is_cut();
+                        Literal { v: ul.v.into_bytes(), cut: cut }
+                    })
+                    .collect();
+                let lits = create_lits(given);
+                let got = lits.unambiguous_prefixes();
+                assert_eq!($expected, escape_lits(got.literals()));
+            }
+        };
+    }
+
+    test_unamb!(unambiguous1, vec![M("z"), M("azb")], vec![C("a"), C("z")]);
+    test_unamb!(
+        unambiguous2,
+        vec![M("zaaaaaa"), M("aa")],
+        vec![C("aa"), C("z")]
+    );
+    test_unamb!(
+        unambiguous3,
+        vec![M("Sherlock"), M("Watson")],
+        vec![M("Sherlock"), M("Watson")]
+    );
+    test_unamb!(unambiguous4, vec![M("abc"), M("bc")], vec![C("a"), C("bc")]);
+    test_unamb!(unambiguous5, vec![M("bc"), M("abc")], vec![C("a"), C("bc")]);
+    test_unamb!(unambiguous6, vec![M("a"), M("aa")], vec![C("a")]);
+    test_unamb!(unambiguous7, vec![M("aa"), M("a")], vec![C("a")]);
+    test_unamb!(unambiguous8, vec![M("ab"), M("a")], vec![C("a")]);
+    test_unamb!(
+        unambiguous9,
+        vec![M("ac"), M("bc"), M("c"), M("ac"), M("abc"), M("c")],
+        vec![C("a"), C("b"), C("c")]
+    );
+    test_unamb!(
+        unambiguous10,
+        vec![M("Mo'"), M("Mu'"), M("Mo"), M("Mu")],
+        vec![C("Mo"), C("Mu")]
+    );
+    test_unamb!(
+        unambiguous11,
+        vec![M("zazb"), M("azb")],
+        vec![C("a"), C("z")]
+    );
+    test_unamb!(unambiguous12, vec![M("foo"), C("foo")], vec![C("foo")]);
+    test_unamb!(
+        unambiguous13,
+        vec![M("ABCX"), M("CDAX"), M("BCX")],
+        vec![C("A"), C("BCX"), C("CD")]
+    );
+    test_unamb!(
+        unambiguous14,
+        vec![M("IMGX"), M("MVIX"), M("MGX"), M("DSX")],
+        vec![M("DSX"), C("I"), C("MGX"), C("MV")]
+    );
+    test_unamb!(
+        unambiguous15,
+        vec![M("IMG_"), M("MG_"), M("CIMG")],
+        vec![C("C"), C("I"), C("MG_")]
+    );
+
+    // ************************************************************************
+    // Tests for suffix trimming.
+    // ************************************************************************
+    macro_rules! test_trim {
+        ($name:ident, $trim:expr, $given:expr, $expected:expr) => {
+            #[test]
+            fn $name() {
+                let given: Vec<Literal> = $given
+                    .into_iter()
+                    .map(|ul| {
+                        let cut = ul.is_cut();
+                        Literal { v: ul.v.into_bytes(), cut: cut }
+                    })
+                    .collect();
+                let lits = create_lits(given);
+                let got = lits.trim_suffix($trim).unwrap();
+                assert_eq!($expected, escape_lits(got.literals()));
+            }
+        };
+    }
+
+    test_trim!(trim1, 1, vec![M("ab"), M("yz")], vec![C("a"), C("y")]);
+    test_trim!(trim2, 1, vec![M("abc"), M("abd")], vec![C("ab")]);
+    test_trim!(trim3, 2, vec![M("abc"), M("abd")], vec![C("a")]);
+    test_trim!(trim4, 2, vec![M("abc"), M("ghij")], vec![C("a"), C("gh")]);
+
+    // ************************************************************************
+    // Tests for longest common prefix.
+    // ************************************************************************
+
+    macro_rules! test_lcp {
+        ($name:ident, $given:expr, $expected:expr) => {
+            #[test]
+            fn $name() {
+                let given: Vec<Literal> = $given
+                    .into_iter()
+                    .map(|s: &str| Literal {
+                        v: s.to_owned().into_bytes(),
+                        cut: false,
+                    })
+                    .collect();
+                let lits = create_lits(given);
+                let got = lits.longest_common_prefix();
+                assert_eq!($expected, escape_bytes(got));
+            }
+        };
+    }
+
+    test_lcp!(lcp1, vec!["a"], "a");
+    test_lcp!(lcp2, vec![], "");
+    test_lcp!(lcp3, vec!["a", "b"], "");
+    test_lcp!(lcp4, vec!["ab", "ab"], "ab");
+    test_lcp!(lcp5, vec!["ab", "a"], "a");
+    test_lcp!(lcp6, vec!["a", "ab"], "a");
+    test_lcp!(lcp7, vec!["ab", "b"], "");
+    test_lcp!(lcp8, vec!["b", "ab"], "");
+    test_lcp!(lcp9, vec!["foobar", "foobaz"], "fooba");
+    test_lcp!(lcp10, vec!["foobar", "foobaz", "a"], "");
+    test_lcp!(lcp11, vec!["a", "foobar", "foobaz"], "");
+    test_lcp!(lcp12, vec!["foo", "flub", "flab", "floo"], "f");
+
+    // ************************************************************************
+    // Tests for longest common suffix.
+    // ************************************************************************
+
+    macro_rules! test_lcs {
+        ($name:ident, $given:expr, $expected:expr) => {
+            #[test]
+            fn $name() {
+                let given: Vec<Literal> = $given
+                    .into_iter()
+                    .map(|s: &str| Literal {
+                        v: s.to_owned().into_bytes(),
+                        cut: false,
+                    })
+                    .collect();
+                let lits = create_lits(given);
+                let got = lits.longest_common_suffix();
+                assert_eq!($expected, escape_bytes(got));
+            }
+        };
+    }
+
+    test_lcs!(lcs1, vec!["a"], "a");
+    test_lcs!(lcs2, vec![], "");
+    test_lcs!(lcs3, vec!["a", "b"], "");
+    test_lcs!(lcs4, vec!["ab", "ab"], "ab");
+    test_lcs!(lcs5, vec!["ab", "a"], "");
+    test_lcs!(lcs6, vec!["a", "ab"], "");
+    test_lcs!(lcs7, vec!["ab", "b"], "b");
+    test_lcs!(lcs8, vec!["b", "ab"], "b");
+    test_lcs!(lcs9, vec!["barfoo", "bazfoo"], "foo");
+    test_lcs!(lcs10, vec!["barfoo", "bazfoo", "a"], "");
+    test_lcs!(lcs11, vec!["a", "barfoo", "bazfoo"], "");
+    test_lcs!(lcs12, vec!["flub", "bub", "boob", "dub"], "b");
+}

diff --git a/src/hir/mod.rs b/src/hir/mod.rs
new file mode 100644
index 0000000..a2f166c
--- /dev/null
+++ b/src/hir/mod.rs

@@ -0,0 +1,2292 @@
+/*!
+Defines a high-level intermediate representation for regular expressions.
+*/
+use std::char;
+use std::cmp;
+use std::error;
+use std::fmt;
+use std::result;
+use std::u8;
+
+use ast::Span;
+use hir::interval::{Interval, IntervalSet, IntervalSetIter};
+use unicode;
+
+pub use hir::visitor::{visit, Visitor};
+pub use unicode::CaseFoldError;
+
+mod interval;
+pub mod literal;
+pub mod print;
+pub mod translate;
+mod visitor;
+
+/// An error that can occur while translating an `Ast` to a `Hir`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Error {
+    /// The kind of error.
+    kind: ErrorKind,
+    /// The original pattern that the translator's Ast was parsed from. Every
+    /// span in an error is a valid range into this string.
+    pattern: String,
+    /// The span of this error, derived from the Ast given to the translator.
+    span: Span,
+}
+
+impl Error {
+    /// Return the type of this error.
+    pub fn kind(&self) -> &ErrorKind {
+        &self.kind
+    }
+
+    /// The original pattern string in which this error occurred.
+    ///
+    /// Every span reported by this error is reported in terms of this string.
+    pub fn pattern(&self) -> &str {
+        &self.pattern
+    }
+
+    /// Return the span at which this error occurred.
+    pub fn span(&self) -> &Span {
+        &self.span
+    }
+}
+
+/// The type of an error that occurred while building an `Hir`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum ErrorKind {
+    /// This error occurs when a Unicode feature is used when Unicode
+    /// support is disabled. For example `(?-u:\pL)` would trigger this error.
+    UnicodeNotAllowed,
+    /// This error occurs when translating a pattern that could match a byte
+    /// sequence that isn't UTF-8 and `allow_invalid_utf8` was disabled.
+    InvalidUtf8,
+    /// This occurs when an unrecognized Unicode property name could not
+    /// be found.
+    UnicodePropertyNotFound,
+    /// This occurs when an unrecognized Unicode property value could not
+    /// be found.
+    UnicodePropertyValueNotFound,
+    /// This occurs when a Unicode-aware Perl character class (`\w`, `\s` or
+    /// `\d`) could not be found. This can occur when the `unicode-perl`
+    /// crate feature is not enabled.
+    UnicodePerlClassNotFound,
+    /// This occurs when the Unicode simple case mapping tables are not
+    /// available, and the regular expression required Unicode aware case
+    /// insensitivity.
+    UnicodeCaseUnavailable,
+    /// This occurs when the translator attempts to construct a character class
+    /// that is empty.
+    ///
+    /// Note that this restriction in the translator may be removed in the
+    /// future.
+    EmptyClassNotAllowed,
+    /// Hints that destructuring should not be exhaustive.
+    ///
+    /// This enum may grow additional variants, so this makes sure clients
+    /// don't count on exhaustive matching. (Otherwise, adding a new variant
+    /// could break existing code.)
+    #[doc(hidden)]
+    __Nonexhaustive,
+}
+
+impl ErrorKind {
+    // TODO: Remove this method entirely on the next breaking semver release.
+    #[allow(deprecated)]
+    fn description(&self) -> &str {
+        use self::ErrorKind::*;
+        match *self {
+            UnicodeNotAllowed => "Unicode not allowed here",
+            InvalidUtf8 => "pattern can match invalid UTF-8",
+            UnicodePropertyNotFound => "Unicode property not found",
+            UnicodePropertyValueNotFound => "Unicode property value not found",
+            UnicodePerlClassNotFound => {
+                "Unicode-aware Perl class not found \
+                 (make sure the unicode-perl feature is enabled)"
+            }
+            UnicodeCaseUnavailable => {
+                "Unicode-aware case insensitivity matching is not available \
+                 (make sure the unicode-case feature is enabled)"
+            }
+            EmptyClassNotAllowed => "empty character classes are not allowed",
+            __Nonexhaustive => unreachable!(),
+        }
+    }
+}
+
+impl error::Error for Error {
+    // TODO: Remove this method entirely on the next breaking semver release.
+    #[allow(deprecated)]
+    fn description(&self) -> &str {
+        self.kind.description()
+    }
+}
+
+impl fmt::Display for Error {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        ::error::Formatter::from(self).fmt(f)
+    }
+}
+
+impl fmt::Display for ErrorKind {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        // TODO: Remove this on the next breaking semver release.
+        #[allow(deprecated)]
+        f.write_str(self.description())
+    }
+}
+
+/// A high-level intermediate representation (HIR) for a regular expression.
+///
+/// The HIR of a regular expression represents an intermediate step between its
+/// abstract syntax (a structured description of the concrete syntax) and
+/// compiled byte codes. The purpose of HIR is to make regular expressions
+/// easier to analyze. In particular, the AST is much more complex than the
+/// HIR. For example, while an AST supports arbitrarily nested character
+/// classes, the HIR will flatten all nested classes into a single set. The HIR
+/// will also "compile away" every flag present in the concrete syntax. For
+/// example, users of HIR expressions never need to worry about case folding;
+/// it is handled automatically by the translator (e.g., by translating `(?i)A`
+/// to `[aA]`).
+///
+/// If the HIR was produced by a translator that disallows invalid UTF-8, then
+/// the HIR is guaranteed to match UTF-8 exclusively.
+///
+/// This type defines its own destructor that uses constant stack space and
+/// heap space proportional to the size of the HIR.
+///
+/// The specific type of an HIR expression can be accessed via its `kind`
+/// or `into_kind` methods. This extra level of indirection exists for two
+/// reasons:
+///
+/// 1. Construction of an HIR expression *must* use the constructor methods
+///    on this `Hir` type instead of building the `HirKind` values directly.
+///    This permits construction to enforce invariants like "concatenations
+///    always consist of two or more sub-expressions."
+/// 2. Every HIR expression contains attributes that are defined inductively,
+///    and can be computed cheaply during the construction process. For
+///    example, one such attribute is whether the expression must match at the
+///    beginning of the text.
+///
+/// Also, an `Hir`'s `fmt::Display` implementation prints an HIR as a regular
+/// expression pattern string, and uses constant stack space and heap space
+/// proportional to the size of the `Hir`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Hir {
+    /// The underlying HIR kind.
+    kind: HirKind,
+    /// Analysis info about this HIR, computed during construction.
+    info: HirInfo,
+}
+
+/// The kind of an arbitrary `Hir` expression.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum HirKind {
+    /// The empty regular expression, which matches everything, including the
+    /// empty string.
+    Empty,
+    /// A single literal character that matches exactly this character.
+    Literal(Literal),
+    /// A single character class that matches any of the characters in the
+    /// class. A class can either consist of Unicode scalar values as
+    /// characters, or it can use bytes.
+    Class(Class),
+    /// An anchor assertion. An anchor assertion match always has zero length.
+    Anchor(Anchor),
+    /// A word boundary assertion, which may or may not be Unicode aware. A
+    /// word boundary assertion match always has zero length.
+    WordBoundary(WordBoundary),
+    /// A repetition operation applied to a child expression.
+    Repetition(Repetition),
+    /// A possibly capturing group, which contains a child expression.
+    Group(Group),
+    /// A concatenation of expressions. A concatenation always has at least two
+    /// child expressions.
+    ///
+    /// A concatenation matches only if each of its child expression matches
+    /// one after the other.
+    Concat(Vec<Hir>),
+    /// An alternation of expressions. An alternation always has at least two
+    /// child expressions.
+    ///
+    /// An alternation matches only if at least one of its child expression
+    /// matches. If multiple expressions match, then the leftmost is preferred.
+    Alternation(Vec<Hir>),
+}
+
+impl Hir {
+    /// Returns a reference to the underlying HIR kind.
+    pub fn kind(&self) -> &HirKind {
+        &self.kind
+    }
+
+    /// Consumes ownership of this HIR expression and returns its underlying
+    /// `HirKind`.
+    pub fn into_kind(mut self) -> HirKind {
+        use std::mem;
+        mem::replace(&mut self.kind, HirKind::Empty)
+    }
+
+    /// Returns an empty HIR expression.
+    ///
+    /// An empty HIR expression always matches, including the empty string.
+    pub fn empty() -> Hir {
+        let mut info = HirInfo::new();
+        info.set_always_utf8(true);
+        info.set_all_assertions(true);
+        info.set_anchored_start(false);
+        info.set_anchored_end(false);
+        info.set_line_anchored_start(false);
+        info.set_line_anchored_end(false);
+        info.set_any_anchored_start(false);
+        info.set_any_anchored_end(false);
+        info.set_match_empty(true);
+        info.set_literal(true);
+        info.set_alternation_literal(true);
+        Hir { kind: HirKind::Empty, info: info }
+    }
+
+    /// Creates a literal HIR expression.
+    ///
+    /// If the given literal has a `Byte` variant with an ASCII byte, then this
+    /// method panics. This enforces the invariant that `Byte` variants are
+    /// only used to express matching of invalid UTF-8.
+    pub fn literal(lit: Literal) -> Hir {
+        if let Literal::Byte(b) = lit {
+            assert!(b > 0x7F);
+        }
+
+        let mut info = HirInfo::new();
+        info.set_always_utf8(lit.is_unicode());
+        info.set_all_assertions(false);
+        info.set_anchored_start(false);
+        info.set_anchored_end(false);
+        info.set_line_anchored_start(false);
+        info.set_line_anchored_end(false);
+        info.set_any_anchored_start(false);
+        info.set_any_anchored_end(false);
+        info.set_match_empty(false);
+        info.set_literal(true);
+        info.set_alternation_literal(true);
+        Hir { kind: HirKind::Literal(lit), info: info }
+    }
+
+    /// Creates a class HIR expression.
+    pub fn class(class: Class) -> Hir {
+        let mut info = HirInfo::new();
+        info.set_always_utf8(class.is_always_utf8());
+        info.set_all_assertions(false);
+        info.set_anchored_start(false);
+        info.set_anchored_end(false);
+        info.set_line_anchored_start(false);
+        info.set_line_anchored_end(false);
+        info.set_any_anchored_start(false);
+        info.set_any_anchored_end(false);
+        info.set_match_empty(false);
+        info.set_literal(false);
+        info.set_alternation_literal(false);
+        Hir { kind: HirKind::Class(class), info: info }
+    }
+
+    /// Creates an anchor assertion HIR expression.
+    pub fn anchor(anchor: Anchor) -> Hir {
+        let mut info = HirInfo::new();
+        info.set_always_utf8(true);
+        info.set_all_assertions(true);
+        info.set_anchored_start(false);
+        info.set_anchored_end(false);
+        info.set_line_anchored_start(false);
+        info.set_line_anchored_end(false);
+        info.set_any_anchored_start(false);
+        info.set_any_anchored_end(false);
+        info.set_match_empty(true);
+        info.set_literal(false);
+        info.set_alternation_literal(false);
+        if let Anchor::StartText = anchor {
+            info.set_anchored_start(true);
+            info.set_line_anchored_start(true);
+            info.set_any_anchored_start(true);
+        }
+        if let Anchor::EndText = anchor {
+            info.set_anchored_end(true);
+            info.set_line_anchored_end(true);
+            info.set_any_anchored_end(true);
+        }
+        if let Anchor::StartLine = anchor {
+            info.set_line_anchored_start(true);
+        }
+        if let Anchor::EndLine = anchor {
+            info.set_line_anchored_end(true);
+        }
+        Hir { kind: HirKind::Anchor(anchor), info: info }
+    }
+
+    /// Creates a word boundary assertion HIR expression.
+    pub fn word_boundary(word_boundary: WordBoundary) -> Hir {
+        let mut info = HirInfo::new();
+        info.set_always_utf8(true);
+        info.set_all_assertions(true);
+        info.set_anchored_start(false);
+        info.set_anchored_end(false);
+        info.set_line_anchored_start(false);
+        info.set_line_anchored_end(false);
+        info.set_any_anchored_start(false);
+        info.set_any_anchored_end(false);
+        info.set_literal(false);
+        info.set_alternation_literal(false);
+        // A negated word boundary matches the empty string, but a normal
+        // word boundary does not!
+        info.set_match_empty(word_boundary.is_negated());
+        // Negated ASCII word boundaries can match invalid UTF-8.
+        if let WordBoundary::AsciiNegate = word_boundary {
+            info.set_always_utf8(false);
+        }
+        Hir { kind: HirKind::WordBoundary(word_boundary), info: info }
+    }
+
+    /// Creates a repetition HIR expression.
+    pub fn repetition(rep: Repetition) -> Hir {
+        let mut info = HirInfo::new();
+        info.set_always_utf8(rep.hir.is_always_utf8());
+        info.set_all_assertions(rep.hir.is_all_assertions());
+        // If this operator can match the empty string, then it can never
+        // be anchored.
+        info.set_anchored_start(
+            !rep.is_match_empty() && rep.hir.is_anchored_start(),
+        );
+        info.set_anchored_end(
+            !rep.is_match_empty() && rep.hir.is_anchored_end(),
+        );
+        info.set_line_anchored_start(
+            !rep.is_match_empty() && rep.hir.is_anchored_start(),
+        );
+        info.set_line_anchored_end(
+            !rep.is_match_empty() && rep.hir.is_anchored_end(),
+        );
+        info.set_any_anchored_start(rep.hir.is_any_anchored_start());
+        info.set_any_anchored_end(rep.hir.is_any_anchored_end());
+        info.set_match_empty(rep.is_match_empty() || rep.hir.is_match_empty());
+        info.set_literal(false);
+        info.set_alternation_literal(false);
+        Hir { kind: HirKind::Repetition(rep), info: info }
+    }
+
+    /// Creates a group HIR expression.
+    pub fn group(group: Group) -> Hir {
+        let mut info = HirInfo::new();
+        info.set_always_utf8(group.hir.is_always_utf8());
+        info.set_all_assertions(group.hir.is_all_assertions());
+        info.set_anchored_start(group.hir.is_anchored_start());
+        info.set_anchored_end(group.hir.is_anchored_end());
+        info.set_line_anchored_start(group.hir.is_line_anchored_start());
+        info.set_line_anchored_end(group.hir.is_line_anchored_end());
+        info.set_any_anchored_start(group.hir.is_any_anchored_start());
+        info.set_any_anchored_end(group.hir.is_any_anchored_end());
+        info.set_match_empty(group.hir.is_match_empty());
+        info.set_literal(false);
+        info.set_alternation_literal(false);
+        Hir { kind: HirKind::Group(group), info: info }
+    }
+
+    /// Returns the concatenation of the given expressions.
+    ///
+    /// This flattens the concatenation as appropriate.
+    pub fn concat(mut exprs: Vec<Hir>) -> Hir {
+        match exprs.len() {
+            0 => Hir::empty(),
+            1 => exprs.pop().unwrap(),
+            _ => {
+                let mut info = HirInfo::new();
+                info.set_always_utf8(true);
+                info.set_all_assertions(true);
+                info.set_any_anchored_start(false);
+                info.set_any_anchored_end(false);
+                info.set_match_empty(true);
+                info.set_literal(true);
+                info.set_alternation_literal(true);
+
+                // Some attributes require analyzing all sub-expressions.
+                for e in &exprs {
+                    let x = info.is_always_utf8() && e.is_always_utf8();
+                    info.set_always_utf8(x);
+
+                    let x = info.is_all_assertions() && e.is_all_assertions();
+                    info.set_all_assertions(x);
+
+                    let x = info.is_any_anchored_start()
+                        || e.is_any_anchored_start();
+                    info.set_any_anchored_start(x);
+
+                    let x =
+                        info.is_any_anchored_end() || e.is_any_anchored_end();
+                    info.set_any_anchored_end(x);
+
+                    let x = info.is_match_empty() && e.is_match_empty();
+                    info.set_match_empty(x);
+
+                    let x = info.is_literal() && e.is_literal();
+                    info.set_literal(x);
+
+                    let x = info.is_alternation_literal()
+                        && e.is_alternation_literal();
+                    info.set_alternation_literal(x);
+                }
+                // Anchored attributes require something slightly more
+                // sophisticated. Normally, WLOG, to determine whether an
+                // expression is anchored to the start, we'd only need to check
+                // the first expression of a concatenation. However,
+                // expressions like `$\b^` are still anchored to the start,
+                // but the first expression in the concatenation *isn't*
+                // anchored to the start. So the "first" expression to look at
+                // is actually one that is either not an assertion or is
+                // specifically the StartText assertion.
+                info.set_anchored_start(
+                    exprs
+                        .iter()
+                        .take_while(|e| {
+                            e.is_anchored_start() || e.is_all_assertions()
+                        })
+                        .any(|e| e.is_anchored_start()),
+                );
+                // Similarly for the end anchor, but in reverse.
+                info.set_anchored_end(
+                    exprs
+                        .iter()
+                        .rev()
+                        .take_while(|e| {
+                            e.is_anchored_end() || e.is_all_assertions()
+                        })
+                        .any(|e| e.is_anchored_end()),
+                );
+                // Repeat the process for line anchors.
+                info.set_line_anchored_start(
+                    exprs
+                        .iter()
+                        .take_while(|e| {
+                            e.is_line_anchored_start() || e.is_all_assertions()
+                        })
+                        .any(|e| e.is_line_anchored_start()),
+                );
+                info.set_line_anchored_end(
+                    exprs
+                        .iter()
+                        .rev()
+                        .take_while(|e| {
+                            e.is_line_anchored_end() || e.is_all_assertions()
+                        })
+                        .any(|e| e.is_line_anchored_end()),
+                );
+                Hir { kind: HirKind::Concat(exprs), info: info }
+            }
+        }
+    }
+
+    /// Returns the alternation of the given expressions.
+    ///
+    /// This flattens the alternation as appropriate.
+    pub fn alternation(mut exprs: Vec<Hir>) -> Hir {
+        match exprs.len() {
+            0 => Hir::empty(),
+            1 => exprs.pop().unwrap(),
+            _ => {
+                let mut info = HirInfo::new();
+                info.set_always_utf8(true);
+                info.set_all_assertions(true);
+                info.set_anchored_start(true);
+                info.set_anchored_end(true);
+                info.set_line_anchored_start(true);
+                info.set_line_anchored_end(true);
+                info.set_any_anchored_start(false);
+                info.set_any_anchored_end(false);
+                info.set_match_empty(false);
+                info.set_literal(false);
+                info.set_alternation_literal(true);
+
+                // Some attributes require analyzing all sub-expressions.
+                for e in &exprs {
+                    let x = info.is_always_utf8() && e.is_always_utf8();
+                    info.set_always_utf8(x);
+
+                    let x = info.is_all_assertions() && e.is_all_assertions();
+                    info.set_all_assertions(x);
+
+                    let x = info.is_anchored_start() && e.is_anchored_start();
+                    info.set_anchored_start(x);
+
+                    let x = info.is_anchored_end() && e.is_anchored_end();
+                    info.set_anchored_end(x);
+
+                    let x = info.is_line_anchored_start()
+                        && e.is_line_anchored_start();
+                    info.set_line_anchored_start(x);
+
+                    let x = info.is_line_anchored_end()
+                        && e.is_line_anchored_end();
+                    info.set_line_anchored_end(x);
+
+                    let x = info.is_any_anchored_start()
+                        || e.is_any_anchored_start();
+                    info.set_any_anchored_start(x);
+
+                    let x =
+                        info.is_any_anchored_end() || e.is_any_anchored_end();
+                    info.set_any_anchored_end(x);
+
+                    let x = info.is_match_empty() || e.is_match_empty();
+                    info.set_match_empty(x);
+
+                    let x = info.is_alternation_literal() && e.is_literal();
+                    info.set_alternation_literal(x);
+                }
+                Hir { kind: HirKind::Alternation(exprs), info: info }
+            }
+        }
+    }
+
+    /// Build an HIR expression for `.`.
+    ///
+    /// A `.` expression matches any character except for `\n`. To build an
+    /// expression that matches any character, including `\n`, use the `any`
+    /// method.
+    ///
+    /// If `bytes` is `true`, then this assumes characters are limited to a
+    /// single byte.
+    pub fn dot(bytes: bool) -> Hir {
+        if bytes {
+            let mut cls = ClassBytes::empty();
+            cls.push(ClassBytesRange::new(b'\0', b'\x09'));
+            cls.push(ClassBytesRange::new(b'\x0B', b'\xFF'));
+            Hir::class(Class::Bytes(cls))
+        } else {
+            let mut cls = ClassUnicode::empty();
+            cls.push(ClassUnicodeRange::new('\0', '\x09'));
+            cls.push(ClassUnicodeRange::new('\x0B', '\u{10FFFF}'));
+            Hir::class(Class::Unicode(cls))
+        }
+    }
+
+    /// Build an HIR expression for `(?s).`.
+    ///
+    /// A `(?s).` expression matches any character, including `\n`. To build an
+    /// expression that matches any character except for `\n`, then use the
+    /// `dot` method.
+    ///
+    /// If `bytes` is `true`, then this assumes characters are limited to a
+    /// single byte.
+    pub fn any(bytes: bool) -> Hir {
+        if bytes {
+            let mut cls = ClassBytes::empty();
+            cls.push(ClassBytesRange::new(b'\0', b'\xFF'));
+            Hir::class(Class::Bytes(cls))
+        } else {
+            let mut cls = ClassUnicode::empty();
+            cls.push(ClassUnicodeRange::new('\0', '\u{10FFFF}'));
+            Hir::class(Class::Unicode(cls))
+        }
+    }
+
+    /// Return true if and only if this HIR will always match valid UTF-8.
+    ///
+    /// When this returns false, then it is possible for this HIR expression
+    /// to match invalid UTF-8.
+    pub fn is_always_utf8(&self) -> bool {
+        self.info.is_always_utf8()
+    }
+
+    /// Returns true if and only if this entire HIR expression is made up of
+    /// zero-width assertions.
+    ///
+    /// This includes expressions like `^$\b\A\z` and even `((\b)+())*^`, but
+    /// not `^a`.
+    pub fn is_all_assertions(&self) -> bool {
+        self.info.is_all_assertions()
+    }
+
+    /// Return true if and only if this HIR is required to match from the
+    /// beginning of text. This includes expressions like `^foo`, `^(foo|bar)`,
+    /// `^foo|^bar` but not `^foo|bar`.
+    pub fn is_anchored_start(&self) -> bool {
+        self.info.is_anchored_start()
+    }
+
+    /// Return true if and only if this HIR is required to match at the end
+    /// of text. This includes expressions like `foo$`, `(foo|bar)$`,
+    /// `foo$|bar$` but not `foo$|bar`.
+    pub fn is_anchored_end(&self) -> bool {
+        self.info.is_anchored_end()
+    }
+
+    /// Return true if and only if this HIR is required to match from the
+    /// beginning of text or the beginning of a line. This includes expressions
+    /// like `^foo`, `(?m)^foo`, `^(foo|bar)`, `^(foo|bar)`, `(?m)^foo|^bar`
+    /// but not `^foo|bar` or `(?m)^foo|bar`.
+    ///
+    /// Note that if `is_anchored_start` is `true`, then
+    /// `is_line_anchored_start` will also be `true`. The reverse implication
+    /// is not true. For example, `(?m)^foo` is line anchored, but not
+    /// `is_anchored_start`.
+    pub fn is_line_anchored_start(&self) -> bool {
+        self.info.is_line_anchored_start()
+    }
+
+    /// Return true if and only if this HIR is required to match at the
+    /// end of text or the end of a line. This includes expressions like
+    /// `foo$`, `(?m)foo$`, `(foo|bar)$`, `(?m)(foo|bar)$`, `foo$|bar$`,
+    /// `(?m)(foo|bar)$`, but not `foo$|bar` or `(?m)foo$|bar`.
+    ///
+    /// Note that if `is_anchored_end` is `true`, then
+    /// `is_line_anchored_end` will also be `true`. The reverse implication
+    /// is not true. For example, `(?m)foo$` is line anchored, but not
+    /// `is_anchored_end`.
+    pub fn is_line_anchored_end(&self) -> bool {
+        self.info.is_line_anchored_end()
+    }
+
+    /// Return true if and only if this HIR contains any sub-expression that
+    /// is required to match at the beginning of text. Specifically, this
+    /// returns true if the `^` symbol (when multiline mode is disabled) or the
+    /// `\A` escape appear anywhere in the regex.
+    pub fn is_any_anchored_start(&self) -> bool {
+        self.info.is_any_anchored_start()
+    }
+
+    /// Return true if and only if this HIR contains any sub-expression that is
+    /// required to match at the end of text. Specifically, this returns true
+    /// if the `$` symbol (when multiline mode is disabled) or the `\z` escape
+    /// appear anywhere in the regex.
+    pub fn is_any_anchored_end(&self) -> bool {
+        self.info.is_any_anchored_end()
+    }
+
+    /// Return true if and only if the empty string is part of the language
+    /// matched by this regular expression.
+    ///
+    /// This includes `a*`, `a?b*`, `a{0}`, `()`, `()+`, `^$`, `a|b?`, `\B`,
+    /// but not `a`, `a+` or `\b`.
+    pub fn is_match_empty(&self) -> bool {
+        self.info.is_match_empty()
+    }
+
+    /// Return true if and only if this HIR is a simple literal. This is only
+    /// true when this HIR expression is either itself a `Literal` or a
+    /// concatenation of only `Literal`s.
+    ///
+    /// For example, `f` and `foo` are literals, but `f+`, `(foo)`, `foo()`
+    /// are not (even though that contain sub-expressions that are literals).
+    pub fn is_literal(&self) -> bool {
+        self.info.is_literal()
+    }
+
+    /// Return true if and only if this HIR is either a simple literal or an
+    /// alternation of simple literals. This is only
+    /// true when this HIR expression is either itself a `Literal` or a
+    /// concatenation of only `Literal`s or an alternation of only `Literal`s.
+    ///
+    /// For example, `f`, `foo`, `a|b|c`, and `foo|bar|baz` are alternaiton
+    /// literals, but `f+`, `(foo)`, `foo()`
+    /// are not (even though that contain sub-expressions that are literals).
+    pub fn is_alternation_literal(&self) -> bool {
+        self.info.is_alternation_literal()
+    }
+}
+
+impl HirKind {
+    /// Return true if and only if this HIR is the empty regular expression.
+    ///
+    /// Note that this is not defined inductively. That is, it only tests if
+    /// this kind is the `Empty` variant. To get the inductive definition,
+    /// use the `is_match_empty` method on [`Hir`](struct.Hir.html).
+    pub fn is_empty(&self) -> bool {
+        match *self {
+            HirKind::Empty => true,
+            _ => false,
+        }
+    }
+
+    /// Returns true if and only if this kind has any (including possibly
+    /// empty) subexpressions.
+    pub fn has_subexprs(&self) -> bool {
+        match *self {
+            HirKind::Empty
+            | HirKind::Literal(_)
+            | HirKind::Class(_)
+            | HirKind::Anchor(_)
+            | HirKind::WordBoundary(_) => false,
+            HirKind::Group(_)
+            | HirKind::Repetition(_)
+            | HirKind::Concat(_)
+            | HirKind::Alternation(_) => true,
+        }
+    }
+}
+
+/// Print a display representation of this Hir.
+///
+/// The result of this is a valid regular expression pattern string.
+///
+/// This implementation uses constant stack space and heap space proportional
+/// to the size of the `Hir`.
+impl fmt::Display for Hir {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        use hir::print::Printer;
+        Printer::new().print(self, f)
+    }
+}
+
+/// The high-level intermediate representation of a literal.
+///
+/// A literal corresponds to a single character, where a character is either
+/// defined by a Unicode scalar value or an arbitrary byte. Unicode characters
+/// are preferred whenever possible. In particular, a `Byte` variant is only
+/// ever produced when it could match invalid UTF-8.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum Literal {
+    /// A single character represented by a Unicode scalar value.
+    Unicode(char),
+    /// A single character represented by an arbitrary byte.
+    Byte(u8),
+}
+
+impl Literal {
+    /// Returns true if and only if this literal corresponds to a Unicode
+    /// scalar value.
+    pub fn is_unicode(&self) -> bool {
+        match *self {
+            Literal::Unicode(_) => true,
+            Literal::Byte(b) if b <= 0x7F => true,
+            Literal::Byte(_) => false,
+        }
+    }
+}
+
+/// The high-level intermediate representation of a character class.
+///
+/// A character class corresponds to a set of characters. A character is either
+/// defined by a Unicode scalar value or a byte. Unicode characters are used
+/// by default, while bytes are used when Unicode mode (via the `u` flag) is
+/// disabled.
+///
+/// A character class, regardless of its character type, is represented by a
+/// sequence of non-overlapping non-adjacent ranges of characters.
+///
+/// Note that unlike [`Literal`](enum.Literal.html), a `Bytes` variant may
+/// be produced even when it exclusively matches valid UTF-8. This is because
+/// a `Bytes` variant represents an intention by the author of the regular
+/// expression to disable Unicode mode, which in turn impacts the semantics of
+/// case insensitive matching. For example, `(?i)k` and `(?i-u)k` will not
+/// match the same set of strings.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum Class {
+    /// A set of characters represented by Unicode scalar values.
+    Unicode(ClassUnicode),
+    /// A set of characters represented by arbitrary bytes (one byte per
+    /// character).
+    Bytes(ClassBytes),
+}
+
+impl Class {
+    /// Apply Unicode simple case folding to this character class, in place.
+    /// The character class will be expanded to include all simple case folded
+    /// character variants.
+    ///
+    /// If this is a byte oriented character class, then this will be limited
+    /// to the ASCII ranges `A-Z` and `a-z`.
+    pub fn case_fold_simple(&mut self) {
+        match *self {
+            Class::Unicode(ref mut x) => x.case_fold_simple(),
+            Class::Bytes(ref mut x) => x.case_fold_simple(),
+        }
+    }
+
+    /// Negate this character class in place.
+    ///
+    /// After completion, this character class will contain precisely the
+    /// characters that weren't previously in the class.
+    pub fn negate(&mut self) {
+        match *self {
+            Class::Unicode(ref mut x) => x.negate(),
+            Class::Bytes(ref mut x) => x.negate(),
+        }
+    }
+
+    /// Returns true if and only if this character class will only ever match
+    /// valid UTF-8.
+    ///
+    /// A character class can match invalid UTF-8 only when the following
+    /// conditions are met:
+    ///
+    /// 1. The translator was configured to permit generating an expression
+    ///    that can match invalid UTF-8. (By default, this is disabled.)
+    /// 2. Unicode mode (via the `u` flag) was disabled either in the concrete
+    ///    syntax or in the parser builder. By default, Unicode mode is
+    ///    enabled.
+    pub fn is_always_utf8(&self) -> bool {
+        match *self {
+            Class::Unicode(_) => true,
+            Class::Bytes(ref x) => x.is_all_ascii(),
+        }
+    }
+}
+
+/// A set of characters represented by Unicode scalar values.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct ClassUnicode {
+    set: IntervalSet<ClassUnicodeRange>,
+}
+
+impl ClassUnicode {
+    /// Create a new class from a sequence of ranges.
+    ///
+    /// The given ranges do not need to be in any specific order, and ranges
+    /// may overlap.
+    pub fn new<I>(ranges: I) -> ClassUnicode
+    where
+        I: IntoIterator<Item = ClassUnicodeRange>,
+    {
+        ClassUnicode { set: IntervalSet::new(ranges) }
+    }
+
+    /// Create a new class with no ranges.
+    pub fn empty() -> ClassUnicode {
+        ClassUnicode::new(vec![])
+    }
+
+    /// Add a new range to this set.
+    pub fn push(&mut self, range: ClassUnicodeRange) {
+        self.set.push(range);
+    }
+
+    /// Return an iterator over all ranges in this class.
+    ///
+    /// The iterator yields ranges in ascending order.
+    pub fn iter(&self) -> ClassUnicodeIter {
+        ClassUnicodeIter(self.set.iter())
+    }
+
+    /// Return the underlying ranges as a slice.
+    pub fn ranges(&self) -> &[ClassUnicodeRange] {
+        self.set.intervals()
+    }
+
+    /// Expand this character class such that it contains all case folded
+    /// characters, according to Unicode's "simple" mapping. For example, if
+    /// this class consists of the range `a-z`, then applying case folding will
+    /// result in the class containing both the ranges `a-z` and `A-Z`.
+    ///
+    /// # Panics
+    ///
+    /// This routine panics when the case mapping data necessary for this
+    /// routine to complete is unavailable. This occurs when the `unicode-case`
+    /// feature is not enabled.
+    ///
+    /// Callers should prefer using `try_case_fold_simple` instead, which will
+    /// return an error instead of panicking.
+    pub fn case_fold_simple(&mut self) {
+        self.set
+            .case_fold_simple()
+            .expect("unicode-case feature must be enabled");
+    }
+
+    /// Expand this character class such that it contains all case folded
+    /// characters, according to Unicode's "simple" mapping. For example, if
+    /// this class consists of the range `a-z`, then applying case folding will
+    /// result in the class containing both the ranges `a-z` and `A-Z`.
+    ///
+    /// # Error
+    ///
+    /// This routine returns an error when the case mapping data necessary
+    /// for this routine to complete is unavailable. This occurs when the
+    /// `unicode-case` feature is not enabled.
+    pub fn try_case_fold_simple(
+        &mut self,
+    ) -> result::Result<(), CaseFoldError> {
+        self.set.case_fold_simple()
+    }
+
+    /// Negate this character class.
+    ///
+    /// For all `c` where `c` is a Unicode scalar value, if `c` was in this
+    /// set, then it will not be in this set after negation.
+    pub fn negate(&mut self) {
+        self.set.negate();
+    }
+
+    /// Union this character class with the given character class, in place.
+    pub fn union(&mut self, other: &ClassUnicode) {
+        self.set.union(&other.set);
+    }
+
+    /// Intersect this character class with the given character class, in
+    /// place.
+    pub fn intersect(&mut self, other: &ClassUnicode) {
+        self.set.intersect(&other.set);
+    }
+
+    /// Subtract the given character class from this character class, in place.
+    pub fn difference(&mut self, other: &ClassUnicode) {
+        self.set.difference(&other.set);
+    }
+
+    /// Compute the symmetric difference of the given character classes, in
+    /// place.
+    ///
+    /// This computes the symmetric difference of two character classes. This
+    /// removes all elements in this class that are also in the given class,
+    /// but all adds all elements from the given class that aren't in this
+    /// class. That is, the class will contain all elements in either class,
+    /// but will not contain any elements that are in both classes.
+    pub fn symmetric_difference(&mut self, other: &ClassUnicode) {
+        self.set.symmetric_difference(&other.set);
+    }
+
+    /// Returns true if and only if this character class will either match
+    /// nothing or only ASCII bytes. Stated differently, this returns false
+    /// if and only if this class contains a non-ASCII codepoint.
+    pub fn is_all_ascii(&self) -> bool {
+        self.set.intervals().last().map_or(true, |r| r.end <= '\x7F')
+    }
+}
+
+/// An iterator over all ranges in a Unicode character class.
+///
+/// The lifetime `'a` refers to the lifetime of the underlying class.
+#[derive(Debug)]
+pub struct ClassUnicodeIter<'a>(IntervalSetIter<'a, ClassUnicodeRange>);
+
+impl<'a> Iterator for ClassUnicodeIter<'a> {
+    type Item = &'a ClassUnicodeRange;
+
+    fn next(&mut self) -> Option<&'a ClassUnicodeRange> {
+        self.0.next()
+    }
+}
+
+/// A single range of characters represented by Unicode scalar values.
+///
+/// The range is closed. That is, the start and end of the range are included
+/// in the range.
+#[derive(Clone, Copy, Default, Eq, PartialEq, PartialOrd, Ord)]
+pub struct ClassUnicodeRange {
+    start: char,
+    end: char,
+}
+
+impl fmt::Debug for ClassUnicodeRange {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let start = if !self.start.is_whitespace() && !self.start.is_control()
+        {
+            self.start.to_string()
+        } else {
+            format!("0x{:X}", self.start as u32)
+        };
+        let end = if !self.end.is_whitespace() && !self.end.is_control() {
+            self.end.to_string()
+        } else {
+            format!("0x{:X}", self.end as u32)
+        };
+        f.debug_struct("ClassUnicodeRange")
+            .field("start", &start)
+            .field("end", &end)
+            .finish()
+    }
+}
+
+impl Interval for ClassUnicodeRange {
+    type Bound = char;
+
+    #[inline]
+    fn lower(&self) -> char {
+        self.start
+    }
+    #[inline]
+    fn upper(&self) -> char {
+        self.end
+    }
+    #[inline]
+    fn set_lower(&mut self, bound: char) {
+        self.start = bound;
+    }
+    #[inline]
+    fn set_upper(&mut self, bound: char) {
+        self.end = bound;
+    }
+
+    /// Apply simple case folding to this Unicode scalar value range.
+    ///
+    /// Additional ranges are appended to the given vector. Canonical ordering
+    /// is *not* maintained in the given vector.
+    fn case_fold_simple(
+        &self,
+        ranges: &mut Vec<ClassUnicodeRange>,
+    ) -> Result<(), unicode::CaseFoldError> {
+        if !unicode::contains_simple_case_mapping(self.start, self.end)? {
+            return Ok(());
+        }
+        let start = self.start as u32;
+        let end = (self.end as u32).saturating_add(1);
+        let mut next_simple_cp = None;
+        for cp in (start..end).filter_map(char::from_u32) {
+            if next_simple_cp.map_or(false, |next| cp < next) {
+                continue;
+            }
+            let it = match unicode::simple_fold(cp)? {
+                Ok(it) => it,
+                Err(next) => {
+                    next_simple_cp = next;
+                    continue;
+                }
+            };
+            for cp_folded in it {
+                ranges.push(ClassUnicodeRange::new(cp_folded, cp_folded));
+            }
+        }
+        Ok(())
+    }
+}
+
+impl ClassUnicodeRange {
+    /// Create a new Unicode scalar value range for a character class.
+    ///
+    /// The returned range is always in a canonical form. That is, the range
+    /// returned always satisfies the invariant that `start <= end`.
+    pub fn new(start: char, end: char) -> ClassUnicodeRange {
+        ClassUnicodeRange::create(start, end)
+    }
+
+    /// Return the start of this range.
+    ///
+    /// The start of a range is always less than or equal to the end of the
+    /// range.
+    pub fn start(&self) -> char {
+        self.start
+    }
+
+    /// Return the end of this range.
+    ///
+    /// The end of a range is always greater than or equal to the start of the
+    /// range.
+    pub fn end(&self) -> char {
+        self.end
+    }
+}
+
+/// A set of characters represented by arbitrary bytes (where one byte
+/// corresponds to one character).
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct ClassBytes {
+    set: IntervalSet<ClassBytesRange>,
+}
+
+impl ClassBytes {
+    /// Create a new class from a sequence of ranges.
+    ///
+    /// The given ranges do not need to be in any specific order, and ranges
+    /// may overlap.
+    pub fn new<I>(ranges: I) -> ClassBytes
+    where
+        I: IntoIterator<Item = ClassBytesRange>,
+    {
+        ClassBytes { set: IntervalSet::new(ranges) }
+    }
+
+    /// Create a new class with no ranges.
+    pub fn empty() -> ClassBytes {
+        ClassBytes::new(vec![])
+    }
+
+    /// Add a new range to this set.
+    pub fn push(&mut self, range: ClassBytesRange) {
+        self.set.push(range);
+    }
+
+    /// Return an iterator over all ranges in this class.
+    ///
+    /// The iterator yields ranges in ascending order.
+    pub fn iter(&self) -> ClassBytesIter {
+        ClassBytesIter(self.set.iter())
+    }
+
+    /// Return the underlying ranges as a slice.
+    pub fn ranges(&self) -> &[ClassBytesRange] {
+        self.set.intervals()
+    }
+
+    /// Expand this character class such that it contains all case folded
+    /// characters. For example, if this class consists of the range `a-z`,
+    /// then applying case folding will result in the class containing both the
+    /// ranges `a-z` and `A-Z`.
+    ///
+    /// Note that this only applies ASCII case folding, which is limited to the
+    /// characters `a-z` and `A-Z`.
+    pub fn case_fold_simple(&mut self) {
+        self.set.case_fold_simple().expect("ASCII case folding never fails");
+    }
+
+    /// Negate this byte class.
+    ///
+    /// For all `b` where `b` is a any byte, if `b` was in this set, then it
+    /// will not be in this set after negation.
+    pub fn negate(&mut self) {
+        self.set.negate();
+    }
+
+    /// Union this byte class with the given byte class, in place.
+    pub fn union(&mut self, other: &ClassBytes) {
+        self.set.union(&other.set);
+    }
+
+    /// Intersect this byte class with the given byte class, in place.
+    pub fn intersect(&mut self, other: &ClassBytes) {
+        self.set.intersect(&other.set);
+    }
+
+    /// Subtract the given byte class from this byte class, in place.
+    pub fn difference(&mut self, other: &ClassBytes) {
+        self.set.difference(&other.set);
+    }
+
+    /// Compute the symmetric difference of the given byte classes, in place.
+    ///
+    /// This computes the symmetric difference of two byte classes. This
+    /// removes all elements in this class that are also in the given class,
+    /// but all adds all elements from the given class that aren't in this
+    /// class. That is, the class will contain all elements in either class,
+    /// but will not contain any elements that are in both classes.
+    pub fn symmetric_difference(&mut self, other: &ClassBytes) {
+        self.set.symmetric_difference(&other.set);
+    }
+
+    /// Returns true if and only if this character class will either match
+    /// nothing or only ASCII bytes. Stated differently, this returns false
+    /// if and only if this class contains a non-ASCII byte.
+    pub fn is_all_ascii(&self) -> bool {
+        self.set.intervals().last().map_or(true, |r| r.end <= 0x7F)
+    }
+}
+
+/// An iterator over all ranges in a byte character class.
+///
+/// The lifetime `'a` refers to the lifetime of the underlying class.
+#[derive(Debug)]
+pub struct ClassBytesIter<'a>(IntervalSetIter<'a, ClassBytesRange>);
+
+impl<'a> Iterator for ClassBytesIter<'a> {
+    type Item = &'a ClassBytesRange;
+
+    fn next(&mut self) -> Option<&'a ClassBytesRange> {
+        self.0.next()
+    }
+}
+
+/// A single range of characters represented by arbitrary bytes.
+///
+/// The range is closed. That is, the start and end of the range are included
+/// in the range.
+#[derive(Clone, Copy, Default, Eq, PartialEq, PartialOrd, Ord)]
+pub struct ClassBytesRange {
+    start: u8,
+    end: u8,
+}
+
+impl Interval for ClassBytesRange {
+    type Bound = u8;
+
+    #[inline]
+    fn lower(&self) -> u8 {
+        self.start
+    }
+    #[inline]
+    fn upper(&self) -> u8 {
+        self.end
+    }
+    #[inline]
+    fn set_lower(&mut self, bound: u8) {
+        self.start = bound;
+    }
+    #[inline]
+    fn set_upper(&mut self, bound: u8) {
+        self.end = bound;
+    }
+
+    /// Apply simple case folding to this byte range. Only ASCII case mappings
+    /// (for a-z) are applied.
+    ///
+    /// Additional ranges are appended to the given vector. Canonical ordering
+    /// is *not* maintained in the given vector.
+    fn case_fold_simple(
+        &self,
+        ranges: &mut Vec<ClassBytesRange>,
+    ) -> Result<(), unicode::CaseFoldError> {
+        if !ClassBytesRange::new(b'a', b'z').is_intersection_empty(self) {
+            let lower = cmp::max(self.start, b'a');
+            let upper = cmp::min(self.end, b'z');
+            ranges.push(ClassBytesRange::new(lower - 32, upper - 32));
+        }
+        if !ClassBytesRange::new(b'A', b'Z').is_intersection_empty(self) {
+            let lower = cmp::max(self.start, b'A');
+            let upper = cmp::min(self.end, b'Z');
+            ranges.push(ClassBytesRange::new(lower + 32, upper + 32));
+        }
+        Ok(())
+    }
+}
+
+impl ClassBytesRange {
+    /// Create a new byte range for a character class.
+    ///
+    /// The returned range is always in a canonical form. That is, the range
+    /// returned always satisfies the invariant that `start <= end`.
+    pub fn new(start: u8, end: u8) -> ClassBytesRange {
+        ClassBytesRange::create(start, end)
+    }
+
+    /// Return the start of this range.
+    ///
+    /// The start of a range is always less than or equal to the end of the
+    /// range.
+    pub fn start(&self) -> u8 {
+        self.start
+    }
+
+    /// Return the end of this range.
+    ///
+    /// The end of a range is always greater than or equal to the start of the
+    /// range.
+    pub fn end(&self) -> u8 {
+        self.end
+    }
+}
+
+impl fmt::Debug for ClassBytesRange {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let mut debug = f.debug_struct("ClassBytesRange");
+        if self.start <= 0x7F {
+            debug.field("start", &(self.start as char));
+        } else {
+            debug.field("start", &self.start);
+        }
+        if self.end <= 0x7F {
+            debug.field("end", &(self.end as char));
+        } else {
+            debug.field("end", &self.end);
+        }
+        debug.finish()
+    }
+}
+
+/// The high-level intermediate representation for an anchor assertion.
+///
+/// A matching anchor assertion is always zero-length.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum Anchor {
+    /// Match the beginning of a line or the beginning of text. Specifically,
+    /// this matches at the starting position of the input, or at the position
+    /// immediately following a `\n` character.
+    StartLine,
+    /// Match the end of a line or the end of text. Specifically,
+    /// this matches at the end position of the input, or at the position
+    /// immediately preceding a `\n` character.
+    EndLine,
+    /// Match the beginning of text. Specifically, this matches at the starting
+    /// position of the input.
+    StartText,
+    /// Match the end of text. Specifically, this matches at the ending
+    /// position of the input.
+    EndText,
+}
+
+/// The high-level intermediate representation for a word-boundary assertion.
+///
+/// A matching word boundary assertion is always zero-length.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum WordBoundary {
+    /// Match a Unicode-aware word boundary. That is, this matches a position
+    /// where the left adjacent character and right adjacent character
+    /// correspond to a word and non-word or a non-word and word character.
+    Unicode,
+    /// Match a Unicode-aware negation of a word boundary.
+    UnicodeNegate,
+    /// Match an ASCII-only word boundary. That is, this matches a position
+    /// where the left adjacent character and right adjacent character
+    /// correspond to a word and non-word or a non-word and word character.
+    Ascii,
+    /// Match an ASCII-only negation of a word boundary.
+    AsciiNegate,
+}
+
+impl WordBoundary {
+    /// Returns true if and only if this word boundary assertion is negated.
+    pub fn is_negated(&self) -> bool {
+        match *self {
+            WordBoundary::Unicode | WordBoundary::Ascii => false,
+            WordBoundary::UnicodeNegate | WordBoundary::AsciiNegate => true,
+        }
+    }
+}
+
+/// The high-level intermediate representation for a group.
+///
+/// This represents one of three possible group types:
+///
+/// 1. A non-capturing group (e.g., `(?:expr)`).
+/// 2. A capturing group (e.g., `(expr)`).
+/// 3. A named capturing group (e.g., `(?P<name>expr)`).
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Group {
+    /// The kind of this group. If it is a capturing group, then the kind
+    /// contains the capture group index (and the name, if it is a named
+    /// group).
+    pub kind: GroupKind,
+    /// The expression inside the capturing group, which may be empty.
+    pub hir: Box<Hir>,
+}
+
+/// The kind of group.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum GroupKind {
+    /// A normal unnamed capturing group.
+    ///
+    /// The value is the capture index of the group.
+    CaptureIndex(u32),
+    /// A named capturing group.
+    CaptureName {
+        /// The name of the group.
+        name: String,
+        /// The capture index of the group.
+        index: u32,
+    },
+    /// A non-capturing group.
+    NonCapturing,
+}
+
+/// The high-level intermediate representation of a repetition operator.
+///
+/// A repetition operator permits the repetition of an arbitrary
+/// sub-expression.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Repetition {
+    /// The kind of this repetition operator.
+    pub kind: RepetitionKind,
+    /// Whether this repetition operator is greedy or not. A greedy operator
+    /// will match as much as it can. A non-greedy operator will match as
+    /// little as it can.
+    ///
+    /// Typically, operators are greedy by default and are only non-greedy when
+    /// a `?` suffix is used, e.g., `(expr)*` is greedy while `(expr)*?` is
+    /// not. However, this can be inverted via the `U` "ungreedy" flag.
+    pub greedy: bool,
+    /// The expression being repeated.
+    pub hir: Box<Hir>,
+}
+
+impl Repetition {
+    /// Returns true if and only if this repetition operator makes it possible
+    /// to match the empty string.
+    ///
+    /// Note that this is not defined inductively. For example, while `a*`
+    /// will report `true`, `()+` will not, even though `()` matches the empty
+    /// string and one or more occurrences of something that matches the empty
+    /// string will always match the empty string. In order to get the
+    /// inductive definition, see the corresponding method on
+    /// [`Hir`](struct.Hir.html).
+    pub fn is_match_empty(&self) -> bool {
+        match self.kind {
+            RepetitionKind::ZeroOrOne => true,
+            RepetitionKind::ZeroOrMore => true,
+            RepetitionKind::OneOrMore => false,
+            RepetitionKind::Range(RepetitionRange::Exactly(m)) => m == 0,
+            RepetitionKind::Range(RepetitionRange::AtLeast(m)) => m == 0,
+            RepetitionKind::Range(RepetitionRange::Bounded(m, _)) => m == 0,
+        }
+    }
+}
+
+/// The kind of a repetition operator.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum RepetitionKind {
+    /// Matches a sub-expression zero or one times.
+    ZeroOrOne,
+    /// Matches a sub-expression zero or more times.
+    ZeroOrMore,
+    /// Matches a sub-expression one or more times.
+    OneOrMore,
+    /// Matches a sub-expression within a bounded range of times.
+    Range(RepetitionRange),
+}
+
+/// The kind of a counted repetition operator.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum RepetitionRange {
+    /// Matches a sub-expression exactly this many times.
+    Exactly(u32),
+    /// Matches a sub-expression at least this many times.
+    AtLeast(u32),
+    /// Matches a sub-expression at least `m` times and at most `n` times.
+    Bounded(u32, u32),
+}
+
+/// A custom `Drop` impl is used for `HirKind` such that it uses constant stack
+/// space but heap space proportional to the depth of the total `Hir`.
+impl Drop for Hir {
+    fn drop(&mut self) {
+        use std::mem;
+
+        match *self.kind() {
+            HirKind::Empty
+            | HirKind::Literal(_)
+            | HirKind::Class(_)
+            | HirKind::Anchor(_)
+            | HirKind::WordBoundary(_) => return,
+            HirKind::Group(ref x) if !x.hir.kind.has_subexprs() => return,
+            HirKind::Repetition(ref x) if !x.hir.kind.has_subexprs() => return,
+            HirKind::Concat(ref x) if x.is_empty() => return,
+            HirKind::Alternation(ref x) if x.is_empty() => return,
+            _ => {}
+        }
+
+        let mut stack = vec![mem::replace(self, Hir::empty())];
+        while let Some(mut expr) = stack.pop() {
+            match expr.kind {
+                HirKind::Empty
+                | HirKind::Literal(_)
+                | HirKind::Class(_)
+                | HirKind::Anchor(_)
+                | HirKind::WordBoundary(_) => {}
+                HirKind::Group(ref mut x) => {
+                    stack.push(mem::replace(&mut x.hir, Hir::empty()));
+                }
+                HirKind::Repetition(ref mut x) => {
+                    stack.push(mem::replace(&mut x.hir, Hir::empty()));
+                }
+                HirKind::Concat(ref mut x) => {
+                    stack.extend(x.drain(..));
+                }
+                HirKind::Alternation(ref mut x) => {
+                    stack.extend(x.drain(..));
+                }
+            }
+        }
+    }
+}
+
+/// A type that documents various attributes of an HIR expression.
+///
+/// These attributes are typically defined inductively on the HIR.
+#[derive(Clone, Debug, Eq, PartialEq)]
+struct HirInfo {
+    /// Represent yes/no questions by a bitfield to conserve space, since
+    /// this is included in every HIR expression.
+    ///
+    /// If more attributes need to be added, it is OK to increase the size of
+    /// this as appropriate.
+    bools: u16,
+}
+
+// A simple macro for defining bitfield accessors/mutators.
+macro_rules! define_bool {
+    ($bit:expr, $is_fn_name:ident, $set_fn_name:ident) => {
+        fn $is_fn_name(&self) -> bool {
+            self.bools & (0b1 << $bit) > 0
+        }
+
+        fn $set_fn_name(&mut self, yes: bool) {
+            if yes {
+                self.bools |= 1 << $bit;
+            } else {
+                self.bools &= !(1 << $bit);
+            }
+        }
+    }
+}
+
+impl HirInfo {
+    fn new() -> HirInfo {
+        HirInfo { bools: 0 }
+    }
+
+    define_bool!(0, is_always_utf8, set_always_utf8);
+    define_bool!(1, is_all_assertions, set_all_assertions);
+    define_bool!(2, is_anchored_start, set_anchored_start);
+    define_bool!(3, is_anchored_end, set_anchored_end);
+    define_bool!(4, is_line_anchored_start, set_line_anchored_start);
+    define_bool!(5, is_line_anchored_end, set_line_anchored_end);
+    define_bool!(6, is_any_anchored_start, set_any_anchored_start);
+    define_bool!(7, is_any_anchored_end, set_any_anchored_end);
+    define_bool!(8, is_match_empty, set_match_empty);
+    define_bool!(9, is_literal, set_literal);
+    define_bool!(10, is_alternation_literal, set_alternation_literal);
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn uclass(ranges: &[(char, char)]) -> ClassUnicode {
+        let ranges: Vec<ClassUnicodeRange> = ranges
+            .iter()
+            .map(|&(s, e)| ClassUnicodeRange::new(s, e))
+            .collect();
+        ClassUnicode::new(ranges)
+    }
+
+    fn bclass(ranges: &[(u8, u8)]) -> ClassBytes {
+        let ranges: Vec<ClassBytesRange> =
+            ranges.iter().map(|&(s, e)| ClassBytesRange::new(s, e)).collect();
+        ClassBytes::new(ranges)
+    }
+
+    fn uranges(cls: &ClassUnicode) -> Vec<(char, char)> {
+        cls.iter().map(|x| (x.start(), x.end())).collect()
+    }
+
+    #[cfg(feature = "unicode-case")]
+    fn ucasefold(cls: &ClassUnicode) -> ClassUnicode {
+        let mut cls_ = cls.clone();
+        cls_.case_fold_simple();
+        cls_
+    }
+
+    fn uunion(cls1: &ClassUnicode, cls2: &ClassUnicode) -> ClassUnicode {
+        let mut cls_ = cls1.clone();
+        cls_.union(cls2);
+        cls_
+    }
+
+    fn uintersect(cls1: &ClassUnicode, cls2: &ClassUnicode) -> ClassUnicode {
+        let mut cls_ = cls1.clone();
+        cls_.intersect(cls2);
+        cls_
+    }
+
+    fn udifference(cls1: &ClassUnicode, cls2: &ClassUnicode) -> ClassUnicode {
+        let mut cls_ = cls1.clone();
+        cls_.difference(cls2);
+        cls_
+    }
+
+    fn usymdifference(
+        cls1: &ClassUnicode,
+        cls2: &ClassUnicode,
+    ) -> ClassUnicode {
+        let mut cls_ = cls1.clone();
+        cls_.symmetric_difference(cls2);
+        cls_
+    }
+
+    fn unegate(cls: &ClassUnicode) -> ClassUnicode {
+        let mut cls_ = cls.clone();
+        cls_.negate();
+        cls_
+    }
+
+    fn branges(cls: &ClassBytes) -> Vec<(u8, u8)> {
+        cls.iter().map(|x| (x.start(), x.end())).collect()
+    }
+
+    fn bcasefold(cls: &ClassBytes) -> ClassBytes {
+        let mut cls_ = cls.clone();
+        cls_.case_fold_simple();
+        cls_
+    }
+
+    fn bunion(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes {
+        let mut cls_ = cls1.clone();
+        cls_.union(cls2);
+        cls_
+    }
+
+    fn bintersect(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes {
+        let mut cls_ = cls1.clone();
+        cls_.intersect(cls2);
+        cls_
+    }
+
+    fn bdifference(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes {
+        let mut cls_ = cls1.clone();
+        cls_.difference(cls2);
+        cls_
+    }
+
+    fn bsymdifference(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes {
+        let mut cls_ = cls1.clone();
+        cls_.symmetric_difference(cls2);
+        cls_
+    }
+
+    fn bnegate(cls: &ClassBytes) -> ClassBytes {
+        let mut cls_ = cls.clone();
+        cls_.negate();
+        cls_
+    }
+
+    #[test]
+    fn class_range_canonical_unicode() {
+        let range = ClassUnicodeRange::new('\u{00FF}', '\0');
+        assert_eq!('\0', range.start());
+        assert_eq!('\u{00FF}', range.end());
+    }
+
+    #[test]
+    fn class_range_canonical_bytes() {
+        let range = ClassBytesRange::new(b'\xFF', b'\0');
+        assert_eq!(b'\0', range.start());
+        assert_eq!(b'\xFF', range.end());
+    }
+
+    #[test]
+    fn class_canonicalize_unicode() {
+        let cls = uclass(&[('a', 'c'), ('x', 'z')]);
+        let expected = vec![('a', 'c'), ('x', 'z')];
+        assert_eq!(expected, uranges(&cls));
+
+        let cls = uclass(&[('x', 'z'), ('a', 'c')]);
+        let expected = vec![('a', 'c'), ('x', 'z')];
+        assert_eq!(expected, uranges(&cls));
+
+        let cls = uclass(&[('x', 'z'), ('w', 'y')]);
+        let expected = vec![('w', 'z')];
+        assert_eq!(expected, uranges(&cls));
+
+        let cls = uclass(&[
+            ('c', 'f'),
+            ('a', 'g'),
+            ('d', 'j'),
+            ('a', 'c'),
+            ('m', 'p'),
+            ('l', 's'),
+        ]);
+        let expected = vec![('a', 'j'), ('l', 's')];
+        assert_eq!(expected, uranges(&cls));
+
+        let cls = uclass(&[('x', 'z'), ('u', 'w')]);
+        let expected = vec![('u', 'z')];
+        assert_eq!(expected, uranges(&cls));
+
+        let cls = uclass(&[('\x00', '\u{10FFFF}'), ('\x00', '\u{10FFFF}')]);
+        let expected = vec![('\x00', '\u{10FFFF}')];
+        assert_eq!(expected, uranges(&cls));
+
+        let cls = uclass(&[('a', 'a'), ('b', 'b')]);
+        let expected = vec![('a', 'b')];
+        assert_eq!(expected, uranges(&cls));
+    }
+
+    #[test]
+    fn class_canonicalize_bytes() {
+        let cls = bclass(&[(b'a', b'c'), (b'x', b'z')]);
+        let expected = vec![(b'a', b'c'), (b'x', b'z')];
+        assert_eq!(expected, branges(&cls));
+
+        let cls = bclass(&[(b'x', b'z'), (b'a', b'c')]);
+        let expected = vec![(b'a', b'c'), (b'x', b'z')];
+        assert_eq!(expected, branges(&cls));
+
+        let cls = bclass(&[(b'x', b'z'), (b'w', b'y')]);
+        let expected = vec![(b'w', b'z')];
+        assert_eq!(expected, branges(&cls));
+
+        let cls = bclass(&[
+            (b'c', b'f'),
+            (b'a', b'g'),
+            (b'd', b'j'),
+            (b'a', b'c'),
+            (b'm', b'p'),
+            (b'l', b's'),
+        ]);
+        let expected = vec![(b'a', b'j'), (b'l', b's')];
+        assert_eq!(expected, branges(&cls));
+
+        let cls = bclass(&[(b'x', b'z'), (b'u', b'w')]);
+        let expected = vec![(b'u', b'z')];
+        assert_eq!(expected, branges(&cls));
+
+        let cls = bclass(&[(b'\x00', b'\xFF'), (b'\x00', b'\xFF')]);
+        let expected = vec![(b'\x00', b'\xFF')];
+        assert_eq!(expected, branges(&cls));
+
+        let cls = bclass(&[(b'a', b'a'), (b'b', b'b')]);
+        let expected = vec![(b'a', b'b')];
+        assert_eq!(expected, branges(&cls));
+    }
+
+    #[test]
+    #[cfg(feature = "unicode-case")]
+    fn class_case_fold_unicode() {
+        let cls = uclass(&[
+            ('C', 'F'),
+            ('A', 'G'),
+            ('D', 'J'),
+            ('A', 'C'),
+            ('M', 'P'),
+            ('L', 'S'),
+            ('c', 'f'),
+        ]);
+        let expected = uclass(&[
+            ('A', 'J'),
+            ('L', 'S'),
+            ('a', 'j'),
+            ('l', 's'),
+            ('\u{17F}', '\u{17F}'),
+        ]);
+        assert_eq!(expected, ucasefold(&cls));
+
+        let cls = uclass(&[('A', 'Z')]);
+        let expected = uclass(&[
+            ('A', 'Z'),
+            ('a', 'z'),
+            ('\u{17F}', '\u{17F}'),
+            ('\u{212A}', '\u{212A}'),
+        ]);
+        assert_eq!(expected, ucasefold(&cls));
+
+        let cls = uclass(&[('a', 'z')]);
+        let expected = uclass(&[
+            ('A', 'Z'),
+            ('a', 'z'),
+            ('\u{17F}', '\u{17F}'),
+            ('\u{212A}', '\u{212A}'),
+        ]);
+        assert_eq!(expected, ucasefold(&cls));
+
+        let cls = uclass(&[('A', 'A'), ('_', '_')]);
+        let expected = uclass(&[('A', 'A'), ('_', '_'), ('a', 'a')]);
+        assert_eq!(expected, ucasefold(&cls));
+
+        let cls = uclass(&[('A', 'A'), ('=', '=')]);
+        let expected = uclass(&[('=', '='), ('A', 'A'), ('a', 'a')]);
+        assert_eq!(expected, ucasefold(&cls));
+
+        let cls = uclass(&[('\x00', '\x10')]);
+        assert_eq!(cls, ucasefold(&cls));
+
+        let cls = uclass(&[('k', 'k')]);
+        let expected =
+            uclass(&[('K', 'K'), ('k', 'k'), ('\u{212A}', '\u{212A}')]);
+        assert_eq!(expected, ucasefold(&cls));
+
+        let cls = uclass(&[('@', '@')]);
+        assert_eq!(cls, ucasefold(&cls));
+    }
+
+    #[test]
+    #[cfg(not(feature = "unicode-case"))]
+    fn class_case_fold_unicode_disabled() {
+        let mut cls = uclass(&[
+            ('C', 'F'),
+            ('A', 'G'),
+            ('D', 'J'),
+            ('A', 'C'),
+            ('M', 'P'),
+            ('L', 'S'),
+            ('c', 'f'),
+        ]);
+        assert!(cls.try_case_fold_simple().is_err());
+    }
+
+    #[test]
+    #[should_panic]
+    #[cfg(not(feature = "unicode-case"))]
+    fn class_case_fold_unicode_disabled_panics() {
+        let mut cls = uclass(&[
+            ('C', 'F'),
+            ('A', 'G'),
+            ('D', 'J'),
+            ('A', 'C'),
+            ('M', 'P'),
+            ('L', 'S'),
+            ('c', 'f'),
+        ]);
+        cls.case_fold_simple();
+    }
+
+    #[test]
+    fn class_case_fold_bytes() {
+        let cls = bclass(&[
+            (b'C', b'F'),
+            (b'A', b'G'),
+            (b'D', b'J'),
+            (b'A', b'C'),
+            (b'M', b'P'),
+            (b'L', b'S'),
+            (b'c', b'f'),
+        ]);
+        let expected =
+            bclass(&[(b'A', b'J'), (b'L', b'S'), (b'a', b'j'), (b'l', b's')]);
+        assert_eq!(expected, bcasefold(&cls));
+
+        let cls = bclass(&[(b'A', b'Z')]);
+        let expected = bclass(&[(b'A', b'Z'), (b'a', b'z')]);
+        assert_eq!(expected, bcasefold(&cls));
+
+        let cls = bclass(&[(b'a', b'z')]);
+        let expected = bclass(&[(b'A', b'Z'), (b'a', b'z')]);
+        assert_eq!(expected, bcasefold(&cls));
+
+        let cls = bclass(&[(b'A', b'A'), (b'_', b'_')]);
+        let expected = bclass(&[(b'A', b'A'), (b'_', b'_'), (b'a', b'a')]);
+        assert_eq!(expected, bcasefold(&cls));
+
+        let cls = bclass(&[(b'A', b'A'), (b'=', b'=')]);
+        let expected = bclass(&[(b'=', b'='), (b'A', b'A'), (b'a', b'a')]);
+        assert_eq!(expected, bcasefold(&cls));
+
+        let cls = bclass(&[(b'\x00', b'\x10')]);
+        assert_eq!(cls, bcasefold(&cls));
+
+        let cls = bclass(&[(b'k', b'k')]);
+        let expected = bclass(&[(b'K', b'K'), (b'k', b'k')]);
+        assert_eq!(expected, bcasefold(&cls));
+
+        let cls = bclass(&[(b'@', b'@')]);
+        assert_eq!(cls, bcasefold(&cls));
+    }
+
+    #[test]
+    fn class_negate_unicode() {
+        let cls = uclass(&[('a', 'a')]);
+        let expected = uclass(&[('\x00', '\x60'), ('\x62', '\u{10FFFF}')]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls = uclass(&[('a', 'a'), ('b', 'b')]);
+        let expected = uclass(&[('\x00', '\x60'), ('\x63', '\u{10FFFF}')]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls = uclass(&[('a', 'c'), ('x', 'z')]);
+        let expected = uclass(&[
+            ('\x00', '\x60'),
+            ('\x64', '\x77'),
+            ('\x7B', '\u{10FFFF}'),
+        ]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls = uclass(&[('\x00', 'a')]);
+        let expected = uclass(&[('\x62', '\u{10FFFF}')]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls = uclass(&[('a', '\u{10FFFF}')]);
+        let expected = uclass(&[('\x00', '\x60')]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls = uclass(&[('\x00', '\u{10FFFF}')]);
+        let expected = uclass(&[]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls = uclass(&[]);
+        let expected = uclass(&[('\x00', '\u{10FFFF}')]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls =
+            uclass(&[('\x00', '\u{10FFFD}'), ('\u{10FFFF}', '\u{10FFFF}')]);
+        let expected = uclass(&[('\u{10FFFE}', '\u{10FFFE}')]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls = uclass(&[('\x00', '\u{D7FF}')]);
+        let expected = uclass(&[('\u{E000}', '\u{10FFFF}')]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls = uclass(&[('\x00', '\u{D7FE}')]);
+        let expected = uclass(&[('\u{D7FF}', '\u{10FFFF}')]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls = uclass(&[('\u{E000}', '\u{10FFFF}')]);
+        let expected = uclass(&[('\x00', '\u{D7FF}')]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls = uclass(&[('\u{E001}', '\u{10FFFF}')]);
+        let expected = uclass(&[('\x00', '\u{E000}')]);
+        assert_eq!(expected, unegate(&cls));
+    }
+
+    #[test]
+    fn class_negate_bytes() {
+        let cls = bclass(&[(b'a', b'a')]);
+        let expected = bclass(&[(b'\x00', b'\x60'), (b'\x62', b'\xFF')]);
+        assert_eq!(expected, bnegate(&cls));
+
+        let cls = bclass(&[(b'a', b'a'), (b'b', b'b')]);
+        let expected = bclass(&[(b'\x00', b'\x60'), (b'\x63', b'\xFF')]);
+        assert_eq!(expected, bnegate(&cls));
+
+        let cls = bclass(&[(b'a', b'c'), (b'x', b'z')]);
+        let expected = bclass(&[
+            (b'\x00', b'\x60'),
+            (b'\x64', b'\x77'),
+            (b'\x7B', b'\xFF'),
+        ]);
+        assert_eq!(expected, bnegate(&cls));
+
+        let cls = bclass(&[(b'\x00', b'a')]);
+        let expected = bclass(&[(b'\x62', b'\xFF')]);
+        assert_eq!(expected, bnegate(&cls));
+
+        let cls = bclass(&[(b'a', b'\xFF')]);
+        let expected = bclass(&[(b'\x00', b'\x60')]);
+        assert_eq!(expected, bnegate(&cls));
+
+        let cls = bclass(&[(b'\x00', b'\xFF')]);
+        let expected = bclass(&[]);
+        assert_eq!(expected, bnegate(&cls));
+
+        let cls = bclass(&[]);
+        let expected = bclass(&[(b'\x00', b'\xFF')]);
+        assert_eq!(expected, bnegate(&cls));
+
+        let cls = bclass(&[(b'\x00', b'\xFD'), (b'\xFF', b'\xFF')]);
+        let expected = bclass(&[(b'\xFE', b'\xFE')]);
+        assert_eq!(expected, bnegate(&cls));
+    }
+
+    #[test]
+    fn class_union_unicode() {
+        let cls1 = uclass(&[('a', 'g'), ('m', 't'), ('A', 'C')]);
+        let cls2 = uclass(&[('a', 'z')]);
+        let expected = uclass(&[('a', 'z'), ('A', 'C')]);
+        assert_eq!(expected, uunion(&cls1, &cls2));
+    }
+
+    #[test]
+    fn class_union_bytes() {
+        let cls1 = bclass(&[(b'a', b'g'), (b'm', b't'), (b'A', b'C')]);
+        let cls2 = bclass(&[(b'a', b'z')]);
+        let expected = bclass(&[(b'a', b'z'), (b'A', b'C')]);
+        assert_eq!(expected, bunion(&cls1, &cls2));
+    }
+
+    #[test]
+    fn class_intersect_unicode() {
+        let cls1 = uclass(&[]);
+        let cls2 = uclass(&[('a', 'a')]);
+        let expected = uclass(&[]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'a')]);
+        let cls2 = uclass(&[('a', 'a')]);
+        let expected = uclass(&[('a', 'a')]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'a')]);
+        let cls2 = uclass(&[('b', 'b')]);
+        let expected = uclass(&[]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'a')]);
+        let cls2 = uclass(&[('a', 'c')]);
+        let expected = uclass(&[('a', 'a')]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'b')]);
+        let cls2 = uclass(&[('a', 'c')]);
+        let expected = uclass(&[('a', 'b')]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'b')]);
+        let cls2 = uclass(&[('b', 'c')]);
+        let expected = uclass(&[('b', 'b')]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'b')]);
+        let cls2 = uclass(&[('c', 'd')]);
+        let expected = uclass(&[]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('b', 'c')]);
+        let cls2 = uclass(&[('a', 'd')]);
+        let expected = uclass(&[('b', 'c')]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
+        let cls2 = uclass(&[('a', 'h')]);
+        let expected = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
+        let cls2 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
+        let expected = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'b'), ('g', 'h')]);
+        let cls2 = uclass(&[('d', 'e'), ('k', 'l')]);
+        let expected = uclass(&[]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
+        let cls2 = uclass(&[('h', 'h')]);
+        let expected = uclass(&[('h', 'h')]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'b'), ('e', 'f'), ('i', 'j')]);
+        let cls2 = uclass(&[('c', 'd'), ('g', 'h'), ('k', 'l')]);
+        let expected = uclass(&[]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'b'), ('c', 'd'), ('e', 'f')]);
+        let cls2 = uclass(&[('b', 'c'), ('d', 'e'), ('f', 'g')]);
+        let expected = uclass(&[('b', 'f')]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+    }
+
+    #[test]
+    fn class_intersect_bytes() {
+        let cls1 = bclass(&[]);
+        let cls2 = bclass(&[(b'a', b'a')]);
+        let expected = bclass(&[]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'a')]);
+        let cls2 = bclass(&[(b'a', b'a')]);
+        let expected = bclass(&[(b'a', b'a')]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'a')]);
+        let cls2 = bclass(&[(b'b', b'b')]);
+        let expected = bclass(&[]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'a')]);
+        let cls2 = bclass(&[(b'a', b'c')]);
+        let expected = bclass(&[(b'a', b'a')]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'b')]);
+        let cls2 = bclass(&[(b'a', b'c')]);
+        let expected = bclass(&[(b'a', b'b')]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'b')]);
+        let cls2 = bclass(&[(b'b', b'c')]);
+        let expected = bclass(&[(b'b', b'b')]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'b')]);
+        let cls2 = bclass(&[(b'c', b'd')]);
+        let expected = bclass(&[]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'b', b'c')]);
+        let cls2 = bclass(&[(b'a', b'd')]);
+        let expected = bclass(&[(b'b', b'c')]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
+        let cls2 = bclass(&[(b'a', b'h')]);
+        let expected = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
+        let cls2 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
+        let expected = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'b'), (b'g', b'h')]);
+        let cls2 = bclass(&[(b'd', b'e'), (b'k', b'l')]);
+        let expected = bclass(&[]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
+        let cls2 = bclass(&[(b'h', b'h')]);
+        let expected = bclass(&[(b'h', b'h')]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'b'), (b'e', b'f'), (b'i', b'j')]);
+        let cls2 = bclass(&[(b'c', b'd'), (b'g', b'h'), (b'k', b'l')]);
+        let expected = bclass(&[]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'b'), (b'c', b'd'), (b'e', b'f')]);
+        let cls2 = bclass(&[(b'b', b'c'), (b'd', b'e'), (b'f', b'g')]);
+        let expected = bclass(&[(b'b', b'f')]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+    }
+
+    #[test]
+    fn class_difference_unicode() {
+        let cls1 = uclass(&[('a', 'a')]);
+        let cls2 = uclass(&[('a', 'a')]);
+        let expected = uclass(&[]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'a')]);
+        let cls2 = uclass(&[]);
+        let expected = uclass(&[('a', 'a')]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[]);
+        let cls2 = uclass(&[('a', 'a')]);
+        let expected = uclass(&[]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'z')]);
+        let cls2 = uclass(&[('a', 'a')]);
+        let expected = uclass(&[('b', 'z')]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'z')]);
+        let cls2 = uclass(&[('z', 'z')]);
+        let expected = uclass(&[('a', 'y')]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'z')]);
+        let cls2 = uclass(&[('m', 'm')]);
+        let expected = uclass(&[('a', 'l'), ('n', 'z')]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]);
+        let cls2 = uclass(&[('a', 'z')]);
+        let expected = uclass(&[]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]);
+        let cls2 = uclass(&[('d', 'v')]);
+        let expected = uclass(&[('a', 'c')]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]);
+        let cls2 = uclass(&[('b', 'g'), ('s', 'u')]);
+        let expected = uclass(&[('a', 'a'), ('h', 'i'), ('r', 'r')]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]);
+        let cls2 = uclass(&[('b', 'd'), ('e', 'g'), ('s', 'u')]);
+        let expected = uclass(&[('a', 'a'), ('h', 'i'), ('r', 'r')]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[('x', 'z')]);
+        let cls2 = uclass(&[('a', 'c'), ('e', 'g'), ('s', 'u')]);
+        let expected = uclass(&[('x', 'z')]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'z')]);
+        let cls2 = uclass(&[('a', 'c'), ('e', 'g'), ('s', 'u')]);
+        let expected = uclass(&[('d', 'd'), ('h', 'r'), ('v', 'z')]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+    }
+
+    #[test]
+    fn class_difference_bytes() {
+        let cls1 = bclass(&[(b'a', b'a')]);
+        let cls2 = bclass(&[(b'a', b'a')]);
+        let expected = bclass(&[]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'a')]);
+        let cls2 = bclass(&[]);
+        let expected = bclass(&[(b'a', b'a')]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[]);
+        let cls2 = bclass(&[(b'a', b'a')]);
+        let expected = bclass(&[]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'z')]);
+        let cls2 = bclass(&[(b'a', b'a')]);
+        let expected = bclass(&[(b'b', b'z')]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'z')]);
+        let cls2 = bclass(&[(b'z', b'z')]);
+        let expected = bclass(&[(b'a', b'y')]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'z')]);
+        let cls2 = bclass(&[(b'm', b'm')]);
+        let expected = bclass(&[(b'a', b'l'), (b'n', b'z')]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]);
+        let cls2 = bclass(&[(b'a', b'z')]);
+        let expected = bclass(&[]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]);
+        let cls2 = bclass(&[(b'd', b'v')]);
+        let expected = bclass(&[(b'a', b'c')]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]);
+        let cls2 = bclass(&[(b'b', b'g'), (b's', b'u')]);
+        let expected = bclass(&[(b'a', b'a'), (b'h', b'i'), (b'r', b'r')]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]);
+        let cls2 = bclass(&[(b'b', b'd'), (b'e', b'g'), (b's', b'u')]);
+        let expected = bclass(&[(b'a', b'a'), (b'h', b'i'), (b'r', b'r')]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'x', b'z')]);
+        let cls2 = bclass(&[(b'a', b'c'), (b'e', b'g'), (b's', b'u')]);
+        let expected = bclass(&[(b'x', b'z')]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'z')]);
+        let cls2 = bclass(&[(b'a', b'c'), (b'e', b'g'), (b's', b'u')]);
+        let expected = bclass(&[(b'd', b'd'), (b'h', b'r'), (b'v', b'z')]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+    }
+
+    #[test]
+    fn class_symmetric_difference_unicode() {
+        let cls1 = uclass(&[('a', 'm')]);
+        let cls2 = uclass(&[('g', 't')]);
+        let expected = uclass(&[('a', 'f'), ('n', 't')]);
+        assert_eq!(expected, usymdifference(&cls1, &cls2));
+    }
+
+    #[test]
+    fn class_symmetric_difference_bytes() {
+        let cls1 = bclass(&[(b'a', b'm')]);
+        let cls2 = bclass(&[(b'g', b't')]);
+        let expected = bclass(&[(b'a', b'f'), (b'n', b't')]);
+        assert_eq!(expected, bsymdifference(&cls1, &cls2));
+    }
+
+    #[test]
+    #[should_panic]
+    fn hir_byte_literal_non_ascii() {
+        Hir::literal(Literal::Byte(b'a'));
+    }
+
+    // We use a thread with an explicit stack size to test that our destructor
+    // for Hir can handle arbitrarily sized expressions in constant stack
+    // space. In case we run on a platform without threads (WASM?), we limit
+    // this test to Windows/Unix.
+    #[test]
+    #[cfg(any(unix, windows))]
+    fn no_stack_overflow_on_drop() {
+        use std::thread;
+
+        let run = || {
+            let mut expr = Hir::empty();
+            for _ in 0..100 {
+                expr = Hir::group(Group {
+                    kind: GroupKind::NonCapturing,
+                    hir: Box::new(expr),
+                });
+                expr = Hir::repetition(Repetition {
+                    kind: RepetitionKind::ZeroOrOne,
+                    greedy: true,
+                    hir: Box::new(expr),
+                });
+
+                expr = Hir {
+                    kind: HirKind::Concat(vec![expr]),
+                    info: HirInfo::new(),
+                };
+                expr = Hir {
+                    kind: HirKind::Alternation(vec![expr]),
+                    info: HirInfo::new(),
+                };
+            }
+            assert!(!expr.kind.is_empty());
+        };
+
+        // We run our test on a thread with a small stack size so we can
+        // force the issue more easily.
+        thread::Builder::new()
+            .stack_size(1 << 10)
+            .spawn(run)
+            .unwrap()
+            .join()
+            .unwrap();
+    }
+}

diff --git a/src/hir/print.rs b/src/hir/print.rs
new file mode 100644
index 0000000..eb44b93
--- /dev/null
+++ b/src/hir/print.rs

@@ -0,0 +1,368 @@
+/*!
+This module provides a regular expression printer for `Hir`.
+*/
+
+use std::fmt;
+
+use hir::visitor::{self, Visitor};
+use hir::{self, Hir, HirKind};
+use is_meta_character;
+
+/// A builder for constructing a printer.
+///
+/// Note that since a printer doesn't have any configuration knobs, this type
+/// remains unexported.
+#[derive(Clone, Debug)]
+struct PrinterBuilder {
+    _priv: (),
+}
+
+impl Default for PrinterBuilder {
+    fn default() -> PrinterBuilder {
+        PrinterBuilder::new()
+    }
+}
+
+impl PrinterBuilder {
+    fn new() -> PrinterBuilder {
+        PrinterBuilder { _priv: () }
+    }
+
+    fn build(&self) -> Printer {
+        Printer { _priv: () }
+    }
+}
+
+/// A printer for a regular expression's high-level intermediate
+/// representation.
+///
+/// A printer converts a high-level intermediate representation (HIR) to a
+/// regular expression pattern string. This particular printer uses constant
+/// stack space and heap space proportional to the size of the HIR.
+///
+/// Since this printer is only using the HIR, the pattern it prints will likely
+/// not resemble the original pattern at all. For example, a pattern like
+/// `\pL` will have its entire class written out.
+///
+/// The purpose of this printer is to provide a means to mutate an HIR and then
+/// build a regular expression from the result of that mutation. (A regex
+/// library could provide a constructor from this HIR explicitly, but that
+/// creates an unnecessary public coupling between the regex library and this
+/// specific HIR representation.)
+#[derive(Debug)]
+pub struct Printer {
+    _priv: (),
+}
+
+impl Printer {
+    /// Create a new printer.
+    pub fn new() -> Printer {
+        PrinterBuilder::new().build()
+    }
+
+    /// Print the given `Ast` to the given writer. The writer must implement
+    /// `fmt::Write`. Typical implementations of `fmt::Write` that can be used
+    /// here are a `fmt::Formatter` (which is available in `fmt::Display`
+    /// implementations) or a `&mut String`.
+    pub fn print<W: fmt::Write>(&mut self, hir: &Hir, wtr: W) -> fmt::Result {
+        visitor::visit(hir, Writer { printer: self, wtr: wtr })
+    }
+}
+
+#[derive(Debug)]
+struct Writer<'p, W> {
+    printer: &'p mut Printer,
+    wtr: W,
+}
+
+impl<'p, W: fmt::Write> Visitor for Writer<'p, W> {
+    type Output = ();
+    type Err = fmt::Error;
+
+    fn finish(self) -> fmt::Result {
+        Ok(())
+    }
+
+    fn visit_pre(&mut self, hir: &Hir) -> fmt::Result {
+        match *hir.kind() {
+            HirKind::Empty
+            | HirKind::Repetition(_)
+            | HirKind::Concat(_)
+            | HirKind::Alternation(_) => {}
+            HirKind::Literal(hir::Literal::Unicode(c)) => {
+                self.write_literal_char(c)?;
+            }
+            HirKind::Literal(hir::Literal::Byte(b)) => {
+                self.write_literal_byte(b)?;
+            }
+            HirKind::Class(hir::Class::Unicode(ref cls)) => {
+                self.wtr.write_str("[")?;
+                for range in cls.iter() {
+                    if range.start() == range.end() {
+                        self.write_literal_char(range.start())?;
+                    } else {
+                        self.write_literal_char(range.start())?;
+                        self.wtr.write_str("-")?;
+                        self.write_literal_char(range.end())?;
+                    }
+                }
+                self.wtr.write_str("]")?;
+            }
+            HirKind::Class(hir::Class::Bytes(ref cls)) => {
+                self.wtr.write_str("(?-u:[")?;
+                for range in cls.iter() {
+                    if range.start() == range.end() {
+                        self.write_literal_class_byte(range.start())?;
+                    } else {
+                        self.write_literal_class_byte(range.start())?;
+                        self.wtr.write_str("-")?;
+                        self.write_literal_class_byte(range.end())?;
+                    }
+                }
+                self.wtr.write_str("])")?;
+            }
+            HirKind::Anchor(hir::Anchor::StartLine) => {
+                self.wtr.write_str("(?m:^)")?;
+            }
+            HirKind::Anchor(hir::Anchor::EndLine) => {
+                self.wtr.write_str("(?m:$)")?;
+            }
+            HirKind::Anchor(hir::Anchor::StartText) => {
+                self.wtr.write_str(r"\A")?;
+            }
+            HirKind::Anchor(hir::Anchor::EndText) => {
+                self.wtr.write_str(r"\z")?;
+            }
+            HirKind::WordBoundary(hir::WordBoundary::Unicode) => {
+                self.wtr.write_str(r"\b")?;
+            }
+            HirKind::WordBoundary(hir::WordBoundary::UnicodeNegate) => {
+                self.wtr.write_str(r"\B")?;
+            }
+            HirKind::WordBoundary(hir::WordBoundary::Ascii) => {
+                self.wtr.write_str(r"(?-u:\b)")?;
+            }
+            HirKind::WordBoundary(hir::WordBoundary::AsciiNegate) => {
+                self.wtr.write_str(r"(?-u:\B)")?;
+            }
+            HirKind::Group(ref x) => match x.kind {
+                hir::GroupKind::CaptureIndex(_) => {
+                    self.wtr.write_str("(")?;
+                }
+                hir::GroupKind::CaptureName { ref name, .. } => {
+                    write!(self.wtr, "(?P<{}>", name)?;
+                }
+                hir::GroupKind::NonCapturing => {
+                    self.wtr.write_str("(?:")?;
+                }
+            },
+        }
+        Ok(())
+    }
+
+    fn visit_post(&mut self, hir: &Hir) -> fmt::Result {
+        match *hir.kind() {
+            // Handled during visit_pre
+            HirKind::Empty
+            | HirKind::Literal(_)
+            | HirKind::Class(_)
+            | HirKind::Anchor(_)
+            | HirKind::WordBoundary(_)
+            | HirKind::Concat(_)
+            | HirKind::Alternation(_) => {}
+            HirKind::Repetition(ref x) => {
+                match x.kind {
+                    hir::RepetitionKind::ZeroOrOne => {
+                        self.wtr.write_str("?")?;
+                    }
+                    hir::RepetitionKind::ZeroOrMore => {
+                        self.wtr.write_str("*")?;
+                    }
+                    hir::RepetitionKind::OneOrMore => {
+                        self.wtr.write_str("+")?;
+                    }
+                    hir::RepetitionKind::Range(ref x) => match *x {
+                        hir::RepetitionRange::Exactly(m) => {
+                            write!(self.wtr, "{{{}}}", m)?;
+                        }
+                        hir::RepetitionRange::AtLeast(m) => {
+                            write!(self.wtr, "{{{},}}", m)?;
+                        }
+                        hir::RepetitionRange::Bounded(m, n) => {
+                            write!(self.wtr, "{{{},{}}}", m, n)?;
+                        }
+                    },
+                }
+                if !x.greedy {
+                    self.wtr.write_str("?")?;
+                }
+            }
+            HirKind::Group(_) => {
+                self.wtr.write_str(")")?;
+            }
+        }
+        Ok(())
+    }
+
+    fn visit_alternation_in(&mut self) -> fmt::Result {
+        self.wtr.write_str("|")
+    }
+}
+
+impl<'p, W: fmt::Write> Writer<'p, W> {
+    fn write_literal_char(&mut self, c: char) -> fmt::Result {
+        if is_meta_character(c) {
+            self.wtr.write_str("\\")?;
+        }
+        self.wtr.write_char(c)
+    }
+
+    fn write_literal_byte(&mut self, b: u8) -> fmt::Result {
+        let c = b as char;
+        if c <= 0x7F as char && !c.is_control() && !c.is_whitespace() {
+            self.write_literal_char(c)
+        } else {
+            write!(self.wtr, "(?-u:\\x{:02X})", b)
+        }
+    }
+
+    fn write_literal_class_byte(&mut self, b: u8) -> fmt::Result {
+        let c = b as char;
+        if c <= 0x7F as char && !c.is_control() && !c.is_whitespace() {
+            self.write_literal_char(c)
+        } else {
+            write!(self.wtr, "\\x{:02X}", b)
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::Printer;
+    use ParserBuilder;
+
+    fn roundtrip(given: &str, expected: &str) {
+        roundtrip_with(|b| b, given, expected);
+    }
+
+    fn roundtrip_bytes(given: &str, expected: &str) {
+        roundtrip_with(|b| b.allow_invalid_utf8(true), given, expected);
+    }
+
+    fn roundtrip_with<F>(mut f: F, given: &str, expected: &str)
+    where
+        F: FnMut(&mut ParserBuilder) -> &mut ParserBuilder,
+    {
+        let mut builder = ParserBuilder::new();
+        f(&mut builder);
+        let hir = builder.build().parse(given).unwrap();
+
+        let mut printer = Printer::new();
+        let mut dst = String::new();
+        printer.print(&hir, &mut dst).unwrap();
+
+        // Check that the result is actually valid.
+        builder.build().parse(&dst).unwrap();
+
+        assert_eq!(expected, dst);
+    }
+
+    #[test]
+    fn print_literal() {
+        roundtrip("a", "a");
+        roundtrip(r"\xff", "\u{FF}");
+        roundtrip_bytes(r"\xff", "\u{FF}");
+        roundtrip_bytes(r"(?-u)\xff", r"(?-u:\xFF)");
+        roundtrip("☃", "☃");
+    }
+
+    #[test]
+    fn print_class() {
+        roundtrip(r"[a]", r"[a]");
+        roundtrip(r"[a-z]", r"[a-z]");
+        roundtrip(r"[a-z--b-c--x-y]", r"[ad-wz]");
+        roundtrip(r"[^\x01-\u{10FFFF}]", "[\u{0}]");
+        roundtrip(r"[-]", r"[\-]");
+        roundtrip(r"[☃-⛄]", r"[☃-⛄]");
+
+        roundtrip(r"(?-u)[a]", r"(?-u:[a])");
+        roundtrip(r"(?-u)[a-z]", r"(?-u:[a-z])");
+        roundtrip_bytes(r"(?-u)[a-\xFF]", r"(?-u:[a-\xFF])");
+
+        // The following test that the printer escapes meta characters
+        // in character classes.
+        roundtrip(r"[\[]", r"[\[]");
+        roundtrip(r"[Z-_]", r"[Z-_]");
+        roundtrip(r"[Z-_--Z]", r"[\[-_]");
+
+        // The following test that the printer escapes meta characters
+        // in byte oriented character classes.
+        roundtrip_bytes(r"(?-u)[\[]", r"(?-u:[\[])");
+        roundtrip_bytes(r"(?-u)[Z-_]", r"(?-u:[Z-_])");
+        roundtrip_bytes(r"(?-u)[Z-_--Z]", r"(?-u:[\[-_])");
+    }
+
+    #[test]
+    fn print_anchor() {
+        roundtrip(r"^", r"\A");
+        roundtrip(r"$", r"\z");
+        roundtrip(r"(?m)^", r"(?m:^)");
+        roundtrip(r"(?m)$", r"(?m:$)");
+    }
+
+    #[test]
+    fn print_word_boundary() {
+        roundtrip(r"\b", r"\b");
+        roundtrip(r"\B", r"\B");
+        roundtrip(r"(?-u)\b", r"(?-u:\b)");
+        roundtrip_bytes(r"(?-u)\B", r"(?-u:\B)");
+    }
+
+    #[test]
+    fn print_repetition() {
+        roundtrip("a?", "a?");
+        roundtrip("a??", "a??");
+        roundtrip("(?U)a?", "a??");
+
+        roundtrip("a*", "a*");
+        roundtrip("a*?", "a*?");
+        roundtrip("(?U)a*", "a*?");
+
+        roundtrip("a+", "a+");
+        roundtrip("a+?", "a+?");
+        roundtrip("(?U)a+", "a+?");
+
+        roundtrip("a{1}", "a{1}");
+        roundtrip("a{1,}", "a{1,}");
+        roundtrip("a{1,5}", "a{1,5}");
+        roundtrip("a{1}?", "a{1}?");
+        roundtrip("a{1,}?", "a{1,}?");
+        roundtrip("a{1,5}?", "a{1,5}?");
+        roundtrip("(?U)a{1}", "a{1}?");
+        roundtrip("(?U)a{1,}", "a{1,}?");
+        roundtrip("(?U)a{1,5}", "a{1,5}?");
+    }
+
+    #[test]
+    fn print_group() {
+        roundtrip("()", "()");
+        roundtrip("(?P<foo>)", "(?P<foo>)");
+        roundtrip("(?:)", "(?:)");
+
+        roundtrip("(a)", "(a)");
+        roundtrip("(?P<foo>a)", "(?P<foo>a)");
+        roundtrip("(?:a)", "(?:a)");
+
+        roundtrip("((((a))))", "((((a))))");
+    }
+
+    #[test]
+    fn print_alternation() {
+        roundtrip("|", "|");
+        roundtrip("||", "||");
+
+        roundtrip("a|b", "a|b");
+        roundtrip("a|b|c", "a|b|c");
+        roundtrip("foo|bar|quux", "foo|bar|quux");
+    }
+}

diff --git a/src/hir/translate.rs b/src/hir/translate.rs
new file mode 100644
index 0000000..2469890
--- /dev/null
+++ b/src/hir/translate.rs

@@ -0,0 +1,3149 @@
+/*!
+Defines a translator that converts an `Ast` to an `Hir`.
+*/
+
+use std::cell::{Cell, RefCell};
+use std::result;
+
+use ast::{self, Ast, Span, Visitor};
+use hir::{self, Error, ErrorKind, Hir};
+use unicode::{self, ClassQuery};
+
+type Result<T> = result::Result<T, Error>;
+
+/// A builder for constructing an AST->HIR translator.
+#[derive(Clone, Debug)]
+pub struct TranslatorBuilder {
+    allow_invalid_utf8: bool,
+    flags: Flags,
+}
+
+impl Default for TranslatorBuilder {
+    fn default() -> TranslatorBuilder {
+        TranslatorBuilder::new()
+    }
+}
+
+impl TranslatorBuilder {
+    /// Create a new translator builder with a default c onfiguration.
+    pub fn new() -> TranslatorBuilder {
+        TranslatorBuilder {
+            allow_invalid_utf8: false,
+            flags: Flags::default(),
+        }
+    }
+
+    /// Build a translator using the current configuration.
+    pub fn build(&self) -> Translator {
+        Translator {
+            stack: RefCell::new(vec![]),
+            flags: Cell::new(self.flags),
+            allow_invalid_utf8: self.allow_invalid_utf8,
+        }
+    }
+
+    /// When enabled, translation will permit the construction of a regular
+    /// expression that may match invalid UTF-8.
+    ///
+    /// When disabled (the default), the translator is guaranteed to produce
+    /// an expression that will only ever match valid UTF-8 (otherwise, the
+    /// translator will return an error).
+    ///
+    /// Perhaps surprisingly, when invalid UTF-8 isn't allowed, a negated ASCII
+    /// word boundary (uttered as `(?-u:\B)` in the concrete syntax) will cause
+    /// the parser to return an error. Namely, a negated ASCII word boundary
+    /// can result in matching positions that aren't valid UTF-8 boundaries.
+    pub fn allow_invalid_utf8(&mut self, yes: bool) -> &mut TranslatorBuilder {
+        self.allow_invalid_utf8 = yes;
+        self
+    }
+
+    /// Enable or disable the case insensitive flag (`i`) by default.
+    pub fn case_insensitive(&mut self, yes: bool) -> &mut TranslatorBuilder {
+        self.flags.case_insensitive = if yes { Some(true) } else { None };
+        self
+    }
+
+    /// Enable or disable the multi-line matching flag (`m`) by default.
+    pub fn multi_line(&mut self, yes: bool) -> &mut TranslatorBuilder {
+        self.flags.multi_line = if yes { Some(true) } else { None };
+        self
+    }
+
+    /// Enable or disable the "dot matches any character" flag (`s`) by
+    /// default.
+    pub fn dot_matches_new_line(
+        &mut self,
+        yes: bool,
+    ) -> &mut TranslatorBuilder {
+        self.flags.dot_matches_new_line = if yes { Some(true) } else { None };
+        self
+    }
+
+    /// Enable or disable the "swap greed" flag (`U`) by default.
+    pub fn swap_greed(&mut self, yes: bool) -> &mut TranslatorBuilder {
+        self.flags.swap_greed = if yes { Some(true) } else { None };
+        self
+    }
+
+    /// Enable or disable the Unicode flag (`u`) by default.
+    pub fn unicode(&mut self, yes: bool) -> &mut TranslatorBuilder {
+        self.flags.unicode = if yes { None } else { Some(false) };
+        self
+    }
+}
+
+/// A translator maps abstract syntax to a high level intermediate
+/// representation.
+///
+/// A translator may be benefit from reuse. That is, a translator can translate
+/// many abstract syntax trees.
+///
+/// A `Translator` can be configured in more detail via a
+/// [`TranslatorBuilder`](struct.TranslatorBuilder.html).
+#[derive(Clone, Debug)]
+pub struct Translator {
+    /// Our call stack, but on the heap.
+    stack: RefCell<Vec<HirFrame>>,
+    /// The current flag settings.
+    flags: Cell<Flags>,
+    /// Whether we're allowed to produce HIR that can match arbitrary bytes.
+    allow_invalid_utf8: bool,
+}
+
+impl Translator {
+    /// Create a new translator using the default configuration.
+    pub fn new() -> Translator {
+        TranslatorBuilder::new().build()
+    }
+
+    /// Translate the given abstract syntax tree (AST) into a high level
+    /// intermediate representation (HIR).
+    ///
+    /// If there was a problem doing the translation, then an HIR-specific
+    /// error is returned.
+    ///
+    /// The original pattern string used to produce the `Ast` *must* also be
+    /// provided. The translator does not use the pattern string during any
+    /// correct translation, but is used for error reporting.
+    pub fn translate(&mut self, pattern: &str, ast: &Ast) -> Result<Hir> {
+        ast::visit(ast, TranslatorI::new(self, pattern))
+    }
+}
+
+/// An HirFrame is a single stack frame, represented explicitly, which is
+/// created for each item in the Ast that we traverse.
+///
+/// Note that technically, this type doesn't represent our entire stack
+/// frame. In particular, the Ast visitor represents any state associated with
+/// traversing the Ast itself.
+#[derive(Clone, Debug)]
+enum HirFrame {
+    /// An arbitrary HIR expression. These get pushed whenever we hit a base
+    /// case in the Ast. They get popped after an inductive (i.e., recursive)
+    /// step is complete.
+    Expr(Hir),
+    /// A Unicode character class. This frame is mutated as we descend into
+    /// the Ast of a character class (which is itself its own mini recursive
+    /// structure).
+    ClassUnicode(hir::ClassUnicode),
+    /// A byte-oriented character class. This frame is mutated as we descend
+    /// into the Ast of a character class (which is itself its own mini
+    /// recursive structure).
+    ///
+    /// Byte character classes are created when Unicode mode (`u`) is disabled.
+    /// If `allow_invalid_utf8` is disabled (the default), then a byte
+    /// character is only permitted to match ASCII text.
+    ClassBytes(hir::ClassBytes),
+    /// This is pushed on to the stack upon first seeing any kind of group,
+    /// indicated by parentheses (including non-capturing groups). It is popped
+    /// upon leaving a group.
+    Group {
+        /// The old active flags when this group was opened.
+        ///
+        /// If this group sets flags, then the new active flags are set to the
+        /// result of merging the old flags with the flags introduced by this
+        /// group. If the group doesn't set any flags, then this is simply
+        /// equivalent to whatever flags were set when the group was opened.
+        ///
+        /// When this group is popped, the active flags should be restored to
+        /// the flags set here.
+        ///
+        /// The "active" flags correspond to whatever flags are set in the
+        /// Translator.
+        old_flags: Flags,
+    },
+    /// This is pushed whenever a concatenation is observed. After visiting
+    /// every sub-expression in the concatenation, the translator's stack is
+    /// popped until it sees a Concat frame.
+    Concat,
+    /// This is pushed whenever an alternation is observed. After visiting
+    /// every sub-expression in the alternation, the translator's stack is
+    /// popped until it sees an Alternation frame.
+    Alternation,
+}
+
+impl HirFrame {
+    /// Assert that the current stack frame is an Hir expression and return it.
+    fn unwrap_expr(self) -> Hir {
+        match self {
+            HirFrame::Expr(expr) => expr,
+            _ => panic!("tried to unwrap expr from HirFrame, got: {:?}", self),
+        }
+    }
+
+    /// Assert that the current stack frame is a Unicode class expression and
+    /// return it.
+    fn unwrap_class_unicode(self) -> hir::ClassUnicode {
+        match self {
+            HirFrame::ClassUnicode(cls) => cls,
+            _ => panic!(
+                "tried to unwrap Unicode class \
+                 from HirFrame, got: {:?}",
+                self
+            ),
+        }
+    }
+
+    /// Assert that the current stack frame is a byte class expression and
+    /// return it.
+    fn unwrap_class_bytes(self) -> hir::ClassBytes {
+        match self {
+            HirFrame::ClassBytes(cls) => cls,
+            _ => panic!(
+                "tried to unwrap byte class \
+                 from HirFrame, got: {:?}",
+                self
+            ),
+        }
+    }
+
+    /// Assert that the current stack frame is a group indicator and return
+    /// its corresponding flags (the flags that were active at the time the
+    /// group was entered).
+    fn unwrap_group(self) -> Flags {
+        match self {
+            HirFrame::Group { old_flags } => old_flags,
+            _ => {
+                panic!("tried to unwrap group from HirFrame, got: {:?}", self)
+            }
+        }
+    }
+}
+
+impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
+    type Output = Hir;
+    type Err = Error;
+
+    fn finish(self) -> Result<Hir> {
+        // ... otherwise, we should have exactly one HIR on the stack.
+        assert_eq!(self.trans().stack.borrow().len(), 1);
+        Ok(self.pop().unwrap().unwrap_expr())
+    }
+
+    fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
+        match *ast {
+            Ast::Class(ast::Class::Bracketed(_)) => {
+                if self.flags().unicode() {
+                    let cls = hir::ClassUnicode::empty();
+                    self.push(HirFrame::ClassUnicode(cls));
+                } else {
+                    let cls = hir::ClassBytes::empty();
+                    self.push(HirFrame::ClassBytes(cls));
+                }
+            }
+            Ast::Group(ref x) => {
+                let old_flags = x
+                    .flags()
+                    .map(|ast| self.set_flags(ast))
+                    .unwrap_or_else(|| self.flags());
+                self.push(HirFrame::Group { old_flags });
+            }
+            Ast::Concat(ref x) if x.asts.is_empty() => {}
+            Ast::Concat(_) => {
+                self.push(HirFrame::Concat);
+            }
+            Ast::Alternation(ref x) if x.asts.is_empty() => {}
+            Ast::Alternation(_) => {
+                self.push(HirFrame::Alternation);
+            }
+            _ => {}
+        }
+        Ok(())
+    }
+
+    fn visit_post(&mut self, ast: &Ast) -> Result<()> {
+        match *ast {
+            Ast::Empty(_) => {
+                self.push(HirFrame::Expr(Hir::empty()));
+            }
+            Ast::Flags(ref x) => {
+                self.set_flags(&x.flags);
+                // Flags in the AST are generally considered directives and
+                // not actual sub-expressions. However, they can be used in
+                // the concrete syntax like `((?i))`, and we need some kind of
+                // indication of an expression there, and Empty is the correct
+                // choice.
+                //
+                // There can also be things like `(?i)+`, but we rule those out
+                // in the parser. In the future, we might allow them for
+                // consistency sake.
+                self.push(HirFrame::Expr(Hir::empty()));
+            }
+            Ast::Literal(ref x) => {
+                self.push(HirFrame::Expr(self.hir_literal(x)?));
+            }
+            Ast::Dot(span) => {
+                self.push(HirFrame::Expr(self.hir_dot(span)?));
+            }
+            Ast::Assertion(ref x) => {
+                self.push(HirFrame::Expr(self.hir_assertion(x)?));
+            }
+            Ast::Class(ast::Class::Perl(ref x)) => {
+                if self.flags().unicode() {
+                    let cls = self.hir_perl_unicode_class(x)?;
+                    let hcls = hir::Class::Unicode(cls);
+                    self.push(HirFrame::Expr(Hir::class(hcls)));
+                } else {
+                    let cls = self.hir_perl_byte_class(x);
+                    let hcls = hir::Class::Bytes(cls);
+                    self.push(HirFrame::Expr(Hir::class(hcls)));
+                }
+            }
+            Ast::Class(ast::Class::Unicode(ref x)) => {
+                let cls = hir::Class::Unicode(self.hir_unicode_class(x)?);
+                self.push(HirFrame::Expr(Hir::class(cls)));
+            }
+            Ast::Class(ast::Class::Bracketed(ref ast)) => {
+                if self.flags().unicode() {
+                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
+                    self.unicode_fold_and_negate(
+                        &ast.span,
+                        ast.negated,
+                        &mut cls,
+                    )?;
+                    if cls.iter().next().is_none() {
+                        return Err(self.error(
+                            ast.span,
+                            ErrorKind::EmptyClassNotAllowed,
+                        ));
+                    }
+                    let expr = Hir::class(hir::Class::Unicode(cls));
+                    self.push(HirFrame::Expr(expr));
+                } else {
+                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
+                    self.bytes_fold_and_negate(
+                        &ast.span,
+                        ast.negated,
+                        &mut cls,
+                    )?;
+                    if cls.iter().next().is_none() {
+                        return Err(self.error(
+                            ast.span,
+                            ErrorKind::EmptyClassNotAllowed,
+                        ));
+                    }
+
+                    let expr = Hir::class(hir::Class::Bytes(cls));
+                    self.push(HirFrame::Expr(expr));
+                }
+            }
+            Ast::Repetition(ref x) => {
+                let expr = self.pop().unwrap().unwrap_expr();
+                self.push(HirFrame::Expr(self.hir_repetition(x, expr)));
+            }
+            Ast::Group(ref x) => {
+                let expr = self.pop().unwrap().unwrap_expr();
+                let old_flags = self.pop().unwrap().unwrap_group();
+                self.trans().flags.set(old_flags);
+                self.push(HirFrame::Expr(self.hir_group(x, expr)));
+            }
+            Ast::Concat(_) => {
+                let mut exprs = vec![];
+                while let Some(HirFrame::Expr(expr)) = self.pop() {
+                    if !expr.kind().is_empty() {
+                        exprs.push(expr);
+                    }
+                }
+                exprs.reverse();
+                self.push(HirFrame::Expr(Hir::concat(exprs)));
+            }
+            Ast::Alternation(_) => {
+                let mut exprs = vec![];
+                while let Some(HirFrame::Expr(expr)) = self.pop() {
+                    exprs.push(expr);
+                }
+                exprs.reverse();
+                self.push(HirFrame::Expr(Hir::alternation(exprs)));
+            }
+        }
+        Ok(())
+    }
+
+    fn visit_class_set_item_pre(
+        &mut self,
+        ast: &ast::ClassSetItem,
+    ) -> Result<()> {
+        match *ast {
+            ast::ClassSetItem::Bracketed(_) => {
+                if self.flags().unicode() {
+                    let cls = hir::ClassUnicode::empty();
+                    self.push(HirFrame::ClassUnicode(cls));
+                } else {
+                    let cls = hir::ClassBytes::empty();
+                    self.push(HirFrame::ClassBytes(cls));
+                }
+            }
+            // We needn't handle the Union case here since the visitor will
+            // do it for us.
+            _ => {}
+        }
+        Ok(())
+    }
+
+    fn visit_class_set_item_post(
+        &mut self,
+        ast: &ast::ClassSetItem,
+    ) -> Result<()> {
+        match *ast {
+            ast::ClassSetItem::Empty(_) => {}
+            ast::ClassSetItem::Literal(ref x) => {
+                if self.flags().unicode() {
+                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
+                    cls.push(hir::ClassUnicodeRange::new(x.c, x.c));
+                    self.push(HirFrame::ClassUnicode(cls));
+                } else {
+                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
+                    let byte = self.class_literal_byte(x)?;
+                    cls.push(hir::ClassBytesRange::new(byte, byte));
+                    self.push(HirFrame::ClassBytes(cls));
+                }
+            }
+            ast::ClassSetItem::Range(ref x) => {
+                if self.flags().unicode() {
+                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
+                    cls.push(hir::ClassUnicodeRange::new(x.start.c, x.end.c));
+                    self.push(HirFrame::ClassUnicode(cls));
+                } else {
+                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
+                    let start = self.class_literal_byte(&x.start)?;
+                    let end = self.class_literal_byte(&x.end)?;
+                    cls.push(hir::ClassBytesRange::new(start, end));
+                    self.push(HirFrame::ClassBytes(cls));
+                }
+            }
+            ast::ClassSetItem::Ascii(ref x) => {
+                if self.flags().unicode() {
+                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
+                    for &(s, e) in ascii_class(&x.kind) {
+                        cls.push(hir::ClassUnicodeRange::new(s, e));
+                    }
+                    self.unicode_fold_and_negate(
+                        &x.span, x.negated, &mut cls,
+                    )?;
+                    self.push(HirFrame::ClassUnicode(cls));
+                } else {
+                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
+                    for &(s, e) in ascii_class(&x.kind) {
+                        cls.push(hir::ClassBytesRange::new(s as u8, e as u8));
+                    }
+                    self.bytes_fold_and_negate(&x.span, x.negated, &mut cls)?;
+                    self.push(HirFrame::ClassBytes(cls));
+                }
+            }
+            ast::ClassSetItem::Unicode(ref x) => {
+                let xcls = self.hir_unicode_class(x)?;
+                let mut cls = self.pop().unwrap().unwrap_class_unicode();
+                cls.union(&xcls);
+                self.push(HirFrame::ClassUnicode(cls));
+            }
+            ast::ClassSetItem::Perl(ref x) => {
+                if self.flags().unicode() {
+                    let xcls = self.hir_perl_unicode_class(x)?;
+                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
+                    cls.union(&xcls);
+                    self.push(HirFrame::ClassUnicode(cls));
+                } else {
+                    let xcls = self.hir_perl_byte_class(x);
+                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
+                    cls.union(&xcls);
+                    self.push(HirFrame::ClassBytes(cls));
+                }
+            }
+            ast::ClassSetItem::Bracketed(ref ast) => {
+                if self.flags().unicode() {
+                    let mut cls1 = self.pop().unwrap().unwrap_class_unicode();
+                    self.unicode_fold_and_negate(
+                        &ast.span,
+                        ast.negated,
+                        &mut cls1,
+                    )?;
+
+                    let mut cls2 = self.pop().unwrap().unwrap_class_unicode();
+                    cls2.union(&cls1);
+                    self.push(HirFrame::ClassUnicode(cls2));
+                } else {
+                    let mut cls1 = self.pop().unwrap().unwrap_class_bytes();
+                    self.bytes_fold_and_negate(
+                        &ast.span,
+                        ast.negated,
+                        &mut cls1,
+                    )?;
+
+                    let mut cls2 = self.pop().unwrap().unwrap_class_bytes();
+                    cls2.union(&cls1);
+                    self.push(HirFrame::ClassBytes(cls2));
+                }
+            }
+            // This is handled automatically by the visitor.
+            ast::ClassSetItem::Union(_) => {}
+        }
+        Ok(())
+    }
+
+    fn visit_class_set_binary_op_pre(
+        &mut self,
+        _op: &ast::ClassSetBinaryOp,
+    ) -> Result<()> {
+        if self.flags().unicode() {
+            let cls = hir::ClassUnicode::empty();
+            self.push(HirFrame::ClassUnicode(cls));
+        } else {
+            let cls = hir::ClassBytes::empty();
+            self.push(HirFrame::ClassBytes(cls));
+        }
+        Ok(())
+    }
+
+    fn visit_class_set_binary_op_in(
+        &mut self,
+        _op: &ast::ClassSetBinaryOp,
+    ) -> Result<()> {
+        if self.flags().unicode() {
+            let cls = hir::ClassUnicode::empty();
+            self.push(HirFrame::ClassUnicode(cls));
+        } else {
+            let cls = hir::ClassBytes::empty();
+            self.push(HirFrame::ClassBytes(cls));
+        }
+        Ok(())
+    }
+
+    fn visit_class_set_binary_op_post(
+        &mut self,
+        op: &ast::ClassSetBinaryOp,
+    ) -> Result<()> {
+        use ast::ClassSetBinaryOpKind::*;
+
+        if self.flags().unicode() {
+            let mut rhs = self.pop().unwrap().unwrap_class_unicode();
+            let mut lhs = self.pop().unwrap().unwrap_class_unicode();
+            let mut cls = self.pop().unwrap().unwrap_class_unicode();
+            if self.flags().case_insensitive() {
+                rhs.try_case_fold_simple().map_err(|_| {
+                    self.error(
+                        op.rhs.span().clone(),
+                        ErrorKind::UnicodeCaseUnavailable,
+                    )
+                })?;
+                lhs.try_case_fold_simple().map_err(|_| {
+                    self.error(
+                        op.lhs.span().clone(),
+                        ErrorKind::UnicodeCaseUnavailable,
+                    )
+                })?;
+            }
+            match op.kind {
+                Intersection => lhs.intersect(&rhs),
+                Difference => lhs.difference(&rhs),
+                SymmetricDifference => lhs.symmetric_difference(&rhs),
+            }
+            cls.union(&lhs);
+            self.push(HirFrame::ClassUnicode(cls));
+        } else {
+            let mut rhs = self.pop().unwrap().unwrap_class_bytes();
+            let mut lhs = self.pop().unwrap().unwrap_class_bytes();
+            let mut cls = self.pop().unwrap().unwrap_class_bytes();
+            if self.flags().case_insensitive() {
+                rhs.case_fold_simple();
+                lhs.case_fold_simple();
+            }
+            match op.kind {
+                Intersection => lhs.intersect(&rhs),
+                Difference => lhs.difference(&rhs),
+                SymmetricDifference => lhs.symmetric_difference(&rhs),
+            }
+            cls.union(&lhs);
+            self.push(HirFrame::ClassBytes(cls));
+        }
+        Ok(())
+    }
+}
+
+/// The internal implementation of a translator.
+///
+/// This type is responsible for carrying around the original pattern string,
+/// which is not tied to the internal state of a translator.
+///
+/// A TranslatorI exists for the time it takes to translate a single Ast.
+#[derive(Clone, Debug)]
+struct TranslatorI<'t, 'p> {
+    trans: &'t Translator,
+    pattern: &'p str,
+}
+
+impl<'t, 'p> TranslatorI<'t, 'p> {
+    /// Build a new internal translator.
+    fn new(trans: &'t Translator, pattern: &'p str) -> TranslatorI<'t, 'p> {
+        TranslatorI { trans: trans, pattern: pattern }
+    }
+
+    /// Return a reference to the underlying translator.
+    fn trans(&self) -> &Translator {
+        &self.trans
+    }
+
+    /// Push the given frame on to the call stack.
+    fn push(&self, frame: HirFrame) {
+        self.trans().stack.borrow_mut().push(frame);
+    }
+
+    /// Pop the top of the call stack. If the call stack is empty, return None.
+    fn pop(&self) -> Option<HirFrame> {
+        self.trans().stack.borrow_mut().pop()
+    }
+
+    /// Create a new error with the given span and error type.
+    fn error(&self, span: Span, kind: ErrorKind) -> Error {
+        Error { kind: kind, pattern: self.pattern.to_string(), span: span }
+    }
+
+    /// Return a copy of the active flags.
+    fn flags(&self) -> Flags {
+        self.trans().flags.get()
+    }
+
+    /// Set the flags of this translator from the flags set in the given AST.
+    /// Then, return the old flags.
+    fn set_flags(&self, ast_flags: &ast::Flags) -> Flags {
+        let old_flags = self.flags();
+        let mut new_flags = Flags::from_ast(ast_flags);
+        new_flags.merge(&old_flags);
+        self.trans().flags.set(new_flags);
+        old_flags
+    }
+
+    fn hir_literal(&self, lit: &ast::Literal) -> Result<Hir> {
+        let ch = match self.literal_to_char(lit)? {
+            byte @ hir::Literal::Byte(_) => return Ok(Hir::literal(byte)),
+            hir::Literal::Unicode(ch) => ch,
+        };
+        if self.flags().case_insensitive() {
+            self.hir_from_char_case_insensitive(lit.span, ch)
+        } else {
+            self.hir_from_char(lit.span, ch)
+        }
+    }
+
+    /// Convert an Ast literal to its scalar representation.
+    ///
+    /// When Unicode mode is enabled, then this always succeeds and returns a
+    /// `char` (Unicode scalar value).
+    ///
+    /// When Unicode mode is disabled, then a raw byte is returned. If that
+    /// byte is not ASCII and invalid UTF-8 is not allowed, then this returns
+    /// an error.
+    fn literal_to_char(&self, lit: &ast::Literal) -> Result<hir::Literal> {
+        if self.flags().unicode() {
+            return Ok(hir::Literal::Unicode(lit.c));
+        }
+        let byte = match lit.byte() {
+            None => return Ok(hir::Literal::Unicode(lit.c)),
+            Some(byte) => byte,
+        };
+        if byte <= 0x7F {
+            return Ok(hir::Literal::Unicode(byte as char));
+        }
+        if !self.trans().allow_invalid_utf8 {
+            return Err(self.error(lit.span, ErrorKind::InvalidUtf8));
+        }
+        Ok(hir::Literal::Byte(byte))
+    }
+
+    fn hir_from_char(&self, span: Span, c: char) -> Result<Hir> {
+        if !self.flags().unicode() && c.len_utf8() > 1 {
+            return Err(self.error(span, ErrorKind::UnicodeNotAllowed));
+        }
+        Ok(Hir::literal(hir::Literal::Unicode(c)))
+    }
+
+    fn hir_from_char_case_insensitive(
+        &self,
+        span: Span,
+        c: char,
+    ) -> Result<Hir> {
+        if self.flags().unicode() {
+            // If case folding won't do anything, then don't bother trying.
+            let map =
+                unicode::contains_simple_case_mapping(c, c).map_err(|_| {
+                    self.error(span, ErrorKind::UnicodeCaseUnavailable)
+                })?;
+            if !map {
+                return self.hir_from_char(span, c);
+            }
+            let mut cls =
+                hir::ClassUnicode::new(vec![hir::ClassUnicodeRange::new(
+                    c, c,
+                )]);
+            cls.try_case_fold_simple().map_err(|_| {
+                self.error(span, ErrorKind::UnicodeCaseUnavailable)
+            })?;
+            Ok(Hir::class(hir::Class::Unicode(cls)))
+        } else {
+            if c.len_utf8() > 1 {
+                return Err(self.error(span, ErrorKind::UnicodeNotAllowed));
+            }
+            // If case folding won't do anything, then don't bother trying.
+            match c {
+                'A'..='Z' | 'a'..='z' => {}
+                _ => return self.hir_from_char(span, c),
+            }
+            let mut cls =
+                hir::ClassBytes::new(vec![hir::ClassBytesRange::new(
+                    c as u8, c as u8,
+                )]);
+            cls.case_fold_simple();
+            Ok(Hir::class(hir::Class::Bytes(cls)))
+        }
+    }
+
+    fn hir_dot(&self, span: Span) -> Result<Hir> {
+        let unicode = self.flags().unicode();
+        if !unicode && !self.trans().allow_invalid_utf8 {
+            return Err(self.error(span, ErrorKind::InvalidUtf8));
+        }
+        Ok(if self.flags().dot_matches_new_line() {
+            Hir::any(!unicode)
+        } else {
+            Hir::dot(!unicode)
+        })
+    }
+
+    fn hir_assertion(&self, asst: &ast::Assertion) -> Result<Hir> {
+        let unicode = self.flags().unicode();
+        let multi_line = self.flags().multi_line();
+        Ok(match asst.kind {
+            ast::AssertionKind::StartLine => Hir::anchor(if multi_line {
+                hir::Anchor::StartLine
+            } else {
+                hir::Anchor::StartText
+            }),
+            ast::AssertionKind::EndLine => Hir::anchor(if multi_line {
+                hir::Anchor::EndLine
+            } else {
+                hir::Anchor::EndText
+            }),
+            ast::AssertionKind::StartText => {
+                Hir::anchor(hir::Anchor::StartText)
+            }
+            ast::AssertionKind::EndText => Hir::anchor(hir::Anchor::EndText),
+            ast::AssertionKind::WordBoundary => {
+                Hir::word_boundary(if unicode {
+                    hir::WordBoundary::Unicode
+                } else {
+                    hir::WordBoundary::Ascii
+                })
+            }
+            ast::AssertionKind::NotWordBoundary => {
+                Hir::word_boundary(if unicode {
+                    hir::WordBoundary::UnicodeNegate
+                } else {
+                    // It is possible for negated ASCII word boundaries to
+                    // match at invalid UTF-8 boundaries, even when searching
+                    // valid UTF-8.
+                    if !self.trans().allow_invalid_utf8 {
+                        return Err(
+                            self.error(asst.span, ErrorKind::InvalidUtf8)
+                        );
+                    }
+                    hir::WordBoundary::AsciiNegate
+                })
+            }
+        })
+    }
+
+    fn hir_group(&self, group: &ast::Group, expr: Hir) -> Hir {
+        let kind = match group.kind {
+            ast::GroupKind::CaptureIndex(idx) => {
+                hir::GroupKind::CaptureIndex(idx)
+            }
+            ast::GroupKind::CaptureName(ref capname) => {
+                hir::GroupKind::CaptureName {
+                    name: capname.name.clone(),
+                    index: capname.index,
+                }
+            }
+            ast::GroupKind::NonCapturing(_) => hir::GroupKind::NonCapturing,
+        };
+        Hir::group(hir::Group { kind: kind, hir: Box::new(expr) })
+    }
+
+    fn hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir {
+        let kind = match rep.op.kind {
+            ast::RepetitionKind::ZeroOrOne => hir::RepetitionKind::ZeroOrOne,
+            ast::RepetitionKind::ZeroOrMore => hir::RepetitionKind::ZeroOrMore,
+            ast::RepetitionKind::OneOrMore => hir::RepetitionKind::OneOrMore,
+            ast::RepetitionKind::Range(ast::RepetitionRange::Exactly(m)) => {
+                hir::RepetitionKind::Range(hir::RepetitionRange::Exactly(m))
+            }
+            ast::RepetitionKind::Range(ast::RepetitionRange::AtLeast(m)) => {
+                hir::RepetitionKind::Range(hir::RepetitionRange::AtLeast(m))
+            }
+            ast::RepetitionKind::Range(ast::RepetitionRange::Bounded(
+                m,
+                n,
+            )) => {
+                hir::RepetitionKind::Range(hir::RepetitionRange::Bounded(m, n))
+            }
+        };
+        let greedy =
+            if self.flags().swap_greed() { !rep.greedy } else { rep.greedy };
+        Hir::repetition(hir::Repetition {
+            kind: kind,
+            greedy: greedy,
+            hir: Box::new(expr),
+        })
+    }
+
+    fn hir_unicode_class(
+        &self,
+        ast_class: &ast::ClassUnicode,
+    ) -> Result<hir::ClassUnicode> {
+        use ast::ClassUnicodeKind::*;
+
+        if !self.flags().unicode() {
+            return Err(
+                self.error(ast_class.span, ErrorKind::UnicodeNotAllowed)
+            );
+        }
+        let query = match ast_class.kind {
+            OneLetter(name) => ClassQuery::OneLetter(name),
+            Named(ref name) => ClassQuery::Binary(name),
+            NamedValue { ref name, ref value, .. } => ClassQuery::ByValue {
+                property_name: name,
+                property_value: value,
+            },
+        };
+        let mut result = self.convert_unicode_class_error(
+            &ast_class.span,
+            unicode::class(query),
+        );
+        if let Ok(ref mut class) = result {
+            self.unicode_fold_and_negate(
+                &ast_class.span,
+                ast_class.negated,
+                class,
+            )?;
+        }
+        result
+    }
+
+    fn hir_perl_unicode_class(
+        &self,
+        ast_class: &ast::ClassPerl,
+    ) -> Result<hir::ClassUnicode> {
+        use ast::ClassPerlKind::*;
+
+        assert!(self.flags().unicode());
+        let result = match ast_class.kind {
+            Digit => unicode::perl_digit(),
+            Space => unicode::perl_space(),
+            Word => unicode::perl_word(),
+        };
+        let mut class =
+            self.convert_unicode_class_error(&ast_class.span, result)?;
+        // We needn't apply case folding here because the Perl Unicode classes
+        // are already closed under Unicode simple case folding.
+        if ast_class.negated {
+            class.negate();
+        }
+        Ok(class)
+    }
+
+    fn hir_perl_byte_class(
+        &self,
+        ast_class: &ast::ClassPerl,
+    ) -> hir::ClassBytes {
+        use ast::ClassPerlKind::*;
+
+        assert!(!self.flags().unicode());
+        let mut class = match ast_class.kind {
+            Digit => hir_ascii_class_bytes(&ast::ClassAsciiKind::Digit),
+            Space => hir_ascii_class_bytes(&ast::ClassAsciiKind::Space),
+            Word => hir_ascii_class_bytes(&ast::ClassAsciiKind::Word),
+        };
+        // We needn't apply case folding here because the Perl ASCII classes
+        // are already closed (under ASCII case folding).
+        if ast_class.negated {
+            class.negate();
+        }
+        class
+    }
+
+    /// Converts the given Unicode specific error to an HIR translation error.
+    ///
+    /// The span given should approximate the position at which an error would
+    /// occur.
+    fn convert_unicode_class_error(
+        &self,
+        span: &Span,
+        result: unicode::Result<hir::ClassUnicode>,
+    ) -> Result<hir::ClassUnicode> {
+        result.map_err(|err| {
+            let sp = span.clone();
+            match err {
+                unicode::Error::PropertyNotFound => {
+                    self.error(sp, ErrorKind::UnicodePropertyNotFound)
+                }
+                unicode::Error::PropertyValueNotFound => {
+                    self.error(sp, ErrorKind::UnicodePropertyValueNotFound)
+                }
+                unicode::Error::PerlClassNotFound => {
+                    self.error(sp, ErrorKind::UnicodePerlClassNotFound)
+                }
+            }
+        })
+    }
+
+    fn unicode_fold_and_negate(
+        &self,
+        span: &Span,
+        negated: bool,
+        class: &mut hir::ClassUnicode,
+    ) -> Result<()> {
+        // Note that we must apply case folding before negation!
+        // Consider `(?i)[^x]`. If we applied negation field, then
+        // the result would be the character class that matched any
+        // Unicode scalar value.
+        if self.flags().case_insensitive() {
+            class.try_case_fold_simple().map_err(|_| {
+                self.error(span.clone(), ErrorKind::UnicodeCaseUnavailable)
+            })?;
+        }
+        if negated {
+            class.negate();
+        }
+        Ok(())
+    }
+
+    fn bytes_fold_and_negate(
+        &self,
+        span: &Span,
+        negated: bool,
+        class: &mut hir::ClassBytes,
+    ) -> Result<()> {
+        // Note that we must apply case folding before negation!
+        // Consider `(?i)[^x]`. If we applied negation field, then
+        // the result would be the character class that matched any
+        // Unicode scalar value.
+        if self.flags().case_insensitive() {
+            class.case_fold_simple();
+        }
+        if negated {
+            class.negate();
+        }
+        if !self.trans().allow_invalid_utf8 && !class.is_all_ascii() {
+            return Err(self.error(span.clone(), ErrorKind::InvalidUtf8));
+        }
+        Ok(())
+    }
+
+    /// Return a scalar byte value suitable for use as a literal in a byte
+    /// character class.
+    fn class_literal_byte(&self, ast: &ast::Literal) -> Result<u8> {
+        match self.literal_to_char(ast)? {
+            hir::Literal::Byte(byte) => Ok(byte),
+            hir::Literal::Unicode(ch) => {
+                if ch <= 0x7F as char {
+                    Ok(ch as u8)
+                } else {
+                    // We can't feasibly support Unicode in
+                    // byte oriented classes. Byte classes don't
+                    // do Unicode case folding.
+                    Err(self.error(ast.span, ErrorKind::UnicodeNotAllowed))
+                }
+            }
+        }
+    }
+}
+
+/// A translator's representation of a regular expression's flags at any given
+/// moment in time.
+///
+/// Each flag can be in one of three states: absent, present but disabled or
+/// present but enabled.
+#[derive(Clone, Copy, Debug, Default)]
+struct Flags {
+    case_insensitive: Option<bool>,
+    multi_line: Option<bool>,
+    dot_matches_new_line: Option<bool>,
+    swap_greed: Option<bool>,
+    unicode: Option<bool>,
+    // Note that `ignore_whitespace` is omitted here because it is handled
+    // entirely in the parser.
+}
+
+impl Flags {
+    fn from_ast(ast: &ast::Flags) -> Flags {
+        let mut flags = Flags::default();
+        let mut enable = true;
+        for item in &ast.items {
+            match item.kind {
+                ast::FlagsItemKind::Negation => {
+                    enable = false;
+                }
+                ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive) => {
+                    flags.case_insensitive = Some(enable);
+                }
+                ast::FlagsItemKind::Flag(ast::Flag::MultiLine) => {
+                    flags.multi_line = Some(enable);
+                }
+                ast::FlagsItemKind::Flag(ast::Flag::DotMatchesNewLine) => {
+                    flags.dot_matches_new_line = Some(enable);
+                }
+                ast::FlagsItemKind::Flag(ast::Flag::SwapGreed) => {
+                    flags.swap_greed = Some(enable);
+                }
+                ast::FlagsItemKind::Flag(ast::Flag::Unicode) => {
+                    flags.unicode = Some(enable);
+                }
+                ast::FlagsItemKind::Flag(ast::Flag::IgnoreWhitespace) => {}
+            }
+        }
+        flags
+    }
+
+    fn merge(&mut self, previous: &Flags) {
+        if self.case_insensitive.is_none() {
+            self.case_insensitive = previous.case_insensitive;
+        }
+        if self.multi_line.is_none() {
+            self.multi_line = previous.multi_line;
+        }
+        if self.dot_matches_new_line.is_none() {
+            self.dot_matches_new_line = previous.dot_matches_new_line;
+        }
+        if self.swap_greed.is_none() {
+            self.swap_greed = previous.swap_greed;
+        }
+        if self.unicode.is_none() {
+            self.unicode = previous.unicode;
+        }
+    }
+
+    fn case_insensitive(&self) -> bool {
+        self.case_insensitive.unwrap_or(false)
+    }
+
+    fn multi_line(&self) -> bool {
+        self.multi_line.unwrap_or(false)
+    }
+
+    fn dot_matches_new_line(&self) -> bool {
+        self.dot_matches_new_line.unwrap_or(false)
+    }
+
+    fn swap_greed(&self) -> bool {
+        self.swap_greed.unwrap_or(false)
+    }
+
+    fn unicode(&self) -> bool {
+        self.unicode.unwrap_or(true)
+    }
+}
+
+fn hir_ascii_class_bytes(kind: &ast::ClassAsciiKind) -> hir::ClassBytes {
+    let ranges: Vec<_> = ascii_class(kind)
+        .iter()
+        .cloned()
+        .map(|(s, e)| hir::ClassBytesRange::new(s as u8, e as u8))
+        .collect();
+    hir::ClassBytes::new(ranges)
+}
+
+fn ascii_class(kind: &ast::ClassAsciiKind) -> &'static [(char, char)] {
+    use ast::ClassAsciiKind::*;
+    match *kind {
+        Alnum => &[('0', '9'), ('A', 'Z'), ('a', 'z')],
+        Alpha => &[('A', 'Z'), ('a', 'z')],
+        Ascii => &[('\x00', '\x7F')],
+        Blank => &[('\t', '\t'), (' ', ' ')],
+        Cntrl => &[('\x00', '\x1F'), ('\x7F', '\x7F')],
+        Digit => &[('0', '9')],
+        Graph => &[('!', '~')],
+        Lower => &[('a', 'z')],
+        Print => &[(' ', '~')],
+        Punct => &[('!', '/'), (':', '@'), ('[', '`'), ('{', '~')],
+        Space => &[
+            ('\t', '\t'),
+            ('\n', '\n'),
+            ('\x0B', '\x0B'),
+            ('\x0C', '\x0C'),
+            ('\r', '\r'),
+            (' ', ' '),
+        ],
+        Upper => &[('A', 'Z')],
+        Word => &[('0', '9'), ('A', 'Z'), ('_', '_'), ('a', 'z')],
+        Xdigit => &[('0', '9'), ('A', 'F'), ('a', 'f')],
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use ast::parse::ParserBuilder;
+    use ast::{self, Ast, Position, Span};
+    use hir::{self, Hir, HirKind};
+    use unicode::{self, ClassQuery};
+
+    use super::{ascii_class, TranslatorBuilder};
+
+    // We create these errors to compare with real hir::Errors in the tests.
+    // We define equality between TestError and hir::Error to disregard the
+    // pattern string in hir::Error, which is annoying to provide in tests.
+    #[derive(Clone, Debug)]
+    struct TestError {
+        span: Span,
+        kind: hir::ErrorKind,
+    }
+
+    impl PartialEq<hir::Error> for TestError {
+        fn eq(&self, other: &hir::Error) -> bool {
+            self.span == other.span && self.kind == other.kind
+        }
+    }
+
+    impl PartialEq<TestError> for hir::Error {
+        fn eq(&self, other: &TestError) -> bool {
+            self.span == other.span && self.kind == other.kind
+        }
+    }
+
+    fn parse(pattern: &str) -> Ast {
+        ParserBuilder::new().octal(true).build().parse(pattern).unwrap()
+    }
+
+    fn t(pattern: &str) -> Hir {
+        TranslatorBuilder::new()
+            .allow_invalid_utf8(false)
+            .build()
+            .translate(pattern, &parse(pattern))
+            .unwrap()
+    }
+
+    fn t_err(pattern: &str) -> hir::Error {
+        TranslatorBuilder::new()
+            .allow_invalid_utf8(false)
+            .build()
+            .translate(pattern, &parse(pattern))
+            .unwrap_err()
+    }
+
+    fn t_bytes(pattern: &str) -> Hir {
+        TranslatorBuilder::new()
+            .allow_invalid_utf8(true)
+            .build()
+            .translate(pattern, &parse(pattern))
+            .unwrap()
+    }
+
+    fn hir_lit(s: &str) -> Hir {
+        match s.len() {
+            0 => Hir::empty(),
+            _ => {
+                let lits = s
+                    .chars()
+                    .map(hir::Literal::Unicode)
+                    .map(Hir::literal)
+                    .collect();
+                Hir::concat(lits)
+            }
+        }
+    }
+
+    fn hir_blit(s: &[u8]) -> Hir {
+        match s.len() {
+            0 => Hir::empty(),
+            1 => Hir::literal(hir::Literal::Byte(s[0])),
+            _ => {
+                let lits = s
+                    .iter()
+                    .cloned()
+                    .map(hir::Literal::Byte)
+                    .map(Hir::literal)
+                    .collect();
+                Hir::concat(lits)
+            }
+        }
+    }
+
+    fn hir_group(i: u32, expr: Hir) -> Hir {
+        Hir::group(hir::Group {
+            kind: hir::GroupKind::CaptureIndex(i),
+            hir: Box::new(expr),
+        })
+    }
+
+    fn hir_group_name(i: u32, name: &str, expr: Hir) -> Hir {
+        Hir::group(hir::Group {
+            kind: hir::GroupKind::CaptureName {
+                name: name.to_string(),
+                index: i,
+            },
+            hir: Box::new(expr),
+        })
+    }
+
+    fn hir_group_nocap(expr: Hir) -> Hir {
+        Hir::group(hir::Group {
+            kind: hir::GroupKind::NonCapturing,
+            hir: Box::new(expr),
+        })
+    }
+
+    fn hir_quest(greedy: bool, expr: Hir) -> Hir {
+        Hir::repetition(hir::Repetition {
+            kind: hir::RepetitionKind::ZeroOrOne,
+            greedy: greedy,
+            hir: Box::new(expr),
+        })
+    }
+
+    fn hir_star(greedy: bool, expr: Hir) -> Hir {
+        Hir::repetition(hir::Repetition {
+            kind: hir::RepetitionKind::ZeroOrMore,
+            greedy: greedy,
+            hir: Box::new(expr),
+        })
+    }
+
+    fn hir_plus(greedy: bool, expr: Hir) -> Hir {
+        Hir::repetition(hir::Repetition {
+            kind: hir::RepetitionKind::OneOrMore,
+            greedy: greedy,
+            hir: Box::new(expr),
+        })
+    }
+
+    fn hir_range(greedy: bool, range: hir::RepetitionRange, expr: Hir) -> Hir {
+        Hir::repetition(hir::Repetition {
+            kind: hir::RepetitionKind::Range(range),
+            greedy: greedy,
+            hir: Box::new(expr),
+        })
+    }
+
+    fn hir_alt(alts: Vec<Hir>) -> Hir {
+        Hir::alternation(alts)
+    }
+
+    fn hir_cat(exprs: Vec<Hir>) -> Hir {
+        Hir::concat(exprs)
+    }
+
+    #[allow(dead_code)]
+    fn hir_uclass_query(query: ClassQuery) -> Hir {
+        Hir::class(hir::Class::Unicode(unicode::class(query).unwrap()))
+    }
+
+    #[allow(dead_code)]
+    fn hir_uclass_perl_word() -> Hir {
+        Hir::class(hir::Class::Unicode(unicode::perl_word().unwrap()))
+    }
+
+    fn hir_uclass(ranges: &[(char, char)]) -> Hir {
+        let ranges: Vec<hir::ClassUnicodeRange> = ranges
+            .iter()
+            .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e))
+            .collect();
+        Hir::class(hir::Class::Unicode(hir::ClassUnicode::new(ranges)))
+    }
+
+    fn hir_bclass(ranges: &[(u8, u8)]) -> Hir {
+        let ranges: Vec<hir::ClassBytesRange> = ranges
+            .iter()
+            .map(|&(s, e)| hir::ClassBytesRange::new(s, e))
+            .collect();
+        Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges)))
+    }
+
+    fn hir_bclass_from_char(ranges: &[(char, char)]) -> Hir {
+        let ranges: Vec<hir::ClassBytesRange> = ranges
+            .iter()
+            .map(|&(s, e)| {
+                assert!(s as u32 <= 0x7F);
+                assert!(e as u32 <= 0x7F);
+                hir::ClassBytesRange::new(s as u8, e as u8)
+            })
+            .collect();
+        Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges)))
+    }
+
+    fn hir_case_fold(expr: Hir) -> Hir {
+        match expr.into_kind() {
+            HirKind::Class(mut cls) => {
+                cls.case_fold_simple();
+                Hir::class(cls)
+            }
+            _ => panic!("cannot case fold non-class Hir expr"),
+        }
+    }
+
+    fn hir_negate(expr: Hir) -> Hir {
+        match expr.into_kind() {
+            HirKind::Class(mut cls) => {
+                cls.negate();
+                Hir::class(cls)
+            }
+            _ => panic!("cannot negate non-class Hir expr"),
+        }
+    }
+
+    #[allow(dead_code)]
+    fn hir_union(expr1: Hir, expr2: Hir) -> Hir {
+        use hir::Class::{Bytes, Unicode};
+
+        match (expr1.into_kind(), expr2.into_kind()) {
+            (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
+                c1.union(&c2);
+                Hir::class(hir::Class::Unicode(c1))
+            }
+            (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => {
+                c1.union(&c2);
+                Hir::class(hir::Class::Bytes(c1))
+            }
+            _ => panic!("cannot union non-class Hir exprs"),
+        }
+    }
+
+    #[allow(dead_code)]
+    fn hir_difference(expr1: Hir, expr2: Hir) -> Hir {
+        use hir::Class::{Bytes, Unicode};
+
+        match (expr1.into_kind(), expr2.into_kind()) {
+            (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
+                c1.difference(&c2);
+                Hir::class(hir::Class::Unicode(c1))
+            }
+            (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => {
+                c1.difference(&c2);
+                Hir::class(hir::Class::Bytes(c1))
+            }
+            _ => panic!("cannot difference non-class Hir exprs"),
+        }
+    }
+
+    fn hir_anchor(anchor: hir::Anchor) -> Hir {
+        Hir::anchor(anchor)
+    }
+
+    fn hir_word(wb: hir::WordBoundary) -> Hir {
+        Hir::word_boundary(wb)
+    }
+
+    #[test]
+    fn empty() {
+        assert_eq!(t(""), Hir::empty());
+        assert_eq!(t("(?i)"), Hir::empty());
+        assert_eq!(t("()"), hir_group(1, Hir::empty()));
+        assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty()));
+        assert_eq!(t("(?P<wat>)"), hir_group_name(1, "wat", Hir::empty()));
+        assert_eq!(t("|"), hir_alt(vec![Hir::empty(), Hir::empty()]));
+        assert_eq!(
+            t("()|()"),
+            hir_alt(vec![
+                hir_group(1, Hir::empty()),
+                hir_group(2, Hir::empty()),
+            ])
+        );
+        assert_eq!(
+            t("(|b)"),
+            hir_group(1, hir_alt(vec![Hir::empty(), hir_lit("b"),]))
+        );
+        assert_eq!(
+            t("(a|)"),
+            hir_group(1, hir_alt(vec![hir_lit("a"), Hir::empty(),]))
+        );
+        assert_eq!(
+            t("(a||c)"),
+            hir_group(
+                1,
+                hir_alt(vec![hir_lit("a"), Hir::empty(), hir_lit("c"),])
+            )
+        );
+        assert_eq!(
+            t("(||)"),
+            hir_group(
+                1,
+                hir_alt(vec![Hir::empty(), Hir::empty(), Hir::empty(),])
+            )
+        );
+    }
+
+    #[test]
+    fn literal() {
+        assert_eq!(t("a"), hir_lit("a"));
+        assert_eq!(t("(?-u)a"), hir_lit("a"));
+        assert_eq!(t("☃"), hir_lit("☃"));
+        assert_eq!(t("abcd"), hir_lit("abcd"));
+
+        assert_eq!(t_bytes("(?-u)a"), hir_lit("a"));
+        assert_eq!(t_bytes("(?-u)\x61"), hir_lit("a"));
+        assert_eq!(t_bytes(r"(?-u)\x61"), hir_lit("a"));
+        assert_eq!(t_bytes(r"(?-u)\xFF"), hir_blit(b"\xFF"));
+
+        assert_eq!(
+            t_err("(?-u)☃"),
+            TestError {
+                kind: hir::ErrorKind::UnicodeNotAllowed,
+                span: Span::new(
+                    Position::new(5, 1, 6),
+                    Position::new(8, 1, 7)
+                ),
+            }
+        );
+        assert_eq!(
+            t_err(r"(?-u)\xFF"),
+            TestError {
+                kind: hir::ErrorKind::InvalidUtf8,
+                span: Span::new(
+                    Position::new(5, 1, 6),
+                    Position::new(9, 1, 10)
+                ),
+            }
+        );
+    }
+
+    #[test]
+    fn literal_case_insensitive() {
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(t("(?i)a"), hir_uclass(&[('A', 'A'), ('a', 'a'),]));
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i:a)"),
+            hir_group_nocap(hir_uclass(&[('A', 'A'), ('a', 'a')],))
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("a(?i)a(?-i)a"),
+            hir_cat(vec![
+                hir_lit("a"),
+                hir_uclass(&[('A', 'A'), ('a', 'a')]),
+                hir_lit("a"),
+            ])
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i)ab@c"),
+            hir_cat(vec![
+                hir_uclass(&[('A', 'A'), ('a', 'a')]),
+                hir_uclass(&[('B', 'B'), ('b', 'b')]),
+                hir_lit("@"),
+                hir_uclass(&[('C', 'C'), ('c', 'c')]),
+            ])
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i)β"),
+            hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),])
+        );
+
+        assert_eq!(t("(?i-u)a"), hir_bclass(&[(b'A', b'A'), (b'a', b'a'),]));
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?-u)a(?i)a(?-i)a"),
+            hir_cat(vec![
+                hir_lit("a"),
+                hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
+                hir_lit("a"),
+            ])
+        );
+        assert_eq!(
+            t("(?i-u)ab@c"),
+            hir_cat(vec![
+                hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
+                hir_bclass(&[(b'B', b'B'), (b'b', b'b')]),
+                hir_lit("@"),
+                hir_bclass(&[(b'C', b'C'), (b'c', b'c')]),
+            ])
+        );
+
+        assert_eq!(
+            t_bytes("(?i-u)a"),
+            hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
+        );
+        assert_eq!(
+            t_bytes("(?i-u)\x61"),
+            hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
+        );
+        assert_eq!(
+            t_bytes(r"(?i-u)\x61"),
+            hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
+        );
+        assert_eq!(t_bytes(r"(?i-u)\xFF"), hir_blit(b"\xFF"));
+
+        assert_eq!(
+            t_err("(?i-u)β"),
+            TestError {
+                kind: hir::ErrorKind::UnicodeNotAllowed,
+                span: Span::new(
+                    Position::new(6, 1, 7),
+                    Position::new(8, 1, 8),
+                ),
+            }
+        );
+    }
+
+    #[test]
+    fn dot() {
+        assert_eq!(
+            t("."),
+            hir_uclass(&[('\0', '\t'), ('\x0B', '\u{10FFFF}'),])
+        );
+        assert_eq!(t("(?s)."), hir_uclass(&[('\0', '\u{10FFFF}'),]));
+        assert_eq!(
+            t_bytes("(?-u)."),
+            hir_bclass(&[(b'\0', b'\t'), (b'\x0B', b'\xFF'),])
+        );
+        assert_eq!(t_bytes("(?s-u)."), hir_bclass(&[(b'\0', b'\xFF'),]));
+
+        // If invalid UTF-8 isn't allowed, then non-Unicode `.` isn't allowed.
+        assert_eq!(
+            t_err("(?-u)."),
+            TestError {
+                kind: hir::ErrorKind::InvalidUtf8,
+                span: Span::new(
+                    Position::new(5, 1, 6),
+                    Position::new(6, 1, 7)
+                ),
+            }
+        );
+        assert_eq!(
+            t_err("(?s-u)."),
+            TestError {
+                kind: hir::ErrorKind::InvalidUtf8,
+                span: Span::new(
+                    Position::new(6, 1, 7),
+                    Position::new(7, 1, 8)
+                ),
+            }
+        );
+    }
+
+    #[test]
+    fn assertions() {
+        assert_eq!(t("^"), hir_anchor(hir::Anchor::StartText));
+        assert_eq!(t("$"), hir_anchor(hir::Anchor::EndText));
+        assert_eq!(t(r"\A"), hir_anchor(hir::Anchor::StartText));
+        assert_eq!(t(r"\z"), hir_anchor(hir::Anchor::EndText));
+        assert_eq!(t("(?m)^"), hir_anchor(hir::Anchor::StartLine));
+        assert_eq!(t("(?m)$"), hir_anchor(hir::Anchor::EndLine));
+        assert_eq!(t(r"(?m)\A"), hir_anchor(hir::Anchor::StartText));
+        assert_eq!(t(r"(?m)\z"), hir_anchor(hir::Anchor::EndText));
+
+        assert_eq!(t(r"\b"), hir_word(hir::WordBoundary::Unicode));
+        assert_eq!(t(r"\B"), hir_word(hir::WordBoundary::UnicodeNegate));
+        assert_eq!(t(r"(?-u)\b"), hir_word(hir::WordBoundary::Ascii));
+        assert_eq!(
+            t_bytes(r"(?-u)\B"),
+            hir_word(hir::WordBoundary::AsciiNegate)
+        );
+
+        assert_eq!(
+            t_err(r"(?-u)\B"),
+            TestError {
+                kind: hir::ErrorKind::InvalidUtf8,
+                span: Span::new(
+                    Position::new(5, 1, 6),
+                    Position::new(7, 1, 8)
+                ),
+            }
+        );
+    }
+
+    #[test]
+    fn group() {
+        assert_eq!(t("(a)"), hir_group(1, hir_lit("a")));
+        assert_eq!(
+            t("(a)(b)"),
+            hir_cat(vec![
+                hir_group(1, hir_lit("a")),
+                hir_group(2, hir_lit("b")),
+            ])
+        );
+        assert_eq!(
+            t("(a)|(b)"),
+            hir_alt(vec![
+                hir_group(1, hir_lit("a")),
+                hir_group(2, hir_lit("b")),
+            ])
+        );
+        assert_eq!(t("(?P<foo>)"), hir_group_name(1, "foo", Hir::empty()));
+        assert_eq!(t("(?P<foo>a)"), hir_group_name(1, "foo", hir_lit("a")));
+        assert_eq!(
+            t("(?P<foo>a)(?P<bar>b)"),
+            hir_cat(vec![
+                hir_group_name(1, "foo", hir_lit("a")),
+                hir_group_name(2, "bar", hir_lit("b")),
+            ])
+        );
+        assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty()));
+        assert_eq!(t("(?:a)"), hir_group_nocap(hir_lit("a")));
+        assert_eq!(
+            t("(?:a)(b)"),
+            hir_cat(vec![
+                hir_group_nocap(hir_lit("a")),
+                hir_group(1, hir_lit("b")),
+            ])
+        );
+        assert_eq!(
+            t("(a)(?:b)(c)"),
+            hir_cat(vec![
+                hir_group(1, hir_lit("a")),
+                hir_group_nocap(hir_lit("b")),
+                hir_group(2, hir_lit("c")),
+            ])
+        );
+        assert_eq!(
+            t("(a)(?P<foo>b)(c)"),
+            hir_cat(vec![
+                hir_group(1, hir_lit("a")),
+                hir_group_name(2, "foo", hir_lit("b")),
+                hir_group(3, hir_lit("c")),
+            ])
+        );
+        assert_eq!(t("()"), hir_group(1, Hir::empty()));
+        assert_eq!(t("((?i))"), hir_group(1, Hir::empty()));
+        assert_eq!(t("((?x))"), hir_group(1, Hir::empty()));
+        assert_eq!(t("(((?x)))"), hir_group(1, hir_group(2, Hir::empty())));
+    }
+
+    #[test]
+    fn flags() {
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i:a)a"),
+            hir_cat(vec![
+                hir_group_nocap(hir_uclass(&[('A', 'A'), ('a', 'a')])),
+                hir_lit("a"),
+            ])
+        );
+        assert_eq!(
+            t("(?i-u:a)β"),
+            hir_cat(vec![
+                hir_group_nocap(hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
+                hir_lit("β"),
+            ])
+        );
+        assert_eq!(
+            t("(?:(?i-u)a)b"),
+            hir_cat(vec![
+                hir_group_nocap(hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
+                hir_lit("b"),
+            ])
+        );
+        assert_eq!(
+            t("((?i-u)a)b"),
+            hir_cat(vec![
+                hir_group(1, hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
+                hir_lit("b"),
+            ])
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i)(?-i:a)a"),
+            hir_cat(vec![
+                hir_group_nocap(hir_lit("a")),
+                hir_uclass(&[('A', 'A'), ('a', 'a')]),
+            ])
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?im)a^"),
+            hir_cat(vec![
+                hir_uclass(&[('A', 'A'), ('a', 'a')]),
+                hir_anchor(hir::Anchor::StartLine),
+            ])
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?im)a^(?i-m)a^"),
+            hir_cat(vec![
+                hir_uclass(&[('A', 'A'), ('a', 'a')]),
+                hir_anchor(hir::Anchor::StartLine),
+                hir_uclass(&[('A', 'A'), ('a', 'a')]),
+                hir_anchor(hir::Anchor::StartText),
+            ])
+        );
+        assert_eq!(
+            t("(?U)a*a*?(?-U)a*a*?"),
+            hir_cat(vec![
+                hir_star(false, hir_lit("a")),
+                hir_star(true, hir_lit("a")),
+                hir_star(true, hir_lit("a")),
+                hir_star(false, hir_lit("a")),
+            ])
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?:a(?i)a)a"),
+            hir_cat(vec![
+                hir_group_nocap(hir_cat(vec![
+                    hir_lit("a"),
+                    hir_uclass(&[('A', 'A'), ('a', 'a')]),
+                ])),
+                hir_lit("a"),
+            ])
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i)(?:a(?-i)a)a"),
+            hir_cat(vec![
+                hir_group_nocap(hir_cat(vec![
+                    hir_uclass(&[('A', 'A'), ('a', 'a')]),
+                    hir_lit("a"),
+                ])),
+                hir_uclass(&[('A', 'A'), ('a', 'a')]),
+            ])
+        );
+    }
+
+    #[test]
+    fn escape() {
+        assert_eq!(
+            t(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#"),
+            hir_lit(r"\.+*?()|[]{}^$#")
+        );
+    }
+
+    #[test]
+    fn repetition() {
+        assert_eq!(t("a?"), hir_quest(true, hir_lit("a")));
+        assert_eq!(t("a*"), hir_star(true, hir_lit("a")));
+        assert_eq!(t("a+"), hir_plus(true, hir_lit("a")));
+        assert_eq!(t("a??"), hir_quest(false, hir_lit("a")));
+        assert_eq!(t("a*?"), hir_star(false, hir_lit("a")));
+        assert_eq!(t("a+?"), hir_plus(false, hir_lit("a")));
+
+        assert_eq!(
+            t("a{1}"),
+            hir_range(true, hir::RepetitionRange::Exactly(1), hir_lit("a"),)
+        );
+        assert_eq!(
+            t("a{1,}"),
+            hir_range(true, hir::RepetitionRange::AtLeast(1), hir_lit("a"),)
+        );
+        assert_eq!(
+            t("a{1,2}"),
+            hir_range(true, hir::RepetitionRange::Bounded(1, 2), hir_lit("a"),)
+        );
+        assert_eq!(
+            t("a{1}?"),
+            hir_range(false, hir::RepetitionRange::Exactly(1), hir_lit("a"),)
+        );
+        assert_eq!(
+            t("a{1,}?"),
+            hir_range(false, hir::RepetitionRange::AtLeast(1), hir_lit("a"),)
+        );
+        assert_eq!(
+            t("a{1,2}?"),
+            hir_range(
+                false,
+                hir::RepetitionRange::Bounded(1, 2),
+                hir_lit("a"),
+            )
+        );
+
+        assert_eq!(
+            t("ab?"),
+            hir_cat(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),])
+        );
+        assert_eq!(
+            t("(ab)?"),
+            hir_quest(
+                true,
+                hir_group(1, hir_cat(vec![hir_lit("a"), hir_lit("b"),]))
+            )
+        );
+        assert_eq!(
+            t("a|b?"),
+            hir_alt(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),])
+        );
+    }
+
+    #[test]
+    fn cat_alt() {
+        assert_eq!(
+            t("(ab)"),
+            hir_group(1, hir_cat(vec![hir_lit("a"), hir_lit("b"),]))
+        );
+        assert_eq!(t("a|b"), hir_alt(vec![hir_lit("a"), hir_lit("b"),]));
+        assert_eq!(
+            t("a|b|c"),
+            hir_alt(vec![hir_lit("a"), hir_lit("b"), hir_lit("c"),])
+        );
+        assert_eq!(
+            t("ab|bc|cd"),
+            hir_alt(vec![hir_lit("ab"), hir_lit("bc"), hir_lit("cd"),])
+        );
+        assert_eq!(
+            t("(a|b)"),
+            hir_group(1, hir_alt(vec![hir_lit("a"), hir_lit("b"),]))
+        );
+        assert_eq!(
+            t("(a|b|c)"),
+            hir_group(
+                1,
+                hir_alt(vec![hir_lit("a"), hir_lit("b"), hir_lit("c"),])
+            )
+        );
+        assert_eq!(
+            t("(ab|bc|cd)"),
+            hir_group(
+                1,
+                hir_alt(vec![hir_lit("ab"), hir_lit("bc"), hir_lit("cd"),])
+            )
+        );
+        assert_eq!(
+            t("(ab|(bc|(cd)))"),
+            hir_group(
+                1,
+                hir_alt(vec![
+                    hir_lit("ab"),
+                    hir_group(
+                        2,
+                        hir_alt(vec![
+                            hir_lit("bc"),
+                            hir_group(3, hir_lit("cd")),
+                        ])
+                    ),
+                ])
+            )
+        );
+    }
+
+    #[test]
+    fn class_ascii() {
+        assert_eq!(
+            t("[[:alnum:]]"),
+            hir_uclass(ascii_class(&ast::ClassAsciiKind::Alnum))
+        );
+        assert_eq!(
+            t("[[:alpha:]]"),
+            hir_uclass(ascii_class(&ast::ClassAsciiKind::Alpha))
+        );
+        assert_eq!(
+            t("[[:ascii:]]"),
+            hir_uclass(ascii_class(&ast::ClassAsciiKind::Ascii))
+        );
+        assert_eq!(
+            t("[[:blank:]]"),
+            hir_uclass(ascii_class(&ast::ClassAsciiKind::Blank))
+        );
+        assert_eq!(
+            t("[[:cntrl:]]"),
+            hir_uclass(ascii_class(&ast::ClassAsciiKind::Cntrl))
+        );
+        assert_eq!(
+            t("[[:digit:]]"),
+            hir_uclass(ascii_class(&ast::ClassAsciiKind::Digit))
+        );
+        assert_eq!(
+            t("[[:graph:]]"),
+            hir_uclass(ascii_class(&ast::ClassAsciiKind::Graph))
+        );
+        assert_eq!(
+            t("[[:lower:]]"),
+            hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower))
+        );
+        assert_eq!(
+            t("[[:print:]]"),
+            hir_uclass(ascii_class(&ast::ClassAsciiKind::Print))
+        );
+        assert_eq!(
+            t("[[:punct:]]"),
+            hir_uclass(ascii_class(&ast::ClassAsciiKind::Punct))
+        );
+        assert_eq!(
+            t("[[:space:]]"),
+            hir_uclass(ascii_class(&ast::ClassAsciiKind::Space))
+        );
+        assert_eq!(
+            t("[[:upper:]]"),
+            hir_uclass(ascii_class(&ast::ClassAsciiKind::Upper))
+        );
+        assert_eq!(
+            t("[[:word:]]"),
+            hir_uclass(ascii_class(&ast::ClassAsciiKind::Word))
+        );
+        assert_eq!(
+            t("[[:xdigit:]]"),
+            hir_uclass(ascii_class(&ast::ClassAsciiKind::Xdigit))
+        );
+
+        assert_eq!(
+            t("[[:^lower:]]"),
+            hir_negate(hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower)))
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i)[[:lower:]]"),
+            hir_uclass(&[
+                ('A', 'Z'),
+                ('a', 'z'),
+                ('\u{17F}', '\u{17F}'),
+                ('\u{212A}', '\u{212A}'),
+            ])
+        );
+
+        assert_eq!(
+            t("(?-u)[[:lower:]]"),
+            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Lower))
+        );
+        assert_eq!(
+            t("(?i-u)[[:lower:]]"),
+            hir_case_fold(hir_bclass_from_char(ascii_class(
+                &ast::ClassAsciiKind::Lower
+            )))
+        );
+
+        assert_eq!(
+            t_err("(?-u)[[:^lower:]]"),
+            TestError {
+                kind: hir::ErrorKind::InvalidUtf8,
+                span: Span::new(
+                    Position::new(6, 1, 7),
+                    Position::new(16, 1, 17)
+                ),
+            }
+        );
+        assert_eq!(
+            t_err("(?i-u)[[:^lower:]]"),
+            TestError {
+                kind: hir::ErrorKind::InvalidUtf8,
+                span: Span::new(
+                    Position::new(7, 1, 8),
+                    Position::new(17, 1, 18)
+                ),
+            }
+        );
+    }
+
+    #[test]
+    #[cfg(feature = "unicode-perl")]
+    fn class_perl() {
+        // Unicode
+        assert_eq!(t(r"\d"), hir_uclass_query(ClassQuery::Binary("digit")));
+        assert_eq!(t(r"\s"), hir_uclass_query(ClassQuery::Binary("space")));
+        assert_eq!(t(r"\w"), hir_uclass_perl_word());
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t(r"(?i)\d"),
+            hir_uclass_query(ClassQuery::Binary("digit"))
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t(r"(?i)\s"),
+            hir_uclass_query(ClassQuery::Binary("space"))
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(t(r"(?i)\w"), hir_uclass_perl_word());
+
+        // Unicode, negated
+        assert_eq!(
+            t(r"\D"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
+        );
+        assert_eq!(
+            t(r"\S"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("space")))
+        );
+        assert_eq!(t(r"\W"), hir_negate(hir_uclass_perl_word()));
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t(r"(?i)\D"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t(r"(?i)\S"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("space")))
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(t(r"(?i)\W"), hir_negate(hir_uclass_perl_word()));
+
+        // ASCII only
+        assert_eq!(
+            t(r"(?-u)\d"),
+            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
+        );
+        assert_eq!(
+            t(r"(?-u)\s"),
+            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space))
+        );
+        assert_eq!(
+            t(r"(?-u)\w"),
+            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word))
+        );
+        assert_eq!(
+            t(r"(?i-u)\d"),
+            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
+        );
+        assert_eq!(
+            t(r"(?i-u)\s"),
+            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space))
+        );
+        assert_eq!(
+            t(r"(?i-u)\w"),
+            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word))
+        );
+
+        // ASCII only, negated
+        assert_eq!(
+            t(r"(?-u)\D"),
+            hir_negate(hir_bclass_from_char(ascii_class(
+                &ast::ClassAsciiKind::Digit
+            )))
+        );
+        assert_eq!(
+            t(r"(?-u)\S"),
+            hir_negate(hir_bclass_from_char(ascii_class(
+                &ast::ClassAsciiKind::Space
+            )))
+        );
+        assert_eq!(
+            t(r"(?-u)\W"),
+            hir_negate(hir_bclass_from_char(ascii_class(
+                &ast::ClassAsciiKind::Word
+            )))
+        );
+        assert_eq!(
+            t(r"(?i-u)\D"),
+            hir_negate(hir_bclass_from_char(ascii_class(
+                &ast::ClassAsciiKind::Digit
+            )))
+        );
+        assert_eq!(
+            t(r"(?i-u)\S"),
+            hir_negate(hir_bclass_from_char(ascii_class(
+                &ast::ClassAsciiKind::Space
+            )))
+        );
+        assert_eq!(
+            t(r"(?i-u)\W"),
+            hir_negate(hir_bclass_from_char(ascii_class(
+                &ast::ClassAsciiKind::Word
+            )))
+        );
+    }
+
+    #[test]
+    #[cfg(not(feature = "unicode-perl"))]
+    fn class_perl_word_disabled() {
+        assert_eq!(
+            t_err(r"\w"),
+            TestError {
+                kind: hir::ErrorKind::UnicodePerlClassNotFound,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(2, 1, 3)
+                ),
+            }
+        );
+    }
+
+    #[test]
+    #[cfg(all(not(feature = "unicode-perl"), not(feature = "unicode-bool")))]
+    fn class_perl_space_disabled() {
+        assert_eq!(
+            t_err(r"\s"),
+            TestError {
+                kind: hir::ErrorKind::UnicodePerlClassNotFound,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(2, 1, 3)
+                ),
+            }
+        );
+    }
+
+    #[test]
+    #[cfg(all(
+        not(feature = "unicode-perl"),
+        not(feature = "unicode-gencat")
+    ))]
+    fn class_perl_digit_disabled() {
+        assert_eq!(
+            t_err(r"\d"),
+            TestError {
+                kind: hir::ErrorKind::UnicodePerlClassNotFound,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(2, 1, 3)
+                ),
+            }
+        );
+    }
+
+    #[test]
+    #[cfg(feature = "unicode-gencat")]
+    fn class_unicode_gencat() {
+        assert_eq!(t(r"\pZ"), hir_uclass_query(ClassQuery::Binary("Z")));
+        assert_eq!(t(r"\pz"), hir_uclass_query(ClassQuery::Binary("Z")));
+        assert_eq!(
+            t(r"\p{Separator}"),
+            hir_uclass_query(ClassQuery::Binary("Z"))
+        );
+        assert_eq!(
+            t(r"\p{se      PaRa ToR}"),
+            hir_uclass_query(ClassQuery::Binary("Z"))
+        );
+        assert_eq!(
+            t(r"\p{gc:Separator}"),
+            hir_uclass_query(ClassQuery::Binary("Z"))
+        );
+        assert_eq!(
+            t(r"\p{gc=Separator}"),
+            hir_uclass_query(ClassQuery::Binary("Z"))
+        );
+        assert_eq!(
+            t(r"\p{Other}"),
+            hir_uclass_query(ClassQuery::Binary("Other"))
+        );
+        assert_eq!(t(r"\pC"), hir_uclass_query(ClassQuery::Binary("Other")));
+
+        assert_eq!(
+            t(r"\PZ"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
+        );
+        assert_eq!(
+            t(r"\P{separator}"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
+        );
+        assert_eq!(
+            t(r"\P{gc!=separator}"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
+        );
+
+        assert_eq!(t(r"\p{any}"), hir_uclass_query(ClassQuery::Binary("Any")));
+        assert_eq!(
+            t(r"\p{assigned}"),
+            hir_uclass_query(ClassQuery::Binary("Assigned"))
+        );
+        assert_eq!(
+            t(r"\p{ascii}"),
+            hir_uclass_query(ClassQuery::Binary("ASCII"))
+        );
+        assert_eq!(
+            t(r"\p{gc:any}"),
+            hir_uclass_query(ClassQuery::Binary("Any"))
+        );
+        assert_eq!(
+            t(r"\p{gc:assigned}"),
+            hir_uclass_query(ClassQuery::Binary("Assigned"))
+        );
+        assert_eq!(
+            t(r"\p{gc:ascii}"),
+            hir_uclass_query(ClassQuery::Binary("ASCII"))
+        );
+
+        assert_eq!(
+            t_err(r"(?-u)\pZ"),
+            TestError {
+                kind: hir::ErrorKind::UnicodeNotAllowed,
+                span: Span::new(
+                    Position::new(5, 1, 6),
+                    Position::new(8, 1, 9)
+                ),
+            }
+        );
+        assert_eq!(
+            t_err(r"(?-u)\p{Separator}"),
+            TestError {
+                kind: hir::ErrorKind::UnicodeNotAllowed,
+                span: Span::new(
+                    Position::new(5, 1, 6),
+                    Position::new(18, 1, 19)
+                ),
+            }
+        );
+        assert_eq!(
+            t_err(r"\pE"),
+            TestError {
+                kind: hir::ErrorKind::UnicodePropertyNotFound,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(3, 1, 4)
+                ),
+            }
+        );
+        assert_eq!(
+            t_err(r"\p{Foo}"),
+            TestError {
+                kind: hir::ErrorKind::UnicodePropertyNotFound,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(7, 1, 8)
+                ),
+            }
+        );
+        assert_eq!(
+            t_err(r"\p{gc:Foo}"),
+            TestError {
+                kind: hir::ErrorKind::UnicodePropertyValueNotFound,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(10, 1, 11)
+                ),
+            }
+        );
+    }
+
+    #[test]
+    #[cfg(not(feature = "unicode-gencat"))]
+    fn class_unicode_gencat_disabled() {
+        assert_eq!(
+            t_err(r"\p{Separator}"),
+            TestError {
+                kind: hir::ErrorKind::UnicodePropertyNotFound,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(13, 1, 14)
+                ),
+            }
+        );
+
+        assert_eq!(
+            t_err(r"\p{Any}"),
+            TestError {
+                kind: hir::ErrorKind::UnicodePropertyNotFound,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(7, 1, 8)
+                ),
+            }
+        );
+    }
+
+    #[test]
+    #[cfg(feature = "unicode-script")]
+    fn class_unicode_script() {
+        assert_eq!(
+            t(r"\p{Greek}"),
+            hir_uclass_query(ClassQuery::Binary("Greek"))
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t(r"(?i)\p{Greek}"),
+            hir_case_fold(hir_uclass_query(ClassQuery::Binary("Greek")))
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t(r"(?i)\P{Greek}"),
+            hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
+                "Greek"
+            ))))
+        );
+
+        assert_eq!(
+            t_err(r"\p{sc:Foo}"),
+            TestError {
+                kind: hir::ErrorKind::UnicodePropertyValueNotFound,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(10, 1, 11)
+                ),
+            }
+        );
+        assert_eq!(
+            t_err(r"\p{scx:Foo}"),
+            TestError {
+                kind: hir::ErrorKind::UnicodePropertyValueNotFound,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(11, 1, 12)
+                ),
+            }
+        );
+    }
+
+    #[test]
+    #[cfg(not(feature = "unicode-script"))]
+    fn class_unicode_script_disabled() {
+        assert_eq!(
+            t_err(r"\p{Greek}"),
+            TestError {
+                kind: hir::ErrorKind::UnicodePropertyNotFound,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(9, 1, 10)
+                ),
+            }
+        );
+
+        assert_eq!(
+            t_err(r"\p{scx:Greek}"),
+            TestError {
+                kind: hir::ErrorKind::UnicodePropertyNotFound,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(13, 1, 14)
+                ),
+            }
+        );
+    }
+
+    #[test]
+    #[cfg(feature = "unicode-age")]
+    fn class_unicode_age() {
+        assert_eq!(
+            t_err(r"\p{age:Foo}"),
+            TestError {
+                kind: hir::ErrorKind::UnicodePropertyValueNotFound,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(11, 1, 12)
+                ),
+            }
+        );
+    }
+
+    #[test]
+    #[cfg(not(feature = "unicode-age"))]
+    fn class_unicode_age_disabled() {
+        assert_eq!(
+            t_err(r"\p{age:3.0}"),
+            TestError {
+                kind: hir::ErrorKind::UnicodePropertyNotFound,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(11, 1, 12)
+                ),
+            }
+        );
+    }
+
+    #[test]
+    fn class_bracketed() {
+        assert_eq!(t("[a]"), hir_uclass(&[('a', 'a')]));
+        assert_eq!(t("[^[a]]"), hir_negate(hir_uclass(&[('a', 'a')])));
+        assert_eq!(t("[a-z]"), hir_uclass(&[('a', 'z')]));
+        assert_eq!(t("[a-fd-h]"), hir_uclass(&[('a', 'h')]));
+        assert_eq!(t("[a-fg-m]"), hir_uclass(&[('a', 'm')]));
+        assert_eq!(t(r"[\x00]"), hir_uclass(&[('\0', '\0')]));
+        assert_eq!(t(r"[\n]"), hir_uclass(&[('\n', '\n')]));
+        assert_eq!(t("[\n]"), hir_uclass(&[('\n', '\n')]));
+        #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
+        assert_eq!(t(r"[\d]"), hir_uclass_query(ClassQuery::Binary("digit")));
+        #[cfg(feature = "unicode-gencat")]
+        assert_eq!(
+            t(r"[\pZ]"),
+            hir_uclass_query(ClassQuery::Binary("separator"))
+        );
+        #[cfg(feature = "unicode-gencat")]
+        assert_eq!(
+            t(r"[\p{separator}]"),
+            hir_uclass_query(ClassQuery::Binary("separator"))
+        );
+        #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
+        assert_eq!(t(r"[^\D]"), hir_uclass_query(ClassQuery::Binary("digit")));
+        #[cfg(feature = "unicode-gencat")]
+        assert_eq!(
+            t(r"[^\PZ]"),
+            hir_uclass_query(ClassQuery::Binary("separator"))
+        );
+        #[cfg(feature = "unicode-gencat")]
+        assert_eq!(
+            t(r"[^\P{separator}]"),
+            hir_uclass_query(ClassQuery::Binary("separator"))
+        );
+        #[cfg(all(
+            feature = "unicode-case",
+            any(feature = "unicode-perl", feature = "unicode-gencat")
+        ))]
+        assert_eq!(
+            t(r"(?i)[^\D]"),
+            hir_uclass_query(ClassQuery::Binary("digit"))
+        );
+        #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
+        assert_eq!(
+            t(r"(?i)[^\P{greek}]"),
+            hir_case_fold(hir_uclass_query(ClassQuery::Binary("greek")))
+        );
+
+        assert_eq!(t("(?-u)[a]"), hir_bclass(&[(b'a', b'a')]));
+        assert_eq!(t(r"(?-u)[\x00]"), hir_bclass(&[(b'\0', b'\0')]));
+        assert_eq!(t_bytes(r"(?-u)[\xFF]"), hir_bclass(&[(b'\xFF', b'\xFF')]));
+
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(t("(?i)[a]"), hir_uclass(&[('A', 'A'), ('a', 'a')]));
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i)[k]"),
+            hir_uclass(&[('K', 'K'), ('k', 'k'), ('\u{212A}', '\u{212A}'),])
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i)[β]"),
+            hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),])
+        );
+        assert_eq!(t("(?i-u)[k]"), hir_bclass(&[(b'K', b'K'), (b'k', b'k'),]));
+
+        assert_eq!(t("[^a]"), hir_negate(hir_uclass(&[('a', 'a')])));
+        assert_eq!(t(r"[^\x00]"), hir_negate(hir_uclass(&[('\0', '\0')])));
+        assert_eq!(
+            t_bytes("(?-u)[^a]"),
+            hir_negate(hir_bclass(&[(b'a', b'a')]))
+        );
+        #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
+        assert_eq!(
+            t(r"[^\d]"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
+        );
+        #[cfg(feature = "unicode-gencat")]
+        assert_eq!(
+            t(r"[^\pZ]"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))
+        );
+        #[cfg(feature = "unicode-gencat")]
+        assert_eq!(
+            t(r"[^\p{separator}]"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))
+        );
+        #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
+        assert_eq!(
+            t(r"(?i)[^\p{greek}]"),
+            hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
+                "greek"
+            ))))
+        );
+        #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
+        assert_eq!(
+            t(r"(?i)[\P{greek}]"),
+            hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
+                "greek"
+            ))))
+        );
+
+        // Test some weird cases.
+        assert_eq!(t(r"[\[]"), hir_uclass(&[('[', '[')]));
+
+        assert_eq!(t(r"[&]"), hir_uclass(&[('&', '&')]));
+        assert_eq!(t(r"[\&]"), hir_uclass(&[('&', '&')]));
+        assert_eq!(t(r"[\&\&]"), hir_uclass(&[('&', '&')]));
+        assert_eq!(t(r"[\x00-&]"), hir_uclass(&[('\0', '&')]));
+        assert_eq!(t(r"[&-\xFF]"), hir_uclass(&[('&', '\u{FF}')]));
+
+        assert_eq!(t(r"[~]"), hir_uclass(&[('~', '~')]));
+        assert_eq!(t(r"[\~]"), hir_uclass(&[('~', '~')]));
+        assert_eq!(t(r"[\~\~]"), hir_uclass(&[('~', '~')]));
+        assert_eq!(t(r"[\x00-~]"), hir_uclass(&[('\0', '~')]));
+        assert_eq!(t(r"[~-\xFF]"), hir_uclass(&[('~', '\u{FF}')]));
+
+        assert_eq!(t(r"[-]"), hir_uclass(&[('-', '-')]));
+        assert_eq!(t(r"[\-]"), hir_uclass(&[('-', '-')]));
+        assert_eq!(t(r"[\-\-]"), hir_uclass(&[('-', '-')]));
+        assert_eq!(t(r"[\x00-\-]"), hir_uclass(&[('\0', '-')]));
+        assert_eq!(t(r"[\--\xFF]"), hir_uclass(&[('-', '\u{FF}')]));
+
+        assert_eq!(
+            t_err("(?-u)[^a]"),
+            TestError {
+                kind: hir::ErrorKind::InvalidUtf8,
+                span: Span::new(
+                    Position::new(5, 1, 6),
+                    Position::new(9, 1, 10)
+                ),
+            }
+        );
+        #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
+        assert_eq!(
+            t_err(r"[^\s\S]"),
+            TestError {
+                kind: hir::ErrorKind::EmptyClassNotAllowed,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(7, 1, 8)
+                ),
+            }
+        );
+        #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
+        assert_eq!(
+            t_err(r"(?-u)[^\s\S]"),
+            TestError {
+                kind: hir::ErrorKind::EmptyClassNotAllowed,
+                span: Span::new(
+                    Position::new(5, 1, 6),
+                    Position::new(12, 1, 13)
+                ),
+            }
+        );
+    }
+
+    #[test]
+    fn class_bracketed_union() {
+        assert_eq!(t("[a-zA-Z]"), hir_uclass(&[('A', 'Z'), ('a', 'z')]));
+        #[cfg(feature = "unicode-gencat")]
+        assert_eq!(
+            t(r"[a\pZb]"),
+            hir_union(
+                hir_uclass(&[('a', 'b')]),
+                hir_uclass_query(ClassQuery::Binary("separator"))
+            )
+        );
+        #[cfg(all(feature = "unicode-gencat", feature = "unicode-script"))]
+        assert_eq!(
+            t(r"[\pZ\p{Greek}]"),
+            hir_union(
+                hir_uclass_query(ClassQuery::Binary("greek")),
+                hir_uclass_query(ClassQuery::Binary("separator"))
+            )
+        );
+        #[cfg(all(
+            feature = "unicode-age",
+            feature = "unicode-gencat",
+            feature = "unicode-script"
+        ))]
+        assert_eq!(
+            t(r"[\p{age:3.0}\pZ\p{Greek}]"),
+            hir_union(
+                hir_uclass_query(ClassQuery::ByValue {
+                    property_name: "age",
+                    property_value: "3.0",
+                }),
+                hir_union(
+                    hir_uclass_query(ClassQuery::Binary("greek")),
+                    hir_uclass_query(ClassQuery::Binary("separator"))
+                )
+            )
+        );
+        #[cfg(all(
+            feature = "unicode-age",
+            feature = "unicode-gencat",
+            feature = "unicode-script"
+        ))]
+        assert_eq!(
+            t(r"[[[\p{age:3.0}\pZ]\p{Greek}][\p{Cyrillic}]]"),
+            hir_union(
+                hir_uclass_query(ClassQuery::ByValue {
+                    property_name: "age",
+                    property_value: "3.0",
+                }),
+                hir_union(
+                    hir_uclass_query(ClassQuery::Binary("cyrillic")),
+                    hir_union(
+                        hir_uclass_query(ClassQuery::Binary("greek")),
+                        hir_uclass_query(ClassQuery::Binary("separator"))
+                    )
+                )
+            )
+        );
+
+        #[cfg(all(
+            feature = "unicode-age",
+            feature = "unicode-case",
+            feature = "unicode-gencat",
+            feature = "unicode-script"
+        ))]
+        assert_eq!(
+            t(r"(?i)[\p{age:3.0}\pZ\p{Greek}]"),
+            hir_case_fold(hir_union(
+                hir_uclass_query(ClassQuery::ByValue {
+                    property_name: "age",
+                    property_value: "3.0",
+                }),
+                hir_union(
+                    hir_uclass_query(ClassQuery::Binary("greek")),
+                    hir_uclass_query(ClassQuery::Binary("separator"))
+                )
+            ))
+        );
+        #[cfg(all(
+            feature = "unicode-age",
+            feature = "unicode-gencat",
+            feature = "unicode-script"
+        ))]
+        assert_eq!(
+            t(r"[^\p{age:3.0}\pZ\p{Greek}]"),
+            hir_negate(hir_union(
+                hir_uclass_query(ClassQuery::ByValue {
+                    property_name: "age",
+                    property_value: "3.0",
+                }),
+                hir_union(
+                    hir_uclass_query(ClassQuery::Binary("greek")),
+                    hir_uclass_query(ClassQuery::Binary("separator"))
+                )
+            ))
+        );
+        #[cfg(all(
+            feature = "unicode-age",
+            feature = "unicode-case",
+            feature = "unicode-gencat",
+            feature = "unicode-script"
+        ))]
+        assert_eq!(
+            t(r"(?i)[^\p{age:3.0}\pZ\p{Greek}]"),
+            hir_negate(hir_case_fold(hir_union(
+                hir_uclass_query(ClassQuery::ByValue {
+                    property_name: "age",
+                    property_value: "3.0",
+                }),
+                hir_union(
+                    hir_uclass_query(ClassQuery::Binary("greek")),
+                    hir_uclass_query(ClassQuery::Binary("separator"))
+                )
+            )))
+        );
+    }
+
+    #[test]
+    fn class_bracketed_nested() {
+        assert_eq!(t(r"[a[^c]]"), hir_negate(hir_uclass(&[('c', 'c')])));
+        assert_eq!(t(r"[a-b[^c]]"), hir_negate(hir_uclass(&[('c', 'c')])));
+        assert_eq!(t(r"[a-c[^c]]"), hir_negate(hir_uclass(&[])));
+
+        assert_eq!(t(r"[^a[^c]]"), hir_uclass(&[('c', 'c')]));
+        assert_eq!(t(r"[^a-b[^c]]"), hir_uclass(&[('c', 'c')]));
+
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t(r"(?i)[a[^c]]"),
+            hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')])))
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t(r"(?i)[a-b[^c]]"),
+            hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')])))
+        );
+
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(t(r"(?i)[^a[^c]]"), hir_uclass(&[('C', 'C'), ('c', 'c')]));
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t(r"(?i)[^a-b[^c]]"),
+            hir_uclass(&[('C', 'C'), ('c', 'c')])
+        );
+
+        assert_eq!(
+            t_err(r"[^a-c[^c]]"),
+            TestError {
+                kind: hir::ErrorKind::EmptyClassNotAllowed,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(10, 1, 11)
+                ),
+            }
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t_err(r"(?i)[^a-c[^c]]"),
+            TestError {
+                kind: hir::ErrorKind::EmptyClassNotAllowed,
+                span: Span::new(
+                    Position::new(4, 1, 5),
+                    Position::new(14, 1, 15)
+                ),
+            }
+        );
+    }
+
+    #[test]
+    fn class_bracketed_intersect() {
+        assert_eq!(t("[abc&&b-c]"), hir_uclass(&[('b', 'c')]));
+        assert_eq!(t("[abc&&[b-c]]"), hir_uclass(&[('b', 'c')]));
+        assert_eq!(t("[[abc]&&[b-c]]"), hir_uclass(&[('b', 'c')]));
+        assert_eq!(t("[a-z&&b-y&&c-x]"), hir_uclass(&[('c', 'x')]));
+        assert_eq!(t("[c-da-b&&a-d]"), hir_uclass(&[('a', 'd')]));
+        assert_eq!(t("[a-d&&c-da-b]"), hir_uclass(&[('a', 'd')]));
+        assert_eq!(t(r"[a-z&&a-c]"), hir_uclass(&[('a', 'c')]));
+        assert_eq!(t(r"[[a-z&&a-c]]"), hir_uclass(&[('a', 'c')]));
+        assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
+
+        assert_eq!(t("(?-u)[abc&&b-c]"), hir_bclass(&[(b'b', b'c')]));
+        assert_eq!(t("(?-u)[abc&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
+        assert_eq!(t("(?-u)[[abc]&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
+        assert_eq!(t("(?-u)[a-z&&b-y&&c-x]"), hir_bclass(&[(b'c', b'x')]));
+        assert_eq!(t("(?-u)[c-da-b&&a-d]"), hir_bclass(&[(b'a', b'd')]));
+        assert_eq!(t("(?-u)[a-d&&c-da-b]"), hir_bclass(&[(b'a', b'd')]));
+
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i)[abc&&b-c]"),
+            hir_case_fold(hir_uclass(&[('b', 'c')]))
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i)[abc&&[b-c]]"),
+            hir_case_fold(hir_uclass(&[('b', 'c')]))
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i)[[abc]&&[b-c]]"),
+            hir_case_fold(hir_uclass(&[('b', 'c')]))
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i)[a-z&&b-y&&c-x]"),
+            hir_case_fold(hir_uclass(&[('c', 'x')]))
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i)[c-da-b&&a-d]"),
+            hir_case_fold(hir_uclass(&[('a', 'd')]))
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i)[a-d&&c-da-b]"),
+            hir_case_fold(hir_uclass(&[('a', 'd')]))
+        );
+
+        assert_eq!(
+            t("(?i-u)[abc&&b-c]"),
+            hir_case_fold(hir_bclass(&[(b'b', b'c')]))
+        );
+        assert_eq!(
+            t("(?i-u)[abc&&[b-c]]"),
+            hir_case_fold(hir_bclass(&[(b'b', b'c')]))
+        );
+        assert_eq!(
+            t("(?i-u)[[abc]&&[b-c]]"),
+            hir_case_fold(hir_bclass(&[(b'b', b'c')]))
+        );
+        assert_eq!(
+            t("(?i-u)[a-z&&b-y&&c-x]"),
+            hir_case_fold(hir_bclass(&[(b'c', b'x')]))
+        );
+        assert_eq!(
+            t("(?i-u)[c-da-b&&a-d]"),
+            hir_case_fold(hir_bclass(&[(b'a', b'd')]))
+        );
+        assert_eq!(
+            t("(?i-u)[a-d&&c-da-b]"),
+            hir_case_fold(hir_bclass(&[(b'a', b'd')]))
+        );
+
+        // In `[a^]`, `^` does not need to be escaped, so it makes sense that
+        // `^` is also allowed to be unescaped after `&&`.
+        assert_eq!(t(r"[\^&&^]"), hir_uclass(&[('^', '^')]));
+        // `]` needs to be escaped after `&&` since it's not at start of class.
+        assert_eq!(t(r"[]&&\]]"), hir_uclass(&[(']', ']')]));
+        assert_eq!(t(r"[-&&-]"), hir_uclass(&[('-', '-')]));
+        assert_eq!(t(r"[\&&&&]"), hir_uclass(&[('&', '&')]));
+        assert_eq!(t(r"[\&&&\&]"), hir_uclass(&[('&', '&')]));
+        // Test precedence.
+        assert_eq!(
+            t(r"[a-w&&[^c-g]z]"),
+            hir_uclass(&[('a', 'b'), ('h', 'w')])
+        );
+    }
+
+    #[test]
+    fn class_bracketed_intersect_negate() {
+        #[cfg(feature = "unicode-perl")]
+        assert_eq!(
+            t(r"[^\w&&\d]"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
+        );
+        assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
+        #[cfg(feature = "unicode-perl")]
+        assert_eq!(
+            t(r"[^[\w&&\d]]"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
+        );
+        #[cfg(feature = "unicode-perl")]
+        assert_eq!(
+            t(r"[^[^\w&&\d]]"),
+            hir_uclass_query(ClassQuery::Binary("digit"))
+        );
+        #[cfg(feature = "unicode-perl")]
+        assert_eq!(t(r"[[[^\w]&&[^\d]]]"), hir_negate(hir_uclass_perl_word()));
+
+        #[cfg(feature = "unicode-perl")]
+        assert_eq!(
+            t_bytes(r"(?-u)[^\w&&\d]"),
+            hir_negate(hir_bclass_from_char(ascii_class(
+                &ast::ClassAsciiKind::Digit
+            )))
+        );
+        assert_eq!(
+            t_bytes(r"(?-u)[^[a-z&&a-c]]"),
+            hir_negate(hir_bclass(&[(b'a', b'c')]))
+        );
+        assert_eq!(
+            t_bytes(r"(?-u)[^[\w&&\d]]"),
+            hir_negate(hir_bclass_from_char(ascii_class(
+                &ast::ClassAsciiKind::Digit
+            )))
+        );
+        assert_eq!(
+            t_bytes(r"(?-u)[^[^\w&&\d]]"),
+            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
+        );
+        assert_eq!(
+            t_bytes(r"(?-u)[[[^\w]&&[^\d]]]"),
+            hir_negate(hir_bclass_from_char(ascii_class(
+                &ast::ClassAsciiKind::Word
+            )))
+        );
+    }
+
+    #[test]
+    fn class_bracketed_difference() {
+        #[cfg(feature = "unicode-gencat")]
+        assert_eq!(
+            t(r"[\pL--[:ascii:]]"),
+            hir_difference(
+                hir_uclass_query(ClassQuery::Binary("letter")),
+                hir_uclass(&[('\0', '\x7F')])
+            )
+        );
+
+        assert_eq!(
+            t(r"(?-u)[[:alpha:]--[:lower:]]"),
+            hir_bclass(&[(b'A', b'Z')])
+        );
+    }
+
+    #[test]
+    fn class_bracketed_symmetric_difference() {
+        #[cfg(feature = "unicode-script")]
+        assert_eq!(
+            t(r"[\p{sc:Greek}~~\p{scx:Greek}]"),
+            hir_uclass(&[
+                ('\u{0342}', '\u{0342}'),
+                ('\u{0345}', '\u{0345}'),
+                ('\u{1DC0}', '\u{1DC1}'),
+            ])
+        );
+        assert_eq!(t(r"[a-g~~c-j]"), hir_uclass(&[('a', 'b'), ('h', 'j')]));
+
+        assert_eq!(
+            t(r"(?-u)[a-g~~c-j]"),
+            hir_bclass(&[(b'a', b'b'), (b'h', b'j')])
+        );
+    }
+
+    #[test]
+    fn ignore_whitespace() {
+        assert_eq!(t(r"(?x)\12 3"), hir_lit("\n3"));
+        assert_eq!(t(r"(?x)\x { 53 }"), hir_lit("S"));
+        assert_eq!(
+            t(r"(?x)\x # comment
+{ # comment
+    53 # comment
+} #comment"),
+            hir_lit("S")
+        );
+
+        assert_eq!(t(r"(?x)\x 53"), hir_lit("S"));
+        assert_eq!(
+            t(r"(?x)\x # comment
+        53 # comment"),
+            hir_lit("S")
+        );
+        assert_eq!(t(r"(?x)\x5 3"), hir_lit("S"));
+
+        #[cfg(feature = "unicode-gencat")]
+        assert_eq!(
+            t(r"(?x)\p # comment
+{ # comment
+    Separator # comment
+} # comment"),
+            hir_uclass_query(ClassQuery::Binary("separator"))
+        );
+
+        assert_eq!(
+            t(r"(?x)a # comment
+{ # comment
+    5 # comment
+    , # comment
+    10 # comment
+} # comment"),
+            hir_range(
+                true,
+                hir::RepetitionRange::Bounded(5, 10),
+                hir_lit("a")
+            )
+        );
+
+        assert_eq!(t(r"(?x)a\  # hi there"), hir_lit("a "));
+    }
+
+    #[test]
+    fn analysis_is_always_utf8() {
+        // Positive examples.
+        assert!(t_bytes(r"a").is_always_utf8());
+        assert!(t_bytes(r"ab").is_always_utf8());
+        assert!(t_bytes(r"(?-u)a").is_always_utf8());
+        assert!(t_bytes(r"(?-u)ab").is_always_utf8());
+        assert!(t_bytes(r"\xFF").is_always_utf8());
+        assert!(t_bytes(r"\xFF\xFF").is_always_utf8());
+        assert!(t_bytes(r"[^a]").is_always_utf8());
+        assert!(t_bytes(r"[^a][^a]").is_always_utf8());
+        assert!(t_bytes(r"\b").is_always_utf8());
+        assert!(t_bytes(r"\B").is_always_utf8());
+        assert!(t_bytes(r"(?-u)\b").is_always_utf8());
+
+        // Negative examples.
+        assert!(!t_bytes(r"(?-u)\xFF").is_always_utf8());
+        assert!(!t_bytes(r"(?-u)\xFF\xFF").is_always_utf8());
+        assert!(!t_bytes(r"(?-u)[^a]").is_always_utf8());
+        assert!(!t_bytes(r"(?-u)[^a][^a]").is_always_utf8());
+        assert!(!t_bytes(r"(?-u)\B").is_always_utf8());
+    }
+
+    #[test]
+    fn analysis_is_all_assertions() {
+        // Positive examples.
+        assert!(t(r"\b").is_all_assertions());
+        assert!(t(r"\B").is_all_assertions());
+        assert!(t(r"^").is_all_assertions());
+        assert!(t(r"$").is_all_assertions());
+        assert!(t(r"\A").is_all_assertions());
+        assert!(t(r"\z").is_all_assertions());
+        assert!(t(r"$^\z\A\b\B").is_all_assertions());
+        assert!(t(r"$|^|\z|\A|\b|\B").is_all_assertions());
+        assert!(t(r"^$|$^").is_all_assertions());
+        assert!(t(r"((\b)+())*^").is_all_assertions());
+
+        // Negative examples.
+        assert!(!t(r"^a").is_all_assertions());
+    }
+
+    #[test]
+    fn analysis_is_anchored() {
+        // Positive examples.
+        assert!(t(r"^").is_anchored_start());
+        assert!(t(r"$").is_anchored_end());
+        assert!(t(r"^").is_line_anchored_start());
+        assert!(t(r"$").is_line_anchored_end());
+
+        assert!(t(r"^^").is_anchored_start());
+        assert!(t(r"$$").is_anchored_end());
+        assert!(t(r"^^").is_line_anchored_start());
+        assert!(t(r"$$").is_line_anchored_end());
+
+        assert!(t(r"^$").is_anchored_start());
+        assert!(t(r"^$").is_anchored_end());
+        assert!(t(r"^$").is_line_anchored_start());
+        assert!(t(r"^$").is_line_anchored_end());
+
+        assert!(t(r"^foo").is_anchored_start());
+        assert!(t(r"foo$").is_anchored_end());
+        assert!(t(r"^foo").is_line_anchored_start());
+        assert!(t(r"foo$").is_line_anchored_end());
+
+        assert!(t(r"^foo|^bar").is_anchored_start());
+        assert!(t(r"foo$|bar$").is_anchored_end());
+        assert!(t(r"^foo|^bar").is_line_anchored_start());
+        assert!(t(r"foo$|bar$").is_line_anchored_end());
+
+        assert!(t(r"^(foo|bar)").is_anchored_start());
+        assert!(t(r"(foo|bar)$").is_anchored_end());
+        assert!(t(r"^(foo|bar)").is_line_anchored_start());
+        assert!(t(r"(foo|bar)$").is_line_anchored_end());
+
+        assert!(t(r"^+").is_anchored_start());
+        assert!(t(r"$+").is_anchored_end());
+        assert!(t(r"^+").is_line_anchored_start());
+        assert!(t(r"$+").is_line_anchored_end());
+        assert!(t(r"^++").is_anchored_start());
+        assert!(t(r"$++").is_anchored_end());
+        assert!(t(r"^++").is_line_anchored_start());
+        assert!(t(r"$++").is_line_anchored_end());
+        assert!(t(r"(^)+").is_anchored_start());
+        assert!(t(r"($)+").is_anchored_end());
+        assert!(t(r"(^)+").is_line_anchored_start());
+        assert!(t(r"($)+").is_line_anchored_end());
+
+        assert!(t(r"$^").is_anchored_start());
+        assert!(t(r"$^").is_anchored_start());
+        assert!(t(r"$^").is_line_anchored_end());
+        assert!(t(r"$^").is_line_anchored_end());
+        assert!(t(r"$^|^$").is_anchored_start());
+        assert!(t(r"$^|^$").is_anchored_end());
+        assert!(t(r"$^|^$").is_line_anchored_start());
+        assert!(t(r"$^|^$").is_line_anchored_end());
+
+        assert!(t(r"\b^").is_anchored_start());
+        assert!(t(r"$\b").is_anchored_end());
+        assert!(t(r"\b^").is_line_anchored_start());
+        assert!(t(r"$\b").is_line_anchored_end());
+        assert!(t(r"^(?m:^)").is_anchored_start());
+        assert!(t(r"(?m:$)$").is_anchored_end());
+        assert!(t(r"^(?m:^)").is_line_anchored_start());
+        assert!(t(r"(?m:$)$").is_line_anchored_end());
+        assert!(t(r"(?m:^)^").is_anchored_start());
+        assert!(t(r"$(?m:$)").is_anchored_end());
+        assert!(t(r"(?m:^)^").is_line_anchored_start());
+        assert!(t(r"$(?m:$)").is_line_anchored_end());
+
+        // Negative examples.
+        assert!(!t(r"(?m)^").is_anchored_start());
+        assert!(!t(r"(?m)$").is_anchored_end());
+        assert!(!t(r"(?m:^$)|$^").is_anchored_start());
+        assert!(!t(r"(?m:^$)|$^").is_anchored_end());
+        assert!(!t(r"$^|(?m:^$)").is_anchored_start());
+        assert!(!t(r"$^|(?m:^$)").is_anchored_end());
+
+        assert!(!t(r"a^").is_anchored_start());
+        assert!(!t(r"$a").is_anchored_start());
+        assert!(!t(r"a^").is_line_anchored_start());
+        assert!(!t(r"$a").is_line_anchored_start());
+
+        assert!(!t(r"a^").is_anchored_end());
+        assert!(!t(r"$a").is_anchored_end());
+        assert!(!t(r"a^").is_line_anchored_end());
+        assert!(!t(r"$a").is_line_anchored_end());
+
+        assert!(!t(r"^foo|bar").is_anchored_start());
+        assert!(!t(r"foo|bar$").is_anchored_end());
+        assert!(!t(r"^foo|bar").is_line_anchored_start());
+        assert!(!t(r"foo|bar$").is_line_anchored_end());
+
+        assert!(!t(r"^*").is_anchored_start());
+        assert!(!t(r"$*").is_anchored_end());
+        assert!(!t(r"^*").is_line_anchored_start());
+        assert!(!t(r"$*").is_line_anchored_end());
+        assert!(!t(r"^*+").is_anchored_start());
+        assert!(!t(r"$*+").is_anchored_end());
+        assert!(!t(r"^*+").is_line_anchored_start());
+        assert!(!t(r"$*+").is_line_anchored_end());
+        assert!(!t(r"^+*").is_anchored_start());
+        assert!(!t(r"$+*").is_anchored_end());
+        assert!(!t(r"^+*").is_line_anchored_start());
+        assert!(!t(r"$+*").is_line_anchored_end());
+        assert!(!t(r"(^)*").is_anchored_start());
+        assert!(!t(r"($)*").is_anchored_end());
+        assert!(!t(r"(^)*").is_line_anchored_start());
+        assert!(!t(r"($)*").is_line_anchored_end());
+    }
+
+    #[test]
+    fn analysis_is_line_anchored() {
+        assert!(t(r"(?m)^(foo|bar)").is_line_anchored_start());
+        assert!(t(r"(?m)(foo|bar)$").is_line_anchored_end());
+
+        assert!(t(r"(?m)^foo|^bar").is_line_anchored_start());
+        assert!(t(r"(?m)foo$|bar$").is_line_anchored_end());
+
+        assert!(t(r"(?m)^").is_line_anchored_start());
+        assert!(t(r"(?m)$").is_line_anchored_end());
+
+        assert!(t(r"(?m:^$)|$^").is_line_anchored_start());
+        assert!(t(r"(?m:^$)|$^").is_line_anchored_end());
+
+        assert!(t(r"$^|(?m:^$)").is_line_anchored_start());
+        assert!(t(r"$^|(?m:^$)").is_line_anchored_end());
+    }
+
+    #[test]
+    fn analysis_is_any_anchored() {
+        // Positive examples.
+        assert!(t(r"^").is_any_anchored_start());
+        assert!(t(r"$").is_any_anchored_end());
+        assert!(t(r"\A").is_any_anchored_start());
+        assert!(t(r"\z").is_any_anchored_end());
+
+        // Negative examples.
+        assert!(!t(r"(?m)^").is_any_anchored_start());
+        assert!(!t(r"(?m)$").is_any_anchored_end());
+        assert!(!t(r"$").is_any_anchored_start());
+        assert!(!t(r"^").is_any_anchored_end());
+    }
+
+    #[test]
+    fn analysis_is_match_empty() {
+        // Positive examples.
+        assert!(t(r"").is_match_empty());
+        assert!(t(r"()").is_match_empty());
+        assert!(t(r"()*").is_match_empty());
+        assert!(t(r"()+").is_match_empty());
+        assert!(t(r"()?").is_match_empty());
+        assert!(t(r"a*").is_match_empty());
+        assert!(t(r"a?").is_match_empty());
+        assert!(t(r"a{0}").is_match_empty());
+        assert!(t(r"a{0,}").is_match_empty());
+        assert!(t(r"a{0,1}").is_match_empty());
+        assert!(t(r"a{0,10}").is_match_empty());
+        #[cfg(feature = "unicode-gencat")]
+        assert!(t(r"\pL*").is_match_empty());
+        assert!(t(r"a*|b").is_match_empty());
+        assert!(t(r"b|a*").is_match_empty());
+        assert!(t(r"a*a?(abcd)*").is_match_empty());
+        assert!(t(r"^").is_match_empty());
+        assert!(t(r"$").is_match_empty());
+        assert!(t(r"(?m)^").is_match_empty());
+        assert!(t(r"(?m)$").is_match_empty());
+        assert!(t(r"\A").is_match_empty());
+        assert!(t(r"\z").is_match_empty());
+        assert!(t(r"\B").is_match_empty());
+        assert!(t_bytes(r"(?-u)\B").is_match_empty());
+
+        // Negative examples.
+        assert!(!t(r"a+").is_match_empty());
+        assert!(!t(r"a{1}").is_match_empty());
+        assert!(!t(r"a{1,}").is_match_empty());
+        assert!(!t(r"a{1,2}").is_match_empty());
+        assert!(!t(r"a{1,10}").is_match_empty());
+        assert!(!t(r"b|a").is_match_empty());
+        assert!(!t(r"a*a+(abcd)*").is_match_empty());
+        assert!(!t(r"\b").is_match_empty());
+        assert!(!t(r"(?-u)\b").is_match_empty());
+    }
+
+    #[test]
+    fn analysis_is_literal() {
+        // Positive examples.
+        assert!(t(r"").is_literal());
+        assert!(t(r"a").is_literal());
+        assert!(t(r"ab").is_literal());
+        assert!(t(r"abc").is_literal());
+        assert!(t(r"(?m)abc").is_literal());
+
+        // Negative examples.
+        assert!(!t(r"^").is_literal());
+        assert!(!t(r"a|b").is_literal());
+        assert!(!t(r"(a)").is_literal());
+        assert!(!t(r"a+").is_literal());
+        assert!(!t(r"foo(a)").is_literal());
+        assert!(!t(r"(a)foo").is_literal());
+        assert!(!t(r"[a]").is_literal());
+    }
+
+    #[test]
+    fn analysis_is_alternation_literal() {
+        // Positive examples.
+        assert!(t(r"").is_alternation_literal());
+        assert!(t(r"a").is_alternation_literal());
+        assert!(t(r"ab").is_alternation_literal());
+        assert!(t(r"abc").is_alternation_literal());
+        assert!(t(r"(?m)abc").is_alternation_literal());
+        assert!(t(r"a|b").is_alternation_literal());
+        assert!(t(r"a|b|c").is_alternation_literal());
+        assert!(t(r"foo|bar").is_alternation_literal());
+        assert!(t(r"foo|bar|baz").is_alternation_literal());
+
+        // Negative examples.
+        assert!(!t(r"^").is_alternation_literal());
+        assert!(!t(r"(a)").is_alternation_literal());
+        assert!(!t(r"a+").is_alternation_literal());
+        assert!(!t(r"foo(a)").is_alternation_literal());
+        assert!(!t(r"(a)foo").is_alternation_literal());
+        assert!(!t(r"[a]").is_alternation_literal());
+        assert!(!t(r"[a]|b").is_alternation_literal());
+        assert!(!t(r"a|[b]").is_alternation_literal());
+        assert!(!t(r"(a)|b").is_alternation_literal());
+        assert!(!t(r"a|(b)").is_alternation_literal());
+    }
+}

diff --git a/src/hir/visitor.rs b/src/hir/visitor.rs
new file mode 100644
index 0000000..81a9e98
--- /dev/null
+++ b/src/hir/visitor.rs

@@ -0,0 +1,203 @@
+use hir::{self, Hir, HirKind};
+
+/// A trait for visiting the high-level IR (HIR) in depth first order.
+///
+/// The principle aim of this trait is to enable callers to perform case
+/// analysis on a high-level intermediate representation of a regular
+/// expression without necessarily using recursion. In particular, this permits
+/// callers to do case analysis with constant stack usage, which can be
+/// important since the size of an HIR may be proportional to end user input.
+///
+/// Typical usage of this trait involves providing an implementation and then
+/// running it using the [`visit`](fn.visit.html) function.
+pub trait Visitor {
+    /// The result of visiting an HIR.
+    type Output;
+    /// An error that visiting an HIR might return.
+    type Err;
+
+    /// All implementors of `Visitor` must provide a `finish` method, which
+    /// yields the result of visiting the HIR or an error.
+    fn finish(self) -> Result<Self::Output, Self::Err>;
+
+    /// This method is called before beginning traversal of the HIR.
+    fn start(&mut self) {}
+
+    /// This method is called on an `Hir` before descending into child `Hir`
+    /// nodes.
+    fn visit_pre(&mut self, _hir: &Hir) -> Result<(), Self::Err> {
+        Ok(())
+    }
+
+    /// This method is called on an `Hir` after descending all of its child
+    /// `Hir` nodes.
+    fn visit_post(&mut self, _hir: &Hir) -> Result<(), Self::Err> {
+        Ok(())
+    }
+
+    /// This method is called between child nodes of an alternation.
+    fn visit_alternation_in(&mut self) -> Result<(), Self::Err> {
+        Ok(())
+    }
+}
+
+/// Executes an implementation of `Visitor` in constant stack space.
+///
+/// This function will visit every node in the given `Hir` while calling
+/// appropriate methods provided by the
+/// [`Visitor`](trait.Visitor.html) trait.
+///
+/// The primary use case for this method is when one wants to perform case
+/// analysis over an `Hir` without using a stack size proportional to the depth
+/// of the `Hir`. Namely, this method will instead use constant stack space,
+/// but will use heap space proportional to the size of the `Hir`. This may be
+/// desirable in cases where the size of `Hir` is proportional to end user
+/// input.
+///
+/// If the visitor returns an error at any point, then visiting is stopped and
+/// the error is returned.
+pub fn visit<V: Visitor>(hir: &Hir, visitor: V) -> Result<V::Output, V::Err> {
+    HeapVisitor::new().visit(hir, visitor)
+}
+
+/// HeapVisitor visits every item in an `Hir` recursively using constant stack
+/// size and a heap size proportional to the size of the `Hir`.
+struct HeapVisitor<'a> {
+    /// A stack of `Hir` nodes. This is roughly analogous to the call stack
+    /// used in a typical recursive visitor.
+    stack: Vec<(&'a Hir, Frame<'a>)>,
+}
+
+/// Represents a single stack frame while performing structural induction over
+/// an `Hir`.
+enum Frame<'a> {
+    /// A stack frame allocated just before descending into a repetition
+    /// operator's child node.
+    Repetition(&'a hir::Repetition),
+    /// A stack frame allocated just before descending into a group's child
+    /// node.
+    Group(&'a hir::Group),
+    /// The stack frame used while visiting every child node of a concatenation
+    /// of expressions.
+    Concat {
+        /// The child node we are currently visiting.
+        head: &'a Hir,
+        /// The remaining child nodes to visit (which may be empty).
+        tail: &'a [Hir],
+    },
+    /// The stack frame used while visiting every child node of an alternation
+    /// of expressions.
+    Alternation {
+        /// The child node we are currently visiting.
+        head: &'a Hir,
+        /// The remaining child nodes to visit (which may be empty).
+        tail: &'a [Hir],
+    },
+}
+
+impl<'a> HeapVisitor<'a> {
+    fn new() -> HeapVisitor<'a> {
+        HeapVisitor { stack: vec![] }
+    }
+
+    fn visit<V: Visitor>(
+        &mut self,
+        mut hir: &'a Hir,
+        mut visitor: V,
+    ) -> Result<V::Output, V::Err> {
+        self.stack.clear();
+
+        visitor.start();
+        loop {
+            visitor.visit_pre(hir)?;
+            if let Some(x) = self.induct(hir) {
+                let child = x.child();
+                self.stack.push((hir, x));
+                hir = child;
+                continue;
+            }
+            // No induction means we have a base case, so we can post visit
+            // it now.
+            visitor.visit_post(hir)?;
+
+            // At this point, we now try to pop our call stack until it is
+            // either empty or we hit another inductive case.
+            loop {
+                let (post_hir, frame) = match self.stack.pop() {
+                    None => return visitor.finish(),
+                    Some((post_hir, frame)) => (post_hir, frame),
+                };
+                // If this is a concat/alternate, then we might have additional
+                // inductive steps to process.
+                if let Some(x) = self.pop(frame) {
+                    if let Frame::Alternation { .. } = x {
+                        visitor.visit_alternation_in()?;
+                    }
+                    hir = x.child();
+                    self.stack.push((post_hir, x));
+                    break;
+                }
+                // Otherwise, we've finished visiting all the child nodes for
+                // this HIR, so we can post visit it now.
+                visitor.visit_post(post_hir)?;
+            }
+        }
+    }
+
+    /// Build a stack frame for the given HIR if one is needed (which occurs if
+    /// and only if there are child nodes in the HIR). Otherwise, return None.
+    fn induct(&mut self, hir: &'a Hir) -> Option<Frame<'a>> {
+        match *hir.kind() {
+            HirKind::Repetition(ref x) => Some(Frame::Repetition(x)),
+            HirKind::Group(ref x) => Some(Frame::Group(x)),
+            HirKind::Concat(ref x) if x.is_empty() => None,
+            HirKind::Concat(ref x) => {
+                Some(Frame::Concat { head: &x[0], tail: &x[1..] })
+            }
+            HirKind::Alternation(ref x) if x.is_empty() => None,
+            HirKind::Alternation(ref x) => {
+                Some(Frame::Alternation { head: &x[0], tail: &x[1..] })
+            }
+            _ => None,
+        }
+    }
+
+    /// Pops the given frame. If the frame has an additional inductive step,
+    /// then return it, otherwise return `None`.
+    fn pop(&self, induct: Frame<'a>) -> Option<Frame<'a>> {
+        match induct {
+            Frame::Repetition(_) => None,
+            Frame::Group(_) => None,
+            Frame::Concat { tail, .. } => {
+                if tail.is_empty() {
+                    None
+                } else {
+                    Some(Frame::Concat { head: &tail[0], tail: &tail[1..] })
+                }
+            }
+            Frame::Alternation { tail, .. } => {
+                if tail.is_empty() {
+                    None
+                } else {
+                    Some(Frame::Alternation {
+                        head: &tail[0],
+                        tail: &tail[1..],
+                    })
+                }
+            }
+        }
+    }
+}
+
+impl<'a> Frame<'a> {
+    /// Perform the next inductive step on this frame and return the next
+    /// child HIR node to visit.
+    fn child(&self) -> &'a Hir {
+        match *self {
+            Frame::Repetition(rep) => &rep.hir,
+            Frame::Group(group) => &group.hir,
+            Frame::Concat { head, .. } => head,
+            Frame::Alternation { head, .. } => head,
+        }
+    }
+}

diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..e9f59d8
--- /dev/null
+++ b/src/lib.rs

@@ -0,0 +1,310 @@
+/*!
+This crate provides a robust regular expression parser.
+
+This crate defines two primary types:
+
+* [`Ast`](ast/enum.Ast.html) is the abstract syntax of a regular expression.
+  An abstract syntax corresponds to a *structured representation* of the
+  concrete syntax of a regular expression, where the concrete syntax is the
+  pattern string itself (e.g., `foo(bar)+`). Given some abstract syntax, it
+  can be converted back to the original concrete syntax (modulo some details,
+  like whitespace). To a first approximation, the abstract syntax is complex
+  and difficult to analyze.
+* [`Hir`](hir/struct.Hir.html) is the high-level intermediate representation
+  ("HIR" or "high-level IR" for short) of regular expression. It corresponds to
+  an intermediate state of a regular expression that sits between the abstract
+  syntax and the low level compiled opcodes that are eventually responsible for
+  executing a regular expression search. Given some high-level IR, it is not
+  possible to produce the original concrete syntax (although it is possible to
+  produce an equivalent concrete syntax, but it will likely scarcely resemble
+  the original pattern). To a first approximation, the high-level IR is simple
+  and easy to analyze.
+
+These two types come with conversion routines:
+
+* An [`ast::parse::Parser`](ast/parse/struct.Parser.html) converts concrete
+  syntax (a `&str`) to an [`Ast`](ast/enum.Ast.html).
+* A [`hir::translate::Translator`](hir/translate/struct.Translator.html)
+  converts an [`Ast`](ast/enum.Ast.html) to a [`Hir`](hir/struct.Hir.html).
+
+As a convenience, the above two conversion routines are combined into one via
+the top-level [`Parser`](struct.Parser.html) type. This `Parser` will first
+convert your pattern to an `Ast` and then convert the `Ast` to an `Hir`.
+
+
+# Example
+
+This example shows how to parse a pattern string into its HIR:
+
+```
+use regex_syntax::Parser;
+use regex_syntax::hir::{self, Hir};
+
+let hir = Parser::new().parse("a|b").unwrap();
+assert_eq!(hir, Hir::alternation(vec![
+    Hir::literal(hir::Literal::Unicode('a')),
+    Hir::literal(hir::Literal::Unicode('b')),
+]));
+```
+
+
+# Concrete syntax supported
+
+The concrete syntax is documented as part of the public API of the
+[`regex` crate](https://docs.rs/regex/%2A/regex/#syntax).
+
+
+# Input safety
+
+A key feature of this library is that it is safe to use with end user facing
+input. This plays a significant role in the internal implementation. In
+particular:
+
+1. Parsers provide a `nest_limit` option that permits callers to control how
+   deeply nested a regular expression is allowed to be. This makes it possible
+   to do case analysis over an `Ast` or an `Hir` using recursion without
+   worrying about stack overflow.
+2. Since relying on a particular stack size is brittle, this crate goes to
+   great lengths to ensure that all interactions with both the `Ast` and the
+   `Hir` do not use recursion. Namely, they use constant stack space and heap
+   space proportional to the size of the original pattern string (in bytes).
+   This includes the type's corresponding destructors. (One exception to this
+   is literal extraction, but this will eventually get fixed.)
+
+
+# Error reporting
+
+The `Display` implementations on all `Error` types exposed in this library
+provide nice human readable errors that are suitable for showing to end users
+in a monospace font.
+
+
+# Literal extraction
+
+This crate provides limited support for
+[literal extraction from `Hir` values](hir/literal/struct.Literals.html).
+Be warned that literal extraction currently uses recursion, and therefore,
+stack size proportional to the size of the `Hir`.
+
+The purpose of literal extraction is to speed up searches. That is, if you
+know a regular expression must match a prefix or suffix literal, then it is
+often quicker to search for instances of that literal, and then confirm or deny
+the match using the full regular expression engine. These optimizations are
+done automatically in the `regex` crate.
+
+
+# Crate features
+
+An important feature provided by this crate is its Unicode support. This
+includes things like case folding, boolean properties, general categories,
+scripts and Unicode-aware support for the Perl classes `\w`, `\s` and `\d`.
+However, a downside of this support is that it requires bundling several
+Unicode data tables that are substantial in size.
+
+A fair number of use cases do not require full Unicode support. For this
+reason, this crate exposes a number of features to control which Unicode
+data is available.
+
+If a regular expression attempts to use a Unicode feature that is not available
+because the corresponding crate feature was disabled, then translating that
+regular expression to an `Hir` will return an error. (It is still possible
+construct an `Ast` for such a regular expression, since Unicode data is not
+used until translation to an `Hir`.) Stated differently, enabling or disabling
+any of the features below can only add or subtract from the total set of valid
+regular expressions. Enabling or disabling a feature will never modify the
+match semantics of a regular expression.
+
+The following features are available:
+
+* **unicode** -
+  Enables all Unicode features. This feature is enabled by default, and will
+  always cover all Unicode features, even if more are added in the future.
+* **unicode-age** -
+  Provide the data for the
+  [Unicode `Age` property](https://www.unicode.org/reports/tr44/tr44-24.html#Character_Age).
+  This makes it possible to use classes like `\p{Age:6.0}` to refer to all
+  codepoints first introduced in Unicode 6.0
+* **unicode-bool** -
+  Provide the data for numerous Unicode boolean properties. The full list
+  is not included here, but contains properties like `Alphabetic`, `Emoji`,
+  `Lowercase`, `Math`, `Uppercase` and `White_Space`.
+* **unicode-case** -
+  Provide the data for case insensitive matching using
+  [Unicode's "simple loose matches" specification](https://www.unicode.org/reports/tr18/#Simple_Loose_Matches).
+* **unicode-gencat** -
+  Provide the data for
+  [Uncode general categories](https://www.unicode.org/reports/tr44/tr44-24.html#General_Category_Values).
+  This includes, but is not limited to, `Decimal_Number`, `Letter`,
+  `Math_Symbol`, `Number` and `Punctuation`.
+* **unicode-perl** -
+  Provide the data for supporting the Unicode-aware Perl character classes,
+  corresponding to `\w`, `\s` and `\d`. This is also necessary for using
+  Unicode-aware word boundary assertions. Note that if this feature is
+  disabled, the `\s` and `\d` character classes are still available if the
+  `unicode-bool` and `unicode-gencat` features are enabled, respectively.
+* **unicode-script** -
+  Provide the data for
+  [Unicode scripts and script extensions](https://www.unicode.org/reports/tr24/).
+  This includes, but is not limited to, `Arabic`, `Cyrillic`, `Hebrew`,
+  `Latin` and `Thai`.
+* **unicode-segment** -
+  Provide the data necessary to provide the properties used to implement the
+  [Unicode text segmentation algorithms](https://www.unicode.org/reports/tr29/).
+  This enables using classes like `\p{gcb=Extend}`, `\p{wb=Katakana}` and
+  `\p{sb=ATerm}`.
+*/
+
+#![deny(missing_docs)]
+#![forbid(unsafe_code)]
+
+pub use error::{Error, Result};
+pub use parser::{Parser, ParserBuilder};
+pub use unicode::UnicodeWordError;
+
+pub mod ast;
+mod either;
+mod error;
+pub mod hir;
+mod parser;
+mod unicode;
+mod unicode_tables;
+pub mod utf8;
+
+/// Escapes all regular expression meta characters in `text`.
+///
+/// The string returned may be safely used as a literal in a regular
+/// expression.
+pub fn escape(text: &str) -> String {
+    let mut quoted = String::with_capacity(text.len());
+    escape_into(text, &mut quoted);
+    quoted
+}
+
+/// Escapes all meta characters in `text` and writes the result into `buf`.
+///
+/// This will append escape characters into the given buffer. The characters
+/// that are appended are safe to use as a literal in a regular expression.
+pub fn escape_into(text: &str, buf: &mut String) {
+    for c in text.chars() {
+        if is_meta_character(c) {
+            buf.push('\\');
+        }
+        buf.push(c);
+    }
+}
+
+/// Returns true if the give character has significance in a regex.
+///
+/// These are the only characters that are allowed to be escaped, with one
+/// exception: an ASCII space character may be escaped when extended mode (with
+/// the `x` flag) is enabld. In particular, `is_meta_character(' ')` returns
+/// `false`.
+///
+/// Note that the set of characters for which this function returns `true` or
+/// `false` is fixed and won't change in a semver compatible release.
+pub fn is_meta_character(c: char) -> bool {
+    match c {
+        '\\' | '.' | '+' | '*' | '?' | '(' | ')' | '|' | '[' | ']' | '{'
+        | '}' | '^' | '$' | '#' | '&' | '-' | '~' => true,
+        _ => false,
+    }
+}
+
+/// Returns true if and only if the given character is a Unicode word
+/// character.
+///
+/// A Unicode word character is defined by
+/// [UTS#18 Annex C](http://unicode.org/reports/tr18/#Compatibility_Properties).
+/// In particular, a character
+/// is considered a word character if it is in either of the `Alphabetic` or
+/// `Join_Control` properties, or is in one of the `Decimal_Number`, `Mark`
+/// or `Connector_Punctuation` general categories.
+///
+/// # Panics
+///
+/// If the `unicode-perl` feature is not enabled, then this function panics.
+/// For this reason, it is recommended that callers use
+/// [`try_is_word_character`](fn.try_is_word_character.html)
+/// instead.
+pub fn is_word_character(c: char) -> bool {
+    try_is_word_character(c).expect("unicode-perl feature must be enabled")
+}
+
+/// Returns true if and only if the given character is a Unicode word
+/// character.
+///
+/// A Unicode word character is defined by
+/// [UTS#18 Annex C](http://unicode.org/reports/tr18/#Compatibility_Properties).
+/// In particular, a character
+/// is considered a word character if it is in either of the `Alphabetic` or
+/// `Join_Control` properties, or is in one of the `Decimal_Number`, `Mark`
+/// or `Connector_Punctuation` general categories.
+///
+/// # Errors
+///
+/// If the `unicode-perl` feature is not enabled, then this function always
+/// returns an error.
+pub fn try_is_word_character(
+    c: char,
+) -> std::result::Result<bool, UnicodeWordError> {
+    unicode::is_word_character(c)
+}
+
+/// Returns true if and only if the given character is an ASCII word character.
+///
+/// An ASCII word character is defined by the following character class:
+/// `[_0-9a-zA-Z]'.
+pub fn is_word_byte(c: u8) -> bool {
+    match c {
+        b'_' | b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' => true,
+        _ => false,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn escape_meta() {
+        assert_eq!(
+            escape(r"\.+*?()|[]{}^$#&-~"),
+            r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#\&\-\~".to_string()
+        );
+    }
+
+    #[test]
+    fn word_byte() {
+        assert!(is_word_byte(b'a'));
+        assert!(!is_word_byte(b'-'));
+    }
+
+    #[test]
+    #[cfg(feature = "unicode-perl")]
+    fn word_char() {
+        assert!(is_word_character('a'), "ASCII");
+        assert!(is_word_character('à'), "Latin-1");
+        assert!(is_word_character('β'), "Greek");
+        assert!(is_word_character('\u{11011}'), "Brahmi (Unicode 6.0)");
+        assert!(is_word_character('\u{11611}'), "Modi (Unicode 7.0)");
+        assert!(is_word_character('\u{11711}'), "Ahom (Unicode 8.0)");
+        assert!(is_word_character('\u{17828}'), "Tangut (Unicode 9.0)");
+        assert!(is_word_character('\u{1B1B1}'), "Nushu (Unicode 10.0)");
+        assert!(is_word_character('\u{16E40}'), "Medefaidrin (Unicode 11.0)");
+        assert!(!is_word_character('-'));
+        assert!(!is_word_character('☃'));
+    }
+
+    #[test]
+    #[should_panic]
+    #[cfg(not(feature = "unicode-perl"))]
+    fn word_char_disabled_panic() {
+        assert!(is_word_character('a'));
+    }
+
+    #[test]
+    #[cfg(not(feature = "unicode-perl"))]
+    fn word_char_disabled_error() {
+        assert!(try_is_word_character('a').is_err());
+    }
+}

diff --git a/src/parser.rs b/src/parser.rs
new file mode 100644
index 0000000..00f1391
--- /dev/null
+++ b/src/parser.rs

@@ -0,0 +1,200 @@
+use ast;
+use hir;
+
+use Result;
+
+/// A builder for a regular expression parser.
+///
+/// This builder permits modifying configuration options for the parser.
+///
+/// This type combines the builder options for both the
+/// [AST `ParserBuilder`](ast/parse/struct.ParserBuilder.html)
+/// and the
+/// [HIR `TranslatorBuilder`](hir/translate/struct.TranslatorBuilder.html).
+#[derive(Clone, Debug, Default)]
+pub struct ParserBuilder {
+    ast: ast::parse::ParserBuilder,
+    hir: hir::translate::TranslatorBuilder,
+}
+
+impl ParserBuilder {
+    /// Create a new parser builder with a default configuration.
+    pub fn new() -> ParserBuilder {
+        ParserBuilder::default()
+    }
+
+    /// Build a parser from this configuration with the given pattern.
+    pub fn build(&self) -> Parser {
+        Parser { ast: self.ast.build(), hir: self.hir.build() }
+    }
+
+    /// Set the nesting limit for this parser.
+    ///
+    /// The nesting limit controls how deep the abstract syntax tree is allowed
+    /// to be. If the AST exceeds the given limit (e.g., with too many nested
+    /// groups), then an error is returned by the parser.
+    ///
+    /// The purpose of this limit is to act as a heuristic to prevent stack
+    /// overflow for consumers that do structural induction on an `Ast` using
+    /// explicit recursion. While this crate never does this (instead using
+    /// constant stack space and moving the call stack to the heap), other
+    /// crates may.
+    ///
+    /// This limit is not checked until the entire Ast is parsed. Therefore,
+    /// if callers want to put a limit on the amount of heap space used, then
+    /// they should impose a limit on the length, in bytes, of the concrete
+    /// pattern string. In particular, this is viable since this parser
+    /// implementation will limit itself to heap space proportional to the
+    /// lenth of the pattern string.
+    ///
+    /// Note that a nest limit of `0` will return a nest limit error for most
+    /// patterns but not all. For example, a nest limit of `0` permits `a` but
+    /// not `ab`, since `ab` requires a concatenation, which results in a nest
+    /// depth of `1`. In general, a nest limit is not something that manifests
+    /// in an obvious way in the concrete syntax, therefore, it should not be
+    /// used in a granular way.
+    pub fn nest_limit(&mut self, limit: u32) -> &mut ParserBuilder {
+        self.ast.nest_limit(limit);
+        self
+    }
+
+    /// Whether to support octal syntax or not.
+    ///
+    /// Octal syntax is a little-known way of uttering Unicode codepoints in
+    /// a regular expression. For example, `a`, `\x61`, `\u0061` and
+    /// `\141` are all equivalent regular expressions, where the last example
+    /// shows octal syntax.
+    ///
+    /// While supporting octal syntax isn't in and of itself a problem, it does
+    /// make good error messages harder. That is, in PCRE based regex engines,
+    /// syntax like `\0` invokes a backreference, which is explicitly
+    /// unsupported in Rust's regex engine. However, many users expect it to
+    /// be supported. Therefore, when octal support is disabled, the error
+    /// message will explicitly mention that backreferences aren't supported.
+    ///
+    /// Octal syntax is disabled by default.
+    pub fn octal(&mut self, yes: bool) -> &mut ParserBuilder {
+        self.ast.octal(yes);
+        self
+    }
+
+    /// When enabled, the parser will permit the construction of a regular
+    /// expression that may match invalid UTF-8.
+    ///
+    /// When disabled (the default), the parser is guaranteed to produce
+    /// an expression that will only ever match valid UTF-8 (otherwise, the
+    /// parser will return an error).
+    ///
+    /// Perhaps surprisingly, when invalid UTF-8 isn't allowed, a negated ASCII
+    /// word boundary (uttered as `(?-u:\B)` in the concrete syntax) will cause
+    /// the parser to return an error. Namely, a negated ASCII word boundary
+    /// can result in matching positions that aren't valid UTF-8 boundaries.
+    pub fn allow_invalid_utf8(&mut self, yes: bool) -> &mut ParserBuilder {
+        self.hir.allow_invalid_utf8(yes);
+        self
+    }
+
+    /// Enable verbose mode in the regular expression.
+    ///
+    /// When enabled, verbose mode permits insigificant whitespace in many
+    /// places in the regular expression, as well as comments. Comments are
+    /// started using `#` and continue until the end of the line.
+    ///
+    /// By default, this is disabled. It may be selectively enabled in the
+    /// regular expression by using the `x` flag regardless of this setting.
+    pub fn ignore_whitespace(&mut self, yes: bool) -> &mut ParserBuilder {
+        self.ast.ignore_whitespace(yes);
+        self
+    }
+
+    /// Enable or disable the case insensitive flag by default.
+    ///
+    /// By default this is disabled. It may alternatively be selectively
+    /// enabled in the regular expression itself via the `i` flag.
+    pub fn case_insensitive(&mut self, yes: bool) -> &mut ParserBuilder {
+        self.hir.case_insensitive(yes);
+        self
+    }
+
+    /// Enable or disable the multi-line matching flag by default.
+    ///
+    /// By default this is disabled. It may alternatively be selectively
+    /// enabled in the regular expression itself via the `m` flag.
+    pub fn multi_line(&mut self, yes: bool) -> &mut ParserBuilder {
+        self.hir.multi_line(yes);
+        self
+    }
+
+    /// Enable or disable the "dot matches any character" flag by default.
+    ///
+    /// By default this is disabled. It may alternatively be selectively
+    /// enabled in the regular expression itself via the `s` flag.
+    pub fn dot_matches_new_line(&mut self, yes: bool) -> &mut ParserBuilder {
+        self.hir.dot_matches_new_line(yes);
+        self
+    }
+
+    /// Enable or disable the "swap greed" flag by default.
+    ///
+    /// By default this is disabled. It may alternatively be selectively
+    /// enabled in the regular expression itself via the `U` flag.
+    pub fn swap_greed(&mut self, yes: bool) -> &mut ParserBuilder {
+        self.hir.swap_greed(yes);
+        self
+    }
+
+    /// Enable or disable the Unicode flag (`u`) by default.
+    ///
+    /// By default this is **enabled**. It may alternatively be selectively
+    /// disabled in the regular expression itself via the `u` flag.
+    ///
+    /// Note that unless `allow_invalid_utf8` is enabled (it's disabled by
+    /// default), a regular expression will fail to parse if Unicode mode is
+    /// disabled and a sub-expression could possibly match invalid UTF-8.
+    pub fn unicode(&mut self, yes: bool) -> &mut ParserBuilder {
+        self.hir.unicode(yes);
+        self
+    }
+}
+
+/// A convenience parser for regular expressions.
+///
+/// This parser takes as input a regular expression pattern string (the
+/// "concrete syntax") and returns a high-level intermediate representation
+/// (the HIR) suitable for most types of analysis. In particular, this parser
+/// hides the intermediate state of producing an AST (the "abstract syntax").
+/// The AST is itself far more complex than the HIR, so this parser serves as a
+/// convenience for never having to deal with it at all.
+///
+/// If callers have more fine grained use cases that need an AST, then please
+/// see the [`ast::parse`](ast/parse/index.html) module.
+///
+/// A `Parser` can be configured in more detail via a
+/// [`ParserBuilder`](struct.ParserBuilder.html).
+#[derive(Clone, Debug)]
+pub struct Parser {
+    ast: ast::parse::Parser,
+    hir: hir::translate::Translator,
+}
+
+impl Parser {
+    /// Create a new parser with a default configuration.
+    ///
+    /// The parser can be run with `parse` method. The parse method returns
+    /// a high level intermediate representation of the given regular
+    /// expression.
+    ///
+    /// To set configuration options on the parser, use
+    /// [`ParserBuilder`](struct.ParserBuilder.html).
+    pub fn new() -> Parser {
+        ParserBuilder::new().build()
+    }
+
+    /// Parse the regular expression into a high level intermediate
+    /// representation.
+    pub fn parse(&mut self, pattern: &str) -> Result<hir::Hir> {
+        let ast = self.ast.parse(pattern)?;
+        let hir = self.hir.translate(pattern, &ast)?;
+        Ok(hir)
+    }
+}

diff --git a/src/unicode.rs b/src/unicode.rs
new file mode 100644
index 0000000..7e41439
--- /dev/null
+++ b/src/unicode.rs

@@ -0,0 +1,991 @@
+use std::error;
+use std::fmt;
+use std::result;
+
+use hir;
+
+/// A type alias for errors specific to Unicode handling of classes.
+pub type Result<T> = result::Result<T, Error>;
+
+/// An inclusive range of codepoints from a generated file (hence the static
+/// lifetime).
+type Range = &'static [(char, char)];
+
+/// An error that occurs when dealing with Unicode.
+///
+/// We don't impl the Error trait here because these always get converted
+/// into other public errors. (This error type isn't exported.)
+#[derive(Debug)]
+pub enum Error {
+    PropertyNotFound,
+    PropertyValueNotFound,
+    // Not used when unicode-perl is enabled.
+    #[allow(dead_code)]
+    PerlClassNotFound,
+}
+
+/// A type alias for errors specific to Unicode case folding.
+pub type FoldResult<T> = result::Result<T, CaseFoldError>;
+
+/// An error that occurs when Unicode-aware simple case folding fails.
+///
+/// This error can occur when the case mapping tables necessary for Unicode
+/// aware case folding are unavailable. This only occurs when the
+/// `unicode-case` feature is disabled. (The feature is enabled by default.)
+#[derive(Debug)]
+pub struct CaseFoldError(());
+
+impl error::Error for CaseFoldError {}
+
+impl fmt::Display for CaseFoldError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(
+            f,
+            "Unicode-aware case folding is not available \
+             (probably because the unicode-case feature is not enabled)"
+        )
+    }
+}
+
+/// An error that occurs when the Unicode-aware `\w` class is unavailable.
+///
+/// This error can occur when the data tables necessary for the Unicode aware
+/// Perl character class `\w` are unavailable. This only occurs when the
+/// `unicode-perl` feature is disabled. (The feature is enabled by default.)
+#[derive(Debug)]
+pub struct UnicodeWordError(());
+
+impl error::Error for UnicodeWordError {}
+
+impl fmt::Display for UnicodeWordError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(
+            f,
+            "Unicode-aware \\w class is not available \
+             (probably because the unicode-perl feature is not enabled)"
+        )
+    }
+}
+
+/// Return an iterator over the equivalence class of simple case mappings
+/// for the given codepoint. The equivalence class does not include the
+/// given codepoint.
+///
+/// If the equivalence class is empty, then this returns the next scalar
+/// value that has a non-empty equivalence class, if it exists. If no such
+/// scalar value exists, then `None` is returned. The point of this behavior
+/// is to permit callers to avoid calling `simple_fold` more than they need
+/// to, since there is some cost to fetching the equivalence class.
+///
+/// This returns an error if the Unicode case folding tables are not available.
+pub fn simple_fold(
+    c: char,
+) -> FoldResult<result::Result<impl Iterator<Item = char>, Option<char>>> {
+    #[cfg(not(feature = "unicode-case"))]
+    fn imp(
+        _: char,
+    ) -> FoldResult<result::Result<impl Iterator<Item = char>, Option<char>>>
+    {
+        use std::option::IntoIter;
+        Err::<result::Result<IntoIter<char>, _>, _>(CaseFoldError(()))
+    }
+
+    #[cfg(feature = "unicode-case")]
+    fn imp(
+        c: char,
+    ) -> FoldResult<result::Result<impl Iterator<Item = char>, Option<char>>>
+    {
+        use unicode_tables::case_folding_simple::CASE_FOLDING_SIMPLE;
+
+        Ok(CASE_FOLDING_SIMPLE
+            .binary_search_by_key(&c, |&(c1, _)| c1)
+            .map(|i| CASE_FOLDING_SIMPLE[i].1.iter().map(|&c| c))
+            .map_err(|i| {
+                if i >= CASE_FOLDING_SIMPLE.len() {
+                    None
+                } else {
+                    Some(CASE_FOLDING_SIMPLE[i].0)
+                }
+            }))
+    }
+
+    imp(c)
+}
+
+/// Returns true if and only if the given (inclusive) range contains at least
+/// one Unicode scalar value that has a non-empty non-trivial simple case
+/// mapping.
+///
+/// This function panics if `end < start`.
+///
+/// This returns an error if the Unicode case folding tables are not available.
+pub fn contains_simple_case_mapping(
+    start: char,
+    end: char,
+) -> FoldResult<bool> {
+    #[cfg(not(feature = "unicode-case"))]
+    fn imp(_: char, _: char) -> FoldResult<bool> {
+        Err(CaseFoldError(()))
+    }
+
+    #[cfg(feature = "unicode-case")]
+    fn imp(start: char, end: char) -> FoldResult<bool> {
+        use std::cmp::Ordering;
+        use unicode_tables::case_folding_simple::CASE_FOLDING_SIMPLE;
+
+        assert!(start <= end);
+        Ok(CASE_FOLDING_SIMPLE
+            .binary_search_by(|&(c, _)| {
+                if start <= c && c <= end {
+                    Ordering::Equal
+                } else if c > end {
+                    Ordering::Greater
+                } else {
+                    Ordering::Less
+                }
+            })
+            .is_ok())
+    }
+
+    imp(start, end)
+}
+
+/// A query for finding a character class defined by Unicode. This supports
+/// either use of a property name directly, or lookup by property value. The
+/// former generally refers to Binary properties (see UTS#44, Table 8), but
+/// as a special exception (see UTS#18, Section 1.2) both general categories
+/// (an enumeration) and scripts (a catalog) are supported as if each of their
+/// possible values were a binary property.
+///
+/// In all circumstances, property names and values are normalized and
+/// canonicalized. That is, `GC == gc == GeneralCategory == general_category`.
+///
+/// The lifetime `'a` refers to the shorter of the lifetimes of property name
+/// and property value.
+#[derive(Debug)]
+pub enum ClassQuery<'a> {
+    /// Return a class corresponding to a Unicode binary property, named by
+    /// a single letter.
+    OneLetter(char),
+    /// Return a class corresponding to a Unicode binary property.
+    ///
+    /// Note that, by special exception (see UTS#18, Section 1.2), both
+    /// general category values and script values are permitted here as if
+    /// they were a binary property.
+    Binary(&'a str),
+    /// Return a class corresponding to all codepoints whose property
+    /// (identified by `property_name`) corresponds to the given value
+    /// (identified by `property_value`).
+    ByValue {
+        /// A property name.
+        property_name: &'a str,
+        /// A property value.
+        property_value: &'a str,
+    },
+}
+
+impl<'a> ClassQuery<'a> {
+    fn canonicalize(&self) -> Result<CanonicalClassQuery> {
+        match *self {
+            ClassQuery::OneLetter(c) => self.canonical_binary(&c.to_string()),
+            ClassQuery::Binary(name) => self.canonical_binary(name),
+            ClassQuery::ByValue { property_name, property_value } => {
+                let property_name = symbolic_name_normalize(property_name);
+                let property_value = symbolic_name_normalize(property_value);
+
+                let canon_name = match canonical_prop(&property_name)? {
+                    None => return Err(Error::PropertyNotFound),
+                    Some(canon_name) => canon_name,
+                };
+                Ok(match canon_name {
+                    "General_Category" => {
+                        let canon = match canonical_gencat(&property_value)? {
+                            None => return Err(Error::PropertyValueNotFound),
+                            Some(canon) => canon,
+                        };
+                        CanonicalClassQuery::GeneralCategory(canon)
+                    }
+                    "Script" => {
+                        let canon = match canonical_script(&property_value)? {
+                            None => return Err(Error::PropertyValueNotFound),
+                            Some(canon) => canon,
+                        };
+                        CanonicalClassQuery::Script(canon)
+                    }
+                    _ => {
+                        let vals = match property_values(canon_name)? {
+                            None => return Err(Error::PropertyValueNotFound),
+                            Some(vals) => vals,
+                        };
+                        let canon_val =
+                            match canonical_value(vals, &property_value) {
+                                None => {
+                                    return Err(Error::PropertyValueNotFound)
+                                }
+                                Some(canon_val) => canon_val,
+                            };
+                        CanonicalClassQuery::ByValue {
+                            property_name: canon_name,
+                            property_value: canon_val,
+                        }
+                    }
+                })
+            }
+        }
+    }
+
+    fn canonical_binary(&self, name: &str) -> Result<CanonicalClassQuery> {
+        let norm = symbolic_name_normalize(name);
+
+        if let Some(canon) = canonical_prop(&norm)? {
+            return Ok(CanonicalClassQuery::Binary(canon));
+        }
+        if let Some(canon) = canonical_gencat(&norm)? {
+            return Ok(CanonicalClassQuery::GeneralCategory(canon));
+        }
+        if let Some(canon) = canonical_script(&norm)? {
+            return Ok(CanonicalClassQuery::Script(canon));
+        }
+        Err(Error::PropertyNotFound)
+    }
+}
+
+/// Like ClassQuery, but its parameters have been canonicalized. This also
+/// differentiates binary properties from flattened general categories and
+/// scripts.
+#[derive(Debug, Eq, PartialEq)]
+enum CanonicalClassQuery {
+    /// The canonical binary property name.
+    Binary(&'static str),
+    /// The canonical general category name.
+    GeneralCategory(&'static str),
+    /// The canonical script name.
+    Script(&'static str),
+    /// An arbitrary association between property and value, both of which
+    /// have been canonicalized.
+    ///
+    /// Note that by construction, the property name of ByValue will never
+    /// be General_Category or Script. Those two cases are subsumed by the
+    /// eponymous variants.
+    ByValue {
+        /// The canonical property name.
+        property_name: &'static str,
+        /// The canonical property value.
+        property_value: &'static str,
+    },
+}
+
+/// Looks up a Unicode class given a query. If one doesn't exist, then
+/// `None` is returned.
+pub fn class(query: ClassQuery) -> Result<hir::ClassUnicode> {
+    use self::CanonicalClassQuery::*;
+
+    match query.canonicalize()? {
+        Binary(name) => bool_property(name),
+        GeneralCategory(name) => gencat(name),
+        Script(name) => script(name),
+        ByValue { property_name: "Age", property_value } => {
+            let mut class = hir::ClassUnicode::empty();
+            for set in ages(property_value)? {
+                class.union(&hir_class(set));
+            }
+            Ok(class)
+        }
+        ByValue { property_name: "Script_Extensions", property_value } => {
+            script_extension(property_value)
+        }
+        ByValue {
+            property_name: "Grapheme_Cluster_Break",
+            property_value,
+        } => gcb(property_value),
+        ByValue { property_name: "Sentence_Break", property_value } => {
+            sb(property_value)
+        }
+        ByValue { property_name: "Word_Break", property_value } => {
+            wb(property_value)
+        }
+        _ => {
+            // What else should we support?
+            Err(Error::PropertyNotFound)
+        }
+    }
+}
+
+/// Returns a Unicode aware class for \w.
+///
+/// This returns an error if the data is not available for \w.
+pub fn perl_word() -> Result<hir::ClassUnicode> {
+    #[cfg(not(feature = "unicode-perl"))]
+    fn imp() -> Result<hir::ClassUnicode> {
+        Err(Error::PerlClassNotFound)
+    }
+
+    #[cfg(feature = "unicode-perl")]
+    fn imp() -> Result<hir::ClassUnicode> {
+        use unicode_tables::perl_word::PERL_WORD;
+        Ok(hir_class(PERL_WORD))
+    }
+
+    imp()
+}
+
+/// Returns a Unicode aware class for \s.
+///
+/// This returns an error if the data is not available for \s.
+pub fn perl_space() -> Result<hir::ClassUnicode> {
+    #[cfg(not(any(feature = "unicode-perl", feature = "unicode-bool")))]
+    fn imp() -> Result<hir::ClassUnicode> {
+        Err(Error::PerlClassNotFound)
+    }
+
+    #[cfg(all(feature = "unicode-perl", not(feature = "unicode-bool")))]
+    fn imp() -> Result<hir::ClassUnicode> {
+        use unicode_tables::perl_space::WHITE_SPACE;
+        Ok(hir_class(WHITE_SPACE))
+    }
+
+    #[cfg(feature = "unicode-bool")]
+    fn imp() -> Result<hir::ClassUnicode> {
+        use unicode_tables::property_bool::WHITE_SPACE;
+        Ok(hir_class(WHITE_SPACE))
+    }
+
+    imp()
+}
+
+/// Returns a Unicode aware class for \d.
+///
+/// This returns an error if the data is not available for \d.
+pub fn perl_digit() -> Result<hir::ClassUnicode> {
+    #[cfg(not(any(feature = "unicode-perl", feature = "unicode-gencat")))]
+    fn imp() -> Result<hir::ClassUnicode> {
+        Err(Error::PerlClassNotFound)
+    }
+
+    #[cfg(all(feature = "unicode-perl", not(feature = "unicode-gencat")))]
+    fn imp() -> Result<hir::ClassUnicode> {
+        use unicode_tables::perl_decimal::DECIMAL_NUMBER;
+        Ok(hir_class(DECIMAL_NUMBER))
+    }
+
+    #[cfg(feature = "unicode-gencat")]
+    fn imp() -> Result<hir::ClassUnicode> {
+        use unicode_tables::general_category::DECIMAL_NUMBER;
+        Ok(hir_class(DECIMAL_NUMBER))
+    }
+
+    imp()
+}
+
+/// Build a Unicode HIR class from a sequence of Unicode scalar value ranges.
+pub fn hir_class(ranges: &[(char, char)]) -> hir::ClassUnicode {
+    let hir_ranges: Vec<hir::ClassUnicodeRange> = ranges
+        .iter()
+        .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e))
+        .collect();
+    hir::ClassUnicode::new(hir_ranges)
+}
+
+/// Returns true only if the given codepoint is in the `\w` character class.
+///
+/// If the `unicode-perl` feature is not enabled, then this returns an error.
+pub fn is_word_character(c: char) -> result::Result<bool, UnicodeWordError> {
+    #[cfg(not(feature = "unicode-perl"))]
+    fn imp(_: char) -> result::Result<bool, UnicodeWordError> {
+        Err(UnicodeWordError(()))
+    }
+
+    #[cfg(feature = "unicode-perl")]
+    fn imp(c: char) -> result::Result<bool, UnicodeWordError> {
+        use is_word_byte;
+        use std::cmp::Ordering;
+        use unicode_tables::perl_word::PERL_WORD;
+
+        if c <= 0x7F as char && is_word_byte(c as u8) {
+            return Ok(true);
+        }
+        Ok(PERL_WORD
+            .binary_search_by(|&(start, end)| {
+                if start <= c && c <= end {
+                    Ordering::Equal
+                } else if start > c {
+                    Ordering::Greater
+                } else {
+                    Ordering::Less
+                }
+            })
+            .is_ok())
+    }
+
+    imp(c)
+}
+
+/// A mapping of property values for a specific property.
+///
+/// The first element of each tuple is a normalized property value while the
+/// second element of each tuple is the corresponding canonical property
+/// value.
+type PropertyValues = &'static [(&'static str, &'static str)];
+
+fn canonical_gencat(normalized_value: &str) -> Result<Option<&'static str>> {
+    Ok(match normalized_value {
+        "any" => Some("Any"),
+        "assigned" => Some("Assigned"),
+        "ascii" => Some("ASCII"),
+        _ => {
+            let gencats = property_values("General_Category")?.unwrap();
+            canonical_value(gencats, normalized_value)
+        }
+    })
+}
+
+fn canonical_script(normalized_value: &str) -> Result<Option<&'static str>> {
+    let scripts = property_values("Script")?.unwrap();
+    Ok(canonical_value(scripts, normalized_value))
+}
+
+/// Find the canonical property name for the given normalized property name.
+///
+/// If no such property exists, then `None` is returned.
+///
+/// The normalized property name must have been normalized according to
+/// UAX44 LM3, which can be done using `symbolic_name_normalize`.
+///
+/// If the property names data is not available, then an error is returned.
+fn canonical_prop(normalized_name: &str) -> Result<Option<&'static str>> {
+    #[cfg(not(any(
+        feature = "unicode-age",
+        feature = "unicode-bool",
+        feature = "unicode-gencat",
+        feature = "unicode-perl",
+        feature = "unicode-script",
+        feature = "unicode-segment",
+    )))]
+    fn imp(_: &str) -> Result<Option<&'static str>> {
+        Err(Error::PropertyNotFound)
+    }
+
+    #[cfg(any(
+        feature = "unicode-age",
+        feature = "unicode-bool",
+        feature = "unicode-gencat",
+        feature = "unicode-perl",
+        feature = "unicode-script",
+        feature = "unicode-segment",
+    ))]
+    fn imp(name: &str) -> Result<Option<&'static str>> {
+        use unicode_tables::property_names::PROPERTY_NAMES;
+
+        Ok(PROPERTY_NAMES
+            .binary_search_by_key(&name, |&(n, _)| n)
+            .ok()
+            .map(|i| PROPERTY_NAMES[i].1))
+    }
+
+    imp(normalized_name)
+}
+
+/// Find the canonical property value for the given normalized property
+/// value.
+///
+/// The given property values should correspond to the values for the property
+/// under question, which can be found using `property_values`.
+///
+/// If no such property value exists, then `None` is returned.
+///
+/// The normalized property value must have been normalized according to
+/// UAX44 LM3, which can be done using `symbolic_name_normalize`.
+fn canonical_value(
+    vals: PropertyValues,
+    normalized_value: &str,
+) -> Option<&'static str> {
+    vals.binary_search_by_key(&normalized_value, |&(n, _)| n)
+        .ok()
+        .map(|i| vals[i].1)
+}
+
+/// Return the table of property values for the given property name.
+///
+/// If the property values data is not available, then an error is returned.
+fn property_values(
+    canonical_property_name: &'static str,
+) -> Result<Option<PropertyValues>> {
+    #[cfg(not(any(
+        feature = "unicode-age",
+        feature = "unicode-bool",
+        feature = "unicode-gencat",
+        feature = "unicode-perl",
+        feature = "unicode-script",
+        feature = "unicode-segment",
+    )))]
+    fn imp(_: &'static str) -> Result<Option<PropertyValues>> {
+        Err(Error::PropertyValueNotFound)
+    }
+
+    #[cfg(any(
+        feature = "unicode-age",
+        feature = "unicode-bool",
+        feature = "unicode-gencat",
+        feature = "unicode-perl",
+        feature = "unicode-script",
+        feature = "unicode-segment",
+    ))]
+    fn imp(name: &'static str) -> Result<Option<PropertyValues>> {
+        use unicode_tables::property_values::PROPERTY_VALUES;
+
+        Ok(PROPERTY_VALUES
+            .binary_search_by_key(&name, |&(n, _)| n)
+            .ok()
+            .map(|i| PROPERTY_VALUES[i].1))
+    }
+
+    imp(canonical_property_name)
+}
+
+// This is only used in some cases, but small enough to just let it be dead
+// instead of figuring out (and maintaining) the right set of features.
+#[allow(dead_code)]
+fn property_set(
+    name_map: &'static [(&'static str, Range)],
+    canonical: &'static str,
+) -> Option<Range> {
+    name_map
+        .binary_search_by_key(&canonical, |x| x.0)
+        .ok()
+        .map(|i| name_map[i].1)
+}
+
+/// Returns an iterator over Unicode Age sets. Each item corresponds to a set
+/// of codepoints that were added in a particular revision of Unicode. The
+/// iterator yields items in chronological order.
+///
+/// If the given age value isn't valid or if the data isn't available, then an
+/// error is returned instead.
+fn ages(canonical_age: &str) -> Result<impl Iterator<Item = Range>> {
+    #[cfg(not(feature = "unicode-age"))]
+    fn imp(_: &str) -> Result<impl Iterator<Item = Range>> {
+        use std::option::IntoIter;
+        Err::<IntoIter<Range>, _>(Error::PropertyNotFound)
+    }
+
+    #[cfg(feature = "unicode-age")]
+    fn imp(canonical_age: &str) -> Result<impl Iterator<Item = Range>> {
+        use unicode_tables::age;
+
+        const AGES: &'static [(&'static str, Range)] = &[
+            ("V1_1", age::V1_1),
+            ("V2_0", age::V2_0),
+            ("V2_1", age::V2_1),
+            ("V3_0", age::V3_0),
+            ("V3_1", age::V3_1),
+            ("V3_2", age::V3_2),
+            ("V4_0", age::V4_0),
+            ("V4_1", age::V4_1),
+            ("V5_0", age::V5_0),
+            ("V5_1", age::V5_1),
+            ("V5_2", age::V5_2),
+            ("V6_0", age::V6_0),
+            ("V6_1", age::V6_1),
+            ("V6_2", age::V6_2),
+            ("V6_3", age::V6_3),
+            ("V7_0", age::V7_0),
+            ("V8_0", age::V8_0),
+            ("V9_0", age::V9_0),
+            ("V10_0", age::V10_0),
+            ("V11_0", age::V11_0),
+            ("V12_0", age::V12_0),
+            ("V12_1", age::V12_1),
+            ("V13_0", age::V13_0),
+        ];
+        assert_eq!(AGES.len(), age::BY_NAME.len(), "ages are out of sync");
+
+        let pos = AGES.iter().position(|&(age, _)| canonical_age == age);
+        match pos {
+            None => Err(Error::PropertyValueNotFound),
+            Some(i) => Ok(AGES[..i + 1].iter().map(|&(_, classes)| classes)),
+        }
+    }
+
+    imp(canonical_age)
+}
+
+/// Returns the Unicode HIR class corresponding to the given general category.
+///
+/// Name canonicalization is assumed to be performed by the caller.
+///
+/// If the given general category could not be found, or if the general
+/// category data is not available, then an error is returned.
+fn gencat(canonical_name: &'static str) -> Result<hir::ClassUnicode> {
+    #[cfg(not(feature = "unicode-gencat"))]
+    fn imp(_: &'static str) -> Result<hir::ClassUnicode> {
+        Err(Error::PropertyNotFound)
+    }
+
+    #[cfg(feature = "unicode-gencat")]
+    fn imp(name: &'static str) -> Result<hir::ClassUnicode> {
+        use unicode_tables::general_category::BY_NAME;
+        match name {
+            "ASCII" => Ok(hir_class(&[('\0', '\x7F')])),
+            "Any" => Ok(hir_class(&[('\0', '\u{10FFFF}')])),
+            "Assigned" => {
+                let mut cls = gencat("Unassigned")?;
+                cls.negate();
+                Ok(cls)
+            }
+            name => property_set(BY_NAME, name)
+                .map(hir_class)
+                .ok_or(Error::PropertyValueNotFound),
+        }
+    }
+
+    match canonical_name {
+        "Decimal_Number" => perl_digit(),
+        name => imp(name),
+    }
+}
+
+/// Returns the Unicode HIR class corresponding to the given script.
+///
+/// Name canonicalization is assumed to be performed by the caller.
+///
+/// If the given script could not be found, or if the script data is not
+/// available, then an error is returned.
+fn script(canonical_name: &'static str) -> Result<hir::ClassUnicode> {
+    #[cfg(not(feature = "unicode-script"))]
+    fn imp(_: &'static str) -> Result<hir::ClassUnicode> {
+        Err(Error::PropertyNotFound)
+    }
+
+    #[cfg(feature = "unicode-script")]
+    fn imp(name: &'static str) -> Result<hir::ClassUnicode> {
+        use unicode_tables::script::BY_NAME;
+        property_set(BY_NAME, name)
+            .map(hir_class)
+            .ok_or(Error::PropertyValueNotFound)
+    }
+
+    imp(canonical_name)
+}
+
+/// Returns the Unicode HIR class corresponding to the given script extension.
+///
+/// Name canonicalization is assumed to be performed by the caller.
+///
+/// If the given script extension could not be found, or if the script data is
+/// not available, then an error is returned.
+fn script_extension(
+    canonical_name: &'static str,
+) -> Result<hir::ClassUnicode> {
+    #[cfg(not(feature = "unicode-script"))]
+    fn imp(_: &'static str) -> Result<hir::ClassUnicode> {
+        Err(Error::PropertyNotFound)
+    }
+
+    #[cfg(feature = "unicode-script")]
+    fn imp(name: &'static str) -> Result<hir::ClassUnicode> {
+        use unicode_tables::script_extension::BY_NAME;
+        property_set(BY_NAME, name)
+            .map(hir_class)
+            .ok_or(Error::PropertyValueNotFound)
+    }
+
+    imp(canonical_name)
+}
+
+/// Returns the Unicode HIR class corresponding to the given Unicode boolean
+/// property.
+///
+/// Name canonicalization is assumed to be performed by the caller.
+///
+/// If the given boolean property could not be found, or if the boolean
+/// property data is not available, then an error is returned.
+fn bool_property(canonical_name: &'static str) -> Result<hir::ClassUnicode> {
+    #[cfg(not(feature = "unicode-bool"))]
+    fn imp(_: &'static str) -> Result<hir::ClassUnicode> {
+        Err(Error::PropertyNotFound)
+    }
+
+    #[cfg(feature = "unicode-bool")]
+    fn imp(name: &'static str) -> Result<hir::ClassUnicode> {
+        use unicode_tables::property_bool::BY_NAME;
+        property_set(BY_NAME, name)
+            .map(hir_class)
+            .ok_or(Error::PropertyNotFound)
+    }
+
+    match canonical_name {
+        "Decimal_Number" => perl_digit(),
+        "White_Space" => perl_space(),
+        name => imp(name),
+    }
+}
+
+/// Returns the Unicode HIR class corresponding to the given grapheme cluster
+/// break property.
+///
+/// Name canonicalization is assumed to be performed by the caller.
+///
+/// If the given property could not be found, or if the corresponding data is
+/// not available, then an error is returned.
+fn gcb(canonical_name: &'static str) -> Result<hir::ClassUnicode> {
+    #[cfg(not(feature = "unicode-segment"))]
+    fn imp(_: &'static str) -> Result<hir::ClassUnicode> {
+        Err(Error::PropertyNotFound)
+    }
+
+    #[cfg(feature = "unicode-segment")]
+    fn imp(name: &'static str) -> Result<hir::ClassUnicode> {
+        use unicode_tables::grapheme_cluster_break::BY_NAME;
+        property_set(BY_NAME, name)
+            .map(hir_class)
+            .ok_or(Error::PropertyValueNotFound)
+    }
+
+    imp(canonical_name)
+}
+
+/// Returns the Unicode HIR class corresponding to the given word break
+/// property.
+///
+/// Name canonicalization is assumed to be performed by the caller.
+///
+/// If the given property could not be found, or if the corresponding data is
+/// not available, then an error is returned.
+fn wb(canonical_name: &'static str) -> Result<hir::ClassUnicode> {
+    #[cfg(not(feature = "unicode-segment"))]
+    fn imp(_: &'static str) -> Result<hir::ClassUnicode> {
+        Err(Error::PropertyNotFound)
+    }
+
+    #[cfg(feature = "unicode-segment")]
+    fn imp(name: &'static str) -> Result<hir::ClassUnicode> {
+        use unicode_tables::word_break::BY_NAME;
+        property_set(BY_NAME, name)
+            .map(hir_class)
+            .ok_or(Error::PropertyValueNotFound)
+    }
+
+    imp(canonical_name)
+}
+
+/// Returns the Unicode HIR class corresponding to the given sentence
+/// break property.
+///
+/// Name canonicalization is assumed to be performed by the caller.
+///
+/// If the given property could not be found, or if the corresponding data is
+/// not available, then an error is returned.
+fn sb(canonical_name: &'static str) -> Result<hir::ClassUnicode> {
+    #[cfg(not(feature = "unicode-segment"))]
+    fn imp(_: &'static str) -> Result<hir::ClassUnicode> {
+        Err(Error::PropertyNotFound)
+    }
+
+    #[cfg(feature = "unicode-segment")]
+    fn imp(name: &'static str) -> Result<hir::ClassUnicode> {
+        use unicode_tables::sentence_break::BY_NAME;
+        property_set(BY_NAME, name)
+            .map(hir_class)
+            .ok_or(Error::PropertyValueNotFound)
+    }
+
+    imp(canonical_name)
+}
+
+/// Like symbolic_name_normalize_bytes, but operates on a string.
+fn symbolic_name_normalize(x: &str) -> String {
+    let mut tmp = x.as_bytes().to_vec();
+    let len = symbolic_name_normalize_bytes(&mut tmp).len();
+    tmp.truncate(len);
+    // This should always succeed because `symbolic_name_normalize_bytes`
+    // guarantees that `&tmp[..len]` is always valid UTF-8.
+    //
+    // N.B. We could avoid the additional UTF-8 check here, but it's unlikely
+    // to be worth skipping the additional safety check. A benchmark must
+    // justify it first.
+    String::from_utf8(tmp).unwrap()
+}
+
+/// Normalize the given symbolic name in place according to UAX44-LM3.
+///
+/// A "symbolic name" typically corresponds to property names and property
+/// value aliases. Note, though, that it should not be applied to property
+/// string values.
+///
+/// The slice returned is guaranteed to be valid UTF-8 for all possible values
+/// of `slice`.
+///
+/// See: http://unicode.org/reports/tr44/#UAX44-LM3
+fn symbolic_name_normalize_bytes(slice: &mut [u8]) -> &mut [u8] {
+    // I couldn't find a place in the standard that specified that property
+    // names/aliases had a particular structure (unlike character names), but
+    // we assume that it's ASCII only and drop anything that isn't ASCII.
+    let mut start = 0;
+    let mut starts_with_is = false;
+    if slice.len() >= 2 {
+        // Ignore any "is" prefix.
+        starts_with_is = slice[0..2] == b"is"[..]
+            || slice[0..2] == b"IS"[..]
+            || slice[0..2] == b"iS"[..]
+            || slice[0..2] == b"Is"[..];
+        if starts_with_is {
+            start = 2;
+        }
+    }
+    let mut next_write = 0;
+    for i in start..slice.len() {
+        // VALIDITY ARGUMENT: To guarantee that the resulting slice is valid
+        // UTF-8, we ensure that the slice contains only ASCII bytes. In
+        // particular, we drop every non-ASCII byte from the normalized string.
+        let b = slice[i];
+        if b == b' ' || b == b'_' || b == b'-' {
+            continue;
+        } else if b'A' <= b && b <= b'Z' {
+            slice[next_write] = b + (b'a' - b'A');
+            next_write += 1;
+        } else if b <= 0x7F {
+            slice[next_write] = b;
+            next_write += 1;
+        }
+    }
+    // Special case: ISO_Comment has a 'isc' abbreviation. Since we generally
+    // ignore 'is' prefixes, the 'isc' abbreviation gets caught in the cross
+    // fire and ends up creating an alias for 'c' to 'ISO_Comment', but it
+    // is actually an alias for the 'Other' general category.
+    if starts_with_is && next_write == 1 && slice[0] == b'c' {
+        slice[0] = b'i';
+        slice[1] = b's';
+        slice[2] = b'c';
+        next_write = 3;
+    }
+    &mut slice[..next_write]
+}
+
+#[cfg(test)]
+mod tests {
+    use super::{
+        contains_simple_case_mapping, simple_fold, symbolic_name_normalize,
+        symbolic_name_normalize_bytes,
+    };
+
+    #[cfg(feature = "unicode-case")]
+    fn simple_fold_ok(c: char) -> impl Iterator<Item = char> {
+        simple_fold(c).unwrap().unwrap()
+    }
+
+    #[cfg(feature = "unicode-case")]
+    fn simple_fold_err(c: char) -> Option<char> {
+        match simple_fold(c).unwrap() {
+            Ok(_) => unreachable!("simple_fold returned Ok iterator"),
+            Err(next) => next,
+        }
+    }
+
+    #[cfg(feature = "unicode-case")]
+    fn contains_case_map(start: char, end: char) -> bool {
+        contains_simple_case_mapping(start, end).unwrap()
+    }
+
+    #[test]
+    #[cfg(feature = "unicode-case")]
+    fn simple_fold_k() {
+        let xs: Vec<char> = simple_fold_ok('k').collect();
+        assert_eq!(xs, vec!['K', 'K']);
+
+        let xs: Vec<char> = simple_fold_ok('K').collect();
+        assert_eq!(xs, vec!['k', 'K']);
+
+        let xs: Vec<char> = simple_fold_ok('K').collect();
+        assert_eq!(xs, vec!['K', 'k']);
+    }
+
+    #[test]
+    #[cfg(feature = "unicode-case")]
+    fn simple_fold_a() {
+        let xs: Vec<char> = simple_fold_ok('a').collect();
+        assert_eq!(xs, vec!['A']);
+
+        let xs: Vec<char> = simple_fold_ok('A').collect();
+        assert_eq!(xs, vec!['a']);
+    }
+
+    #[test]
+    #[cfg(feature = "unicode-case")]
+    fn simple_fold_empty() {
+        assert_eq!(Some('A'), simple_fold_err('?'));
+        assert_eq!(Some('A'), simple_fold_err('@'));
+        assert_eq!(Some('a'), simple_fold_err('['));
+        assert_eq!(Some('Ⰰ'), simple_fold_err('☃'));
+    }
+
+    #[test]
+    #[cfg(feature = "unicode-case")]
+    fn simple_fold_max() {
+        assert_eq!(None, simple_fold_err('\u{10FFFE}'));
+        assert_eq!(None, simple_fold_err('\u{10FFFF}'));
+    }
+
+    #[test]
+    #[cfg(not(feature = "unicode-case"))]
+    fn simple_fold_disabled() {
+        assert!(simple_fold('a').is_err());
+    }
+
+    #[test]
+    #[cfg(feature = "unicode-case")]
+    fn range_contains() {
+        assert!(contains_case_map('A', 'A'));
+        assert!(contains_case_map('Z', 'Z'));
+        assert!(contains_case_map('A', 'Z'));
+        assert!(contains_case_map('@', 'A'));
+        assert!(contains_case_map('Z', '['));
+        assert!(contains_case_map('☃', 'Ⰰ'));
+
+        assert!(!contains_case_map('[', '['));
+        assert!(!contains_case_map('[', '`'));
+
+        assert!(!contains_case_map('☃', '☃'));
+    }
+
+    #[test]
+    #[cfg(not(feature = "unicode-case"))]
+    fn range_contains_disabled() {
+        assert!(contains_simple_case_mapping('a', 'a').is_err());
+    }
+
+    #[test]
+    #[cfg(feature = "unicode-gencat")]
+    fn regression_466() {
+        use super::{CanonicalClassQuery, ClassQuery};
+
+        let q = ClassQuery::OneLetter('C');
+        assert_eq!(
+            q.canonicalize().unwrap(),
+            CanonicalClassQuery::GeneralCategory("Other")
+        );
+    }
+
+    #[test]
+    fn sym_normalize() {
+        let sym_norm = symbolic_name_normalize;
+
+        assert_eq!(sym_norm("Line_Break"), "linebreak");
+        assert_eq!(sym_norm("Line-break"), "linebreak");
+        assert_eq!(sym_norm("linebreak"), "linebreak");
+        assert_eq!(sym_norm("BA"), "ba");
+        assert_eq!(sym_norm("ba"), "ba");
+        assert_eq!(sym_norm("Greek"), "greek");
+        assert_eq!(sym_norm("isGreek"), "greek");
+        assert_eq!(sym_norm("IS_Greek"), "greek");
+        assert_eq!(sym_norm("isc"), "isc");
+        assert_eq!(sym_norm("is c"), "isc");
+        assert_eq!(sym_norm("is_c"), "isc");
+    }
+
+    #[test]
+    fn valid_utf8_symbolic() {
+        let mut x = b"abc\xFFxyz".to_vec();
+        let y = symbolic_name_normalize_bytes(&mut x);
+        assert_eq!(y, b"abcxyz");
+    }
+}

diff --git a/src/unicode_tables/LICENSE-UNICODE b/src/unicode_tables/LICENSE-UNICODE
new file mode 100644
index 0000000..b82826b
--- /dev/null
+++ b/src/unicode_tables/LICENSE-UNICODE

@@ -0,0 +1,57 @@
+UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE
+
+Unicode Data Files include all data files under the directories
+http://www.unicode.org/Public/, http://www.unicode.org/reports/,
+http://www.unicode.org/cldr/data/, http://source.icu-project.org/repos/icu/, and
+http://www.unicode.org/utility/trac/browser/.
+
+Unicode Data Files do not include PDF online code charts under the
+directory http://www.unicode.org/Public/.
+
+Software includes any source code published in the Unicode Standard
+or under the directories
+http://www.unicode.org/Public/, http://www.unicode.org/reports/,
+http://www.unicode.org/cldr/data/, http://source.icu-project.org/repos/icu/, and
+http://www.unicode.org/utility/trac/browser/.
+
+NOTICE TO USER: Carefully read the following legal agreement.
+BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S
+DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"),
+YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
+TERMS AND CONDITIONS OF THIS AGREEMENT.
+IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE
+THE DATA FILES OR SOFTWARE.
+
+COPYRIGHT AND PERMISSION NOTICE
+
+Copyright © 1991-2018 Unicode, Inc. All rights reserved.
+Distributed under the Terms of Use in http://www.unicode.org/copyright.html.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of the Unicode data files and any associated documentation
+(the "Data Files") or Unicode software and any associated documentation
+(the "Software") to deal in the Data Files or Software
+without restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, and/or sell copies of
+the Data Files or Software, and to permit persons to whom the Data Files
+or Software are furnished to do so, provided that either
+(a) this copyright and permission notice appear with all copies
+of the Data Files or Software, or
+(b) this copyright and permission notice appear in associated
+Documentation.
+
+THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
+NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
+DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THE DATA FILES OR SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder
+shall not be used in advertising or otherwise to promote the sale,
+use or other dealings in these Data Files or Software without prior
+written authorization of the copyright holder.

diff --git a/src/unicode_tables/age.rs b/src/unicode_tables/age.rs
new file mode 100644
index 0000000..1452a52
--- /dev/null
+++ b/src/unicode_tables/age.rs

@@ -0,0 +1,1671 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+//  ucd-generate age ucd-13.0.0 --chars
+//
+// ucd-generate 0.2.7 is available on crates.io.
+
+pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
+    ("V10_0", V10_0),
+    ("V11_0", V11_0),
+    ("V12_0", V12_0),
+    ("V12_1", V12_1),
+    ("V13_0", V13_0),
+    ("V1_1", V1_1),
+    ("V2_0", V2_0),
+    ("V2_1", V2_1),
+    ("V3_0", V3_0),
+    ("V3_1", V3_1),
+    ("V3_2", V3_2),
+    ("V4_0", V4_0),
+    ("V4_1", V4_1),
+    ("V5_0", V5_0),
+    ("V5_1", V5_1),
+    ("V5_2", V5_2),
+    ("V6_0", V6_0),
+    ("V6_1", V6_1),
+    ("V6_2", V6_2),
+    ("V6_3", V6_3),
+    ("V7_0", V7_0),
+    ("V8_0", V8_0),
+    ("V9_0", V9_0),
+];
+
+pub const V10_0: &'static [(char, char)] = &[
+    ('ࡠ', 'ࡪ'),
+    ('ৼ', '৽'),
+    ('\u{afa}', '\u{aff}'),
+    ('\u{d00}', '\u{d00}'),
+    ('\u{d3b}', '\u{d3c}'),
+    ('᳷', '᳷'),
+    ('\u{1df6}', '\u{1df9}'),
+    ('₿', '₿'),
+    ('⏿', '⏿'),
+    ('⯒', '⯒'),
+    ('⹅', '⹉'),
+    ('ㄮ', 'ㄮ'),
+    ('鿖', '鿪'),
+    ('𐌭', '𐌯'),
+    ('𑨀', '\u{11a47}'),
+    ('𑩐', '𑪃'),
+    ('𑪆', '𑪜'),
+    ('𑪞', '𑪢'),
+    ('𑴀', '𑴆'),
+    ('𑴈', '𑴉'),
+    ('𑴋', '\u{11d36}'),
+    ('\u{11d3a}', '\u{11d3a}'),
+    ('\u{11d3c}', '\u{11d3d}'),
+    ('\u{11d3f}', '\u{11d47}'),
+    ('𑵐', '𑵙'),
+    ('𖿡', '𖿡'),
+    ('𛀂', '𛄞'),
+    ('𛅰', '𛋻'),
+    ('🉠', '🉥'),
+    ('🛓', '🛔'),
+    ('🛷', '🛸'),
+    ('🤀', '🤋'),
+    ('🤟', '🤟'),
+    ('🤨', '🤯'),
+    ('🤱', '🤲'),
+    ('🥌', '🥌'),
+    ('🥟', '🥫'),
+    ('🦒', '🦗'),
+    ('🧐', '🧦'),
+    ('𬺰', '𮯠'),
+];
+
+pub const V11_0: &'static [(char, char)] = &[
+    ('ՠ', 'ՠ'),
+    ('ֈ', 'ֈ'),
+    ('ׯ', 'ׯ'),
+    ('\u{7fd}', '߿'),
+    ('\u{8d3}', '\u{8d3}'),
+    ('\u{9fe}', '\u{9fe}'),
+    ('੶', '੶'),
+    ('\u{c04}', '\u{c04}'),
+    ('಄', '಄'),
+    ('ᡸ', 'ᡸ'),
+    ('Ა', 'Ჺ'),
+    ('Ჽ', 'Ჿ'),
+    ('⮺', '⮼'),
+    ('⯓', '⯫'),
+    ('⯰', '⯾'),
+    ('⹊', '⹎'),
+    ('ㄯ', 'ㄯ'),
+    ('鿫', '鿯'),
+    ('ꞯ', 'ꞯ'),
+    ('Ꞹ', 'ꞹ'),
+    ('ꣾ', '\u{a8ff}'),
+    ('𐨴', '𐨵'),
+    ('𐩈', '𐩈'),
+    ('𐴀', '\u{10d27}'),
+    ('𐴰', '𐴹'),
+    ('𐼀', '𐼧'),
+    ('𐼰', '𐽙'),
+    ('\u{110cd}', '\u{110cd}'),
+    ('𑅄', '𑅆'),
+    ('\u{1133b}', '\u{1133b}'),
+    ('\u{1145e}', '\u{1145e}'),
+    ('𑜚', '𑜚'),
+    ('𑠀', '𑠻'),
+    ('𑪝', '𑪝'),
+    ('𑵠', '𑵥'),
+    ('𑵧', '𑵨'),
+    ('𑵪', '𑶎'),
+    ('\u{11d90}', '\u{11d91}'),
+    ('𑶓', '𑶘'),
+    ('𑶠', '𑶩'),
+    ('𑻠', '𑻸'),
+    ('𖹀', '𖺚'),
+    ('𘟭', '𘟱'),
+    ('𝋠', '𝋳'),
+    ('𝍲', '𝍸'),
+    ('𞱱', '𞲴'),
+    ('🄯', '🄯'),
+    ('🛹', '🛹'),
+    ('🟕', '🟘'),
+    ('🥍', '🥏'),
+    ('🥬', '🥰'),
+    ('🥳', '🥶'),
+    ('🥺', '🥺'),
+    ('🥼', '🥿'),
+    ('🦘', '🦢'),
+    ('🦰', '🦹'),
+    ('🧁', '🧂'),
+    ('🧧', '🧿'),
+    ('🩠', '🩭'),
+];
+
+pub const V12_0: &'static [(char, char)] = &[
+    ('౷', '౷'),
+    ('ຆ', 'ຆ'),
+    ('ຉ', 'ຉ'),
+    ('ຌ', 'ຌ'),
+    ('ຎ', 'ຓ'),
+    ('ຘ', 'ຘ'),
+    ('ຠ', 'ຠ'),
+    ('ຨ', 'ຩ'),
+    ('ຬ', 'ຬ'),
+    ('\u{eba}', '\u{eba}'),
+    ('ᳺ', 'ᳺ'),
+    ('⯉', '⯉'),
+    ('⯿', '⯿'),
+    ('⹏', '⹏'),
+    ('Ꞻ', 'ꞿ'),
+    ('Ꟃ', 'Ᶎ'),
+    ('ꭦ', 'ꭧ'),
+    ('𐿠', '𐿶'),
+    ('𑑟', '𑑟'),
+    ('𑚸', '𑚸'),
+    ('𑦠', '𑦧'),
+    ('𑦪', '\u{119d7}'),
+    ('\u{119da}', '𑧤'),
+    ('𑪄', '𑪅'),
+    ('𑿀', '𑿱'),
+    ('𑿿', '𑿿'),
+    ('\u{13430}', '\u{13438}'),
+    ('𖽅', '𖽊'),
+    ('\u{16f4f}', '\u{16f4f}'),
+    ('𖽿', '𖾇'),
+    ('𖿢', '𖿣'),
+    ('𘟲', '𘟷'),
+    ('𛅐', '𛅒'),
+    ('𛅤', '𛅧'),
+    ('𞄀', '𞄬'),
+    ('\u{1e130}', '𞄽'),
+    ('𞅀', '𞅉'),
+    ('𞅎', '𞅏'),
+    ('𞋀', '𞋹'),
+    ('𞋿', '𞋿'),
+    ('𞥋', '𞥋'),
+    ('𞴁', '𞴽'),
+    ('🅬', '🅬'),
+    ('🛕', '🛕'),
+    ('🛺', '🛺'),
+    ('🟠', '🟫'),
+    ('🤍', '🤏'),
+    ('🤿', '🤿'),
+    ('🥱', '🥱'),
+    ('🥻', '🥻'),
+    ('🦥', '🦪'),
+    ('🦮', '🦯'),
+    ('🦺', '🦿'),
+    ('🧃', '🧊'),
+    ('🧍', '🧏'),
+    ('🨀', '🩓'),
+    ('🩰', '🩳'),
+    ('🩸', '🩺'),
+    ('🪀', '🪂'),
+    ('🪐', '🪕'),
+];
+
+pub const V12_1: &'static [(char, char)] = &[('㋿', '㋿')];
+
+pub const V13_0: &'static [(char, char)] = &[
+    ('\u{8be}', '\u{8c7}'),
+    ('\u{b55}', '\u{b55}'),
+    ('\u{d04}', '\u{d04}'),
+    ('\u{d81}', '\u{d81}'),
+    ('\u{1abf}', '\u{1ac0}'),
+    ('\u{2b97}', '\u{2b97}'),
+    ('\u{2e50}', '\u{2e52}'),
+    ('\u{31bb}', '\u{31bf}'),
+    ('\u{4db6}', '\u{4dbf}'),
+    ('\u{9ff0}', '\u{9ffc}'),
+    ('\u{a7c7}', '\u{a7ca}'),
+    ('\u{a7f5}', '\u{a7f6}'),
+    ('\u{a82c}', '\u{a82c}'),
+    ('\u{ab68}', '\u{ab6b}'),
+    ('\u{1019c}', '\u{1019c}'),
+    ('\u{10e80}', '\u{10ea9}'),
+    ('\u{10eab}', '\u{10ead}'),
+    ('\u{10eb0}', '\u{10eb1}'),
+    ('\u{10fb0}', '\u{10fcb}'),
+    ('\u{11147}', '\u{11147}'),
+    ('\u{111ce}', '\u{111cf}'),
+    ('\u{1145a}', '\u{1145a}'),
+    ('\u{11460}', '\u{11461}'),
+    ('\u{11900}', '\u{11906}'),
+    ('\u{11909}', '\u{11909}'),
+    ('\u{1190c}', '\u{11913}'),
+    ('\u{11915}', '\u{11916}'),
+    ('\u{11918}', '\u{11935}'),
+    ('\u{11937}', '\u{11938}'),
+    ('\u{1193b}', '\u{11946}'),
+    ('\u{11950}', '\u{11959}'),
+    ('\u{11fb0}', '\u{11fb0}'),
+    ('\u{16fe4}', '\u{16fe4}'),
+    ('\u{16ff0}', '\u{16ff1}'),
+    ('\u{18af3}', '\u{18cd5}'),
+    ('\u{18d00}', '\u{18d08}'),
+    ('\u{1f10d}', '\u{1f10f}'),
+    ('\u{1f16d}', '\u{1f16f}'),
+    ('\u{1f1ad}', '\u{1f1ad}'),
+    ('\u{1f6d6}', '\u{1f6d7}'),
+    ('\u{1f6fb}', '\u{1f6fc}'),
+    ('\u{1f8b0}', '\u{1f8b1}'),
+    ('\u{1f90c}', '\u{1f90c}'),
+    ('\u{1f972}', '\u{1f972}'),
+    ('\u{1f977}', '\u{1f978}'),
+    ('\u{1f9a3}', '\u{1f9a4}'),
+    ('\u{1f9ab}', '\u{1f9ad}'),
+    ('\u{1f9cb}', '\u{1f9cb}'),
+    ('\u{1fa74}', '\u{1fa74}'),
+    ('\u{1fa83}', '\u{1fa86}'),
+    ('\u{1fa96}', '\u{1faa8}'),
+    ('\u{1fab0}', '\u{1fab6}'),
+    ('\u{1fac0}', '\u{1fac2}'),
+    ('\u{1fad0}', '\u{1fad6}'),
+    ('\u{1fb00}', '\u{1fb92}'),
+    ('\u{1fb94}', '\u{1fbca}'),
+    ('\u{1fbf0}', '\u{1fbf9}'),
+    ('\u{2a6d7}', '\u{2a6dd}'),
+    ('\u{30000}', '\u{3134a}'),
+];
+
+pub const V1_1: &'static [(char, char)] = &[
+    ('\u{0}', 'ǵ'),
+    ('Ǻ', 'ȗ'),
+    ('ɐ', 'ʨ'),
+    ('ʰ', '˞'),
+    ('ˠ', '˩'),
+    ('\u{300}', '\u{345}'),
+    ('\u{360}', '\u{361}'),
+    ('ʹ', '͵'),
+    ('ͺ', 'ͺ'),
+    (';', ';'),
+    ('΄', 'Ί'),
+    ('Ό', 'Ό'),
+    ('Ύ', 'Ρ'),
+    ('Σ', 'ώ'),
+    ('ϐ', 'ϖ'),
+    ('Ϛ', 'Ϛ'),
+    ('Ϝ', 'Ϝ'),
+    ('Ϟ', 'Ϟ'),
+    ('Ϡ', 'Ϡ'),
+    ('Ϣ', 'ϳ'),
+    ('Ё', 'Ќ'),
+    ('Ў', 'я'),
+    ('ё', 'ќ'),
+    ('ў', '\u{486}'),
+    ('Ґ', 'ӄ'),
+    ('Ӈ', 'ӈ'),
+    ('Ӌ', 'ӌ'),
+    ('Ӑ', 'ӫ'),
+    ('Ӯ', 'ӵ'),
+    ('Ӹ', 'ӹ'),
+    ('Ա', 'Ֆ'),
+    ('ՙ', '՟'),
+    ('ա', 'և'),
+    ('։', '։'),
+    ('\u{5b0}', '\u{5b9}'),
+    ('\u{5bb}', '׃'),
+    ('א', 'ת'),
+    ('װ', '״'),
+    ('،', '،'),
+    ('؛', '؛'),
+    ('؟', '؟'),
+    ('ء', 'غ'),
+    ('ـ', '\u{652}'),
+    ('٠', '٭'),
+    ('\u{670}', 'ڷ'),
+    ('ں', 'ھ'),
+    ('ۀ', 'ێ'),
+    ('ې', '\u{6ed}'),
+    ('۰', '۹'),
+    ('\u{901}', 'ः'),
+    ('अ', 'ह'),
+    ('\u{93c}', '\u{94d}'),
+    ('ॐ', '\u{954}'),
+    ('क़', '॰'),
+    ('\u{981}', 'ঃ'),
+    ('অ', 'ঌ'),
+    ('এ', 'ঐ'),
+    ('ও', 'ন'),
+    ('প', 'র'),
+    ('ল', 'ল'),
+    ('শ', 'হ'),
+    ('\u{9bc}', '\u{9bc}'),
+    ('\u{9be}', '\u{9c4}'),
+    ('ে', 'ৈ'),
+    ('ো', '\u{9cd}'),
+    ('\u{9d7}', '\u{9d7}'),
+    ('ড়', 'ঢ়'),
+    ('য়', '\u{9e3}'),
+    ('০', '৺'),
+    ('\u{a02}', '\u{a02}'),
+    ('ਅ', 'ਊ'),
+    ('ਏ', 'ਐ'),
+    ('ਓ', 'ਨ'),
+    ('ਪ', 'ਰ'),
+    ('ਲ', 'ਲ਼'),
+    ('ਵ', 'ਸ਼'),
+    ('ਸ', 'ਹ'),
+    ('\u{a3c}', '\u{a3c}'),
+    ('ਾ', '\u{a42}'),
+    ('\u{a47}', '\u{a48}'),
+    ('\u{a4b}', '\u{a4d}'),
+    ('ਖ਼', 'ੜ'),
+    ('ਫ਼', 'ਫ਼'),
+    ('੦', 'ੴ'),
+    ('\u{a81}', 'ઃ'),
+    ('અ', 'ઋ'),
+    ('ઍ', 'ઍ'),
+    ('એ', 'ઑ'),
+    ('ઓ', 'ન'),
+    ('પ', 'ર'),
+    ('લ', 'ળ'),
+    ('વ', 'હ'),
+    ('\u{abc}', '\u{ac5}'),
+    ('\u{ac7}', 'ૉ'),
+    ('ો', '\u{acd}'),
+    ('ૐ', 'ૐ'),
+    ('ૠ', 'ૠ'),
+    ('૦', '૯'),
+    ('\u{b01}', 'ଃ'),
+    ('ଅ', 'ଌ'),
+    ('ଏ', 'ଐ'),
+    ('ଓ', 'ନ'),
+    ('ପ', 'ର'),
+    ('ଲ', 'ଳ'),
+    ('ଶ', 'ହ'),
+    ('\u{b3c}', '\u{b43}'),
+    ('େ', 'ୈ'),
+    ('ୋ', '\u{b4d}'),
+    ('\u{b56}', '\u{b57}'),
+    ('ଡ଼', 'ଢ଼'),
+    ('ୟ', 'ୡ'),
+    ('୦', '୰'),
+    ('\u{b82}', 'ஃ'),
+    ('அ', 'ஊ'),
+    ('எ', 'ஐ'),
+    ('ஒ', 'க'),
+    ('ங', 'ச'),
+    ('ஜ', 'ஜ'),
+    ('ஞ', 'ட'),
+    ('ண', 'த'),
+    ('ந', 'ப'),
+    ('ம', 'வ'),
+    ('ஷ', 'ஹ'),
+    ('\u{bbe}', 'ூ'),
+    ('ெ', 'ை'),
+    ('ொ', '\u{bcd}'),
+    ('\u{bd7}', '\u{bd7}'),
+    ('௧', '௲'),
+    ('ఁ', 'ః'),
+    ('అ', 'ఌ'),
+    ('ఎ', 'ఐ'),
+    ('ఒ', 'న'),
+    ('ప', 'ళ'),
+    ('వ', 'హ'),
+    ('\u{c3e}', 'ౄ'),
+    ('\u{c46}', '\u{c48}'),
+    ('\u{c4a}', '\u{c4d}'),
+    ('\u{c55}', '\u{c56}'),
+    ('ౠ', 'ౡ'),
+    ('౦', '౯'),
+    ('ಂ', 'ಃ'),
+    ('ಅ', 'ಌ'),
+    ('ಎ', 'ಐ'),
+    ('ಒ', 'ನ'),
+    ('ಪ', 'ಳ'),
+    ('ವ', 'ಹ'),
+    ('ಾ', 'ೄ'),
+    ('\u{cc6}', 'ೈ'),
+    ('ೊ', '\u{ccd}'),
+    ('\u{cd5}', '\u{cd6}'),
+    ('ೞ', 'ೞ'),
+    ('ೠ', 'ೡ'),
+    ('೦', '೯'),
+    ('ം', 'ഃ'),
+    ('അ', 'ഌ'),
+    ('എ', 'ഐ'),
+    ('ഒ', 'ന'),
+    ('പ', 'ഹ'),
+    ('\u{d3e}', '\u{d43}'),
+    ('െ', 'ൈ'),
+    ('ൊ', '\u{d4d}'),
+    ('\u{d57}', '\u{d57}'),
+    ('ൠ', 'ൡ'),
+    ('൦', '൯'),
+    ('ก', '\u{e3a}'),
+    ('฿', '๛'),
+    ('ກ', 'ຂ'),
+    ('ຄ', 'ຄ'),
+    ('ງ', 'ຈ'),
+    ('ຊ', 'ຊ'),
+    ('ຍ', 'ຍ'),
+    ('ດ', 'ທ'),
+    ('ນ', 'ຟ'),
+    ('ມ', 'ຣ'),
+    ('ລ', 'ລ'),
+    ('ວ', 'ວ'),
+    ('ສ', 'ຫ'),
+    ('ອ', '\u{eb9}'),
+    ('\u{ebb}', 'ຽ'),
+    ('ເ', 'ໄ'),
+    ('ໆ', 'ໆ'),
+    ('\u{ec8}', '\u{ecd}'),
+    ('໐', '໙'),
+    ('ໜ', 'ໝ'),
+    ('Ⴀ', 'Ⴥ'),
+    ('ა', 'ჶ'),
+    ('჻', '჻'),
+    ('ᄀ', 'ᅙ'),
+    ('ᅟ', 'ᆢ'),
+    ('ᆨ', 'ᇹ'),
+    ('Ḁ', 'ẚ'),
+    ('Ạ', 'ỹ'),
+    ('ἀ', 'ἕ'),
+    ('Ἐ', 'Ἕ'),
+    ('ἠ', 'ὅ'),
+    ('Ὀ', 'Ὅ'),
+    ('ὐ', 'ὗ'),
+    ('Ὑ', 'Ὑ'),
+    ('Ὓ', 'Ὓ'),
+    ('Ὕ', 'Ὕ'),
+    ('Ὗ', 'ώ'),
+    ('ᾀ', 'ᾴ'),
+    ('ᾶ', 'ῄ'),
+    ('ῆ', 'ΐ'),
+    ('ῖ', 'Ί'),
+    ('῝', '`'),
+    ('ῲ', 'ῴ'),
+    ('ῶ', '῾'),
+    ('\u{2000}', '\u{202e}'),
+    ('‰', '⁆'),
+    ('\u{206a}', '⁰'),
+    ('⁴', '₎'),
+    ('₠', '₪'),
+    ('\u{20d0}', '\u{20e1}'),
+    ('℀', 'ℸ'),
+    ('⅓', 'ↂ'),
+    ('←', '⇪'),
+    ('∀', '⋱'),
+    ('⌀', '⌀'),
+    ('⌂', '⍺'),
+    ('␀', '␤'),
+    ('⑀', '⑊'),
+    ('①', '⓪'),
+    ('─', '▕'),
+    ('■', '◯'),
+    ('☀', '☓'),
+    ('☚', '♯'),
+    ('✁', '✄'),
+    ('✆', '✉'),
+    ('✌', '✧'),
+    ('✩', '❋'),
+    ('❍', '❍'),
+    ('❏', '❒'),
+    ('❖', '❖'),
+    ('❘', '❞'),
+    ('❡', '❧'),
+    ('❶', '➔'),
+    ('➘', '➯'),
+    ('➱', '➾'),
+    ('\u{3000}', '〷'),
+    ('〿', '〿'),
+    ('ぁ', 'ゔ'),
+    ('\u{3099}', 'ゞ'),
+    ('ァ', 'ヾ'),
+    ('ㄅ', 'ㄬ'),
+    ('ㄱ', 'ㆎ'),
+    ('㆐', '㆟'),
+    ('㈀', '㈜'),
+    ('㈠', '㉃'),
+    ('㉠', '㉻'),
+    ('㉿', '㊰'),
+    ('㋀', '㋋'),
+    ('㋐', '㋾'),
+    ('㌀', '㍶'),
+    ('㍻', '㏝'),
+    ('㏠', '㏾'),
+    ('一', '龥'),
+    ('\u{e000}', '鶴'),
+    ('ﬀ', 'ﬆ'),
+    ('ﬓ', 'ﬗ'),
+    ('\u{fb1e}', 'זּ'),
+    ('טּ', 'לּ'),
+    ('מּ', 'מּ'),
+    ('נּ', 'סּ'),
+    ('ףּ', 'פּ'),
+    ('צּ', 'ﮱ'),
+    ('ﯓ', '﴿'),
+    ('ﵐ', 'ﶏ'),
+    ('ﶒ', 'ﷇ'),
+    ('ﷰ', 'ﷻ'),
+    ('\u{fe20}', '\u{fe23}'),
+    ('︰', '﹄'),
+    ('﹉', '﹒'),
+    ('﹔', '﹦'),
+    ('﹨', '﹫'),
+    ('ﹰ', 'ﹲ'),
+    ('ﹴ', 'ﹴ'),
+    ('ﹶ', 'ﻼ'),
+    ('\u{feff}', '\u{feff}'),
+    ('！', '～'),
+    ('｡', 'ﾾ'),
+    ('ￂ', 'ￇ'),
+    ('ￊ', 'ￏ'),
+    ('ￒ', 'ￗ'),
+    ('ￚ', 'ￜ'),
+    ('￠', '￦'),
+    ('￨', '￮'),
+    ('�', '\u{ffff}'),
+];
+
+pub const V2_0: &'static [(char, char)] = &[
+    ('\u{591}', '\u{5a1}'),
+    ('\u{5a3}', '\u{5af}'),
+    ('\u{5c4}', '\u{5c4}'),
+    ('ༀ', 'ཇ'),
+    ('ཉ', 'ཀྵ'),
+    ('\u{f71}', 'ྋ'),
+    ('\u{f90}', '\u{f95}'),
+    ('\u{f97}', '\u{f97}'),
+    ('\u{f99}', '\u{fad}'),
+    ('\u{fb1}', '\u{fb7}'),
+    ('\u{fb9}', '\u{fb9}'),
+    ('ẛ', 'ẛ'),
+    ('₫', '₫'),
+    ('가', '힣'),
+    ('\u{1fffe}', '\u{1ffff}'),
+    ('\u{2fffe}', '\u{2ffff}'),
+    ('\u{3fffe}', '\u{3ffff}'),
+    ('\u{4fffe}', '\u{4ffff}'),
+    ('\u{5fffe}', '\u{5ffff}'),
+    ('\u{6fffe}', '\u{6ffff}'),
+    ('\u{7fffe}', '\u{7ffff}'),
+    ('\u{8fffe}', '\u{8ffff}'),
+    ('\u{9fffe}', '\u{9ffff}'),
+    ('\u{afffe}', '\u{affff}'),
+    ('\u{bfffe}', '\u{bffff}'),
+    ('\u{cfffe}', '\u{cffff}'),
+    ('\u{dfffe}', '\u{dffff}'),
+    ('\u{efffe}', '\u{10ffff}'),
+];
+
+pub const V2_1: &'static [(char, char)] = &[('€', '€'), ('', '')];
+
+pub const V3_0: &'static [(char, char)] = &[
+    ('Ƕ', 'ǹ'),
+    ('Ș', 'ȟ'),
+    ('Ȣ', 'ȳ'),
+    ('ʩ', 'ʭ'),
+    ('˟', '˟'),
+    ('˪', 'ˮ'),
+    ('\u{346}', '\u{34e}'),
+    ('\u{362}', '\u{362}'),
+    ('ϗ', 'ϗ'),
+    ('ϛ', 'ϛ'),
+    ('ϝ', 'ϝ'),
+    ('ϟ', 'ϟ'),
+    ('ϡ', 'ϡ'),
+    ('Ѐ', 'Ѐ'),
+    ('Ѝ', 'Ѝ'),
+    ('ѐ', 'ѐ'),
+    ('ѝ', 'ѝ'),
+    ('\u{488}', '\u{489}'),
+    ('Ҍ', 'ҏ'),
+    ('Ӭ', 'ӭ'),
+    ('֊', '֊'),
+    ('\u{653}', '\u{655}'),
+    ('ڸ', 'ڹ'),
+    ('ڿ', 'ڿ'),
+    ('ۏ', 'ۏ'),
+    ('ۺ', '۾'),
+    ('܀', '܍'),
+    ('\u{70f}', 'ܬ'),
+    ('\u{730}', '\u{74a}'),
+    ('ހ', '\u{7b0}'),
+    ('ං', 'ඃ'),
+    ('අ', 'ඖ'),
+    ('ක', 'න'),
+    ('ඳ', 'ර'),
+    ('ල', 'ල'),
+    ('ව', 'ෆ'),
+    ('\u{dca}', '\u{dca}'),
+    ('\u{dcf}', '\u{dd4}'),
+    ('\u{dd6}', '\u{dd6}'),
+    ('ෘ', '\u{ddf}'),
+    ('ෲ', '෴'),
+    ('ཪ', 'ཪ'),
+    ('\u{f96}', '\u{f96}'),
+    ('\u{fae}', '\u{fb0}'),
+    ('\u{fb8}', '\u{fb8}'),
+    ('\u{fba}', '\u{fbc}'),
+    ('྾', '࿌'),
+    ('࿏', '࿏'),
+    ('က', 'အ'),
+    ('ဣ', 'ဧ'),
+    ('ဩ', 'ဪ'),
+    ('ာ', '\u{1032}'),
+    ('\u{1036}', '\u{1039}'),
+    ('၀', '\u{1059}'),
+    ('ሀ', 'ሆ'),
+    ('ለ', 'ቆ'),
+    ('ቈ', 'ቈ'),
+    ('ቊ', 'ቍ'),
+    ('ቐ', 'ቖ'),
+    ('ቘ', 'ቘ'),
+    ('ቚ', 'ቝ'),
+    ('በ', 'ኆ'),
+    ('ኈ', 'ኈ'),
+    ('ኊ', 'ኍ'),
+    ('ነ', 'ኮ'),
+    ('ኰ', 'ኰ'),
+    ('ኲ', 'ኵ'),
+    ('ኸ', 'ኾ'),
+    ('ዀ', 'ዀ'),
+    ('ዂ', 'ዅ'),
+    ('ወ', 'ዎ'),
+    ('ዐ', 'ዖ'),
+    ('ዘ', 'ዮ'),
+    ('ደ', 'ጎ'),
+    ('ጐ', 'ጐ'),
+    ('ጒ', 'ጕ'),
+    ('ጘ', 'ጞ'),
+    ('ጠ', 'ፆ'),
+    ('ፈ', 'ፚ'),
+    ('፡', '፼'),
+    ('Ꭰ', 'Ᏼ'),
+    ('ᐁ', 'ᙶ'),
+    ('\u{1680}', '᚜'),
+    ('ᚠ', 'ᛰ'),
+    ('ក', 'ៜ'),
+    ('០', '៩'),
+    ('᠀', '\u{180e}'),
+    ('᠐', '᠙'),
+    ('ᠠ', 'ᡷ'),
+    ('ᢀ', '\u{18a9}'),
+    ('\u{202f}', '\u{202f}'),
+    ('⁈', '⁍'),
+    ('₭', '₯'),
+    ('\u{20e2}', '\u{20e3}'),
+    ('ℹ', '℺'),
+    ('Ↄ', 'Ↄ'),
+    ('⇫', '⇳'),
+    ('⌁', '⌁'),
+    ('⍻', '⍻'),
+    ('⍽', '⎚'),
+    ('␥', '␦'),
+    ('◰', '◷'),
+    ('☙', '☙'),
+    ('♰', '♱'),
+    ('⠀', '⣿'),
+    ('⺀', '⺙'),
+    ('⺛', '⻳'),
+    ('⼀', '⿕'),
+    ('⿰', '⿻'),
+    ('〸', '〺'),
+    ('〾', '〾'),
+    ('ㆠ', 'ㆷ'),
+    ('㐀', '䶵'),
+    ('ꀀ', 'ꒌ'),
+    ('꒐', '꒡'),
+    ('꒤', '꒳'),
+    ('꒵', '꓀'),
+    ('꓂', '꓄'),
+    ('꓆', '꓆'),
+    ('יִ', 'יִ'),
+    ('\u{fff9}', '\u{fffb}'),
+];
+
+pub const V3_1: &'static [(char, char)] = &[
+    ('ϴ', 'ϵ'),
+    ('\u{fdd0}', '\u{fdef}'),
+    ('𐌀', '𐌞'),
+    ('𐌠', '𐌣'),
+    ('𐌰', '𐍊'),
+    ('𐐀', '𐐥'),
+    ('𐐨', '𐑍'),
+    ('𝀀', '𝃵'),
+    ('𝄀', '𝄦'),
+    ('𝄪', '𝇝'),
+    ('𝐀', '𝑔'),
+    ('𝑖', '𝒜'),
+    ('𝒞', '𝒟'),
+    ('𝒢', '𝒢'),
+    ('𝒥', '𝒦'),
+    ('𝒩', '𝒬'),
+    ('𝒮', '𝒹'),
+    ('𝒻', '𝒻'),
+    ('𝒽', '𝓀'),
+    ('𝓂', '𝓃'),
+    ('𝓅', '𝔅'),
+    ('𝔇', '𝔊'),
+    ('𝔍', '𝔔'),
+    ('𝔖', '𝔜'),
+    ('𝔞', '𝔹'),
+    ('𝔻', '𝔾'),
+    ('𝕀', '𝕄'),
+    ('𝕆', '𝕆'),
+    ('𝕊', '𝕐'),
+    ('𝕒', '𝚣'),
+    ('𝚨', '𝟉'),
+    ('𝟎', '𝟿'),
+    ('𠀀', '𪛖'),
+    ('丽', '𪘀'),
+    ('\u{e0001}', '\u{e0001}'),
+    ('\u{e0020}', '\u{e007f}'),
+];
+
+pub const V3_2: &'static [(char, char)] = &[
+    ('Ƞ', 'Ƞ'),
+    ('\u{34f}', '\u{34f}'),
+    ('\u{363}', '\u{36f}'),
+    ('Ϙ', 'ϙ'),
+    ('϶', '϶'),
+    ('Ҋ', 'ҋ'),
+    ('Ӆ', 'ӆ'),
+    ('Ӊ', 'ӊ'),
+    ('Ӎ', 'ӎ'),
+    ('Ԁ', 'ԏ'),
+    ('ٮ', 'ٯ'),
+    ('ޱ', 'ޱ'),
+    ('ჷ', 'ჸ'),
+    ('ᜀ', 'ᜌ'),
+    ('ᜎ', '\u{1714}'),
+    ('ᜠ', '᜶'),
+    ('ᝀ', '\u{1753}'),
+    ('ᝠ', 'ᝬ'),
+    ('ᝮ', 'ᝰ'),
+    ('\u{1772}', '\u{1773}'),
+    ('⁇', '⁇'),
+    ('⁎', '⁒'),
+    ('⁗', '⁗'),
+    ('\u{205f}', '\u{2063}'),
+    ('ⁱ', 'ⁱ'),
+    ('₰', '₱'),
+    ('\u{20e4}', '\u{20ea}'),
+    ('ℽ', '⅋'),
+    ('⇴', '⇿'),
+    ('⋲', '⋿'),
+    ('⍼', '⍼'),
+    ('⎛', '⏎'),
+    ('⓫', '⓾'),
+    ('▖', '▟'),
+    ('◸', '◿'),
+    ('☖', '☗'),
+    ('♲', '♽'),
+    ('⚀', '⚉'),
+    ('❨', '❵'),
+    ('⟐', '⟫'),
+    ('⟰', '⟿'),
+    ('⤀', '⫿'),
+    ('〻', '〽'),
+    ('ゕ', 'ゖ'),
+    ('ゟ', '゠'),
+    ('ヿ', 'ヿ'),
+    ('ㇰ', 'ㇿ'),
+    ('㉑', '㉟'),
+    ('㊱', '㊿'),
+    ('꒢', '꒣'),
+    ('꒴', '꒴'),
+    ('꓁', '꓁'),
+    ('꓅', '꓅'),
+    ('侮', '頻'),
+    ('﷼', '﷼'),
+    ('\u{fe00}', '\u{fe0f}'),
+    ('﹅', '﹆'),
+    ('ﹳ', 'ﹳ'),
+    ('｟', '｠'),
+];
+
+pub const V4_0: &'static [(char, char)] = &[
+    ('ȡ', 'ȡ'),
+    ('ȴ', 'ȶ'),
+    ('ʮ', 'ʯ'),
+    ('˯', '˿'),
+    ('\u{350}', '\u{357}'),
+    ('\u{35d}', '\u{35f}'),
+    ('Ϸ', 'ϻ'),
+    ('\u{600}', '\u{603}'),
+    ('؍', '\u{615}'),
+    ('\u{656}', '\u{658}'),
+    ('ۮ', 'ۯ'),
+    ('ۿ', 'ۿ'),
+    ('ܭ', 'ܯ'),
+    ('ݍ', 'ݏ'),
+    ('ऄ', 'ऄ'),
+    ('ঽ', 'ঽ'),
+    ('\u{a01}', '\u{a01}'),
+    ('ਃ', 'ਃ'),
+    ('ઌ', 'ઌ'),
+    ('ૡ', '\u{ae3}'),
+    ('૱', '૱'),
+    ('ଵ', 'ଵ'),
+    ('ୱ', 'ୱ'),
+    ('௳', '௺'),
+    ('\u{cbc}', 'ಽ'),
+    ('\u{17dd}', '\u{17dd}'),
+    ('៰', '៹'),
+    ('ᤀ', 'ᤜ'),
+    ('\u{1920}', 'ᤫ'),
+    ('ᤰ', '\u{193b}'),
+    ('᥀', '᥀'),
+    ('᥄', 'ᥭ'),
+    ('ᥰ', 'ᥴ'),
+    ('᧠', '᧿'),
+    ('ᴀ', 'ᵫ'),
+    ('⁓', '⁔'),
+    ('℻', '℻'),
+    ('⏏', '⏐'),
+    ('⓿', '⓿'),
+    ('☔', '☕'),
+    ('⚊', '⚑'),
+    ('⚠', '⚡'),
+    ('⬀', '⬍'),
+    ('㈝', '㈞'),
+    ('㉐', '㉐'),
+    ('㉼', '㉽'),
+    ('㋌', '㋏'),
+    ('㍷', '㍺'),
+    ('㏞', '㏟'),
+    ('㏿', '㏿'),
+    ('䷀', '䷿'),
+    ('﷽', '﷽'),
+    ('﹇', '﹈'),
+    ('𐀀', '𐀋'),
+    ('𐀍', '𐀦'),
+    ('𐀨', '𐀺'),
+    ('𐀼', '𐀽'),
+    ('𐀿', '𐁍'),
+    ('𐁐', '𐁝'),
+    ('𐂀', '𐃺'),
+    ('𐄀', '𐄂'),
+    ('𐄇', '𐄳'),
+    ('𐄷', '𐄿'),
+    ('𐎀', '𐎝'),
+    ('𐎟', '𐎟'),
+    ('𐐦', '𐐧'),
+    ('𐑎', '𐒝'),
+    ('𐒠', '𐒩'),
+    ('𐠀', '𐠅'),
+    ('𐠈', '𐠈'),
+    ('𐠊', '𐠵'),
+    ('𐠷', '𐠸'),
+    ('𐠼', '𐠼'),
+    ('𐠿', '𐠿'),
+    ('𝌀', '𝍖'),
+    ('𝓁', '𝓁'),
+    ('\u{e0100}', '\u{e01ef}'),
+];
+
+pub const V4_1: &'static [(char, char)] = &[
+    ('ȷ', 'Ɂ'),
+    ('\u{358}', '\u{35c}'),
+    ('ϼ', 'Ͽ'),
+    ('Ӷ', 'ӷ'),
+    ('\u{5a2}', '\u{5a2}'),
+    ('\u{5c5}', '\u{5c7}'),
+    ('؋', '؋'),
+    ('؞', '؞'),
+    ('\u{659}', '\u{65e}'),
+    ('ݐ', 'ݭ'),
+    ('ॽ', 'ॽ'),
+    ('ৎ', 'ৎ'),
+    ('ஶ', 'ஶ'),
+    ('௦', '௦'),
+    ('࿐', '࿑'),
+    ('ჹ', 'ჺ'),
+    ('ჼ', 'ჼ'),
+    ('ሇ', 'ሇ'),
+    ('ቇ', 'ቇ'),
+    ('ኇ', 'ኇ'),
+    ('ኯ', 'ኯ'),
+    ('ዏ', 'ዏ'),
+    ('ዯ', 'ዯ'),
+    ('ጏ', 'ጏ'),
+    ('ጟ', 'ጟ'),
+    ('ፇ', 'ፇ'),
+    ('\u{135f}', '፠'),
+    ('ᎀ', '᎙'),
+    ('ᦀ', 'ᦩ'),
+    ('ᦰ', 'ᧉ'),
+    ('᧐', '᧙'),
+    ('᧞', '᧟'),
+    ('ᨀ', '\u{1a1b}'),
+    ('᨞', '᨟'),
+    ('ᵬ', '\u{1dc3}'),
+    ('⁕', '⁖'),
+    ('⁘', '⁞'),
+    ('ₐ', 'ₔ'),
+    ('₲', '₵'),
+    ('\u{20eb}', '\u{20eb}'),
+    ('ℼ', 'ℼ'),
+    ('⅌', '⅌'),
+    ('⏑', '⏛'),
+    ('☘', '☘'),
+    ('♾', '♿'),
+    ('⚒', '⚜'),
+    ('⚢', '⚱'),
+    ('⟀', '⟆'),
+    ('⬎', '⬓'),
+    ('Ⰰ', 'Ⱞ'),
+    ('ⰰ', 'ⱞ'),
+    ('Ⲁ', '⳪'),
+    ('⳹', 'ⴥ'),
+    ('ⴰ', 'ⵥ'),
+    ('ⵯ', 'ⵯ'),
+    ('ⶀ', 'ⶖ'),
+    ('ⶠ', 'ⶦ'),
+    ('ⶨ', 'ⶮ'),
+    ('ⶰ', 'ⶶ'),
+    ('ⶸ', 'ⶾ'),
+    ('ⷀ', 'ⷆ'),
+    ('ⷈ', 'ⷎ'),
+    ('ⷐ', 'ⷖ'),
+    ('ⷘ', 'ⷞ'),
+    ('⸀', '⸗'),
+    ('⸜', '⸝'),
+    ('㇀', '㇏'),
+    ('㉾', '㉾'),
+    ('龦', '龻'),
+    ('꜀', '꜖'),
+    ('ꠀ', '꠫'),
+    ('並', '龎'),
+    ('︐', '︙'),
+    ('𐅀', '𐆊'),
+    ('𐎠', '𐏃'),
+    ('𐏈', '𐏕'),
+    ('𐨀', '\u{10a03}'),
+    ('\u{10a05}', '\u{10a06}'),
+    ('\u{10a0c}', '𐨓'),
+    ('𐨕', '𐨗'),
+    ('𐨙', '𐨳'),
+    ('\u{10a38}', '\u{10a3a}'),
+    ('\u{10a3f}', '𐩇'),
+    ('𐩐', '𐩘'),
+    ('𝈀', '𝉅'),
+    ('𝚤', '𝚥'),
+];
+
+pub const V5_0: &'static [(char, char)] = &[
+    ('ɂ', 'ɏ'),
+    ('ͻ', 'ͽ'),
+    ('ӏ', 'ӏ'),
+    ('Ӻ', 'ӿ'),
+    ('Ԑ', 'ԓ'),
+    ('\u{5ba}', '\u{5ba}'),
+    ('߀', 'ߺ'),
+    ('ॻ', 'ॼ'),
+    ('ॾ', 'ॿ'),
+    ('\u{ce2}', '\u{ce3}'),
+    ('ೱ', 'ೲ'),
+    ('\u{1b00}', 'ᭋ'),
+    ('᭐', '᭼'),
+    ('\u{1dc4}', '\u{1dca}'),
+    ('\u{1dfe}', '\u{1dff}'),
+    ('\u{20ec}', '\u{20ef}'),
+    ('⅍', 'ⅎ'),
+    ('ↄ', 'ↄ'),
+    ('⏜', '⏧'),
+    ('⚲', '⚲'),
+    ('⟇', '⟊'),
+    ('⬔', '⬚'),
+    ('⬠', '⬣'),
+    ('Ⱡ', 'ⱬ'),
+    ('ⱴ', 'ⱷ'),
+    ('ꜗ', 'ꜚ'),
+    ('꜠', '꜡'),
+    ('ꡀ', '꡷'),
+    ('𐤀', '𐤙'),
+    ('𐤟', '𐤟'),
+    ('𒀀', '𒍮'),
+    ('𒐀', '𒑢'),
+    ('𒑰', '𒑳'),
+    ('𝍠', '𝍱'),
+    ('𝟊', '𝟋'),
+];
+
+pub const V5_1: &'static [(char, char)] = &[
+    ('Ͱ', 'ͳ'),
+    ('Ͷ', 'ͷ'),
+    ('Ϗ', 'Ϗ'),
+    ('\u{487}', '\u{487}'),
+    ('Ԕ', 'ԣ'),
+    ('؆', '؊'),
+    ('\u{616}', '\u{61a}'),
+    ('ػ', 'ؿ'),
+    ('ݮ', 'ݿ'),
+    ('ॱ', 'ॲ'),
+    ('\u{a51}', '\u{a51}'),
+    ('\u{a75}', '\u{a75}'),
+    ('\u{b44}', '\u{b44}'),
+    ('\u{b62}', '\u{b63}'),
+    ('ௐ', 'ௐ'),
+    ('ఽ', 'ఽ'),
+    ('ౘ', 'ౙ'),
+    ('\u{c62}', '\u{c63}'),
+    ('౸', '౿'),
+    ('ഽ', 'ഽ'),
+    ('\u{d44}', '\u{d44}'),
+    ('\u{d62}', '\u{d63}'),
+    ('൰', '൵'),
+    ('൹', 'ൿ'),
+    ('ཫ', 'ཬ'),
+    ('࿎', '࿎'),
+    ('࿒', '࿔'),
+    ('ဢ', 'ဢ'),
+    ('ဨ', 'ဨ'),
+    ('ါ', 'ါ'),
+    ('\u{1033}', '\u{1035}'),
+    ('\u{103a}', 'ဿ'),
+    ('ၚ', '႙'),
+    ('႞', '႟'),
+    ('ᢪ', 'ᢪ'),
+    ('\u{1b80}', '᮪'),
+    ('ᮮ', '᮹'),
+    ('ᰀ', '\u{1c37}'),
+    ('᰻', '᱉'),
+    ('ᱍ', '᱿'),
+    ('\u{1dcb}', '\u{1de6}'),
+    ('ẜ', 'ẟ'),
+    ('Ỻ', 'ỿ'),
+    ('\u{2064}', '\u{2064}'),
+    ('\u{20f0}', '\u{20f0}'),
+    ('⅏', '⅏'),
+    ('ↅ', 'ↈ'),
+    ('⚝', '⚝'),
+    ('⚳', '⚼'),
+    ('⛀', '⛃'),
+    ('⟌', '⟌'),
+    ('⟬', '⟯'),
+    ('⬛', '⬟'),
+    ('⬤', '⭌'),
+    ('⭐', '⭔'),
+    ('Ɑ', 'Ɐ'),
+    ('ⱱ', 'ⱳ'),
+    ('ⱸ', 'ⱽ'),
+    ('\u{2de0}', '\u{2dff}'),
+    ('⸘', '⸛'),
+    ('⸞', '⸰'),
+    ('ㄭ', 'ㄭ'),
+    ('㇐', '㇣'),
+    ('龼', '鿃'),
+    ('ꔀ', 'ꘫ'),
+    ('Ꙁ', 'ꙟ'),
+    ('Ꙣ', '꙳'),
+    ('\u{a67c}', 'ꚗ'),
+    ('ꜛ', 'ꜟ'),
+    ('Ꜣ', 'ꞌ'),
+    ('ꟻ', 'ꟿ'),
+    ('ꢀ', '\u{a8c4}'),
+    ('꣎', '꣙'),
+    ('꤀', '꥓'),
+    ('꥟', '꥟'),
+    ('ꨀ', '\u{aa36}'),
+    ('ꩀ', 'ꩍ'),
+    ('꩐', '꩙'),
+    ('꩜', '꩟'),
+    ('\u{fe24}', '\u{fe26}'),
+    ('𐆐', '𐆛'),
+    ('𐇐', '\u{101fd}'),
+    ('𐊀', '𐊜'),
+    ('𐊠', '𐋐'),
+    ('𐤠', '𐤹'),
+    ('𐤿', '𐤿'),
+    ('𝄩', '𝄩'),
+    ('🀀', '🀫'),
+    ('🀰', '🂓'),
+];
+
+pub const V5_2: &'static [(char, char)] = &[
+    ('Ԥ', 'ԥ'),
+    ('ࠀ', '\u{82d}'),
+    ('࠰', '࠾'),
+    ('\u{900}', '\u{900}'),
+    ('ॎ', 'ॎ'),
+    ('\u{955}', '\u{955}'),
+    ('ॹ', 'ॺ'),
+    ('৻', '৻'),
+    ('࿕', '࿘'),
+    ('ႚ', '\u{109d}'),
+    ('ᅚ', 'ᅞ'),
+    ('ᆣ', 'ᆧ'),
+    ('ᇺ', 'ᇿ'),
+    ('᐀', '᐀'),
+    ('ᙷ', 'ᙿ'),
+    ('ᢰ', 'ᣵ'),
+    ('ᦪ', 'ᦫ'),
+    ('᧚', '᧚'),
+    ('ᨠ', '\u{1a5e}'),
+    ('\u{1a60}', '\u{1a7c}'),
+    ('\u{1a7f}', '᪉'),
+    ('᪐', '᪙'),
+    ('᪠', '᪭'),
+    ('\u{1cd0}', 'ᳲ'),
+    ('\u{1dfd}', '\u{1dfd}'),
+    ('₶', '₸'),
+    ('⅐', '⅒'),
+    ('↉', '↉'),
+    ('⏨', '⏨'),
+    ('⚞', '⚟'),
+    ('⚽', '⚿'),
+    ('⛄', '⛍'),
+    ('⛏', '⛡'),
+    ('⛣', '⛣'),
+    ('⛨', '⛿'),
+    ('❗', '❗'),
+    ('⭕', '⭙'),
+    ('Ɒ', 'Ɒ'),
+    ('Ȿ', 'Ɀ'),
+    ('Ⳬ', '\u{2cf1}'),
+    ('⸱', '⸱'),
+    ('㉄', '㉏'),
+    ('鿄', '鿋'),
+    ('ꓐ', '꓿'),
+    ('ꚠ', '꛷'),
+    ('꠰', '꠹'),
+    ('\u{a8e0}', 'ꣻ'),
+    ('ꥠ', 'ꥼ'),
+    ('\u{a980}', '꧍'),
+    ('ꧏ', '꧙'),
+    ('꧞', '꧟'),
+    ('ꩠ', 'ꩻ'),
+    ('ꪀ', 'ꫂ'),
+    ('ꫛ', '꫟'),
+    ('ꯀ', '\u{abed}'),
+    ('꯰', '꯹'),
+    ('ힰ', 'ퟆ'),
+    ('ퟋ', 'ퟻ'),
+    ('恵', '舘'),
+    ('𐡀', '𐡕'),
+    ('𐡗', '𐡟'),
+    ('𐤚', '𐤛'),
+    ('𐩠', '𐩿'),
+    ('𐬀', '𐬵'),
+    ('𐬹', '𐭕'),
+    ('𐭘', '𐭲'),
+    ('𐭸', '𐭿'),
+    ('𐰀', '𐱈'),
+    ('𐹠', '𐹾'),
+    ('\u{11080}', '𑃁'),
+    ('𓀀', '𓐮'),
+    ('🄀', '🄊'),
+    ('🄐', '🄮'),
+    ('🄱', '🄱'),
+    ('🄽', '🄽'),
+    ('🄿', '🄿'),
+    ('🅂', '🅂'),
+    ('🅆', '🅆'),
+    ('🅊', '🅎'),
+    ('🅗', '🅗'),
+    ('🅟', '🅟'),
+    ('🅹', '🅹'),
+    ('🅻', '🅼'),
+    ('🅿', '🅿'),
+    ('🆊', '🆍'),
+    ('🆐', '🆐'),
+    ('🈀', '🈀'),
+    ('🈐', '🈱'),
+    ('🉀', '🉈'),
+    ('𪜀', '𫜴'),
+];
+
+pub const V6_0: &'static [(char, char)] = &[
+    ('Ԧ', 'ԧ'),
+    ('ؠ', 'ؠ'),
+    ('\u{65f}', '\u{65f}'),
+    ('ࡀ', '\u{85b}'),
+    ('࡞', '࡞'),
+    ('\u{93a}', 'ऻ'),
+    ('ॏ', 'ॏ'),
+    ('\u{956}', '\u{957}'),
+    ('ॳ', 'ॷ'),
+    ('୲', '୷'),
+    ('ഩ', 'ഩ'),
+    ('ഺ', 'ഺ'),
+    ('ൎ', 'ൎ'),
+    ('ྌ', '\u{f8f}'),
+    ('࿙', '࿚'),
+    ('\u{135d}', '\u{135e}'),
+    ('ᯀ', '᯳'),
+    ('᯼', '᯿'),
+    ('\u{1dfc}', '\u{1dfc}'),
+    ('ₕ', 'ₜ'),
+    ('₹', '₹'),
+    ('⏩', '⏳'),
+    ('⛎', '⛎'),
+    ('⛢', '⛢'),
+    ('⛤', '⛧'),
+    ('✅', '✅'),
+    ('✊', '✋'),
+    ('✨', '✨'),
+    ('❌', '❌'),
+    ('❎', '❎'),
+    ('❓', '❕'),
+    ('❟', '❠'),
+    ('➕', '➗'),
+    ('➰', '➰'),
+    ('➿', '➿'),
+    ('⟎', '⟏'),
+    ('⵰', '⵰'),
+    ('\u{2d7f}', '\u{2d7f}'),
+    ('ㆸ', 'ㆺ'),
+    ('Ꙡ', 'ꙡ'),
+    ('Ɥ', 'ꞎ'),
+    ('Ꞑ', 'ꞑ'),
+    ('Ꞡ', 'ꞩ'),
+    ('ꟺ', 'ꟺ'),
+    ('ꬁ', 'ꬆ'),
+    ('ꬉ', 'ꬎ'),
+    ('ꬑ', 'ꬖ'),
+    ('ꬠ', 'ꬦ'),
+    ('ꬨ', 'ꬮ'),
+    ('﮲', '﯁'),
+    ('𑀀', '𑁍'),
+    ('𑁒', '𑁯'),
+    ('𖠀', '𖨸'),
+    ('𛀀', '𛀁'),
+    ('🂠', '🂮'),
+    ('🂱', '🂾'),
+    ('🃁', '🃏'),
+    ('🃑', '🃟'),
+    ('🄰', '🄰'),
+    ('🄲', '🄼'),
+    ('🄾', '🄾'),
+    ('🅀', '🅁'),
+    ('🅃', '🅅'),
+    ('🅇', '🅉'),
+    ('🅏', '🅖'),
+    ('🅘', '🅞'),
+    ('🅠', '🅩'),
+    ('🅰', '🅸'),
+    ('🅺', '🅺'),
+    ('🅽', '🅾'),
+    ('🆀', '🆉'),
+    ('🆎', '🆏'),
+    ('🆑', '🆚'),
+    ('🇦', '🇿'),
+    ('🈁', '🈂'),
+    ('🈲', '🈺'),
+    ('🉐', '🉑'),
+    ('🌀', '🌠'),
+    ('🌰', '🌵'),
+    ('🌷', '🍼'),
+    ('🎀', '🎓'),
+    ('🎠', '🏄'),
+    ('🏆', '🏊'),
+    ('🏠', '🏰'),
+    ('🐀', '🐾'),
+    ('👀', '👀'),
+    ('👂', '📷'),
+    ('📹', '📼'),
+    ('🔀', '🔽'),
+    ('🕐', '🕧'),
+    ('🗻', '🗿'),
+    ('😁', '😐'),
+    ('😒', '😔'),
+    ('😖', '😖'),
+    ('😘', '😘'),
+    ('😚', '😚'),
+    ('😜', '😞'),
+    ('😠', '😥'),
+    ('😨', '😫'),
+    ('😭', '😭'),
+    ('😰', '😳'),
+    ('😵', '🙀'),
+    ('🙅', '🙏'),
+    ('🚀', '🛅'),
+    ('🜀', '🝳'),
+    ('𫝀', '𫠝'),
+];
+
+pub const V6_1: &'static [(char, char)] = &[
+    ('֏', '֏'),
+    ('\u{604}', '\u{604}'),
+    ('ࢠ', 'ࢠ'),
+    ('ࢢ', 'ࢬ'),
+    ('\u{8e4}', '\u{8fe}'),
+    ('૰', '૰'),
+    ('ໞ', 'ໟ'),
+    ('Ⴧ', 'Ⴧ'),
+    ('Ⴭ', 'Ⴭ'),
+    ('ჽ', 'ჿ'),
+    ('\u{1bab}', '\u{1bad}'),
+    ('ᮺ', 'ᮿ'),
+    ('᳀', '᳇'),
+    ('ᳳ', 'ᳶ'),
+    ('⟋', '⟋'),
+    ('⟍', '⟍'),
+    ('Ⳳ', 'ⳳ'),
+    ('ⴧ', 'ⴧ'),
+    ('ⴭ', 'ⴭ'),
+    ('ⵦ', 'ⵧ'),
+    ('⸲', '⸻'),
+    ('鿌', '鿌'),
+    ('\u{a674}', '\u{a67b}'),
+    ('\u{a69f}', '\u{a69f}'),
+    ('Ꞓ', 'ꞓ'),
+    ('Ɦ', 'Ɦ'),
+    ('ꟸ', 'ꟹ'),
+    ('ꫠ', '\u{aaf6}'),
+    ('郞', '隷'),
+    ('𐦀', '𐦷'),
+    ('𐦾', '𐦿'),
+    ('𑃐', '𑃨'),
+    ('𑃰', '𑃹'),
+    ('\u{11100}', '\u{11134}'),
+    ('𑄶', '𑅃'),
+    ('\u{11180}', '𑇈'),
+    ('𑇐', '𑇙'),
+    ('𑚀', '\u{116b7}'),
+    ('𑛀', '𑛉'),
+    ('𖼀', '𖽄'),
+    ('𖽐', '𖽾'),
+    ('\u{16f8f}', '𖾟'),
+    ('𞸀', '𞸃'),
+    ('𞸅', '𞸟'),
+    ('𞸡', '𞸢'),
+    ('𞸤', '𞸤'),
+    ('𞸧', '𞸧'),
+    ('𞸩', '𞸲'),
+    ('𞸴', '𞸷'),
+    ('𞸹', '𞸹'),
+    ('𞸻', '𞸻'),
+    ('𞹂', '𞹂'),
+    ('𞹇', '𞹇'),
+    ('𞹉', '𞹉'),
+    ('𞹋', '𞹋'),
+    ('𞹍', '𞹏'),
+    ('𞹑', '𞹒'),
+    ('𞹔', '𞹔'),
+    ('𞹗', '𞹗'),
+    ('𞹙', '𞹙'),
+    ('𞹛', '𞹛'),
+    ('𞹝', '𞹝'),
+    ('𞹟', '𞹟'),
+    ('𞹡', '𞹢'),
+    ('𞹤', '𞹤'),
+    ('𞹧', '𞹪'),
+    ('𞹬', '𞹲'),
+    ('𞹴', '𞹷'),
+    ('𞹹', '𞹼'),
+    ('𞹾', '𞹾'),
+    ('𞺀', '𞺉'),
+    ('𞺋', '𞺛'),
+    ('𞺡', '𞺣'),
+    ('𞺥', '𞺩'),
+    ('𞺫', '𞺻'),
+    ('𞻰', '𞻱'),
+    ('🅪', '🅫'),
+    ('🕀', '🕃'),
+    ('😀', '😀'),
+    ('😑', '😑'),
+    ('😕', '😕'),
+    ('😗', '😗'),
+    ('😙', '😙'),
+    ('😛', '😛'),
+    ('😟', '😟'),
+    ('😦', '😧'),
+    ('😬', '😬'),
+    ('😮', '😯'),
+    ('😴', '😴'),
+];
+
+pub const V6_2: &'static [(char, char)] = &[('₺', '₺')];
+
+pub const V6_3: &'static [(char, char)] =
+    &[('\u{61c}', '\u{61c}'), ('\u{2066}', '\u{2069}')];
+
+pub const V7_0: &'static [(char, char)] = &[
+    ('Ϳ', 'Ϳ'),
+    ('Ԩ', 'ԯ'),
+    ('֍', '֎'),
+    ('\u{605}', '\u{605}'),
+    ('ࢡ', 'ࢡ'),
+    ('ࢭ', 'ࢲ'),
+    ('\u{8ff}', '\u{8ff}'),
+    ('ॸ', 'ॸ'),
+    ('ঀ', 'ঀ'),
+    ('\u{c00}', '\u{c00}'),
+    ('ఴ', 'ఴ'),
+    ('\u{c81}', '\u{c81}'),
+    ('\u{d01}', '\u{d01}'),
+    ('෦', '෯'),
+    ('ᛱ', 'ᛸ'),
+    ('ᤝ', 'ᤞ'),
+    ('\u{1ab0}', '\u{1abe}'),
+    ('\u{1cf8}', '\u{1cf9}'),
+    ('\u{1de7}', '\u{1df5}'),
+    ('₻', '₽'),
+    ('⏴', '⏺'),
+    ('✀', '✀'),
+    ('⭍', '⭏'),
+    ('⭚', '⭳'),
+    ('⭶', '⮕'),
+    ('⮘', '⮹'),
+    ('⮽', '⯈'),
+    ('⯊', '⯑'),
+    ('⸼', '⹂'),
+    ('Ꚙ', 'ꚝ'),
+    ('ꞔ', 'ꞟ'),
+    ('Ɜ', 'Ɬ'),
+    ('Ʞ', 'Ʇ'),
+    ('ꟷ', 'ꟷ'),
+    ('ꧠ', 'ꧾ'),
+    ('\u{aa7c}', 'ꩿ'),
+    ('ꬰ', 'ꭟ'),
+    ('ꭤ', 'ꭥ'),
+    ('\u{fe27}', '\u{fe2d}'),
+    ('𐆋', '𐆌'),
+    ('𐆠', '𐆠'),
+    ('\u{102e0}', '𐋻'),
+    ('𐌟', '𐌟'),
+    ('𐍐', '\u{1037a}'),
+    ('𐔀', '𐔧'),
+    ('𐔰', '𐕣'),
+    ('𐕯', '𐕯'),
+    ('𐘀', '𐜶'),
+    ('𐝀', '𐝕'),
+    ('𐝠', '𐝧'),
+    ('𐡠', '𐢞'),
+    ('𐢧', '𐢯'),
+    ('𐪀', '𐪟'),
+    ('𐫀', '\u{10ae6}'),
+    ('𐫫', '𐫶'),
+    ('𐮀', '𐮑'),
+    ('𐮙', '𐮜'),
+    ('𐮩', '𐮯'),
+    ('\u{1107f}', '\u{1107f}'),
+    ('𑅐', '𑅶'),
+    ('𑇍', '𑇍'),
+    ('𑇚', '𑇚'),
+    ('𑇡', '𑇴'),
+    ('𑈀', '𑈑'),
+    ('𑈓', '𑈽'),
+    ('𑊰', '\u{112ea}'),
+    ('𑋰', '𑋹'),
+    ('\u{11301}', '𑌃'),
+    ('𑌅', '𑌌'),
+    ('𑌏', '𑌐'),
+    ('𑌓', '𑌨'),
+    ('𑌪', '𑌰'),
+    ('𑌲', '𑌳'),
+    ('𑌵', '𑌹'),
+    ('\u{1133c}', '𑍄'),
+    ('𑍇', '𑍈'),
+    ('𑍋', '𑍍'),
+    ('\u{11357}', '\u{11357}'),
+    ('𑍝', '𑍣'),
+    ('\u{11366}', '\u{1136c}'),
+    ('\u{11370}', '\u{11374}'),
+    ('𑒀', '𑓇'),
+    ('𑓐', '𑓙'),
+    ('𑖀', '\u{115b5}'),
+    ('𑖸', '𑗉'),
+    ('𑘀', '𑙄'),
+    ('𑙐', '𑙙'),
+    ('𑢠', '𑣲'),
+    ('𑣿', '𑣿'),
+    ('𑫀', '𑫸'),
+    ('𒍯', '𒎘'),
+    ('𒑣', '𒑮'),
+    ('𒑴', '𒑴'),
+    ('𖩀', '𖩞'),
+    ('𖩠', '𖩩'),
+    ('𖩮', '𖩯'),
+    ('𖫐', '𖫭'),
+    ('\u{16af0}', '𖫵'),
+    ('𖬀', '𖭅'),
+    ('𖭐', '𖭙'),
+    ('𖭛', '𖭡'),
+    ('𖭣', '𖭷'),
+    ('𖭽', '𖮏'),
+    ('𛰀', '𛱪'),
+    ('𛱰', '𛱼'),
+    ('𛲀', '𛲈'),
+    ('𛲐', '𛲙'),
+    ('𛲜', '\u{1bca3}'),
+    ('𞠀', '𞣄'),
+    ('𞣇', '\u{1e8d6}'),
+    ('🂿', '🂿'),
+    ('🃠', '🃵'),
+    ('🄋', '🄌'),
+    ('🌡', '🌬'),
+    ('🌶', '🌶'),
+    ('🍽', '🍽'),
+    ('🎔', '🎟'),
+    ('🏅', '🏅'),
+    ('🏋', '🏎'),
+    ('🏔', '🏟'),
+    ('🏱', '🏷'),
+    ('🐿', '🐿'),
+    ('👁', '👁'),
+    ('📸', '📸'),
+    ('📽', '📾'),
+    ('🔾', '🔿'),
+    ('🕄', '🕊'),
+    ('🕨', '🕹'),
+    ('🕻', '🖣'),
+    ('🖥', '🗺'),
+    ('🙁', '🙂'),
+    ('🙐', '🙿'),
+    ('🛆', '🛏'),
+    ('🛠', '🛬'),
+    ('🛰', '🛳'),
+    ('🞀', '🟔'),
+    ('🠀', '🠋'),
+    ('🠐', '🡇'),
+    ('🡐', '🡙'),
+    ('🡠', '🢇'),
+    ('🢐', '🢭'),
+];
+
+pub const V8_0: &'static [(char, char)] = &[
+    ('ࢳ', 'ࢴ'),
+    ('\u{8e3}', '\u{8e3}'),
+    ('ૹ', 'ૹ'),
+    ('ౚ', 'ౚ'),
+    ('ൟ', 'ൟ'),
+    ('Ᏽ', 'Ᏽ'),
+    ('ᏸ', 'ᏽ'),
+    ('₾', '₾'),
+    ('↊', '↋'),
+    ('⯬', '⯯'),
+    ('鿍', '鿕'),
+    ('\u{a69e}', '\u{a69e}'),
+    ('ꞏ', 'ꞏ'),
+    ('Ʝ', 'ꞷ'),
+    ('꣼', 'ꣽ'),
+    ('ꭠ', 'ꭣ'),
+    ('ꭰ', 'ꮿ'),
+    ('\u{fe2e}', '\u{fe2f}'),
+    ('𐣠', '𐣲'),
+    ('𐣴', '𐣵'),
+    ('𐣻', '𐣿'),
+    ('𐦼', '𐦽'),
+    ('𐧀', '𐧏'),
+    ('𐧒', '𐧿'),
+    ('𐲀', '𐲲'),
+    ('𐳀', '𐳲'),
+    ('𐳺', '𐳿'),
+    ('\u{111c9}', '\u{111cc}'),
+    ('𑇛', '𑇟'),
+    ('𑊀', '𑊆'),
+    ('𑊈', '𑊈'),
+    ('𑊊', '𑊍'),
+    ('𑊏', '𑊝'),
+    ('𑊟', '𑊩'),
+    ('\u{11300}', '\u{11300}'),
+    ('𑍐', '𑍐'),
+    ('𑗊', '\u{115dd}'),
+    ('𑜀', '𑜙'),
+    ('\u{1171d}', '\u{1172b}'),
+    ('𑜰', '𑜿'),
+    ('𒎙', '𒎙'),
+    ('𒒀', '𒕃'),
+    ('𔐀', '𔙆'),
+    ('𝇞', '𝇨'),
+    ('𝠀', '𝪋'),
+    ('\u{1da9b}', '\u{1da9f}'),
+    ('\u{1daa1}', '\u{1daaf}'),
+    ('🌭', '🌯'),
+    ('🍾', '🍿'),
+    ('🏏', '🏓'),
+    ('🏸', '🏿'),
+    ('📿', '📿'),
+    ('🕋', '🕏'),
+    ('🙃', '🙄'),
+    ('🛐', '🛐'),
+    ('🤐', '🤘'),
+    ('🦀', '🦄'),
+    ('🧀', '🧀'),
+    ('𫠠', '𬺡'),
+];
+
+pub const V9_0: &'static [(char, char)] = &[
+    ('ࢶ', 'ࢽ'),
+    ('\u{8d4}', '\u{8e2}'),
+    ('ಀ', 'ಀ'),
+    ('൏', '൏'),
+    ('ൔ', 'ൖ'),
+    ('൘', '൞'),
+    ('൶', '൸'),
+    ('ᲀ', 'ᲈ'),
+    ('\u{1dfb}', '\u{1dfb}'),
+    ('⏻', '⏾'),
+    ('⹃', '⹄'),
+    ('Ɪ', 'Ɪ'),
+    ('\u{a8c5}', '\u{a8c5}'),
+    ('𐆍', '𐆎'),
+    ('𐒰', '𐓓'),
+    ('𐓘', '𐓻'),
+    ('\u{1123e}', '\u{1123e}'),
+    ('𑐀', '𑑙'),
+    ('𑑛', '𑑛'),
+    ('𑑝', '𑑝'),
+    ('𑙠', '𑙬'),
+    ('𑰀', '𑰈'),
+    ('𑰊', '\u{11c36}'),
+    ('\u{11c38}', '𑱅'),
+    ('𑱐', '𑱬'),
+    ('𑱰', '𑲏'),
+    ('\u{11c92}', '\u{11ca7}'),
+    ('𑲩', '\u{11cb6}'),
+    ('𖿠', '𖿠'),
+    ('𗀀', '𘟬'),
+    ('𘠀', '𘫲'),
+    ('\u{1e000}', '\u{1e006}'),
+    ('\u{1e008}', '\u{1e018}'),
+    ('\u{1e01b}', '\u{1e021}'),
+    ('\u{1e023}', '\u{1e024}'),
+    ('\u{1e026}', '\u{1e02a}'),
+    ('𞤀', '\u{1e94a}'),
+    ('𞥐', '𞥙'),
+    ('𞥞', '𞥟'),
+    ('🆛', '🆬'),
+    ('🈻', '🈻'),
+    ('🕺', '🕺'),
+    ('🖤', '🖤'),
+    ('🛑', '🛒'),
+    ('🛴', '🛶'),
+    ('🤙', '🤞'),
+    ('🤠', '🤧'),
+    ('🤰', '🤰'),
+    ('🤳', '🤾'),
+    ('🥀', '🥋'),
+    ('🥐', '🥞'),
+    ('🦅', '🦑'),
+];

diff --git a/src/unicode_tables/case_folding_simple.rs b/src/unicode_tables/case_folding_simple.rs
new file mode 100644
index 0000000..6b4cad3
--- /dev/null
+++ b/src/unicode_tables/case_folding_simple.rs

@@ -0,0 +1,2806 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+//  ucd-generate case-folding-simple ucd-13.0.0 --chars --all-pairs
+//
+// ucd-generate 0.2.7 is available on crates.io.
+
+pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[
+    ('A', &['a']),
+    ('B', &['b']),
+    ('C', &['c']),
+    ('D', &['d']),
+    ('E', &['e']),
+    ('F', &['f']),
+    ('G', &['g']),
+    ('H', &['h']),
+    ('I', &['i']),
+    ('J', &['j']),
+    ('K', &['k', 'K']),
+    ('L', &['l']),
+    ('M', &['m']),
+    ('N', &['n']),
+    ('O', &['o']),
+    ('P', &['p']),
+    ('Q', &['q']),
+    ('R', &['r']),
+    ('S', &['s', 'ſ']),
+    ('T', &['t']),
+    ('U', &['u']),
+    ('V', &['v']),
+    ('W', &['w']),
+    ('X', &['x']),
+    ('Y', &['y']),
+    ('Z', &['z']),
+    ('a', &['A']),
+    ('b', &['B']),
+    ('c', &['C']),
+    ('d', &['D']),
+    ('e', &['E']),
+    ('f', &['F']),
+    ('g', &['G']),
+    ('h', &['H']),
+    ('i', &['I']),
+    ('j', &['J']),
+    ('k', &['K', 'K']),
+    ('l', &['L']),
+    ('m', &['M']),
+    ('n', &['N']),
+    ('o', &['O']),
+    ('p', &['P']),
+    ('q', &['Q']),
+    ('r', &['R']),
+    ('s', &['S', 'ſ']),
+    ('t', &['T']),
+    ('u', &['U']),
+    ('v', &['V']),
+    ('w', &['W']),
+    ('x', &['X']),
+    ('y', &['Y']),
+    ('z', &['Z']),
+    ('µ', &['Μ', 'μ']),
+    ('À', &['à']),
+    ('Á', &['á']),
+    ('Â', &['â']),
+    ('Ã', &['ã']),
+    ('Ä', &['ä']),
+    ('Å', &['å', 'Å']),
+    ('Æ', &['æ']),
+    ('Ç', &['ç']),
+    ('È', &['è']),
+    ('É', &['é']),
+    ('Ê', &['ê']),
+    ('Ë', &['ë']),
+    ('Ì', &['ì']),
+    ('Í', &['í']),
+    ('Î', &['î']),
+    ('Ï', &['ï']),
+    ('Ð', &['ð']),
+    ('Ñ', &['ñ']),
+    ('Ò', &['ò']),
+    ('Ó', &['ó']),
+    ('Ô', &['ô']),
+    ('Õ', &['õ']),
+    ('Ö', &['ö']),
+    ('Ø', &['ø']),
+    ('Ù', &['ù']),
+    ('Ú', &['ú']),
+    ('Û', &['û']),
+    ('Ü', &['ü']),
+    ('Ý', &['ý']),
+    ('Þ', &['þ']),
+    ('ß', &['ẞ']),
+    ('à', &['À']),
+    ('á', &['Á']),
+    ('â', &['Â']),
+    ('ã', &['Ã']),
+    ('ä', &['Ä']),
+    ('å', &['Å', 'Å']),
+    ('æ', &['Æ']),
+    ('ç', &['Ç']),
+    ('è', &['È']),
+    ('é', &['É']),
+    ('ê', &['Ê']),
+    ('ë', &['Ë']),
+    ('ì', &['Ì']),
+    ('í', &['Í']),
+    ('î', &['Î']),
+    ('ï', &['Ï']),
+    ('ð', &['Ð']),
+    ('ñ', &['Ñ']),
+    ('ò', &['Ò']),
+    ('ó', &['Ó']),
+    ('ô', &['Ô']),
+    ('õ', &['Õ']),
+    ('ö', &['Ö']),
+    ('ø', &['Ø']),
+    ('ù', &['Ù']),
+    ('ú', &['Ú']),
+    ('û', &['Û']),
+    ('ü', &['Ü']),
+    ('ý', &['Ý']),
+    ('þ', &['Þ']),
+    ('ÿ', &['Ÿ']),
+    ('Ā', &['ā']),
+    ('ā', &['Ā']),
+    ('Ă', &['ă']),
+    ('ă', &['Ă']),
+    ('Ą', &['ą']),
+    ('ą', &['Ą']),
+    ('Ć', &['ć']),
+    ('ć', &['Ć']),
+    ('Ĉ', &['ĉ']),
+    ('ĉ', &['Ĉ']),
+    ('Ċ', &['ċ']),
+    ('ċ', &['Ċ']),
+    ('Č', &['č']),
+    ('č', &['Č']),
+    ('Ď', &['ď']),
+    ('ď', &['Ď']),
+    ('Đ', &['đ']),
+    ('đ', &['Đ']),
+    ('Ē', &['ē']),
+    ('ē', &['Ē']),
+    ('Ĕ', &['ĕ']),
+    ('ĕ', &['Ĕ']),
+    ('Ė', &['ė']),
+    ('ė', &['Ė']),
+    ('Ę', &['ę']),
+    ('ę', &['Ę']),
+    ('Ě', &['ě']),
+    ('ě', &['Ě']),
+    ('Ĝ', &['ĝ']),
+    ('ĝ', &['Ĝ']),
+    ('Ğ', &['ğ']),
+    ('ğ', &['Ğ']),
+    ('Ġ', &['ġ']),
+    ('ġ', &['Ġ']),
+    ('Ģ', &['ģ']),
+    ('ģ', &['Ģ']),
+    ('Ĥ', &['ĥ']),
+    ('ĥ', &['Ĥ']),
+    ('Ħ', &['ħ']),
+    ('ħ', &['Ħ']),
+    ('Ĩ', &['ĩ']),
+    ('ĩ', &['Ĩ']),
+    ('Ī', &['ī']),
+    ('ī', &['Ī']),
+    ('Ĭ', &['ĭ']),
+    ('ĭ', &['Ĭ']),
+    ('Į', &['į']),
+    ('į', &['Į']),
+    ('Ĳ', &['ĳ']),
+    ('ĳ', &['Ĳ']),
+    ('Ĵ', &['ĵ']),
+    ('ĵ', &['Ĵ']),
+    ('Ķ', &['ķ']),
+    ('ķ', &['Ķ']),
+    ('Ĺ', &['ĺ']),
+    ('ĺ', &['Ĺ']),
+    ('Ļ', &['ļ']),
+    ('ļ', &['Ļ']),
+    ('Ľ', &['ľ']),
+    ('ľ', &['Ľ']),
+    ('Ŀ', &['ŀ']),
+    ('ŀ', &['Ŀ']),
+    ('Ł', &['ł']),
+    ('ł', &['Ł']),
+    ('Ń', &['ń']),
+    ('ń', &['Ń']),
+    ('Ņ', &['ņ']),
+    ('ņ', &['Ņ']),
+    ('Ň', &['ň']),
+    ('ň', &['Ň']),
+    ('Ŋ', &['ŋ']),
+    ('ŋ', &['Ŋ']),
+    ('Ō', &['ō']),
+    ('ō', &['Ō']),
+    ('Ŏ', &['ŏ']),
+    ('ŏ', &['Ŏ']),
+    ('Ő', &['ő']),
+    ('ő', &['Ő']),
+    ('Œ', &['œ']),
+    ('œ', &['Œ']),
+    ('Ŕ', &['ŕ']),
+    ('ŕ', &['Ŕ']),
+    ('Ŗ', &['ŗ']),
+    ('ŗ', &['Ŗ']),
+    ('Ř', &['ř']),
+    ('ř', &['Ř']),
+    ('Ś', &['ś']),
+    ('ś', &['Ś']),
+    ('Ŝ', &['ŝ']),
+    ('ŝ', &['Ŝ']),
+    ('Ş', &['ş']),
+    ('ş', &['Ş']),
+    ('Š', &['š']),
+    ('š', &['Š']),
+    ('Ţ', &['ţ']),
+    ('ţ', &['Ţ']),
+    ('Ť', &['ť']),
+    ('ť', &['Ť']),
+    ('Ŧ', &['ŧ']),
+    ('ŧ', &['Ŧ']),
+    ('Ũ', &['ũ']),
+    ('ũ', &['Ũ']),
+    ('Ū', &['ū']),
+    ('ū', &['Ū']),
+    ('Ŭ', &['ŭ']),
+    ('ŭ', &['Ŭ']),
+    ('Ů', &['ů']),
+    ('ů', &['Ů']),
+    ('Ű', &['ű']),
+    ('ű', &['Ű']),
+    ('Ų', &['ų']),
+    ('ų', &['Ų']),
+    ('Ŵ', &['ŵ']),
+    ('ŵ', &['Ŵ']),
+    ('Ŷ', &['ŷ']),
+    ('ŷ', &['Ŷ']),
+    ('Ÿ', &['ÿ']),
+    ('Ź', &['ź']),
+    ('ź', &['Ź']),
+    ('Ż', &['ż']),
+    ('ż', &['Ż']),
+    ('Ž', &['ž']),
+    ('ž', &['Ž']),
+    ('ſ', &['S', 's']),
+    ('ƀ', &['Ƀ']),
+    ('Ɓ', &['ɓ']),
+    ('Ƃ', &['ƃ']),
+    ('ƃ', &['Ƃ']),
+    ('Ƅ', &['ƅ']),
+    ('ƅ', &['Ƅ']),
+    ('Ɔ', &['ɔ']),
+    ('Ƈ', &['ƈ']),
+    ('ƈ', &['Ƈ']),
+    ('Ɖ', &['ɖ']),
+    ('Ɗ', &['ɗ']),
+    ('Ƌ', &['ƌ']),
+    ('ƌ', &['Ƌ']),
+    ('Ǝ', &['ǝ']),
+    ('Ə', &['ə']),
+    ('Ɛ', &['ɛ']),
+    ('Ƒ', &['ƒ']),
+    ('ƒ', &['Ƒ']),
+    ('Ɠ', &['ɠ']),
+    ('Ɣ', &['ɣ']),
+    ('ƕ', &['Ƕ']),
+    ('Ɩ', &['ɩ']),
+    ('Ɨ', &['ɨ']),
+    ('Ƙ', &['ƙ']),
+    ('ƙ', &['Ƙ']),
+    ('ƚ', &['Ƚ']),
+    ('Ɯ', &['ɯ']),
+    ('Ɲ', &['ɲ']),
+    ('ƞ', &['Ƞ']),
+    ('Ɵ', &['ɵ']),
+    ('Ơ', &['ơ']),
+    ('ơ', &['Ơ']),
+    ('Ƣ', &['ƣ']),
+    ('ƣ', &['Ƣ']),
+    ('Ƥ', &['ƥ']),
+    ('ƥ', &['Ƥ']),
+    ('Ʀ', &['ʀ']),
+    ('Ƨ', &['ƨ']),
+    ('ƨ', &['Ƨ']),
+    ('Ʃ', &['ʃ']),
+    ('Ƭ', &['ƭ']),
+    ('ƭ', &['Ƭ']),
+    ('Ʈ', &['ʈ']),
+    ('Ư', &['ư']),
+    ('ư', &['Ư']),
+    ('Ʊ', &['ʊ']),
+    ('Ʋ', &['ʋ']),
+    ('Ƴ', &['ƴ']),
+    ('ƴ', &['Ƴ']),
+    ('Ƶ', &['ƶ']),
+    ('ƶ', &['Ƶ']),
+    ('Ʒ', &['ʒ']),
+    ('Ƹ', &['ƹ']),
+    ('ƹ', &['Ƹ']),
+    ('Ƽ', &['ƽ']),
+    ('ƽ', &['Ƽ']),
+    ('ƿ', &['Ƿ']),
+    ('Ǆ', &['ǅ', 'ǆ']),
+    ('ǅ', &['Ǆ', 'ǆ']),
+    ('ǆ', &['Ǆ', 'ǅ']),
+    ('Ǉ', &['ǈ', 'ǉ']),
+    ('ǈ', &['Ǉ', 'ǉ']),
+    ('ǉ', &['Ǉ', 'ǈ']),
+    ('Ǌ', &['ǋ', 'ǌ']),
+    ('ǋ', &['Ǌ', 'ǌ']),
+    ('ǌ', &['Ǌ', 'ǋ']),
+    ('Ǎ', &['ǎ']),
+    ('ǎ', &['Ǎ']),
+    ('Ǐ', &['ǐ']),
+    ('ǐ', &['Ǐ']),
+    ('Ǒ', &['ǒ']),
+    ('ǒ', &['Ǒ']),
+    ('Ǔ', &['ǔ']),
+    ('ǔ', &['Ǔ']),
+    ('Ǖ', &['ǖ']),
+    ('ǖ', &['Ǖ']),
+    ('Ǘ', &['ǘ']),
+    ('ǘ', &['Ǘ']),
+    ('Ǚ', &['ǚ']),
+    ('ǚ', &['Ǚ']),
+    ('Ǜ', &['ǜ']),
+    ('ǜ', &['Ǜ']),
+    ('ǝ', &['Ǝ']),
+    ('Ǟ', &['ǟ']),
+    ('ǟ', &['Ǟ']),
+    ('Ǡ', &['ǡ']),
+    ('ǡ', &['Ǡ']),
+    ('Ǣ', &['ǣ']),
+    ('ǣ', &['Ǣ']),
+    ('Ǥ', &['ǥ']),
+    ('ǥ', &['Ǥ']),
+    ('Ǧ', &['ǧ']),
+    ('ǧ', &['Ǧ']),
+    ('Ǩ', &['ǩ']),
+    ('ǩ', &['Ǩ']),
+    ('Ǫ', &['ǫ']),
+    ('ǫ', &['Ǫ']),
+    ('Ǭ', &['ǭ']),
+    ('ǭ', &['Ǭ']),
+    ('Ǯ', &['ǯ']),
+    ('ǯ', &['Ǯ']),
+    ('Ǳ', &['ǲ', 'ǳ']),
+    ('ǲ', &['Ǳ', 'ǳ']),
+    ('ǳ', &['Ǳ', 'ǲ']),
+    ('Ǵ', &['ǵ']),
+    ('ǵ', &['Ǵ']),
+    ('Ƕ', &['ƕ']),
+    ('Ƿ', &['ƿ']),
+    ('Ǹ', &['ǹ']),
+    ('ǹ', &['Ǹ']),
+    ('Ǻ', &['ǻ']),
+    ('ǻ', &['Ǻ']),
+    ('Ǽ', &['ǽ']),
+    ('ǽ', &['Ǽ']),
+    ('Ǿ', &['ǿ']),
+    ('ǿ', &['Ǿ']),
+    ('Ȁ', &['ȁ']),
+    ('ȁ', &['Ȁ']),
+    ('Ȃ', &['ȃ']),
+    ('ȃ', &['Ȃ']),
+    ('Ȅ', &['ȅ']),
+    ('ȅ', &['Ȅ']),
+    ('Ȇ', &['ȇ']),
+    ('ȇ', &['Ȇ']),
+    ('Ȉ', &['ȉ']),
+    ('ȉ', &['Ȉ']),
+    ('Ȋ', &['ȋ']),
+    ('ȋ', &['Ȋ']),
+    ('Ȍ', &['ȍ']),
+    ('ȍ', &['Ȍ']),
+    ('Ȏ', &['ȏ']),
+    ('ȏ', &['Ȏ']),
+    ('Ȑ', &['ȑ']),
+    ('ȑ', &['Ȑ']),
+    ('Ȓ', &['ȓ']),
+    ('ȓ', &['Ȓ']),
+    ('Ȕ', &['ȕ']),
+    ('ȕ', &['Ȕ']),
+    ('Ȗ', &['ȗ']),
+    ('ȗ', &['Ȗ']),
+    ('Ș', &['ș']),
+    ('ș', &['Ș']),
+    ('Ț', &['ț']),
+    ('ț', &['Ț']),
+    ('Ȝ', &['ȝ']),
+    ('ȝ', &['Ȝ']),
+    ('Ȟ', &['ȟ']),
+    ('ȟ', &['Ȟ']),
+    ('Ƞ', &['ƞ']),
+    ('Ȣ', &['ȣ']),
+    ('ȣ', &['Ȣ']),
+    ('Ȥ', &['ȥ']),
+    ('ȥ', &['Ȥ']),
+    ('Ȧ', &['ȧ']),
+    ('ȧ', &['Ȧ']),
+    ('Ȩ', &['ȩ']),
+    ('ȩ', &['Ȩ']),
+    ('Ȫ', &['ȫ']),
+    ('ȫ', &['Ȫ']),
+    ('Ȭ', &['ȭ']),
+    ('ȭ', &['Ȭ']),
+    ('Ȯ', &['ȯ']),
+    ('ȯ', &['Ȯ']),
+    ('Ȱ', &['ȱ']),
+    ('ȱ', &['Ȱ']),
+    ('Ȳ', &['ȳ']),
+    ('ȳ', &['Ȳ']),
+    ('Ⱥ', &['ⱥ']),
+    ('Ȼ', &['ȼ']),
+    ('ȼ', &['Ȼ']),
+    ('Ƚ', &['ƚ']),
+    ('Ⱦ', &['ⱦ']),
+    ('ȿ', &['Ȿ']),
+    ('ɀ', &['Ɀ']),
+    ('Ɂ', &['ɂ']),
+    ('ɂ', &['Ɂ']),
+    ('Ƀ', &['ƀ']),
+    ('Ʉ', &['ʉ']),
+    ('Ʌ', &['ʌ']),
+    ('Ɇ', &['ɇ']),
+    ('ɇ', &['Ɇ']),
+    ('Ɉ', &['ɉ']),
+    ('ɉ', &['Ɉ']),
+    ('Ɋ', &['ɋ']),
+    ('ɋ', &['Ɋ']),
+    ('Ɍ', &['ɍ']),
+    ('ɍ', &['Ɍ']),
+    ('Ɏ', &['ɏ']),
+    ('ɏ', &['Ɏ']),
+    ('ɐ', &['Ɐ']),
+    ('ɑ', &['Ɑ']),
+    ('ɒ', &['Ɒ']),
+    ('ɓ', &['Ɓ']),
+    ('ɔ', &['Ɔ']),
+    ('ɖ', &['Ɖ']),
+    ('ɗ', &['Ɗ']),
+    ('ə', &['Ə']),
+    ('ɛ', &['Ɛ']),
+    ('ɜ', &['Ɜ']),
+    ('ɠ', &['Ɠ']),
+    ('ɡ', &['Ɡ']),
+    ('ɣ', &['Ɣ']),
+    ('ɥ', &['Ɥ']),
+    ('ɦ', &['Ɦ']),
+    ('ɨ', &['Ɨ']),
+    ('ɩ', &['Ɩ']),
+    ('ɪ', &['Ɪ']),
+    ('ɫ', &['Ɫ']),
+    ('ɬ', &['Ɬ']),
+    ('ɯ', &['Ɯ']),
+    ('ɱ', &['Ɱ']),
+    ('ɲ', &['Ɲ']),
+    ('ɵ', &['Ɵ']),
+    ('ɽ', &['Ɽ']),
+    ('ʀ', &['Ʀ']),
+    ('ʂ', &['Ʂ']),
+    ('ʃ', &['Ʃ']),
+    ('ʇ', &['Ʇ']),
+    ('ʈ', &['Ʈ']),
+    ('ʉ', &['Ʉ']),
+    ('ʊ', &['Ʊ']),
+    ('ʋ', &['Ʋ']),
+    ('ʌ', &['Ʌ']),
+    ('ʒ', &['Ʒ']),
+    ('ʝ', &['Ʝ']),
+    ('ʞ', &['Ʞ']),
+    ('\u{345}', &['Ι', 'ι', 'ι']),
+    ('Ͱ', &['ͱ']),
+    ('ͱ', &['Ͱ']),
+    ('Ͳ', &['ͳ']),
+    ('ͳ', &['Ͳ']),
+    ('Ͷ', &['ͷ']),
+    ('ͷ', &['Ͷ']),
+    ('ͻ', &['Ͻ']),
+    ('ͼ', &['Ͼ']),
+    ('ͽ', &['Ͽ']),
+    ('Ϳ', &['ϳ']),
+    ('Ά', &['ά']),
+    ('Έ', &['έ']),
+    ('Ή', &['ή']),
+    ('Ί', &['ί']),
+    ('Ό', &['ό']),
+    ('Ύ', &['ύ']),
+    ('Ώ', &['ώ']),
+    ('Α', &['α']),
+    ('Β', &['β', 'ϐ']),
+    ('Γ', &['γ']),
+    ('Δ', &['δ']),
+    ('Ε', &['ε', 'ϵ']),
+    ('Ζ', &['ζ']),
+    ('Η', &['η']),
+    ('Θ', &['θ', 'ϑ', 'ϴ']),
+    ('Ι', &['\u{345}', 'ι', 'ι']),
+    ('Κ', &['κ', 'ϰ']),
+    ('Λ', &['λ']),
+    ('Μ', &['µ', 'μ']),
+    ('Ν', &['ν']),
+    ('Ξ', &['ξ']),
+    ('Ο', &['ο']),
+    ('Π', &['π', 'ϖ']),
+    ('Ρ', &['ρ', 'ϱ']),
+    ('Σ', &['ς', 'σ']),
+    ('Τ', &['τ']),
+    ('Υ', &['υ']),
+    ('Φ', &['φ', 'ϕ']),
+    ('Χ', &['χ']),
+    ('Ψ', &['ψ']),
+    ('Ω', &['ω', 'Ω']),
+    ('Ϊ', &['ϊ']),
+    ('Ϋ', &['ϋ']),
+    ('ά', &['Ά']),
+    ('έ', &['Έ']),
+    ('ή', &['Ή']),
+    ('ί', &['Ί']),
+    ('α', &['Α']),
+    ('β', &['Β', 'ϐ']),
+    ('γ', &['Γ']),
+    ('δ', &['Δ']),
+    ('ε', &['Ε', 'ϵ']),
+    ('ζ', &['Ζ']),
+    ('η', &['Η']),
+    ('θ', &['Θ', 'ϑ', 'ϴ']),
+    ('ι', &['\u{345}', 'Ι', 'ι']),
+    ('κ', &['Κ', 'ϰ']),
+    ('λ', &['Λ']),
+    ('μ', &['µ', 'Μ']),
+    ('ν', &['Ν']),
+    ('ξ', &['Ξ']),
+    ('ο', &['Ο']),
+    ('π', &['Π', 'ϖ']),
+    ('ρ', &['Ρ', 'ϱ']),
+    ('ς', &['Σ', 'σ']),
+    ('σ', &['Σ', 'ς']),
+    ('τ', &['Τ']),
+    ('υ', &['Υ']),
+    ('φ', &['Φ', 'ϕ']),
+    ('χ', &['Χ']),
+    ('ψ', &['Ψ']),
+    ('ω', &['Ω', 'Ω']),
+    ('ϊ', &['Ϊ']),
+    ('ϋ', &['Ϋ']),
+    ('ό', &['Ό']),
+    ('ύ', &['Ύ']),
+    ('ώ', &['Ώ']),
+    ('Ϗ', &['ϗ']),
+    ('ϐ', &['Β', 'β']),
+    ('ϑ', &['Θ', 'θ', 'ϴ']),
+    ('ϕ', &['Φ', 'φ']),
+    ('ϖ', &['Π', 'π']),
+    ('ϗ', &['Ϗ']),
+    ('Ϙ', &['ϙ']),
+    ('ϙ', &['Ϙ']),
+    ('Ϛ', &['ϛ']),
+    ('ϛ', &['Ϛ']),
+    ('Ϝ', &['ϝ']),
+    ('ϝ', &['Ϝ']),
+    ('Ϟ', &['ϟ']),
+    ('ϟ', &['Ϟ']),
+    ('Ϡ', &['ϡ']),
+    ('ϡ', &['Ϡ']),
+    ('Ϣ', &['ϣ']),
+    ('ϣ', &['Ϣ']),
+    ('Ϥ', &['ϥ']),
+    ('ϥ', &['Ϥ']),
+    ('Ϧ', &['ϧ']),
+    ('ϧ', &['Ϧ']),
+    ('Ϩ', &['ϩ']),
+    ('ϩ', &['Ϩ']),
+    ('Ϫ', &['ϫ']),
+    ('ϫ', &['Ϫ']),
+    ('Ϭ', &['ϭ']),
+    ('ϭ', &['Ϭ']),
+    ('Ϯ', &['ϯ']),
+    ('ϯ', &['Ϯ']),
+    ('ϰ', &['Κ', 'κ']),
+    ('ϱ', &['Ρ', 'ρ']),
+    ('ϲ', &['Ϲ']),
+    ('ϳ', &['Ϳ']),
+    ('ϴ', &['Θ', 'θ', 'ϑ']),
+    ('ϵ', &['Ε', 'ε']),
+    ('Ϸ', &['ϸ']),
+    ('ϸ', &['Ϸ']),
+    ('Ϲ', &['ϲ']),
+    ('Ϻ', &['ϻ']),
+    ('ϻ', &['Ϻ']),
+    ('Ͻ', &['ͻ']),
+    ('Ͼ', &['ͼ']),
+    ('Ͽ', &['ͽ']),
+    ('Ѐ', &['ѐ']),
+    ('Ё', &['ё']),
+    ('Ђ', &['ђ']),
+    ('Ѓ', &['ѓ']),
+    ('Є', &['є']),
+    ('Ѕ', &['ѕ']),
+    ('І', &['і']),
+    ('Ї', &['ї']),
+    ('Ј', &['ј']),
+    ('Љ', &['љ']),
+    ('Њ', &['њ']),
+    ('Ћ', &['ћ']),
+    ('Ќ', &['ќ']),
+    ('Ѝ', &['ѝ']),
+    ('Ў', &['ў']),
+    ('Џ', &['џ']),
+    ('А', &['а']),
+    ('Б', &['б']),
+    ('В', &['в', 'ᲀ']),
+    ('Г', &['г']),
+    ('Д', &['д', 'ᲁ']),
+    ('Е', &['е']),
+    ('Ж', &['ж']),
+    ('З', &['з']),
+    ('И', &['и']),
+    ('Й', &['й']),
+    ('К', &['к']),
+    ('Л', &['л']),
+    ('М', &['м']),
+    ('Н', &['н']),
+    ('О', &['о', 'ᲂ']),
+    ('П', &['п']),
+    ('Р', &['р']),
+    ('С', &['с', 'ᲃ']),
+    ('Т', &['т', 'ᲄ', 'ᲅ']),
+    ('У', &['у']),
+    ('Ф', &['ф']),
+    ('Х', &['х']),
+    ('Ц', &['ц']),
+    ('Ч', &['ч']),
+    ('Ш', &['ш']),
+    ('Щ', &['щ']),
+    ('Ъ', &['ъ', 'ᲆ']),
+    ('Ы', &['ы']),
+    ('Ь', &['ь']),
+    ('Э', &['э']),
+    ('Ю', &['ю']),
+    ('Я', &['я']),
+    ('а', &['А']),
+    ('б', &['Б']),
+    ('в', &['В', 'ᲀ']),
+    ('г', &['Г']),
+    ('д', &['Д', 'ᲁ']),
+    ('е', &['Е']),
+    ('ж', &['Ж']),
+    ('з', &['З']),
+    ('и', &['И']),
+    ('й', &['Й']),
+    ('к', &['К']),
+    ('л', &['Л']),
+    ('м', &['М']),
+    ('н', &['Н']),
+    ('о', &['О', 'ᲂ']),
+    ('п', &['П']),
+    ('р', &['Р']),
+    ('с', &['С', 'ᲃ']),
+    ('т', &['Т', 'ᲄ', 'ᲅ']),
+    ('у', &['У']),
+    ('ф', &['Ф']),
+    ('х', &['Х']),
+    ('ц', &['Ц']),
+    ('ч', &['Ч']),
+    ('ш', &['Ш']),
+    ('щ', &['Щ']),
+    ('ъ', &['Ъ', 'ᲆ']),
+    ('ы', &['Ы']),
+    ('ь', &['Ь']),
+    ('э', &['Э']),
+    ('ю', &['Ю']),
+    ('я', &['Я']),
+    ('ѐ', &['Ѐ']),
+    ('ё', &['Ё']),
+    ('ђ', &['Ђ']),
+    ('ѓ', &['Ѓ']),
+    ('є', &['Є']),
+    ('ѕ', &['Ѕ']),
+    ('і', &['І']),
+    ('ї', &['Ї']),
+    ('ј', &['Ј']),
+    ('љ', &['Љ']),
+    ('њ', &['Њ']),
+    ('ћ', &['Ћ']),
+    ('ќ', &['Ќ']),
+    ('ѝ', &['Ѝ']),
+    ('ў', &['Ў']),
+    ('џ', &['Џ']),
+    ('Ѡ', &['ѡ']),
+    ('ѡ', &['Ѡ']),
+    ('Ѣ', &['ѣ', 'ᲇ']),
+    ('ѣ', &['Ѣ', 'ᲇ']),
+    ('Ѥ', &['ѥ']),
+    ('ѥ', &['Ѥ']),
+    ('Ѧ', &['ѧ']),
+    ('ѧ', &['Ѧ']),
+    ('Ѩ', &['ѩ']),
+    ('ѩ', &['Ѩ']),
+    ('Ѫ', &['ѫ']),
+    ('ѫ', &['Ѫ']),
+    ('Ѭ', &['ѭ']),
+    ('ѭ', &['Ѭ']),
+    ('Ѯ', &['ѯ']),
+    ('ѯ', &['Ѯ']),
+    ('Ѱ', &['ѱ']),
+    ('ѱ', &['Ѱ']),
+    ('Ѳ', &['ѳ']),
+    ('ѳ', &['Ѳ']),
+    ('Ѵ', &['ѵ']),
+    ('ѵ', &['Ѵ']),
+    ('Ѷ', &['ѷ']),
+    ('ѷ', &['Ѷ']),
+    ('Ѹ', &['ѹ']),
+    ('ѹ', &['Ѹ']),
+    ('Ѻ', &['ѻ']),
+    ('ѻ', &['Ѻ']),
+    ('Ѽ', &['ѽ']),
+    ('ѽ', &['Ѽ']),
+    ('Ѿ', &['ѿ']),
+    ('ѿ', &['Ѿ']),
+    ('Ҁ', &['ҁ']),
+    ('ҁ', &['Ҁ']),
+    ('Ҋ', &['ҋ']),
+    ('ҋ', &['Ҋ']),
+    ('Ҍ', &['ҍ']),
+    ('ҍ', &['Ҍ']),
+    ('Ҏ', &['ҏ']),
+    ('ҏ', &['Ҏ']),
+    ('Ґ', &['ґ']),
+    ('ґ', &['Ґ']),
+    ('Ғ', &['ғ']),
+    ('ғ', &['Ғ']),
+    ('Ҕ', &['ҕ']),
+    ('ҕ', &['Ҕ']),
+    ('Җ', &['җ']),
+    ('җ', &['Җ']),
+    ('Ҙ', &['ҙ']),
+    ('ҙ', &['Ҙ']),
+    ('Қ', &['қ']),
+    ('қ', &['Қ']),
+    ('Ҝ', &['ҝ']),
+    ('ҝ', &['Ҝ']),
+    ('Ҟ', &['ҟ']),
+    ('ҟ', &['Ҟ']),
+    ('Ҡ', &['ҡ']),
+    ('ҡ', &['Ҡ']),
+    ('Ң', &['ң']),
+    ('ң', &['Ң']),
+    ('Ҥ', &['ҥ']),
+    ('ҥ', &['Ҥ']),
+    ('Ҧ', &['ҧ']),
+    ('ҧ', &['Ҧ']),
+    ('Ҩ', &['ҩ']),
+    ('ҩ', &['Ҩ']),
+    ('Ҫ', &['ҫ']),
+    ('ҫ', &['Ҫ']),
+    ('Ҭ', &['ҭ']),
+    ('ҭ', &['Ҭ']),
+    ('Ү', &['ү']),
+    ('ү', &['Ү']),
+    ('Ұ', &['ұ']),
+    ('ұ', &['Ұ']),
+    ('Ҳ', &['ҳ']),
+    ('ҳ', &['Ҳ']),
+    ('Ҵ', &['ҵ']),
+    ('ҵ', &['Ҵ']),
+    ('Ҷ', &['ҷ']),
+    ('ҷ', &['Ҷ']),
+    ('Ҹ', &['ҹ']),
+    ('ҹ', &['Ҹ']),
+    ('Һ', &['һ']),
+    ('һ', &['Һ']),
+    ('Ҽ', &['ҽ']),
+    ('ҽ', &['Ҽ']),
+    ('Ҿ', &['ҿ']),
+    ('ҿ', &['Ҿ']),
+    ('Ӏ', &['ӏ']),
+    ('Ӂ', &['ӂ']),
+    ('ӂ', &['Ӂ']),
+    ('Ӄ', &['ӄ']),
+    ('ӄ', &['Ӄ']),
+    ('Ӆ', &['ӆ']),
+    ('ӆ', &['Ӆ']),
+    ('Ӈ', &['ӈ']),
+    ('ӈ', &['Ӈ']),
+    ('Ӊ', &['ӊ']),
+    ('ӊ', &['Ӊ']),
+    ('Ӌ', &['ӌ']),
+    ('ӌ', &['Ӌ']),
+    ('Ӎ', &['ӎ']),
+    ('ӎ', &['Ӎ']),
+    ('ӏ', &['Ӏ']),
+    ('Ӑ', &['ӑ']),
+    ('ӑ', &['Ӑ']),
+    ('Ӓ', &['ӓ']),
+    ('ӓ', &['Ӓ']),
+    ('Ӕ', &['ӕ']),
+    ('ӕ', &['Ӕ']),
+    ('Ӗ', &['ӗ']),
+    ('ӗ', &['Ӗ']),
+    ('Ә', &['ә']),
+    ('ә', &['Ә']),
+    ('Ӛ', &['ӛ']),
+    ('ӛ', &['Ӛ']),
+    ('Ӝ', &['ӝ']),
+    ('ӝ', &['Ӝ']),
+    ('Ӟ', &['ӟ']),
+    ('ӟ', &['Ӟ']),
+    ('Ӡ', &['ӡ']),
+    ('ӡ', &['Ӡ']),
+    ('Ӣ', &['ӣ']),
+    ('ӣ', &['Ӣ']),
+    ('Ӥ', &['ӥ']),
+    ('ӥ', &['Ӥ']),
+    ('Ӧ', &['ӧ']),
+    ('ӧ', &['Ӧ']),
+    ('Ө', &['ө']),
+    ('ө', &['Ө']),
+    ('Ӫ', &['ӫ']),
+    ('ӫ', &['Ӫ']),
+    ('Ӭ', &['ӭ']),
+    ('ӭ', &['Ӭ']),
+    ('Ӯ', &['ӯ']),
+    ('ӯ', &['Ӯ']),
+    ('Ӱ', &['ӱ']),
+    ('ӱ', &['Ӱ']),
+    ('Ӳ', &['ӳ']),
+    ('ӳ', &['Ӳ']),
+    ('Ӵ', &['ӵ']),
+    ('ӵ', &['Ӵ']),
+    ('Ӷ', &['ӷ']),
+    ('ӷ', &['Ӷ']),
+    ('Ӹ', &['ӹ']),
+    ('ӹ', &['Ӹ']),
+    ('Ӻ', &['ӻ']),
+    ('ӻ', &['Ӻ']),
+    ('Ӽ', &['ӽ']),
+    ('ӽ', &['Ӽ']),
+    ('Ӿ', &['ӿ']),
+    ('ӿ', &['Ӿ']),
+    ('Ԁ', &['ԁ']),
+    ('ԁ', &['Ԁ']),
+    ('Ԃ', &['ԃ']),
+    ('ԃ', &['Ԃ']),
+    ('Ԅ', &['ԅ']),
+    ('ԅ', &['Ԅ']),
+    ('Ԇ', &['ԇ']),
+    ('ԇ', &['Ԇ']),
+    ('Ԉ', &['ԉ']),
+    ('ԉ', &['Ԉ']),
+    ('Ԋ', &['ԋ']),
+    ('ԋ', &['Ԋ']),
+    ('Ԍ', &['ԍ']),
+    ('ԍ', &['Ԍ']),
+    ('Ԏ', &['ԏ']),
+    ('ԏ', &['Ԏ']),
+    ('Ԑ', &['ԑ']),
+    ('ԑ', &['Ԑ']),
+    ('Ԓ', &['ԓ']),
+    ('ԓ', &['Ԓ']),
+    ('Ԕ', &['ԕ']),
+    ('ԕ', &['Ԕ']),
+    ('Ԗ', &['ԗ']),
+    ('ԗ', &['Ԗ']),
+    ('Ԙ', &['ԙ']),
+    ('ԙ', &['Ԙ']),
+    ('Ԛ', &['ԛ']),
+    ('ԛ', &['Ԛ']),
+    ('Ԝ', &['ԝ']),
+    ('ԝ', &['Ԝ']),
+    ('Ԟ', &['ԟ']),
+    ('ԟ', &['Ԟ']),
+    ('Ԡ', &['ԡ']),
+    ('ԡ', &['Ԡ']),
+    ('Ԣ', &['ԣ']),
+    ('ԣ', &['Ԣ']),
+    ('Ԥ', &['ԥ']),
+    ('ԥ', &['Ԥ']),
+    ('Ԧ', &['ԧ']),
+    ('ԧ', &['Ԧ']),
+    ('Ԩ', &['ԩ']),
+    ('ԩ', &['Ԩ']),
+    ('Ԫ', &['ԫ']),
+    ('ԫ', &['Ԫ']),
+    ('Ԭ', &['ԭ']),
+    ('ԭ', &['Ԭ']),
+    ('Ԯ', &['ԯ']),
+    ('ԯ', &['Ԯ']),
+    ('Ա', &['ա']),
+    ('Բ', &['բ']),
+    ('Գ', &['գ']),
+    ('Դ', &['դ']),
+    ('Ե', &['ե']),
+    ('Զ', &['զ']),
+    ('Է', &['է']),
+    ('Ը', &['ը']),
+    ('Թ', &['թ']),
+    ('Ժ', &['ժ']),
+    ('Ի', &['ի']),
+    ('Լ', &['լ']),
+    ('Խ', &['խ']),
+    ('Ծ', &['ծ']),
+    ('Կ', &['կ']),
+    ('Հ', &['հ']),
+    ('Ձ', &['ձ']),
+    ('Ղ', &['ղ']),
+    ('Ճ', &['ճ']),
+    ('Մ', &['մ']),
+    ('Յ', &['յ']),
+    ('Ն', &['ն']),
+    ('Շ', &['շ']),
+    ('Ո', &['ո']),
+    ('Չ', &['չ']),
+    ('Պ', &['պ']),
+    ('Ջ', &['ջ']),
+    ('Ռ', &['ռ']),
+    ('Ս', &['ս']),
+    ('Վ', &['վ']),
+    ('Տ', &['տ']),
+    ('Ր', &['ր']),
+    ('Ց', &['ց']),
+    ('Ւ', &['ւ']),
+    ('Փ', &['փ']),
+    ('Ք', &['ք']),
+    ('Օ', &['օ']),
+    ('Ֆ', &['ֆ']),
+    ('ա', &['Ա']),
+    ('բ', &['Բ']),
+    ('գ', &['Գ']),
+    ('դ', &['Դ']),
+    ('ե', &['Ե']),
+    ('զ', &['Զ']),
+    ('է', &['Է']),
+    ('ը', &['Ը']),
+    ('թ', &['Թ']),
+    ('ժ', &['Ժ']),
+    ('ի', &['Ի']),
+    ('լ', &['Լ']),
+    ('խ', &['Խ']),
+    ('ծ', &['Ծ']),
+    ('կ', &['Կ']),
+    ('հ', &['Հ']),
+    ('ձ', &['Ձ']),
+    ('ղ', &['Ղ']),
+    ('ճ', &['Ճ']),
+    ('մ', &['Մ']),
+    ('յ', &['Յ']),
+    ('ն', &['Ն']),
+    ('շ', &['Շ']),
+    ('ո', &['Ո']),
+    ('չ', &['Չ']),
+    ('պ', &['Պ']),
+    ('ջ', &['Ջ']),
+    ('ռ', &['Ռ']),
+    ('ս', &['Ս']),
+    ('վ', &['Վ']),
+    ('տ', &['Տ']),
+    ('ր', &['Ր']),
+    ('ց', &['Ց']),
+    ('ւ', &['Ւ']),
+    ('փ', &['Փ']),
+    ('ք', &['Ք']),
+    ('օ', &['Օ']),
+    ('ֆ', &['Ֆ']),
+    ('Ⴀ', &['ⴀ']),
+    ('Ⴁ', &['ⴁ']),
+    ('Ⴂ', &['ⴂ']),
+    ('Ⴃ', &['ⴃ']),
+    ('Ⴄ', &['ⴄ']),
+    ('Ⴅ', &['ⴅ']),
+    ('Ⴆ', &['ⴆ']),
+    ('Ⴇ', &['ⴇ']),
+    ('Ⴈ', &['ⴈ']),
+    ('Ⴉ', &['ⴉ']),
+    ('Ⴊ', &['ⴊ']),
+    ('Ⴋ', &['ⴋ']),
+    ('Ⴌ', &['ⴌ']),
+    ('Ⴍ', &['ⴍ']),
+    ('Ⴎ', &['ⴎ']),
+    ('Ⴏ', &['ⴏ']),
+    ('Ⴐ', &['ⴐ']),
+    ('Ⴑ', &['ⴑ']),
+    ('Ⴒ', &['ⴒ']),
+    ('Ⴓ', &['ⴓ']),
+    ('Ⴔ', &['ⴔ']),
+    ('Ⴕ', &['ⴕ']),
+    ('Ⴖ', &['ⴖ']),
+    ('Ⴗ', &['ⴗ']),
+    ('Ⴘ', &['ⴘ']),
+    ('Ⴙ', &['ⴙ']),
+    ('Ⴚ', &['ⴚ']),
+    ('Ⴛ', &['ⴛ']),
+    ('Ⴜ', &['ⴜ']),
+    ('Ⴝ', &['ⴝ']),
+    ('Ⴞ', &['ⴞ']),
+    ('Ⴟ', &['ⴟ']),
+    ('Ⴠ', &['ⴠ']),
+    ('Ⴡ', &['ⴡ']),
+    ('Ⴢ', &['ⴢ']),
+    ('Ⴣ', &['ⴣ']),
+    ('Ⴤ', &['ⴤ']),
+    ('Ⴥ', &['ⴥ']),
+    ('Ⴧ', &['ⴧ']),
+    ('Ⴭ', &['ⴭ']),
+    ('ა', &['Ა']),
+    ('ბ', &['Ბ']),
+    ('გ', &['Გ']),
+    ('დ', &['Დ']),
+    ('ე', &['Ე']),
+    ('ვ', &['Ვ']),
+    ('ზ', &['Ზ']),
+    ('თ', &['Თ']),
+    ('ი', &['Ი']),
+    ('კ', &['Კ']),
+    ('ლ', &['Ლ']),
+    ('მ', &['Მ']),
+    ('ნ', &['Ნ']),
+    ('ო', &['Ო']),
+    ('პ', &['Პ']),
+    ('ჟ', &['Ჟ']),
+    ('რ', &['Რ']),
+    ('ს', &['Ს']),
+    ('ტ', &['Ტ']),
+    ('უ', &['Უ']),
+    ('ფ', &['Ფ']),
+    ('ქ', &['Ქ']),
+    ('ღ', &['Ღ']),
+    ('ყ', &['Ყ']),
+    ('შ', &['Შ']),
+    ('ჩ', &['Ჩ']),
+    ('ც', &['Ც']),
+    ('ძ', &['Ძ']),
+    ('წ', &['Წ']),
+    ('ჭ', &['Ჭ']),
+    ('ხ', &['Ხ']),
+    ('ჯ', &['Ჯ']),
+    ('ჰ', &['Ჰ']),
+    ('ჱ', &['Ჱ']),
+    ('ჲ', &['Ჲ']),
+    ('ჳ', &['Ჳ']),
+    ('ჴ', &['Ჴ']),
+    ('ჵ', &['Ჵ']),
+    ('ჶ', &['Ჶ']),
+    ('ჷ', &['Ჷ']),
+    ('ჸ', &['Ჸ']),
+    ('ჹ', &['Ჹ']),
+    ('ჺ', &['Ჺ']),
+    ('ჽ', &['Ჽ']),
+    ('ჾ', &['Ჾ']),
+    ('ჿ', &['Ჿ']),
+    ('Ꭰ', &['ꭰ']),
+    ('Ꭱ', &['ꭱ']),
+    ('Ꭲ', &['ꭲ']),
+    ('Ꭳ', &['ꭳ']),
+    ('Ꭴ', &['ꭴ']),
+    ('Ꭵ', &['ꭵ']),
+    ('Ꭶ', &['ꭶ']),
+    ('Ꭷ', &['ꭷ']),
+    ('Ꭸ', &['ꭸ']),
+    ('Ꭹ', &['ꭹ']),
+    ('Ꭺ', &['ꭺ']),
+    ('Ꭻ', &['ꭻ']),
+    ('Ꭼ', &['ꭼ']),
+    ('Ꭽ', &['ꭽ']),
+    ('Ꭾ', &['ꭾ']),
+    ('Ꭿ', &['ꭿ']),
+    ('Ꮀ', &['ꮀ']),
+    ('Ꮁ', &['ꮁ']),
+    ('Ꮂ', &['ꮂ']),
+    ('Ꮃ', &['ꮃ']),
+    ('Ꮄ', &['ꮄ']),
+    ('Ꮅ', &['ꮅ']),
+    ('Ꮆ', &['ꮆ']),
+    ('Ꮇ', &['ꮇ']),
+    ('Ꮈ', &['ꮈ']),
+    ('Ꮉ', &['ꮉ']),
+    ('Ꮊ', &['ꮊ']),
+    ('Ꮋ', &['ꮋ']),
+    ('Ꮌ', &['ꮌ']),
+    ('Ꮍ', &['ꮍ']),
+    ('Ꮎ', &['ꮎ']),
+    ('Ꮏ', &['ꮏ']),
+    ('Ꮐ', &['ꮐ']),
+    ('Ꮑ', &['ꮑ']),
+    ('Ꮒ', &['ꮒ']),
+    ('Ꮓ', &['ꮓ']),
+    ('Ꮔ', &['ꮔ']),
+    ('Ꮕ', &['ꮕ']),
+    ('Ꮖ', &['ꮖ']),
+    ('Ꮗ', &['ꮗ']),
+    ('Ꮘ', &['ꮘ']),
+    ('Ꮙ', &['ꮙ']),
+    ('Ꮚ', &['ꮚ']),
+    ('Ꮛ', &['ꮛ']),
+    ('Ꮜ', &['ꮜ']),
+    ('Ꮝ', &['ꮝ']),
+    ('Ꮞ', &['ꮞ']),
+    ('Ꮟ', &['ꮟ']),
+    ('Ꮠ', &['ꮠ']),
+    ('Ꮡ', &['ꮡ']),
+    ('Ꮢ', &['ꮢ']),
+    ('Ꮣ', &['ꮣ']),
+    ('Ꮤ', &['ꮤ']),
+    ('Ꮥ', &['ꮥ']),
+    ('Ꮦ', &['ꮦ']),
+    ('Ꮧ', &['ꮧ']),
+    ('Ꮨ', &['ꮨ']),
+    ('Ꮩ', &['ꮩ']),
+    ('Ꮪ', &['ꮪ']),
+    ('Ꮫ', &['ꮫ']),
+    ('Ꮬ', &['ꮬ']),
+    ('Ꮭ', &['ꮭ']),
+    ('Ꮮ', &['ꮮ']),
+    ('Ꮯ', &['ꮯ']),
+    ('Ꮰ', &['ꮰ']),
+    ('Ꮱ', &['ꮱ']),
+    ('Ꮲ', &['ꮲ']),
+    ('Ꮳ', &['ꮳ']),
+    ('Ꮴ', &['ꮴ']),
+    ('Ꮵ', &['ꮵ']),
+    ('Ꮶ', &['ꮶ']),
+    ('Ꮷ', &['ꮷ']),
+    ('Ꮸ', &['ꮸ']),
+    ('Ꮹ', &['ꮹ']),
+    ('Ꮺ', &['ꮺ']),
+    ('Ꮻ', &['ꮻ']),
+    ('Ꮼ', &['ꮼ']),
+    ('Ꮽ', &['ꮽ']),
+    ('Ꮾ', &['ꮾ']),
+    ('Ꮿ', &['ꮿ']),
+    ('Ᏸ', &['ᏸ']),
+    ('Ᏹ', &['ᏹ']),
+    ('Ᏺ', &['ᏺ']),
+    ('Ᏻ', &['ᏻ']),
+    ('Ᏼ', &['ᏼ']),
+    ('Ᏽ', &['ᏽ']),
+    ('ᏸ', &['Ᏸ']),
+    ('ᏹ', &['Ᏹ']),
+    ('ᏺ', &['Ᏺ']),
+    ('ᏻ', &['Ᏻ']),
+    ('ᏼ', &['Ᏼ']),
+    ('ᏽ', &['Ᏽ']),
+    ('ᲀ', &['В', 'в']),
+    ('ᲁ', &['Д', 'д']),
+    ('ᲂ', &['О', 'о']),
+    ('ᲃ', &['С', 'с']),
+    ('ᲄ', &['Т', 'т', 'ᲅ']),
+    ('ᲅ', &['Т', 'т', 'ᲄ']),
+    ('ᲆ', &['Ъ', 'ъ']),
+    ('ᲇ', &['Ѣ', 'ѣ']),
+    ('ᲈ', &['Ꙋ', 'ꙋ']),
+    ('Ა', &['ა']),
+    ('Ბ', &['ბ']),
+    ('Გ', &['გ']),
+    ('Დ', &['დ']),
+    ('Ე', &['ე']),
+    ('Ვ', &['ვ']),
+    ('Ზ', &['ზ']),
+    ('Თ', &['თ']),
+    ('Ი', &['ი']),
+    ('Კ', &['კ']),
+    ('Ლ', &['ლ']),
+    ('Მ', &['მ']),
+    ('Ნ', &['ნ']),
+    ('Ო', &['ო']),
+    ('Პ', &['პ']),
+    ('Ჟ', &['ჟ']),
+    ('Რ', &['რ']),
+    ('Ს', &['ს']),
+    ('Ტ', &['ტ']),
+    ('Უ', &['უ']),
+    ('Ფ', &['ფ']),
+    ('Ქ', &['ქ']),
+    ('Ღ', &['ღ']),
+    ('Ყ', &['ყ']),
+    ('Შ', &['შ']),
+    ('Ჩ', &['ჩ']),
+    ('Ც', &['ც']),
+    ('Ძ', &['ძ']),
+    ('Წ', &['წ']),
+    ('Ჭ', &['ჭ']),
+    ('Ხ', &['ხ']),
+    ('Ჯ', &['ჯ']),
+    ('Ჰ', &['ჰ']),
+    ('Ჱ', &['ჱ']),
+    ('Ჲ', &['ჲ']),
+    ('Ჳ', &['ჳ']),
+    ('Ჴ', &['ჴ']),
+    ('Ჵ', &['ჵ']),
+    ('Ჶ', &['ჶ']),
+    ('Ჷ', &['ჷ']),
+    ('Ჸ', &['ჸ']),
+    ('Ჹ', &['ჹ']),
+    ('Ჺ', &['ჺ']),
+    ('Ჽ', &['ჽ']),
+    ('Ჾ', &['ჾ']),
+    ('Ჿ', &['ჿ']),
+    ('ᵹ', &['Ᵹ']),
+    ('ᵽ', &['Ᵽ']),
+    ('ᶎ', &['Ᶎ']),
+    ('Ḁ', &['ḁ']),
+    ('ḁ', &['Ḁ']),
+    ('Ḃ', &['ḃ']),
+    ('ḃ', &['Ḃ']),
+    ('Ḅ', &['ḅ']),
+    ('ḅ', &['Ḅ']),
+    ('Ḇ', &['ḇ']),
+    ('ḇ', &['Ḇ']),
+    ('Ḉ', &['ḉ']),
+    ('ḉ', &['Ḉ']),
+    ('Ḋ', &['ḋ']),
+    ('ḋ', &['Ḋ']),
+    ('Ḍ', &['ḍ']),
+    ('ḍ', &['Ḍ']),
+    ('Ḏ', &['ḏ']),
+    ('ḏ', &['Ḏ']),
+    ('Ḑ', &['ḑ']),
+    ('ḑ', &['Ḑ']),
+    ('Ḓ', &['ḓ']),
+    ('ḓ', &['Ḓ']),
+    ('Ḕ', &['ḕ']),
+    ('ḕ', &['Ḕ']),
+    ('Ḗ', &['ḗ']),
+    ('ḗ', &['Ḗ']),
+    ('Ḙ', &['ḙ']),
+    ('ḙ', &['Ḙ']),
+    ('Ḛ', &['ḛ']),
+    ('ḛ', &['Ḛ']),
+    ('Ḝ', &['ḝ']),
+    ('ḝ', &['Ḝ']),
+    ('Ḟ', &['ḟ']),
+    ('ḟ', &['Ḟ']),
+    ('Ḡ', &['ḡ']),
+    ('ḡ', &['Ḡ']),
+    ('Ḣ', &['ḣ']),
+    ('ḣ', &['Ḣ']),
+    ('Ḥ', &['ḥ']),
+    ('ḥ', &['Ḥ']),
+    ('Ḧ', &['ḧ']),
+    ('ḧ', &['Ḧ']),
+    ('Ḩ', &['ḩ']),
+    ('ḩ', &['Ḩ']),
+    ('Ḫ', &['ḫ']),
+    ('ḫ', &['Ḫ']),
+    ('Ḭ', &['ḭ']),
+    ('ḭ', &['Ḭ']),
+    ('Ḯ', &['ḯ']),
+    ('ḯ', &['Ḯ']),
+    ('Ḱ', &['ḱ']),
+    ('ḱ', &['Ḱ']),
+    ('Ḳ', &['ḳ']),
+    ('ḳ', &['Ḳ']),
+    ('Ḵ', &['ḵ']),
+    ('ḵ', &['Ḵ']),
+    ('Ḷ', &['ḷ']),
+    ('ḷ', &['Ḷ']),
+    ('Ḹ', &['ḹ']),
+    ('ḹ', &['Ḹ']),
+    ('Ḻ', &['ḻ']),
+    ('ḻ', &['Ḻ']),
+    ('Ḽ', &['ḽ']),
+    ('ḽ', &['Ḽ']),
+    ('Ḿ', &['ḿ']),
+    ('ḿ', &['Ḿ']),
+    ('Ṁ', &['ṁ']),
+    ('ṁ', &['Ṁ']),
+    ('Ṃ', &['ṃ']),
+    ('ṃ', &['Ṃ']),
+    ('Ṅ', &['ṅ']),
+    ('ṅ', &['Ṅ']),
+    ('Ṇ', &['ṇ']),
+    ('ṇ', &['Ṇ']),
+    ('Ṉ', &['ṉ']),
+    ('ṉ', &['Ṉ']),
+    ('Ṋ', &['ṋ']),
+    ('ṋ', &['Ṋ']),
+    ('Ṍ', &['ṍ']),
+    ('ṍ', &['Ṍ']),
+    ('Ṏ', &['ṏ']),
+    ('ṏ', &['Ṏ']),
+    ('Ṑ', &['ṑ']),
+    ('ṑ', &['Ṑ']),
+    ('Ṓ', &['ṓ']),
+    ('ṓ', &['Ṓ']),
+    ('Ṕ', &['ṕ']),
+    ('ṕ', &['Ṕ']),
+    ('Ṗ', &['ṗ']),
+    ('ṗ', &['Ṗ']),
+    ('Ṙ', &['ṙ']),
+    ('ṙ', &['Ṙ']),
+    ('Ṛ', &['ṛ']),
+    ('ṛ', &['Ṛ']),
+    ('Ṝ', &['ṝ']),
+    ('ṝ', &['Ṝ']),
+    ('Ṟ', &['ṟ']),
+    ('ṟ', &['Ṟ']),
+    ('Ṡ', &['ṡ', 'ẛ']),
+    ('ṡ', &['Ṡ', 'ẛ']),
+    ('Ṣ', &['ṣ']),
+    ('ṣ', &['Ṣ']),
+    ('Ṥ', &['ṥ']),
+    ('ṥ', &['Ṥ']),
+    ('Ṧ', &['ṧ']),
+    ('ṧ', &['Ṧ']),
+    ('Ṩ', &['ṩ']),
+    ('ṩ', &['Ṩ']),
+    ('Ṫ', &['ṫ']),
+    ('ṫ', &['Ṫ']),
+    ('Ṭ', &['ṭ']),
+    ('ṭ', &['Ṭ']),
+    ('Ṯ', &['ṯ']),
+    ('ṯ', &['Ṯ']),
+    ('Ṱ', &['ṱ']),
+    ('ṱ', &['Ṱ']),
+    ('Ṳ', &['ṳ']),
+    ('ṳ', &['Ṳ']),
+    ('Ṵ', &['ṵ']),
+    ('ṵ', &['Ṵ']),
+    ('Ṷ', &['ṷ']),
+    ('ṷ', &['Ṷ']),
+    ('Ṹ', &['ṹ']),
+    ('ṹ', &['Ṹ']),
+    ('Ṻ', &['ṻ']),
+    ('ṻ', &['Ṻ']),
+    ('Ṽ', &['ṽ']),
+    ('ṽ', &['Ṽ']),
+    ('Ṿ', &['ṿ']),
+    ('ṿ', &['Ṿ']),
+    ('Ẁ', &['ẁ']),
+    ('ẁ', &['Ẁ']),
+    ('Ẃ', &['ẃ']),
+    ('ẃ', &['Ẃ']),
+    ('Ẅ', &['ẅ']),
+    ('ẅ', &['Ẅ']),
+    ('Ẇ', &['ẇ']),
+    ('ẇ', &['Ẇ']),
+    ('Ẉ', &['ẉ']),
+    ('ẉ', &['Ẉ']),
+    ('Ẋ', &['ẋ']),
+    ('ẋ', &['Ẋ']),
+    ('Ẍ', &['ẍ']),
+    ('ẍ', &['Ẍ']),
+    ('Ẏ', &['ẏ']),
+    ('ẏ', &['Ẏ']),
+    ('Ẑ', &['ẑ']),
+    ('ẑ', &['Ẑ']),
+    ('Ẓ', &['ẓ']),
+    ('ẓ', &['Ẓ']),
+    ('Ẕ', &['ẕ']),
+    ('ẕ', &['Ẕ']),
+    ('ẛ', &['Ṡ', 'ṡ']),
+    ('ẞ', &['ß']),
+    ('Ạ', &['ạ']),
+    ('ạ', &['Ạ']),
+    ('Ả', &['ả']),
+    ('ả', &['Ả']),
+    ('Ấ', &['ấ']),
+    ('ấ', &['Ấ']),
+    ('Ầ', &['ầ']),
+    ('ầ', &['Ầ']),
+    ('Ẩ', &['ẩ']),
+    ('ẩ', &['Ẩ']),
+    ('Ẫ', &['ẫ']),
+    ('ẫ', &['Ẫ']),
+    ('Ậ', &['ậ']),
+    ('ậ', &['Ậ']),
+    ('Ắ', &['ắ']),
+    ('ắ', &['Ắ']),
+    ('Ằ', &['ằ']),
+    ('ằ', &['Ằ']),
+    ('Ẳ', &['ẳ']),
+    ('ẳ', &['Ẳ']),
+    ('Ẵ', &['ẵ']),
+    ('ẵ', &['Ẵ']),
+    ('Ặ', &['ặ']),
+    ('ặ', &['Ặ']),
+    ('Ẹ', &['ẹ']),
+    ('ẹ', &['Ẹ']),
+    ('Ẻ', &['ẻ']),
+    ('ẻ', &['Ẻ']),
+    ('Ẽ', &['ẽ']),
+    ('ẽ', &['Ẽ']),
+    ('Ế', &['ế']),
+    ('ế', &['Ế']),
+    ('Ề', &['ề']),
+    ('ề', &['Ề']),
+    ('Ể', &['ể']),
+    ('ể', &['Ể']),
+    ('Ễ', &['ễ']),
+    ('ễ', &['Ễ']),
+    ('Ệ', &['ệ']),
+    ('ệ', &['Ệ']),
+    ('Ỉ', &['ỉ']),
+    ('ỉ', &['Ỉ']),
+    ('Ị', &['ị']),
+    ('ị', &['Ị']),
+    ('Ọ', &['ọ']),
+    ('ọ', &['Ọ']),
+    ('Ỏ', &['ỏ']),
+    ('ỏ', &['Ỏ']),
+    ('Ố', &['ố']),
+    ('ố', &['Ố']),
+    ('Ồ', &['ồ']),
+    ('ồ', &['Ồ']),
+    ('Ổ', &['ổ']),
+    ('ổ', &['Ổ']),
+    ('Ỗ', &['ỗ']),
+    ('ỗ', &['Ỗ']),
+    ('Ộ', &['ộ']),
+    ('ộ', &['Ộ']),
+    ('Ớ', &['ớ']),
+    ('ớ', &['Ớ']),
+    ('Ờ', &['ờ']),
+    ('ờ', &['Ờ']),
+    ('Ở', &['ở']),
+    ('ở', &['Ở']),
+    ('Ỡ', &['ỡ']),
+    ('ỡ', &['Ỡ']),
+    ('Ợ', &['ợ']),
+    ('ợ', &['Ợ']),
+    ('Ụ', &['ụ']),
+    ('ụ', &['Ụ']),
+    ('Ủ', &['ủ']),
+    ('ủ', &['Ủ']),
+    ('Ứ', &['ứ']),
+    ('ứ', &['Ứ']),
+    ('Ừ', &['ừ']),
+    ('ừ', &['Ừ']),
+    ('Ử', &['ử']),
+    ('ử', &['Ử']),
+    ('Ữ', &['ữ']),
+    ('ữ', &['Ữ']),
+    ('Ự', &['ự']),
+    ('ự', &['Ự']),
+    ('Ỳ', &['ỳ']),
+    ('ỳ', &['Ỳ']),
+    ('Ỵ', &['ỵ']),
+    ('ỵ', &['Ỵ']),
+    ('Ỷ', &['ỷ']),
+    ('ỷ', &['Ỷ']),
+    ('Ỹ', &['ỹ']),
+    ('ỹ', &['Ỹ']),
+    ('Ỻ', &['ỻ']),
+    ('ỻ', &['Ỻ']),
+    ('Ỽ', &['ỽ']),
+    ('ỽ', &['Ỽ']),
+    ('Ỿ', &['ỿ']),
+    ('ỿ', &['Ỿ']),
+    ('ἀ', &['Ἀ']),
+    ('ἁ', &['Ἁ']),
+    ('ἂ', &['Ἂ']),
+    ('ἃ', &['Ἃ']),
+    ('ἄ', &['Ἄ']),
+    ('ἅ', &['Ἅ']),
+    ('ἆ', &['Ἆ']),
+    ('ἇ', &['Ἇ']),
+    ('Ἀ', &['ἀ']),
+    ('Ἁ', &['ἁ']),
+    ('Ἂ', &['ἂ']),
+    ('Ἃ', &['ἃ']),
+    ('Ἄ', &['ἄ']),
+    ('Ἅ', &['ἅ']),
+    ('Ἆ', &['ἆ']),
+    ('Ἇ', &['ἇ']),
+    ('ἐ', &['Ἐ']),
+    ('ἑ', &['Ἑ']),
+    ('ἒ', &['Ἒ']),
+    ('ἓ', &['Ἓ']),
+    ('ἔ', &['Ἔ']),
+    ('ἕ', &['Ἕ']),
+    ('Ἐ', &['ἐ']),
+    ('Ἑ', &['ἑ']),
+    ('Ἒ', &['ἒ']),
+    ('Ἓ', &['ἓ']),
+    ('Ἔ', &['ἔ']),
+    ('Ἕ', &['ἕ']),
+    ('ἠ', &['Ἠ']),
+    ('ἡ', &['Ἡ']),
+    ('ἢ', &['Ἢ']),
+    ('ἣ', &['Ἣ']),
+    ('ἤ', &['Ἤ']),
+    ('ἥ', &['Ἥ']),
+    ('ἦ', &['Ἦ']),
+    ('ἧ', &['Ἧ']),
+    ('Ἠ', &['ἠ']),
+    ('Ἡ', &['ἡ']),
+    ('Ἢ', &['ἢ']),
+    ('Ἣ', &['ἣ']),
+    ('Ἤ', &['ἤ']),
+    ('Ἥ', &['ἥ']),
+    ('Ἦ', &['ἦ']),
+    ('Ἧ', &['ἧ']),
+    ('ἰ', &['Ἰ']),
+    ('ἱ', &['Ἱ']),
+    ('ἲ', &['Ἲ']),
+    ('ἳ', &['Ἳ']),
+    ('ἴ', &['Ἴ']),
+    ('ἵ', &['Ἵ']),
+    ('ἶ', &['Ἶ']),
+    ('ἷ', &['Ἷ']),
+    ('Ἰ', &['ἰ']),
+    ('Ἱ', &['ἱ']),
+    ('Ἲ', &['ἲ']),
+    ('Ἳ', &['ἳ']),
+    ('Ἴ', &['ἴ']),
+    ('Ἵ', &['ἵ']),
+    ('Ἶ', &['ἶ']),
+    ('Ἷ', &['ἷ']),
+    ('ὀ', &['Ὀ']),
+    ('ὁ', &['Ὁ']),
+    ('ὂ', &['Ὂ']),
+    ('ὃ', &['Ὃ']),
+    ('ὄ', &['Ὄ']),
+    ('ὅ', &['Ὅ']),
+    ('Ὀ', &['ὀ']),
+    ('Ὁ', &['ὁ']),
+    ('Ὂ', &['ὂ']),
+    ('Ὃ', &['ὃ']),
+    ('Ὄ', &['ὄ']),
+    ('Ὅ', &['ὅ']),
+    ('ὑ', &['Ὑ']),
+    ('ὓ', &['Ὓ']),
+    ('ὕ', &['Ὕ']),
+    ('ὗ', &['Ὗ']),
+    ('Ὑ', &['ὑ']),
+    ('Ὓ', &['ὓ']),
+    ('Ὕ', &['ὕ']),
+    ('Ὗ', &['ὗ']),
+    ('ὠ', &['Ὠ']),
+    ('ὡ', &['Ὡ']),
+    ('ὢ', &['Ὢ']),
+    ('ὣ', &['Ὣ']),
+    ('ὤ', &['Ὤ']),
+    ('ὥ', &['Ὥ']),
+    ('ὦ', &['Ὦ']),
+    ('ὧ', &['Ὧ']),
+    ('Ὠ', &['ὠ']),
+    ('Ὡ', &['ὡ']),
+    ('Ὢ', &['ὢ']),
+    ('Ὣ', &['ὣ']),
+    ('Ὤ', &['ὤ']),
+    ('Ὥ', &['ὥ']),
+    ('Ὦ', &['ὦ']),
+    ('Ὧ', &['ὧ']),
+    ('ὰ', &['Ὰ']),
+    ('ά', &['Ά']),
+    ('ὲ', &['Ὲ']),
+    ('έ', &['Έ']),
+    ('ὴ', &['Ὴ']),
+    ('ή', &['Ή']),
+    ('ὶ', &['Ὶ']),
+    ('ί', &['Ί']),
+    ('ὸ', &['Ὸ']),
+    ('ό', &['Ό']),
+    ('ὺ', &['Ὺ']),
+    ('ύ', &['Ύ']),
+    ('ὼ', &['Ὼ']),
+    ('ώ', &['Ώ']),
+    ('ᾀ', &['ᾈ']),
+    ('ᾁ', &['ᾉ']),
+    ('ᾂ', &['ᾊ']),
+    ('ᾃ', &['ᾋ']),
+    ('ᾄ', &['ᾌ']),
+    ('ᾅ', &['ᾍ']),
+    ('ᾆ', &['ᾎ']),
+    ('ᾇ', &['ᾏ']),
+    ('ᾈ', &['ᾀ']),
+    ('ᾉ', &['ᾁ']),
+    ('ᾊ', &['ᾂ']),
+    ('ᾋ', &['ᾃ']),
+    ('ᾌ', &['ᾄ']),
+    ('ᾍ', &['ᾅ']),
+    ('ᾎ', &['ᾆ']),
+    ('ᾏ', &['ᾇ']),
+    ('ᾐ', &['ᾘ']),
+    ('ᾑ', &['ᾙ']),
+    ('ᾒ', &['ᾚ']),
+    ('ᾓ', &['ᾛ']),
+    ('ᾔ', &['ᾜ']),
+    ('ᾕ', &['ᾝ']),
+    ('ᾖ', &['ᾞ']),
+    ('ᾗ', &['ᾟ']),
+    ('ᾘ', &['ᾐ']),
+    ('ᾙ', &['ᾑ']),
+    ('ᾚ', &['ᾒ']),
+    ('ᾛ', &['ᾓ']),
+    ('ᾜ', &['ᾔ']),
+    ('ᾝ', &['ᾕ']),
+    ('ᾞ', &['ᾖ']),
+    ('ᾟ', &['ᾗ']),
+    ('ᾠ', &['ᾨ']),
+    ('ᾡ', &['ᾩ']),
+    ('ᾢ', &['ᾪ']),
+    ('ᾣ', &['ᾫ']),
+    ('ᾤ', &['ᾬ']),
+    ('ᾥ', &['ᾭ']),
+    ('ᾦ', &['ᾮ']),
+    ('ᾧ', &['ᾯ']),
+    ('ᾨ', &['ᾠ']),
+    ('ᾩ', &['ᾡ']),
+    ('ᾪ', &['ᾢ']),
+    ('ᾫ', &['ᾣ']),
+    ('ᾬ', &['ᾤ']),
+    ('ᾭ', &['ᾥ']),
+    ('ᾮ', &['ᾦ']),
+    ('ᾯ', &['ᾧ']),
+    ('ᾰ', &['Ᾰ']),
+    ('ᾱ', &['Ᾱ']),
+    ('ᾳ', &['ᾼ']),
+    ('Ᾰ', &['ᾰ']),
+    ('Ᾱ', &['ᾱ']),
+    ('Ὰ', &['ὰ']),
+    ('Ά', &['ά']),
+    ('ᾼ', &['ᾳ']),
+    ('ι', &['\u{345}', 'Ι', 'ι']),
+    ('ῃ', &['ῌ']),
+    ('Ὲ', &['ὲ']),
+    ('Έ', &['έ']),
+    ('Ὴ', &['ὴ']),
+    ('Ή', &['ή']),
+    ('ῌ', &['ῃ']),
+    ('ῐ', &['Ῐ']),
+    ('ῑ', &['Ῑ']),
+    ('Ῐ', &['ῐ']),
+    ('Ῑ', &['ῑ']),
+    ('Ὶ', &['ὶ']),
+    ('Ί', &['ί']),
+    ('ῠ', &['Ῠ']),
+    ('ῡ', &['Ῡ']),
+    ('ῥ', &['Ῥ']),
+    ('Ῠ', &['ῠ']),
+    ('Ῡ', &['ῡ']),
+    ('Ὺ', &['ὺ']),
+    ('Ύ', &['ύ']),
+    ('Ῥ', &['ῥ']),
+    ('ῳ', &['ῼ']),
+    ('Ὸ', &['ὸ']),
+    ('Ό', &['ό']),
+    ('Ὼ', &['ὼ']),
+    ('Ώ', &['ώ']),
+    ('ῼ', &['ῳ']),
+    ('Ω', &['Ω', 'ω']),
+    ('K', &['K', 'k']),
+    ('Å', &['Å', 'å']),
+    ('Ⅎ', &['ⅎ']),
+    ('ⅎ', &['Ⅎ']),
+    ('Ⅰ', &['ⅰ']),
+    ('Ⅱ', &['ⅱ']),
+    ('Ⅲ', &['ⅲ']),
+    ('Ⅳ', &['ⅳ']),
+    ('Ⅴ', &['ⅴ']),
+    ('Ⅵ', &['ⅵ']),
+    ('Ⅶ', &['ⅶ']),
+    ('Ⅷ', &['ⅷ']),
+    ('Ⅸ', &['ⅸ']),
+    ('Ⅹ', &['ⅹ']),
+    ('Ⅺ', &['ⅺ']),
+    ('Ⅻ', &['ⅻ']),
+    ('Ⅼ', &['ⅼ']),
+    ('Ⅽ', &['ⅽ']),
+    ('Ⅾ', &['ⅾ']),
+    ('Ⅿ', &['ⅿ']),
+    ('ⅰ', &['Ⅰ']),
+    ('ⅱ', &['Ⅱ']),
+    ('ⅲ', &['Ⅲ']),
+    ('ⅳ', &['Ⅳ']),
+    ('ⅴ', &['Ⅴ']),
+    ('ⅵ', &['Ⅵ']),
+    ('ⅶ', &['Ⅶ']),
+    ('ⅷ', &['Ⅷ']),
+    ('ⅸ', &['Ⅸ']),
+    ('ⅹ', &['Ⅹ']),
+    ('ⅺ', &['Ⅺ']),
+    ('ⅻ', &['Ⅻ']),
+    ('ⅼ', &['Ⅼ']),
+    ('ⅽ', &['Ⅽ']),
+    ('ⅾ', &['Ⅾ']),
+    ('ⅿ', &['Ⅿ']),
+    ('Ↄ', &['ↄ']),
+    ('ↄ', &['Ↄ']),
+    ('Ⓐ', &['ⓐ']),
+    ('Ⓑ', &['ⓑ']),
+    ('Ⓒ', &['ⓒ']),
+    ('Ⓓ', &['ⓓ']),
+    ('Ⓔ', &['ⓔ']),
+    ('Ⓕ', &['ⓕ']),
+    ('Ⓖ', &['ⓖ']),
+    ('Ⓗ', &['ⓗ']),
+    ('Ⓘ', &['ⓘ']),
+    ('Ⓙ', &['ⓙ']),
+    ('Ⓚ', &['ⓚ']),
+    ('Ⓛ', &['ⓛ']),
+    ('Ⓜ', &['ⓜ']),
+    ('Ⓝ', &['ⓝ']),
+    ('Ⓞ', &['ⓞ']),
+    ('Ⓟ', &['ⓟ']),
+    ('Ⓠ', &['ⓠ']),
+    ('Ⓡ', &['ⓡ']),
+    ('Ⓢ', &['ⓢ']),
+    ('Ⓣ', &['ⓣ']),
+    ('Ⓤ', &['ⓤ']),
+    ('Ⓥ', &['ⓥ']),
+    ('Ⓦ', &['ⓦ']),
+    ('Ⓧ', &['ⓧ']),
+    ('Ⓨ', &['ⓨ']),
+    ('Ⓩ', &['ⓩ']),
+    ('ⓐ', &['Ⓐ']),
+    ('ⓑ', &['Ⓑ']),
+    ('ⓒ', &['Ⓒ']),
+    ('ⓓ', &['Ⓓ']),
+    ('ⓔ', &['Ⓔ']),
+    ('ⓕ', &['Ⓕ']),
+    ('ⓖ', &['Ⓖ']),
+    ('ⓗ', &['Ⓗ']),
+    ('ⓘ', &['Ⓘ']),
+    ('ⓙ', &['Ⓙ']),
+    ('ⓚ', &['Ⓚ']),
+    ('ⓛ', &['Ⓛ']),
+    ('ⓜ', &['Ⓜ']),
+    ('ⓝ', &['Ⓝ']),
+    ('ⓞ', &['Ⓞ']),
+    ('ⓟ', &['Ⓟ']),
+    ('ⓠ', &['Ⓠ']),
+    ('ⓡ', &['Ⓡ']),
+    ('ⓢ', &['Ⓢ']),
+    ('ⓣ', &['Ⓣ']),
+    ('ⓤ', &['Ⓤ']),
+    ('ⓥ', &['Ⓥ']),
+    ('ⓦ', &['Ⓦ']),
+    ('ⓧ', &['Ⓧ']),
+    ('ⓨ', &['Ⓨ']),
+    ('ⓩ', &['Ⓩ']),
+    ('Ⰰ', &['ⰰ']),
+    ('Ⰱ', &['ⰱ']),
+    ('Ⰲ', &['ⰲ']),
+    ('Ⰳ', &['ⰳ']),
+    ('Ⰴ', &['ⰴ']),
+    ('Ⰵ', &['ⰵ']),
+    ('Ⰶ', &['ⰶ']),
+    ('Ⰷ', &['ⰷ']),
+    ('Ⰸ', &['ⰸ']),
+    ('Ⰹ', &['ⰹ']),
+    ('Ⰺ', &['ⰺ']),
+    ('Ⰻ', &['ⰻ']),
+    ('Ⰼ', &['ⰼ']),
+    ('Ⰽ', &['ⰽ']),
+    ('Ⰾ', &['ⰾ']),
+    ('Ⰿ', &['ⰿ']),
+    ('Ⱀ', &['ⱀ']),
+    ('Ⱁ', &['ⱁ']),
+    ('Ⱂ', &['ⱂ']),
+    ('Ⱃ', &['ⱃ']),
+    ('Ⱄ', &['ⱄ']),
+    ('Ⱅ', &['ⱅ']),
+    ('Ⱆ', &['ⱆ']),
+    ('Ⱇ', &['ⱇ']),
+    ('Ⱈ', &['ⱈ']),
+    ('Ⱉ', &['ⱉ']),
+    ('Ⱊ', &['ⱊ']),
+    ('Ⱋ', &['ⱋ']),
+    ('Ⱌ', &['ⱌ']),
+    ('Ⱍ', &['ⱍ']),
+    ('Ⱎ', &['ⱎ']),
+    ('Ⱏ', &['ⱏ']),
+    ('Ⱐ', &['ⱐ']),
+    ('Ⱑ', &['ⱑ']),
+    ('Ⱒ', &['ⱒ']),
+    ('Ⱓ', &['ⱓ']),
+    ('Ⱔ', &['ⱔ']),
+    ('Ⱕ', &['ⱕ']),
+    ('Ⱖ', &['ⱖ']),
+    ('Ⱗ', &['ⱗ']),
+    ('Ⱘ', &['ⱘ']),
+    ('Ⱙ', &['ⱙ']),
+    ('Ⱚ', &['ⱚ']),
+    ('Ⱛ', &['ⱛ']),
+    ('Ⱜ', &['ⱜ']),
+    ('Ⱝ', &['ⱝ']),
+    ('Ⱞ', &['ⱞ']),
+    ('ⰰ', &['Ⰰ']),
+    ('ⰱ', &['Ⰱ']),
+    ('ⰲ', &['Ⰲ']),
+    ('ⰳ', &['Ⰳ']),
+    ('ⰴ', &['Ⰴ']),
+    ('ⰵ', &['Ⰵ']),
+    ('ⰶ', &['Ⰶ']),
+    ('ⰷ', &['Ⰷ']),
+    ('ⰸ', &['Ⰸ']),
+    ('ⰹ', &['Ⰹ']),
+    ('ⰺ', &['Ⰺ']),
+    ('ⰻ', &['Ⰻ']),
+    ('ⰼ', &['Ⰼ']),
+    ('ⰽ', &['Ⰽ']),
+    ('ⰾ', &['Ⰾ']),
+    ('ⰿ', &['Ⰿ']),
+    ('ⱀ', &['Ⱀ']),
+    ('ⱁ', &['Ⱁ']),
+    ('ⱂ', &['Ⱂ']),
+    ('ⱃ', &['Ⱃ']),
+    ('ⱄ', &['Ⱄ']),
+    ('ⱅ', &['Ⱅ']),
+    ('ⱆ', &['Ⱆ']),
+    ('ⱇ', &['Ⱇ']),
+    ('ⱈ', &['Ⱈ']),
+    ('ⱉ', &['Ⱉ']),
+    ('ⱊ', &['Ⱊ']),
+    ('ⱋ', &['Ⱋ']),
+    ('ⱌ', &['Ⱌ']),
+    ('ⱍ', &['Ⱍ']),
+    ('ⱎ', &['Ⱎ']),
+    ('ⱏ', &['Ⱏ']),
+    ('ⱐ', &['Ⱐ']),
+    ('ⱑ', &['Ⱑ']),
+    ('ⱒ', &['Ⱒ']),
+    ('ⱓ', &['Ⱓ']),
+    ('ⱔ', &['Ⱔ']),
+    ('ⱕ', &['Ⱕ']),
+    ('ⱖ', &['Ⱖ']),
+    ('ⱗ', &['Ⱗ']),
+    ('ⱘ', &['Ⱘ']),
+    ('ⱙ', &['Ⱙ']),
+    ('ⱚ', &['Ⱚ']),
+    ('ⱛ', &['Ⱛ']),
+    ('ⱜ', &['Ⱜ']),
+    ('ⱝ', &['Ⱝ']),
+    ('ⱞ', &['Ⱞ']),
+    ('Ⱡ', &['ⱡ']),
+    ('ⱡ', &['Ⱡ']),
+    ('Ɫ', &['ɫ']),
+    ('Ᵽ', &['ᵽ']),
+    ('Ɽ', &['ɽ']),
+    ('ⱥ', &['Ⱥ']),
+    ('ⱦ', &['Ⱦ']),
+    ('Ⱨ', &['ⱨ']),
+    ('ⱨ', &['Ⱨ']),
+    ('Ⱪ', &['ⱪ']),
+    ('ⱪ', &['Ⱪ']),
+    ('Ⱬ', &['ⱬ']),
+    ('ⱬ', &['Ⱬ']),
+    ('Ɑ', &['ɑ']),
+    ('Ɱ', &['ɱ']),
+    ('Ɐ', &['ɐ']),
+    ('Ɒ', &['ɒ']),
+    ('Ⱳ', &['ⱳ']),
+    ('ⱳ', &['Ⱳ']),
+    ('Ⱶ', &['ⱶ']),
+    ('ⱶ', &['Ⱶ']),
+    ('Ȿ', &['ȿ']),
+    ('Ɀ', &['ɀ']),
+    ('Ⲁ', &['ⲁ']),
+    ('ⲁ', &['Ⲁ']),
+    ('Ⲃ', &['ⲃ']),
+    ('ⲃ', &['Ⲃ']),
+    ('Ⲅ', &['ⲅ']),
+    ('ⲅ', &['Ⲅ']),
+    ('Ⲇ', &['ⲇ']),
+    ('ⲇ', &['Ⲇ']),
+    ('Ⲉ', &['ⲉ']),
+    ('ⲉ', &['Ⲉ']),
+    ('Ⲋ', &['ⲋ']),
+    ('ⲋ', &['Ⲋ']),
+    ('Ⲍ', &['ⲍ']),
+    ('ⲍ', &['Ⲍ']),
+    ('Ⲏ', &['ⲏ']),
+    ('ⲏ', &['Ⲏ']),
+    ('Ⲑ', &['ⲑ']),
+    ('ⲑ', &['Ⲑ']),
+    ('Ⲓ', &['ⲓ']),
+    ('ⲓ', &['Ⲓ']),
+    ('Ⲕ', &['ⲕ']),
+    ('ⲕ', &['Ⲕ']),
+    ('Ⲗ', &['ⲗ']),
+    ('ⲗ', &['Ⲗ']),
+    ('Ⲙ', &['ⲙ']),
+    ('ⲙ', &['Ⲙ']),
+    ('Ⲛ', &['ⲛ']),
+    ('ⲛ', &['Ⲛ']),
+    ('Ⲝ', &['ⲝ']),
+    ('ⲝ', &['Ⲝ']),
+    ('Ⲟ', &['ⲟ']),
+    ('ⲟ', &['Ⲟ']),
+    ('Ⲡ', &['ⲡ']),
+    ('ⲡ', &['Ⲡ']),
+    ('Ⲣ', &['ⲣ']),
+    ('ⲣ', &['Ⲣ']),
+    ('Ⲥ', &['ⲥ']),
+    ('ⲥ', &['Ⲥ']),
+    ('Ⲧ', &['ⲧ']),
+    ('ⲧ', &['Ⲧ']),
+    ('Ⲩ', &['ⲩ']),
+    ('ⲩ', &['Ⲩ']),
+    ('Ⲫ', &['ⲫ']),
+    ('ⲫ', &['Ⲫ']),
+    ('Ⲭ', &['ⲭ']),
+    ('ⲭ', &['Ⲭ']),
+    ('Ⲯ', &['ⲯ']),
+    ('ⲯ', &['Ⲯ']),
+    ('Ⲱ', &['ⲱ']),
+    ('ⲱ', &['Ⲱ']),
+    ('Ⲳ', &['ⲳ']),
+    ('ⲳ', &['Ⲳ']),
+    ('Ⲵ', &['ⲵ']),
+    ('ⲵ', &['Ⲵ']),
+    ('Ⲷ', &['ⲷ']),
+    ('ⲷ', &['Ⲷ']),
+    ('Ⲹ', &['ⲹ']),
+    ('ⲹ', &['Ⲹ']),
+    ('Ⲻ', &['ⲻ']),
+    ('ⲻ', &['Ⲻ']),
+    ('Ⲽ', &['ⲽ']),
+    ('ⲽ', &['Ⲽ']),
+    ('Ⲿ', &['ⲿ']),
+    ('ⲿ', &['Ⲿ']),
+    ('Ⳁ', &['ⳁ']),
+    ('ⳁ', &['Ⳁ']),
+    ('Ⳃ', &['ⳃ']),
+    ('ⳃ', &['Ⳃ']),
+    ('Ⳅ', &['ⳅ']),
+    ('ⳅ', &['Ⳅ']),
+    ('Ⳇ', &['ⳇ']),
+    ('ⳇ', &['Ⳇ']),
+    ('Ⳉ', &['ⳉ']),
+    ('ⳉ', &['Ⳉ']),
+    ('Ⳋ', &['ⳋ']),
+    ('ⳋ', &['Ⳋ']),
+    ('Ⳍ', &['ⳍ']),
+    ('ⳍ', &['Ⳍ']),
+    ('Ⳏ', &['ⳏ']),
+    ('ⳏ', &['Ⳏ']),
+    ('Ⳑ', &['ⳑ']),
+    ('ⳑ', &['Ⳑ']),
+    ('Ⳓ', &['ⳓ']),
+    ('ⳓ', &['Ⳓ']),
+    ('Ⳕ', &['ⳕ']),
+    ('ⳕ', &['Ⳕ']),
+    ('Ⳗ', &['ⳗ']),
+    ('ⳗ', &['Ⳗ']),
+    ('Ⳙ', &['ⳙ']),
+    ('ⳙ', &['Ⳙ']),
+    ('Ⳛ', &['ⳛ']),
+    ('ⳛ', &['Ⳛ']),
+    ('Ⳝ', &['ⳝ']),
+    ('ⳝ', &['Ⳝ']),
+    ('Ⳟ', &['ⳟ']),
+    ('ⳟ', &['Ⳟ']),
+    ('Ⳡ', &['ⳡ']),
+    ('ⳡ', &['Ⳡ']),
+    ('Ⳣ', &['ⳣ']),
+    ('ⳣ', &['Ⳣ']),
+    ('Ⳬ', &['ⳬ']),
+    ('ⳬ', &['Ⳬ']),
+    ('Ⳮ', &['ⳮ']),
+    ('ⳮ', &['Ⳮ']),
+    ('Ⳳ', &['ⳳ']),
+    ('ⳳ', &['Ⳳ']),
+    ('ⴀ', &['Ⴀ']),
+    ('ⴁ', &['Ⴁ']),
+    ('ⴂ', &['Ⴂ']),
+    ('ⴃ', &['Ⴃ']),
+    ('ⴄ', &['Ⴄ']),
+    ('ⴅ', &['Ⴅ']),
+    ('ⴆ', &['Ⴆ']),
+    ('ⴇ', &['Ⴇ']),
+    ('ⴈ', &['Ⴈ']),
+    ('ⴉ', &['Ⴉ']),
+    ('ⴊ', &['Ⴊ']),
+    ('ⴋ', &['Ⴋ']),
+    ('ⴌ', &['Ⴌ']),
+    ('ⴍ', &['Ⴍ']),
+    ('ⴎ', &['Ⴎ']),
+    ('ⴏ', &['Ⴏ']),
+    ('ⴐ', &['Ⴐ']),
+    ('ⴑ', &['Ⴑ']),
+    ('ⴒ', &['Ⴒ']),
+    ('ⴓ', &['Ⴓ']),
+    ('ⴔ', &['Ⴔ']),
+    ('ⴕ', &['Ⴕ']),
+    ('ⴖ', &['Ⴖ']),
+    ('ⴗ', &['Ⴗ']),
+    ('ⴘ', &['Ⴘ']),
+    ('ⴙ', &['Ⴙ']),
+    ('ⴚ', &['Ⴚ']),
+    ('ⴛ', &['Ⴛ']),
+    ('ⴜ', &['Ⴜ']),
+    ('ⴝ', &['Ⴝ']),
+    ('ⴞ', &['Ⴞ']),
+    ('ⴟ', &['Ⴟ']),
+    ('ⴠ', &['Ⴠ']),
+    ('ⴡ', &['Ⴡ']),
+    ('ⴢ', &['Ⴢ']),
+    ('ⴣ', &['Ⴣ']),
+    ('ⴤ', &['Ⴤ']),
+    ('ⴥ', &['Ⴥ']),
+    ('ⴧ', &['Ⴧ']),
+    ('ⴭ', &['Ⴭ']),
+    ('Ꙁ', &['ꙁ']),
+    ('ꙁ', &['Ꙁ']),
+    ('Ꙃ', &['ꙃ']),
+    ('ꙃ', &['Ꙃ']),
+    ('Ꙅ', &['ꙅ']),
+    ('ꙅ', &['Ꙅ']),
+    ('Ꙇ', &['ꙇ']),
+    ('ꙇ', &['Ꙇ']),
+    ('Ꙉ', &['ꙉ']),
+    ('ꙉ', &['Ꙉ']),
+    ('Ꙋ', &['ᲈ', 'ꙋ']),
+    ('ꙋ', &['ᲈ', 'Ꙋ']),
+    ('Ꙍ', &['ꙍ']),
+    ('ꙍ', &['Ꙍ']),
+    ('Ꙏ', &['ꙏ']),
+    ('ꙏ', &['Ꙏ']),
+    ('Ꙑ', &['ꙑ']),
+    ('ꙑ', &['Ꙑ']),
+    ('Ꙓ', &['ꙓ']),
+    ('ꙓ', &['Ꙓ']),
+    ('Ꙕ', &['ꙕ']),
+    ('ꙕ', &['Ꙕ']),
+    ('Ꙗ', &['ꙗ']),
+    ('ꙗ', &['Ꙗ']),
+    ('Ꙙ', &['ꙙ']),
+    ('ꙙ', &['Ꙙ']),
+    ('Ꙛ', &['ꙛ']),
+    ('ꙛ', &['Ꙛ']),
+    ('Ꙝ', &['ꙝ']),
+    ('ꙝ', &['Ꙝ']),
+    ('Ꙟ', &['ꙟ']),
+    ('ꙟ', &['Ꙟ']),
+    ('Ꙡ', &['ꙡ']),
+    ('ꙡ', &['Ꙡ']),
+    ('Ꙣ', &['ꙣ']),
+    ('ꙣ', &['Ꙣ']),
+    ('Ꙥ', &['ꙥ']),
+    ('ꙥ', &['Ꙥ']),
+    ('Ꙧ', &['ꙧ']),
+    ('ꙧ', &['Ꙧ']),
+    ('Ꙩ', &['ꙩ']),
+    ('ꙩ', &['Ꙩ']),
+    ('Ꙫ', &['ꙫ']),
+    ('ꙫ', &['Ꙫ']),
+    ('Ꙭ', &['ꙭ']),
+    ('ꙭ', &['Ꙭ']),
+    ('Ꚁ', &['ꚁ']),
+    ('ꚁ', &['Ꚁ']),
+    ('Ꚃ', &['ꚃ']),
+    ('ꚃ', &['Ꚃ']),
+    ('Ꚅ', &['ꚅ']),
+    ('ꚅ', &['Ꚅ']),
+    ('Ꚇ', &['ꚇ']),
+    ('ꚇ', &['Ꚇ']),
+    ('Ꚉ', &['ꚉ']),
+    ('ꚉ', &['Ꚉ']),
+    ('Ꚋ', &['ꚋ']),
+    ('ꚋ', &['Ꚋ']),
+    ('Ꚍ', &['ꚍ']),
+    ('ꚍ', &['Ꚍ']),
+    ('Ꚏ', &['ꚏ']),
+    ('ꚏ', &['Ꚏ']),
+    ('Ꚑ', &['ꚑ']),
+    ('ꚑ', &['Ꚑ']),
+    ('Ꚓ', &['ꚓ']),
+    ('ꚓ', &['Ꚓ']),
+    ('Ꚕ', &['ꚕ']),
+    ('ꚕ', &['Ꚕ']),
+    ('Ꚗ', &['ꚗ']),
+    ('ꚗ', &['Ꚗ']),
+    ('Ꚙ', &['ꚙ']),
+    ('ꚙ', &['Ꚙ']),
+    ('Ꚛ', &['ꚛ']),
+    ('ꚛ', &['Ꚛ']),
+    ('Ꜣ', &['ꜣ']),
+    ('ꜣ', &['Ꜣ']),
+    ('Ꜥ', &['ꜥ']),
+    ('ꜥ', &['Ꜥ']),
+    ('Ꜧ', &['ꜧ']),
+    ('ꜧ', &['Ꜧ']),
+    ('Ꜩ', &['ꜩ']),
+    ('ꜩ', &['Ꜩ']),
+    ('Ꜫ', &['ꜫ']),
+    ('ꜫ', &['Ꜫ']),
+    ('Ꜭ', &['ꜭ']),
+    ('ꜭ', &['Ꜭ']),
+    ('Ꜯ', &['ꜯ']),
+    ('ꜯ', &['Ꜯ']),
+    ('Ꜳ', &['ꜳ']),
+    ('ꜳ', &['Ꜳ']),
+    ('Ꜵ', &['ꜵ']),
+    ('ꜵ', &['Ꜵ']),
+    ('Ꜷ', &['ꜷ']),
+    ('ꜷ', &['Ꜷ']),
+    ('Ꜹ', &['ꜹ']),
+    ('ꜹ', &['Ꜹ']),
+    ('Ꜻ', &['ꜻ']),
+    ('ꜻ', &['Ꜻ']),
+    ('Ꜽ', &['ꜽ']),
+    ('ꜽ', &['Ꜽ']),
+    ('Ꜿ', &['ꜿ']),
+    ('ꜿ', &['Ꜿ']),
+    ('Ꝁ', &['ꝁ']),
+    ('ꝁ', &['Ꝁ']),
+    ('Ꝃ', &['ꝃ']),
+    ('ꝃ', &['Ꝃ']),
+    ('Ꝅ', &['ꝅ']),
+    ('ꝅ', &['Ꝅ']),
+    ('Ꝇ', &['ꝇ']),
+    ('ꝇ', &['Ꝇ']),
+    ('Ꝉ', &['ꝉ']),
+    ('ꝉ', &['Ꝉ']),
+    ('Ꝋ', &['ꝋ']),
+    ('ꝋ', &['Ꝋ']),
+    ('Ꝍ', &['ꝍ']),
+    ('ꝍ', &['Ꝍ']),
+    ('Ꝏ', &['ꝏ']),
+    ('ꝏ', &['Ꝏ']),
+    ('Ꝑ', &['ꝑ']),
+    ('ꝑ', &['Ꝑ']),
+    ('Ꝓ', &['ꝓ']),
+    ('ꝓ', &['Ꝓ']),
+    ('Ꝕ', &['ꝕ']),
+    ('ꝕ', &['Ꝕ']),
+    ('Ꝗ', &['ꝗ']),
+    ('ꝗ', &['Ꝗ']),
+    ('Ꝙ', &['ꝙ']),
+    ('ꝙ', &['Ꝙ']),
+    ('Ꝛ', &['ꝛ']),
+    ('ꝛ', &['Ꝛ']),
+    ('Ꝝ', &['ꝝ']),
+    ('ꝝ', &['Ꝝ']),
+    ('Ꝟ', &['ꝟ']),
+    ('ꝟ', &['Ꝟ']),
+    ('Ꝡ', &['ꝡ']),
+    ('ꝡ', &['Ꝡ']),
+    ('Ꝣ', &['ꝣ']),
+    ('ꝣ', &['Ꝣ']),
+    ('Ꝥ', &['ꝥ']),
+    ('ꝥ', &['Ꝥ']),
+    ('Ꝧ', &['ꝧ']),
+    ('ꝧ', &['Ꝧ']),
+    ('Ꝩ', &['ꝩ']),
+    ('ꝩ', &['Ꝩ']),
+    ('Ꝫ', &['ꝫ']),
+    ('ꝫ', &['Ꝫ']),
+    ('Ꝭ', &['ꝭ']),
+    ('ꝭ', &['Ꝭ']),
+    ('Ꝯ', &['ꝯ']),
+    ('ꝯ', &['Ꝯ']),
+    ('Ꝺ', &['ꝺ']),
+    ('ꝺ', &['Ꝺ']),
+    ('Ꝼ', &['ꝼ']),
+    ('ꝼ', &['Ꝼ']),
+    ('Ᵹ', &['ᵹ']),
+    ('Ꝿ', &['ꝿ']),
+    ('ꝿ', &['Ꝿ']),
+    ('Ꞁ', &['ꞁ']),
+    ('ꞁ', &['Ꞁ']),
+    ('Ꞃ', &['ꞃ']),
+    ('ꞃ', &['Ꞃ']),
+    ('Ꞅ', &['ꞅ']),
+    ('ꞅ', &['Ꞅ']),
+    ('Ꞇ', &['ꞇ']),
+    ('ꞇ', &['Ꞇ']),
+    ('Ꞌ', &['ꞌ']),
+    ('ꞌ', &['Ꞌ']),
+    ('Ɥ', &['ɥ']),
+    ('Ꞑ', &['ꞑ']),
+    ('ꞑ', &['Ꞑ']),
+    ('Ꞓ', &['ꞓ']),
+    ('ꞓ', &['Ꞓ']),
+    ('ꞔ', &['Ꞔ']),
+    ('Ꞗ', &['ꞗ']),
+    ('ꞗ', &['Ꞗ']),
+    ('Ꞙ', &['ꞙ']),
+    ('ꞙ', &['Ꞙ']),
+    ('Ꞛ', &['ꞛ']),
+    ('ꞛ', &['Ꞛ']),
+    ('Ꞝ', &['ꞝ']),
+    ('ꞝ', &['Ꞝ']),
+    ('Ꞟ', &['ꞟ']),
+    ('ꞟ', &['Ꞟ']),
+    ('Ꞡ', &['ꞡ']),
+    ('ꞡ', &['Ꞡ']),
+    ('Ꞣ', &['ꞣ']),
+    ('ꞣ', &['Ꞣ']),
+    ('Ꞥ', &['ꞥ']),
+    ('ꞥ', &['Ꞥ']),
+    ('Ꞧ', &['ꞧ']),
+    ('ꞧ', &['Ꞧ']),
+    ('Ꞩ', &['ꞩ']),
+    ('ꞩ', &['Ꞩ']),
+    ('Ɦ', &['ɦ']),
+    ('Ɜ', &['ɜ']),
+    ('Ɡ', &['ɡ']),
+    ('Ɬ', &['ɬ']),
+    ('Ɪ', &['ɪ']),
+    ('Ʞ', &['ʞ']),
+    ('Ʇ', &['ʇ']),
+    ('Ʝ', &['ʝ']),
+    ('Ꭓ', &['ꭓ']),
+    ('Ꞵ', &['ꞵ']),
+    ('ꞵ', &['Ꞵ']),
+    ('Ꞷ', &['ꞷ']),
+    ('ꞷ', &['Ꞷ']),
+    ('Ꞹ', &['ꞹ']),
+    ('ꞹ', &['Ꞹ']),
+    ('Ꞻ', &['ꞻ']),
+    ('ꞻ', &['Ꞻ']),
+    ('Ꞽ', &['ꞽ']),
+    ('ꞽ', &['Ꞽ']),
+    ('Ꞿ', &['ꞿ']),
+    ('ꞿ', &['Ꞿ']),
+    ('Ꟃ', &['ꟃ']),
+    ('ꟃ', &['Ꟃ']),
+    ('Ꞔ', &['ꞔ']),
+    ('Ʂ', &['ʂ']),
+    ('Ᶎ', &['ᶎ']),
+    ('\u{a7c7}', &['\u{a7c8}']),
+    ('\u{a7c8}', &['\u{a7c7}']),
+    ('\u{a7c9}', &['\u{a7ca}']),
+    ('\u{a7ca}', &['\u{a7c9}']),
+    ('\u{a7f5}', &['\u{a7f6}']),
+    ('\u{a7f6}', &['\u{a7f5}']),
+    ('ꭓ', &['Ꭓ']),
+    ('ꭰ', &['Ꭰ']),
+    ('ꭱ', &['Ꭱ']),
+    ('ꭲ', &['Ꭲ']),
+    ('ꭳ', &['Ꭳ']),
+    ('ꭴ', &['Ꭴ']),
+    ('ꭵ', &['Ꭵ']),
+    ('ꭶ', &['Ꭶ']),
+    ('ꭷ', &['Ꭷ']),
+    ('ꭸ', &['Ꭸ']),
+    ('ꭹ', &['Ꭹ']),
+    ('ꭺ', &['Ꭺ']),
+    ('ꭻ', &['Ꭻ']),
+    ('ꭼ', &['Ꭼ']),
+    ('ꭽ', &['Ꭽ']),
+    ('ꭾ', &['Ꭾ']),
+    ('ꭿ', &['Ꭿ']),
+    ('ꮀ', &['Ꮀ']),
+    ('ꮁ', &['Ꮁ']),
+    ('ꮂ', &['Ꮂ']),
+    ('ꮃ', &['Ꮃ']),
+    ('ꮄ', &['Ꮄ']),
+    ('ꮅ', &['Ꮅ']),
+    ('ꮆ', &['Ꮆ']),
+    ('ꮇ', &['Ꮇ']),
+    ('ꮈ', &['Ꮈ']),
+    ('ꮉ', &['Ꮉ']),
+    ('ꮊ', &['Ꮊ']),
+    ('ꮋ', &['Ꮋ']),
+    ('ꮌ', &['Ꮌ']),
+    ('ꮍ', &['Ꮍ']),
+    ('ꮎ', &['Ꮎ']),
+    ('ꮏ', &['Ꮏ']),
+    ('ꮐ', &['Ꮐ']),
+    ('ꮑ', &['Ꮑ']),
+    ('ꮒ', &['Ꮒ']),
+    ('ꮓ', &['Ꮓ']),
+    ('ꮔ', &['Ꮔ']),
+    ('ꮕ', &['Ꮕ']),
+    ('ꮖ', &['Ꮖ']),
+    ('ꮗ', &['Ꮗ']),
+    ('ꮘ', &['Ꮘ']),
+    ('ꮙ', &['Ꮙ']),
+    ('ꮚ', &['Ꮚ']),
+    ('ꮛ', &['Ꮛ']),
+    ('ꮜ', &['Ꮜ']),
+    ('ꮝ', &['Ꮝ']),
+    ('ꮞ', &['Ꮞ']),
+    ('ꮟ', &['Ꮟ']),
+    ('ꮠ', &['Ꮠ']),
+    ('ꮡ', &['Ꮡ']),
+    ('ꮢ', &['Ꮢ']),
+    ('ꮣ', &['Ꮣ']),
+    ('ꮤ', &['Ꮤ']),
+    ('ꮥ', &['Ꮥ']),
+    ('ꮦ', &['Ꮦ']),
+    ('ꮧ', &['Ꮧ']),
+    ('ꮨ', &['Ꮨ']),
+    ('ꮩ', &['Ꮩ']),
+    ('ꮪ', &['Ꮪ']),
+    ('ꮫ', &['Ꮫ']),
+    ('ꮬ', &['Ꮬ']),
+    ('ꮭ', &['Ꮭ']),
+    ('ꮮ', &['Ꮮ']),
+    ('ꮯ', &['Ꮯ']),
+    ('ꮰ', &['Ꮰ']),
+    ('ꮱ', &['Ꮱ']),
+    ('ꮲ', &['Ꮲ']),
+    ('ꮳ', &['Ꮳ']),
+    ('ꮴ', &['Ꮴ']),
+    ('ꮵ', &['Ꮵ']),
+    ('ꮶ', &['Ꮶ']),
+    ('ꮷ', &['Ꮷ']),
+    ('ꮸ', &['Ꮸ']),
+    ('ꮹ', &['Ꮹ']),
+    ('ꮺ', &['Ꮺ']),
+    ('ꮻ', &['Ꮻ']),
+    ('ꮼ', &['Ꮼ']),
+    ('ꮽ', &['Ꮽ']),
+    ('ꮾ', &['Ꮾ']),
+    ('ꮿ', &['Ꮿ']),
+    ('Ａ', &['ａ']),
+    ('Ｂ', &['ｂ']),
+    ('Ｃ', &['ｃ']),
+    ('Ｄ', &['ｄ']),
+    ('Ｅ', &['ｅ']),
+    ('Ｆ', &['ｆ']),
+    ('Ｇ', &['ｇ']),
+    ('Ｈ', &['ｈ']),
+    ('Ｉ', &['ｉ']),
+    ('Ｊ', &['ｊ']),
+    ('Ｋ', &['ｋ']),
+    ('Ｌ', &['ｌ']),
+    ('Ｍ', &['ｍ']),
+    ('Ｎ', &['ｎ']),
+    ('Ｏ', &['ｏ']),
+    ('Ｐ', &['ｐ']),
+    ('Ｑ', &['ｑ']),
+    ('Ｒ', &['ｒ']),
+    ('Ｓ', &['ｓ']),
+    ('Ｔ', &['ｔ']),
+    ('Ｕ', &['ｕ']),
+    ('Ｖ', &['ｖ']),
+    ('Ｗ', &['ｗ']),
+    ('Ｘ', &['ｘ']),
+    ('Ｙ', &['ｙ']),
+    ('Ｚ', &['ｚ']),
+    ('ａ', &['Ａ']),
+    ('ｂ', &['Ｂ']),
+    ('ｃ', &['Ｃ']),
+    ('ｄ', &['Ｄ']),
+    ('ｅ', &['Ｅ']),
+    ('ｆ', &['Ｆ']),
+    ('ｇ', &['Ｇ']),
+    ('ｈ', &['Ｈ']),
+    ('ｉ', &['Ｉ']),
+    ('ｊ', &['Ｊ']),
+    ('ｋ', &['Ｋ']),
+    ('ｌ', &['Ｌ']),
+    ('ｍ', &['Ｍ']),
+    ('ｎ', &['Ｎ']),
+    ('ｏ', &['Ｏ']),
+    ('ｐ', &['Ｐ']),
+    ('ｑ', &['Ｑ']),
+    ('ｒ', &['Ｒ']),
+    ('ｓ', &['Ｓ']),
+    ('ｔ', &['Ｔ']),
+    ('ｕ', &['Ｕ']),
+    ('ｖ', &['Ｖ']),
+    ('ｗ', &['Ｗ']),
+    ('ｘ', &['Ｘ']),
+    ('ｙ', &['Ｙ']),
+    ('ｚ', &['Ｚ']),
+    ('𐐀', &['𐐨']),
+    ('𐐁', &['𐐩']),
+    ('𐐂', &['𐐪']),
+    ('𐐃', &['𐐫']),
+    ('𐐄', &['𐐬']),
+    ('𐐅', &['𐐭']),
+    ('𐐆', &['𐐮']),
+    ('𐐇', &['𐐯']),
+    ('𐐈', &['𐐰']),
+    ('𐐉', &['𐐱']),
+    ('𐐊', &['𐐲']),
+    ('𐐋', &['𐐳']),
+    ('𐐌', &['𐐴']),
+    ('𐐍', &['𐐵']),
+    ('𐐎', &['𐐶']),
+    ('𐐏', &['𐐷']),
+    ('𐐐', &['𐐸']),
+    ('𐐑', &['𐐹']),
+    ('𐐒', &['𐐺']),
+    ('𐐓', &['𐐻']),
+    ('𐐔', &['𐐼']),
+    ('𐐕', &['𐐽']),
+    ('𐐖', &['𐐾']),
+    ('𐐗', &['𐐿']),
+    ('𐐘', &['𐑀']),
+    ('𐐙', &['𐑁']),
+    ('𐐚', &['𐑂']),
+    ('𐐛', &['𐑃']),
+    ('𐐜', &['𐑄']),
+    ('𐐝', &['𐑅']),
+    ('𐐞', &['𐑆']),
+    ('𐐟', &['𐑇']),
+    ('𐐠', &['𐑈']),
+    ('𐐡', &['𐑉']),
+    ('𐐢', &['𐑊']),
+    ('𐐣', &['𐑋']),
+    ('𐐤', &['𐑌']),
+    ('𐐥', &['𐑍']),
+    ('𐐦', &['𐑎']),
+    ('𐐧', &['𐑏']),
+    ('𐐨', &['𐐀']),
+    ('𐐩', &['𐐁']),
+    ('𐐪', &['𐐂']),
+    ('𐐫', &['𐐃']),
+    ('𐐬', &['𐐄']),
+    ('𐐭', &['𐐅']),
+    ('𐐮', &['𐐆']),
+    ('𐐯', &['𐐇']),
+    ('𐐰', &['𐐈']),
+    ('𐐱', &['𐐉']),
+    ('𐐲', &['𐐊']),
+    ('𐐳', &['𐐋']),
+    ('𐐴', &['𐐌']),
+    ('𐐵', &['𐐍']),
+    ('𐐶', &['𐐎']),
+    ('𐐷', &['𐐏']),
+    ('𐐸', &['𐐐']),
+    ('𐐹', &['𐐑']),
+    ('𐐺', &['𐐒']),
+    ('𐐻', &['𐐓']),
+    ('𐐼', &['𐐔']),
+    ('𐐽', &['𐐕']),
+    ('𐐾', &['𐐖']),
+    ('𐐿', &['𐐗']),
+    ('𐑀', &['𐐘']),
+    ('𐑁', &['𐐙']),
+    ('𐑂', &['𐐚']),
+    ('𐑃', &['𐐛']),
+    ('𐑄', &['𐐜']),
+    ('𐑅', &['𐐝']),
+    ('𐑆', &['𐐞']),
+    ('𐑇', &['𐐟']),
+    ('𐑈', &['𐐠']),
+    ('𐑉', &['𐐡']),
+    ('𐑊', &['𐐢']),
+    ('𐑋', &['𐐣']),
+    ('𐑌', &['𐐤']),
+    ('𐑍', &['𐐥']),
+    ('𐑎', &['𐐦']),
+    ('𐑏', &['𐐧']),
+    ('𐒰', &['𐓘']),
+    ('𐒱', &['𐓙']),
+    ('𐒲', &['𐓚']),
+    ('𐒳', &['𐓛']),
+    ('𐒴', &['𐓜']),
+    ('𐒵', &['𐓝']),
+    ('𐒶', &['𐓞']),
+    ('𐒷', &['𐓟']),
+    ('𐒸', &['𐓠']),
+    ('𐒹', &['𐓡']),
+    ('𐒺', &['𐓢']),
+    ('𐒻', &['𐓣']),
+    ('𐒼', &['𐓤']),
+    ('𐒽', &['𐓥']),
+    ('𐒾', &['𐓦']),
+    ('𐒿', &['𐓧']),
+    ('𐓀', &['𐓨']),
+    ('𐓁', &['𐓩']),
+    ('𐓂', &['𐓪']),
+    ('𐓃', &['𐓫']),
+    ('𐓄', &['𐓬']),
+    ('𐓅', &['𐓭']),
+    ('𐓆', &['𐓮']),
+    ('𐓇', &['𐓯']),
+    ('𐓈', &['𐓰']),
+    ('𐓉', &['𐓱']),
+    ('𐓊', &['𐓲']),
+    ('𐓋', &['𐓳']),
+    ('𐓌', &['𐓴']),
+    ('𐓍', &['𐓵']),
+    ('𐓎', &['𐓶']),
+    ('𐓏', &['𐓷']),
+    ('𐓐', &['𐓸']),
+    ('𐓑', &['𐓹']),
+    ('𐓒', &['𐓺']),
+    ('𐓓', &['𐓻']),
+    ('𐓘', &['𐒰']),
+    ('𐓙', &['𐒱']),
+    ('𐓚', &['𐒲']),
+    ('𐓛', &['𐒳']),
+    ('𐓜', &['𐒴']),
+    ('𐓝', &['𐒵']),
+    ('𐓞', &['𐒶']),
+    ('𐓟', &['𐒷']),
+    ('𐓠', &['𐒸']),
+    ('𐓡', &['𐒹']),
+    ('𐓢', &['𐒺']),
+    ('𐓣', &['𐒻']),
+    ('𐓤', &['𐒼']),
+    ('𐓥', &['𐒽']),
+    ('𐓦', &['𐒾']),
+    ('𐓧', &['𐒿']),
+    ('𐓨', &['𐓀']),
+    ('𐓩', &['𐓁']),
+    ('𐓪', &['𐓂']),
+    ('𐓫', &['𐓃']),
+    ('𐓬', &['𐓄']),
+    ('𐓭', &['𐓅']),
+    ('𐓮', &['𐓆']),
+    ('𐓯', &['𐓇']),
+    ('𐓰', &['𐓈']),
+    ('𐓱', &['𐓉']),
+    ('𐓲', &['𐓊']),
+    ('𐓳', &['𐓋']),
+    ('𐓴', &['𐓌']),
+    ('𐓵', &['𐓍']),
+    ('𐓶', &['𐓎']),
+    ('𐓷', &['𐓏']),
+    ('𐓸', &['𐓐']),
+    ('𐓹', &['𐓑']),
+    ('𐓺', &['𐓒']),
+    ('𐓻', &['𐓓']),
+    ('𐲀', &['𐳀']),
+    ('𐲁', &['𐳁']),
+    ('𐲂', &['𐳂']),
+    ('𐲃', &['𐳃']),
+    ('𐲄', &['𐳄']),
+    ('𐲅', &['𐳅']),
+    ('𐲆', &['𐳆']),
+    ('𐲇', &['𐳇']),
+    ('𐲈', &['𐳈']),
+    ('𐲉', &['𐳉']),
+    ('𐲊', &['𐳊']),
+    ('𐲋', &['𐳋']),
+    ('𐲌', &['𐳌']),
+    ('𐲍', &['𐳍']),
+    ('𐲎', &['𐳎']),
+    ('𐲏', &['𐳏']),
+    ('𐲐', &['𐳐']),
+    ('𐲑', &['𐳑']),
+    ('𐲒', &['𐳒']),
+    ('𐲓', &['𐳓']),
+    ('𐲔', &['𐳔']),
+    ('𐲕', &['𐳕']),
+    ('𐲖', &['𐳖']),
+    ('𐲗', &['𐳗']),
+    ('𐲘', &['𐳘']),
+    ('𐲙', &['𐳙']),
+    ('𐲚', &['𐳚']),
+    ('𐲛', &['𐳛']),
+    ('𐲜', &['𐳜']),
+    ('𐲝', &['𐳝']),
+    ('𐲞', &['𐳞']),
+    ('𐲟', &['𐳟']),
+    ('𐲠', &['𐳠']),
+    ('𐲡', &['𐳡']),
+    ('𐲢', &['𐳢']),
+    ('𐲣', &['𐳣']),
+    ('𐲤', &['𐳤']),
+    ('𐲥', &['𐳥']),
+    ('𐲦', &['𐳦']),
+    ('𐲧', &['𐳧']),
+    ('𐲨', &['𐳨']),
+    ('𐲩', &['𐳩']),
+    ('𐲪', &['𐳪']),
+    ('𐲫', &['𐳫']),
+    ('𐲬', &['𐳬']),
+    ('𐲭', &['𐳭']),
+    ('𐲮', &['𐳮']),
+    ('𐲯', &['𐳯']),
+    ('𐲰', &['𐳰']),
+    ('𐲱', &['𐳱']),
+    ('𐲲', &['𐳲']),
+    ('𐳀', &['𐲀']),
+    ('𐳁', &['𐲁']),
+    ('𐳂', &['𐲂']),
+    ('𐳃', &['𐲃']),
+    ('𐳄', &['𐲄']),
+    ('𐳅', &['𐲅']),
+    ('𐳆', &['𐲆']),
+    ('𐳇', &['𐲇']),
+    ('𐳈', &['𐲈']),
+    ('𐳉', &['𐲉']),
+    ('𐳊', &['𐲊']),
+    ('𐳋', &['𐲋']),
+    ('𐳌', &['𐲌']),
+    ('𐳍', &['𐲍']),
+    ('𐳎', &['𐲎']),
+    ('𐳏', &['𐲏']),
+    ('𐳐', &['𐲐']),
+    ('𐳑', &['𐲑']),
+    ('𐳒', &['𐲒']),
+    ('𐳓', &['𐲓']),
+    ('𐳔', &['𐲔']),
+    ('𐳕', &['𐲕']),
+    ('𐳖', &['𐲖']),
+    ('𐳗', &['𐲗']),
+    ('𐳘', &['𐲘']),
+    ('𐳙', &['𐲙']),
+    ('𐳚', &['𐲚']),
+    ('𐳛', &['𐲛']),
+    ('𐳜', &['𐲜']),
+    ('𐳝', &['𐲝']),
+    ('𐳞', &['𐲞']),
+    ('𐳟', &['𐲟']),
+    ('𐳠', &['𐲠']),
+    ('𐳡', &['𐲡']),
+    ('𐳢', &['𐲢']),
+    ('𐳣', &['𐲣']),
+    ('𐳤', &['𐲤']),
+    ('𐳥', &['𐲥']),
+    ('𐳦', &['𐲦']),
+    ('𐳧', &['𐲧']),
+    ('𐳨', &['𐲨']),
+    ('𐳩', &['𐲩']),
+    ('𐳪', &['𐲪']),
+    ('𐳫', &['𐲫']),
+    ('𐳬', &['𐲬']),
+    ('𐳭', &['𐲭']),
+    ('𐳮', &['𐲮']),
+    ('𐳯', &['𐲯']),
+    ('𐳰', &['𐲰']),
+    ('𐳱', &['𐲱']),
+    ('𐳲', &['𐲲']),
+    ('𑢠', &['𑣀']),
+    ('𑢡', &['𑣁']),
+    ('𑢢', &['𑣂']),
+    ('𑢣', &['𑣃']),
+    ('𑢤', &['𑣄']),
+    ('𑢥', &['𑣅']),
+    ('𑢦', &['𑣆']),
+    ('𑢧', &['𑣇']),
+    ('𑢨', &['𑣈']),
+    ('𑢩', &['𑣉']),
+    ('𑢪', &['𑣊']),
+    ('𑢫', &['𑣋']),
+    ('𑢬', &['𑣌']),
+    ('𑢭', &['𑣍']),
+    ('𑢮', &['𑣎']),
+    ('𑢯', &['𑣏']),
+    ('𑢰', &['𑣐']),
+    ('𑢱', &['𑣑']),
+    ('𑢲', &['𑣒']),
+    ('𑢳', &['𑣓']),
+    ('𑢴', &['𑣔']),
+    ('𑢵', &['𑣕']),
+    ('𑢶', &['𑣖']),
+    ('𑢷', &['𑣗']),
+    ('𑢸', &['𑣘']),
+    ('𑢹', &['𑣙']),
+    ('𑢺', &['𑣚']),
+    ('𑢻', &['𑣛']),
+    ('𑢼', &['𑣜']),
+    ('𑢽', &['𑣝']),
+    ('𑢾', &['𑣞']),
+    ('𑢿', &['𑣟']),
+    ('𑣀', &['𑢠']),
+    ('𑣁', &['𑢡']),
+    ('𑣂', &['𑢢']),
+    ('𑣃', &['𑢣']),
+    ('𑣄', &['𑢤']),
+    ('𑣅', &['𑢥']),
+    ('𑣆', &['𑢦']),
+    ('𑣇', &['𑢧']),
+    ('𑣈', &['𑢨']),
+    ('𑣉', &['𑢩']),
+    ('𑣊', &['𑢪']),
+    ('𑣋', &['𑢫']),
+    ('𑣌', &['𑢬']),
+    ('𑣍', &['𑢭']),
+    ('𑣎', &['𑢮']),
+    ('𑣏', &['𑢯']),
+    ('𑣐', &['𑢰']),
+    ('𑣑', &['𑢱']),
+    ('𑣒', &['𑢲']),
+    ('𑣓', &['𑢳']),
+    ('𑣔', &['𑢴']),
+    ('𑣕', &['𑢵']),
+    ('𑣖', &['𑢶']),
+    ('𑣗', &['𑢷']),
+    ('𑣘', &['𑢸']),
+    ('𑣙', &['𑢹']),
+    ('𑣚', &['𑢺']),
+    ('𑣛', &['𑢻']),
+    ('𑣜', &['𑢼']),
+    ('𑣝', &['𑢽']),
+    ('𑣞', &['𑢾']),
+    ('𑣟', &['𑢿']),
+    ('𖹀', &['𖹠']),
+    ('𖹁', &['𖹡']),
+    ('𖹂', &['𖹢']),
+    ('𖹃', &['𖹣']),
+    ('𖹄', &['𖹤']),
+    ('𖹅', &['𖹥']),
+    ('𖹆', &['𖹦']),
+    ('𖹇', &['𖹧']),
+    ('𖹈', &['𖹨']),
+    ('𖹉', &['𖹩']),
+    ('𖹊', &['𖹪']),
+    ('𖹋', &['𖹫']),
+    ('𖹌', &['𖹬']),
+    ('𖹍', &['𖹭']),
+    ('𖹎', &['𖹮']),
+    ('𖹏', &['𖹯']),
+    ('𖹐', &['𖹰']),
+    ('𖹑', &['𖹱']),
+    ('𖹒', &['𖹲']),
+    ('𖹓', &['𖹳']),
+    ('𖹔', &['𖹴']),
+    ('𖹕', &['𖹵']),
+    ('𖹖', &['𖹶']),
+    ('𖹗', &['𖹷']),
+    ('𖹘', &['𖹸']),
+    ('𖹙', &['𖹹']),
+    ('𖹚', &['𖹺']),
+    ('𖹛', &['𖹻']),
+    ('𖹜', &['𖹼']),
+    ('𖹝', &['𖹽']),
+    ('𖹞', &['𖹾']),
+    ('𖹟', &['𖹿']),
+    ('𖹠', &['𖹀']),
+    ('𖹡', &['𖹁']),
+    ('𖹢', &['𖹂']),
+    ('𖹣', &['𖹃']),
+    ('𖹤', &['𖹄']),
+    ('𖹥', &['𖹅']),
+    ('𖹦', &['𖹆']),
+    ('𖹧', &['𖹇']),
+    ('𖹨', &['𖹈']),
+    ('𖹩', &['𖹉']),
+    ('𖹪', &['𖹊']),
+    ('𖹫', &['𖹋']),
+    ('𖹬', &['𖹌']),
+    ('𖹭', &['𖹍']),
+    ('𖹮', &['𖹎']),
+    ('𖹯', &['𖹏']),
+    ('𖹰', &['𖹐']),
+    ('𖹱', &['𖹑']),
+    ('𖹲', &['𖹒']),
+    ('𖹳', &['𖹓']),
+    ('𖹴', &['𖹔']),
+    ('𖹵', &['𖹕']),
+    ('𖹶', &['𖹖']),
+    ('𖹷', &['𖹗']),
+    ('𖹸', &['𖹘']),
+    ('𖹹', &['𖹙']),
+    ('𖹺', &['𖹚']),
+    ('𖹻', &['𖹛']),
+    ('𖹼', &['𖹜']),
+    ('𖹽', &['𖹝']),
+    ('𖹾', &['𖹞']),
+    ('𖹿', &['𖹟']),
+    ('𞤀', &['𞤢']),
+    ('𞤁', &['𞤣']),
+    ('𞤂', &['𞤤']),
+    ('𞤃', &['𞤥']),
+    ('𞤄', &['𞤦']),
+    ('𞤅', &['𞤧']),
+    ('𞤆', &['𞤨']),
+    ('𞤇', &['𞤩']),
+    ('𞤈', &['𞤪']),
+    ('𞤉', &['𞤫']),
+    ('𞤊', &['𞤬']),
+    ('𞤋', &['𞤭']),
+    ('𞤌', &['𞤮']),
+    ('𞤍', &['𞤯']),
+    ('𞤎', &['𞤰']),
+    ('𞤏', &['𞤱']),
+    ('𞤐', &['𞤲']),
+    ('𞤑', &['𞤳']),
+    ('𞤒', &['𞤴']),
+    ('𞤓', &['𞤵']),
+    ('𞤔', &['𞤶']),
+    ('𞤕', &['𞤷']),
+    ('𞤖', &['𞤸']),
+    ('𞤗', &['𞤹']),
+    ('𞤘', &['𞤺']),
+    ('𞤙', &['𞤻']),
+    ('𞤚', &['𞤼']),
+    ('𞤛', &['𞤽']),
+    ('𞤜', &['𞤾']),
+    ('𞤝', &['𞤿']),
+    ('𞤞', &['𞥀']),
+    ('𞤟', &['𞥁']),
+    ('𞤠', &['𞥂']),
+    ('𞤡', &['𞥃']),
+    ('𞤢', &['𞤀']),
+    ('𞤣', &['𞤁']),
+    ('𞤤', &['𞤂']),
+    ('𞤥', &['𞤃']),
+    ('𞤦', &['𞤄']),
+    ('𞤧', &['𞤅']),
+    ('𞤨', &['𞤆']),
+    ('𞤩', &['𞤇']),
+    ('𞤪', &['𞤈']),
+    ('𞤫', &['𞤉']),
+    ('𞤬', &['𞤊']),
+    ('𞤭', &['𞤋']),
+    ('𞤮', &['𞤌']),
+    ('𞤯', &['𞤍']),
+    ('𞤰', &['𞤎']),
+    ('𞤱', &['𞤏']),
+    ('𞤲', &['𞤐']),
+    ('𞤳', &['𞤑']),
+    ('𞤴', &['𞤒']),
+    ('𞤵', &['𞤓']),
+    ('𞤶', &['𞤔']),
+    ('𞤷', &['𞤕']),
+    ('𞤸', &['𞤖']),
+    ('𞤹', &['𞤗']),
+    ('𞤺', &['𞤘']),
+    ('𞤻', &['𞤙']),
+    ('𞤼', &['𞤚']),
+    ('𞤽', &['𞤛']),
+    ('𞤾', &['𞤜']),
+    ('𞤿', &['𞤝']),
+    ('𞥀', &['𞤞']),
+    ('𞥁', &['𞤟']),
+    ('𞥂', &['𞤠']),
+    ('𞥃', &['𞤡']),
+];

diff --git a/src/unicode_tables/general_category.rs b/src/unicode_tables/general_category.rs
new file mode 100644
index 0000000..f2f9cef
--- /dev/null
+++ b/src/unicode_tables/general_category.rs

@@ -0,0 +1,6305 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+//  ucd-generate general-category ucd-13.0.0 --chars --exclude surrogate
+//
+// ucd-generate 0.2.7 is available on crates.io.
+
+pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
+    ("Cased_Letter", CASED_LETTER),
+    ("Close_Punctuation", CLOSE_PUNCTUATION),
+    ("Connector_Punctuation", CONNECTOR_PUNCTUATION),
+    ("Control", CONTROL),
+    ("Currency_Symbol", CURRENCY_SYMBOL),
+    ("Dash_Punctuation", DASH_PUNCTUATION),
+    ("Decimal_Number", DECIMAL_NUMBER),
+    ("Enclosing_Mark", ENCLOSING_MARK),
+    ("Final_Punctuation", FINAL_PUNCTUATION),
+    ("Format", FORMAT),
+    ("Initial_Punctuation", INITIAL_PUNCTUATION),
+    ("Letter", LETTER),
+    ("Letter_Number", LETTER_NUMBER),
+    ("Line_Separator", LINE_SEPARATOR),
+    ("Lowercase_Letter", LOWERCASE_LETTER),
+    ("Mark", MARK),
+    ("Math_Symbol", MATH_SYMBOL),
+    ("Modifier_Letter", MODIFIER_LETTER),
+    ("Modifier_Symbol", MODIFIER_SYMBOL),
+    ("Nonspacing_Mark", NONSPACING_MARK),
+    ("Number", NUMBER),
+    ("Open_Punctuation", OPEN_PUNCTUATION),
+    ("Other", OTHER),
+    ("Other_Letter", OTHER_LETTER),
+    ("Other_Number", OTHER_NUMBER),
+    ("Other_Punctuation", OTHER_PUNCTUATION),
+    ("Other_Symbol", OTHER_SYMBOL),
+    ("Paragraph_Separator", PARAGRAPH_SEPARATOR),
+    ("Private_Use", PRIVATE_USE),
+    ("Punctuation", PUNCTUATION),
+    ("Separator", SEPARATOR),
+    ("Space_Separator", SPACE_SEPARATOR),
+    ("Spacing_Mark", SPACING_MARK),
+    ("Symbol", SYMBOL),
+    ("Titlecase_Letter", TITLECASE_LETTER),
+    ("Unassigned", UNASSIGNED),
+    ("Uppercase_Letter", UPPERCASE_LETTER),
+];
+
+pub const CASED_LETTER: &'static [(char, char)] = &[
+    ('A', 'Z'),
+    ('a', 'z'),
+    ('µ', 'µ'),
+    ('À', 'Ö'),
+    ('Ø', 'ö'),
+    ('ø', 'ƺ'),
+    ('Ƽ', 'ƿ'),
+    ('Ǆ', 'ʓ'),
+    ('ʕ', 'ʯ'),
+    ('Ͱ', 'ͳ'),
+    ('Ͷ', 'ͷ'),
+    ('ͻ', 'ͽ'),
+    ('Ϳ', 'Ϳ'),
+    ('Ά', 'Ά'),
+    ('Έ', 'Ί'),
+    ('Ό', 'Ό'),
+    ('Ύ', 'Ρ'),
+    ('Σ', 'ϵ'),
+    ('Ϸ', 'ҁ'),
+    ('Ҋ', 'ԯ'),
+    ('Ա', 'Ֆ'),
+    ('ՠ', 'ֈ'),
+    ('Ⴀ', 'Ⴥ'),
+    ('Ⴧ', 'Ⴧ'),
+    ('Ⴭ', 'Ⴭ'),
+    ('ა', 'ჺ'),
+    ('ჽ', 'ჿ'),
+    ('Ꭰ', 'Ᏽ'),
+    ('ᏸ', 'ᏽ'),
+    ('ᲀ', 'ᲈ'),
+    ('Ა', 'Ჺ'),
+    ('Ჽ', 'Ჿ'),
+    ('ᴀ', 'ᴫ'),
+    ('ᵫ', 'ᵷ'),
+    ('ᵹ', 'ᶚ'),
+    ('Ḁ', 'ἕ'),
+    ('Ἐ', 'Ἕ'),
+    ('ἠ', 'ὅ'),
+    ('Ὀ', 'Ὅ'),
+    ('ὐ', 'ὗ'),
+    ('Ὑ', 'Ὑ'),
+    ('Ὓ', 'Ὓ'),
+    ('Ὕ', 'Ὕ'),
+    ('Ὗ', 'ώ'),
+    ('ᾀ', 'ᾴ'),
+    ('ᾶ', 'ᾼ'),
+    ('ι', 'ι'),
+    ('ῂ', 'ῄ'),
+    ('ῆ', 'ῌ'),
+    ('ῐ', 'ΐ'),
+    ('ῖ', 'Ί'),
+    ('ῠ', 'Ῥ'),
+    ('ῲ', 'ῴ'),
+    ('ῶ', 'ῼ'),
+    ('ℂ', 'ℂ'),
+    ('ℇ', 'ℇ'),
+    ('ℊ', 'ℓ'),
+    ('ℕ', 'ℕ'),
+    ('ℙ', 'ℝ'),
+    ('ℤ', 'ℤ'),
+    ('Ω', 'Ω'),
+    ('ℨ', 'ℨ'),
+    ('K', 'ℭ'),
+    ('ℯ', 'ℴ'),
+    ('ℹ', 'ℹ'),
+    ('ℼ', 'ℿ'),
+    ('ⅅ', 'ⅉ'),
+    ('ⅎ', 'ⅎ'),
+    ('Ↄ', 'ↄ'),
+    ('Ⰰ', 'Ⱞ'),
+    ('ⰰ', 'ⱞ'),
+    ('Ⱡ', 'ⱻ'),
+    ('Ȿ', 'ⳤ'),
+    ('Ⳬ', 'ⳮ'),
+    ('Ⳳ', 'ⳳ'),
+    ('ⴀ', 'ⴥ'),
+    ('ⴧ', 'ⴧ'),
+    ('ⴭ', 'ⴭ'),
+    ('Ꙁ', 'ꙭ'),
+    ('Ꚁ', 'ꚛ'),
+    ('Ꜣ', 'ꝯ'),
+    ('ꝱ', 'ꞇ'),
+    ('Ꞌ', 'ꞎ'),
+    ('Ꞑ', 'ꞿ'),
+    ('Ꟃ', '\u{a7ca}'),
+    ('\u{a7f5}', '\u{a7f6}'),
+    ('ꟺ', 'ꟺ'),
+    ('ꬰ', 'ꭚ'),
+    ('ꭠ', '\u{ab68}'),
+    ('ꭰ', 'ꮿ'),
+    ('ﬀ', 'ﬆ'),
+    ('ﬓ', 'ﬗ'),
+    ('Ａ', 'Ｚ'),
+    ('ａ', 'ｚ'),
+    ('𐐀', '𐑏'),
+    ('𐒰', '𐓓'),
+    ('𐓘', '𐓻'),
+    ('𐲀', '𐲲'),
+    ('𐳀', '𐳲'),
+    ('𑢠', '𑣟'),
+    ('𖹀', '𖹿'),
+    ('𝐀', '𝑔'),
+    ('𝑖', '𝒜'),
+    ('𝒞', '𝒟'),
+    ('𝒢', '𝒢'),
+    ('𝒥', '𝒦'),
+    ('𝒩', '𝒬'),
+    ('𝒮', '𝒹'),
+    ('𝒻', '𝒻'),
+    ('𝒽', '𝓃'),
+    ('𝓅', '𝔅'),
+    ('𝔇', '𝔊'),
+    ('𝔍', '𝔔'),
+    ('𝔖', '𝔜'),
+    ('𝔞', '𝔹'),
+    ('𝔻', '𝔾'),
+    ('𝕀', '𝕄'),
+    ('𝕆', '𝕆'),
+    ('𝕊', '𝕐'),
+    ('𝕒', '𝚥'),
+    ('𝚨', '𝛀'),
+    ('𝛂', '𝛚'),
+    ('𝛜', '𝛺'),
+    ('𝛼', '𝜔'),
+    ('𝜖', '𝜴'),
+    ('𝜶', '𝝎'),
+    ('𝝐', '𝝮'),
+    ('𝝰', '𝞈'),
+    ('𝞊', '𝞨'),
+    ('𝞪', '𝟂'),
+    ('𝟄', '𝟋'),
+    ('𞤀', '𞥃'),
+];
+
+pub const CLOSE_PUNCTUATION: &'static [(char, char)] = &[
+    (')', ')'),
+    (']', ']'),
+    ('}', '}'),
+    ('༻', '༻'),
+    ('༽', '༽'),
+    ('᚜', '᚜'),
+    ('⁆', '⁆'),
+    ('⁾', '⁾'),
+    ('₎', '₎'),
+    ('⌉', '⌉'),
+    ('⌋', '⌋'),
+    ('⟩', '⟩'),
+    ('❩', '❩'),
+    ('❫', '❫'),
+    ('❭', '❭'),
+    ('❯', '❯'),
+    ('❱', '❱'),
+    ('❳', '❳'),
+    ('❵', '❵'),
+    ('⟆', '⟆'),
+    ('⟧', '⟧'),
+    ('⟩', '⟩'),
+    ('⟫', '⟫'),
+    ('⟭', '⟭'),
+    ('⟯', '⟯'),
+    ('⦄', '⦄'),
+    ('⦆', '⦆'),
+    ('⦈', '⦈'),
+    ('⦊', '⦊'),
+    ('⦌', '⦌'),
+    ('⦎', '⦎'),
+    ('⦐', '⦐'),
+    ('⦒', '⦒'),
+    ('⦔', '⦔'),
+    ('⦖', '⦖'),
+    ('⦘', '⦘'),
+    ('⧙', '⧙'),
+    ('⧛', '⧛'),
+    ('⧽', '⧽'),
+    ('⸣', '⸣'),
+    ('⸥', '⸥'),
+    ('⸧', '⸧'),
+    ('⸩', '⸩'),
+    ('〉', '〉'),
+    ('》', '》'),
+    ('」', '」'),
+    ('』', '』'),
+    ('】', '】'),
+    ('〕', '〕'),
+    ('〗', '〗'),
+    ('〙', '〙'),
+    ('〛', '〛'),
+    ('〞', '〟'),
+    ('﴾', '﴾'),
+    ('︘', '︘'),
+    ('︶', '︶'),
+    ('︸', '︸'),
+    ('︺', '︺'),
+    ('︼', '︼'),
+    ('︾', '︾'),
+    ('﹀', '﹀'),
+    ('﹂', '﹂'),
+    ('﹄', '﹄'),
+    ('﹈', '﹈'),
+    ('﹚', '﹚'),
+    ('﹜', '﹜'),
+    ('﹞', '﹞'),
+    ('）', '）'),
+    ('］', '］'),
+    ('｝', '｝'),
+    ('｠', '｠'),
+    ('｣', '｣'),
+];
+
+pub const CONNECTOR_PUNCTUATION: &'static [(char, char)] = &[
+    ('_', '_'),
+    ('‿', '⁀'),
+    ('⁔', '⁔'),
+    ('︳', '︴'),
+    ('﹍', '﹏'),
+    ('＿', '＿'),
+];
+
+pub const CONTROL: &'static [(char, char)] =
+    &[('\u{0}', '\u{1f}'), ('\u{7f}', '\u{9f}')];
+
+pub const CURRENCY_SYMBOL: &'static [(char, char)] = &[
+    ('$', '$'),
+    ('¢', '¥'),
+    ('֏', '֏'),
+    ('؋', '؋'),
+    ('߾', '߿'),
+    ('৲', '৳'),
+    ('৻', '৻'),
+    ('૱', '૱'),
+    ('௹', '௹'),
+    ('฿', '฿'),
+    ('៛', '៛'),
+    ('₠', '₿'),
+    ('꠸', '꠸'),
+    ('﷼', '﷼'),
+    ('﹩', '﹩'),
+    ('＄', '＄'),
+    ('￠', '￡'),
+    ('￥', '￦'),
+    ('𑿝', '𑿠'),
+    ('𞋿', '𞋿'),
+    ('𞲰', '𞲰'),
+];
+
+pub const DASH_PUNCTUATION: &'static [(char, char)] = &[
+    ('-', '-'),
+    ('֊', '֊'),
+    ('־', '־'),
+    ('᐀', '᐀'),
+    ('᠆', '᠆'),
+    ('‐', '―'),
+    ('⸗', '⸗'),
+    ('⸚', '⸚'),
+    ('⸺', '⸻'),
+    ('⹀', '⹀'),
+    ('〜', '〜'),
+    ('〰', '〰'),
+    ('゠', '゠'),
+    ('︱', '︲'),
+    ('﹘', '﹘'),
+    ('﹣', '﹣'),
+    ('－', '－'),
+    ('\u{10ead}', '\u{10ead}'),
+];
+
+pub const DECIMAL_NUMBER: &'static [(char, char)] = &[
+    ('0', '9'),
+    ('٠', '٩'),
+    ('۰', '۹'),
+    ('߀', '߉'),
+    ('०', '९'),
+    ('০', '৯'),
+    ('੦', '੯'),
+    ('૦', '૯'),
+    ('୦', '୯'),
+    ('௦', '௯'),
+    ('౦', '౯'),
+    ('೦', '೯'),
+    ('൦', '൯'),
+    ('෦', '෯'),
+    ('๐', '๙'),
+    ('໐', '໙'),
+    ('༠', '༩'),
+    ('၀', '၉'),
+    ('႐', '႙'),
+    ('០', '៩'),
+    ('᠐', '᠙'),
+    ('᥆', '᥏'),
+    ('᧐', '᧙'),
+    ('᪀', '᪉'),
+    ('᪐', '᪙'),
+    ('᭐', '᭙'),
+    ('᮰', '᮹'),
+    ('᱀', '᱉'),
+    ('᱐', '᱙'),
+    ('꘠', '꘩'),
+    ('꣐', '꣙'),
+    ('꤀', '꤉'),
+    ('꧐', '꧙'),
+    ('꧰', '꧹'),
+    ('꩐', '꩙'),
+    ('꯰', '꯹'),
+    ('０', '９'),
+    ('𐒠', '𐒩'),
+    ('𐴰', '𐴹'),
+    ('𑁦', '𑁯'),
+    ('𑃰', '𑃹'),
+    ('𑄶', '𑄿'),
+    ('𑇐', '𑇙'),
+    ('𑋰', '𑋹'),
+    ('𑑐', '𑑙'),
+    ('𑓐', '𑓙'),
+    ('𑙐', '𑙙'),
+    ('𑛀', '𑛉'),
+    ('𑜰', '𑜹'),
+    ('𑣠', '𑣩'),
+    ('\u{11950}', '\u{11959}'),
+    ('𑱐', '𑱙'),
+    ('𑵐', '𑵙'),
+    ('𑶠', '𑶩'),
+    ('𖩠', '𖩩'),
+    ('𖭐', '𖭙'),
+    ('𝟎', '𝟿'),
+    ('𞅀', '𞅉'),
+    ('𞋰', '𞋹'),
+    ('𞥐', '𞥙'),
+    ('\u{1fbf0}', '\u{1fbf9}'),
+];
+
+pub const ENCLOSING_MARK: &'static [(char, char)] = &[
+    ('\u{488}', '\u{489}'),
+    ('\u{1abe}', '\u{1abe}'),
+    ('\u{20dd}', '\u{20e0}'),
+    ('\u{20e2}', '\u{20e4}'),
+    ('\u{a670}', '\u{a672}'),
+];
+
+pub const FINAL_PUNCTUATION: &'static [(char, char)] = &[
+    ('»', '»'),
+    ('’', '’'),
+    ('”', '”'),
+    ('›', '›'),
+    ('⸃', '⸃'),
+    ('⸅', '⸅'),
+    ('⸊', '⸊'),
+    ('⸍', '⸍'),
+    ('⸝', '⸝'),
+    ('⸡', '⸡'),
+];
+
+pub const FORMAT: &'static [(char, char)] = &[
+    ('\u{ad}', '\u{ad}'),
+    ('\u{600}', '\u{605}'),
+    ('\u{61c}', '\u{61c}'),
+    ('\u{6dd}', '\u{6dd}'),
+    ('\u{70f}', '\u{70f}'),
+    ('\u{8e2}', '\u{8e2}'),
+    ('\u{180e}', '\u{180e}'),
+    ('\u{200b}', '\u{200f}'),
+    ('\u{202a}', '\u{202e}'),
+    ('\u{2060}', '\u{2064}'),
+    ('\u{2066}', '\u{206f}'),
+    ('\u{feff}', '\u{feff}'),
+    ('\u{fff9}', '\u{fffb}'),
+    ('\u{110bd}', '\u{110bd}'),
+    ('\u{110cd}', '\u{110cd}'),
+    ('\u{13430}', '\u{13438}'),
+    ('\u{1bca0}', '\u{1bca3}'),
+    ('\u{1d173}', '\u{1d17a}'),
+    ('\u{e0001}', '\u{e0001}'),
+    ('\u{e0020}', '\u{e007f}'),
+];
+
+pub const INITIAL_PUNCTUATION: &'static [(char, char)] = &[
+    ('«', '«'),
+    ('‘', '‘'),
+    ('‛', '“'),
+    ('‟', '‟'),
+    ('‹', '‹'),
+    ('⸂', '⸂'),
+    ('⸄', '⸄'),
+    ('⸉', '⸉'),
+    ('⸌', '⸌'),
+    ('⸜', '⸜'),
+    ('⸠', '⸠'),
+];
+
+pub const LETTER: &'static [(char, char)] = &[
+    ('A', 'Z'),
+    ('a', 'z'),
+    ('ª', 'ª'),
+    ('µ', 'µ'),
+    ('º', 'º'),
+    ('À', 'Ö'),
+    ('Ø', 'ö'),
+    ('ø', 'ˁ'),
+    ('ˆ', 'ˑ'),
+    ('ˠ', 'ˤ'),
+    ('ˬ', 'ˬ'),
+    ('ˮ', 'ˮ'),
+    ('Ͱ', 'ʹ'),
+    ('Ͷ', 'ͷ'),
+    ('ͺ', 'ͽ'),
+    ('Ϳ', 'Ϳ'),
+    ('Ά', 'Ά'),
+    ('Έ', 'Ί'),
+    ('Ό', 'Ό'),
+    ('Ύ', 'Ρ'),
+    ('Σ', 'ϵ'),
+    ('Ϸ', 'ҁ'),
+    ('Ҋ', 'ԯ'),
+    ('Ա', 'Ֆ'),
+    ('ՙ', 'ՙ'),
+    ('ՠ', 'ֈ'),
+    ('א', 'ת'),
+    ('ׯ', 'ײ'),
+    ('ؠ', 'ي'),
+    ('ٮ', 'ٯ'),
+    ('ٱ', 'ۓ'),
+    ('ە', 'ە'),
+    ('ۥ', 'ۦ'),
+    ('ۮ', 'ۯ'),
+    ('ۺ', 'ۼ'),
+    ('ۿ', 'ۿ'),
+    ('ܐ', 'ܐ'),
+    ('ܒ', 'ܯ'),
+    ('ݍ', 'ޥ'),
+    ('ޱ', 'ޱ'),
+    ('ߊ', 'ߪ'),
+    ('ߴ', 'ߵ'),
+    ('ߺ', 'ߺ'),
+    ('ࠀ', 'ࠕ'),
+    ('ࠚ', 'ࠚ'),
+    ('ࠤ', 'ࠤ'),
+    ('ࠨ', 'ࠨ'),
+    ('ࡀ', 'ࡘ'),
+    ('ࡠ', 'ࡪ'),
+    ('ࢠ', 'ࢴ'),
+    ('ࢶ', '\u{8c7}'),
+    ('ऄ', 'ह'),
+    ('ऽ', 'ऽ'),
+    ('ॐ', 'ॐ'),
+    ('क़', 'ॡ'),
+    ('ॱ', 'ঀ'),
+    ('অ', 'ঌ'),
+    ('এ', 'ঐ'),
+    ('ও', 'ন'),
+    ('প', 'র'),
+    ('ল', 'ল'),
+    ('শ', 'হ'),
+    ('ঽ', 'ঽ'),
+    ('ৎ', 'ৎ'),
+    ('ড়', 'ঢ়'),
+    ('য়', 'ৡ'),
+    ('ৰ', 'ৱ'),
+    ('ৼ', 'ৼ'),
+    ('ਅ', 'ਊ'),
+    ('ਏ', 'ਐ'),
+    ('ਓ', 'ਨ'),
+    ('ਪ', 'ਰ'),
+    ('ਲ', 'ਲ਼'),
+    ('ਵ', 'ਸ਼'),
+    ('ਸ', 'ਹ'),
+    ('ਖ਼', 'ੜ'),
+    ('ਫ਼', 'ਫ਼'),
+    ('ੲ', 'ੴ'),
+    ('અ', 'ઍ'),
+    ('એ', 'ઑ'),
+    ('ઓ', 'ન'),
+    ('પ', 'ર'),
+    ('લ', 'ળ'),
+    ('વ', 'હ'),
+    ('ઽ', 'ઽ'),
+    ('ૐ', 'ૐ'),
+    ('ૠ', 'ૡ'),
+    ('ૹ', 'ૹ'),
+    ('ଅ', 'ଌ'),
+    ('ଏ', 'ଐ'),
+    ('ଓ', 'ନ'),
+    ('ପ', 'ର'),
+    ('ଲ', 'ଳ'),
+    ('ଵ', 'ହ'),
+    ('ଽ', 'ଽ'),
+    ('ଡ଼', 'ଢ଼'),
+    ('ୟ', 'ୡ'),
+    ('ୱ', 'ୱ'),
+    ('ஃ', 'ஃ'),
+    ('அ', 'ஊ'),
+    ('எ', 'ஐ'),
+    ('ஒ', 'க'),
+    ('ங', 'ச'),
+    ('ஜ', 'ஜ'),
+    ('ஞ', 'ட'),
+    ('ண', 'த'),
+    ('ந', 'ப'),
+    ('ம', 'ஹ'),
+    ('ௐ', 'ௐ'),
+    ('అ', 'ఌ'),
+    ('ఎ', 'ఐ'),
+    ('ఒ', 'న'),
+    ('ప', 'హ'),
+    ('ఽ', 'ఽ'),
+    ('ౘ', 'ౚ'),
+    ('ౠ', 'ౡ'),
+    ('ಀ', 'ಀ'),
+    ('ಅ', 'ಌ'),
+    ('ಎ', 'ಐ'),
+    ('ಒ', 'ನ'),
+    ('ಪ', 'ಳ'),
+    ('ವ', 'ಹ'),
+    ('ಽ', 'ಽ'),
+    ('ೞ', 'ೞ'),
+    ('ೠ', 'ೡ'),
+    ('ೱ', 'ೲ'),
+    ('\u{d04}', 'ഌ'),
+    ('എ', 'ഐ'),
+    ('ഒ', 'ഺ'),
+    ('ഽ', 'ഽ'),
+    ('ൎ', 'ൎ'),
+    ('ൔ', 'ൖ'),
+    ('ൟ', 'ൡ'),
+    ('ൺ', 'ൿ'),
+    ('අ', 'ඖ'),
+    ('ක', 'න'),
+    ('ඳ', 'ර'),
+    ('ල', 'ල'),
+    ('ව', 'ෆ'),
+    ('ก', 'ะ'),
+    ('า', 'ำ'),
+    ('เ', 'ๆ'),
+    ('ກ', 'ຂ'),
+    ('ຄ', 'ຄ'),
+    ('ຆ', 'ຊ'),
+    ('ຌ', 'ຣ'),
+    ('ລ', 'ລ'),
+    ('ວ', 'ະ'),
+    ('າ', 'ຳ'),
+    ('ຽ', 'ຽ'),
+    ('ເ', 'ໄ'),
+    ('ໆ', 'ໆ'),
+    ('ໜ', 'ໟ'),
+    ('ༀ', 'ༀ'),
+    ('ཀ', 'ཇ'),
+    ('ཉ', 'ཬ'),
+    ('ྈ', 'ྌ'),
+    ('က', 'ဪ'),
+    ('ဿ', 'ဿ'),
+    ('ၐ', 'ၕ'),
+    ('ၚ', 'ၝ'),
+    ('ၡ', 'ၡ'),
+    ('ၥ', 'ၦ'),
+    ('ၮ', 'ၰ'),
+    ('ၵ', 'ႁ'),
+    ('ႎ', 'ႎ'),
+    ('Ⴀ', 'Ⴥ'),
+    ('Ⴧ', 'Ⴧ'),
+    ('Ⴭ', 'Ⴭ'),
+    ('ა', 'ჺ'),
+    ('ჼ', 'ቈ'),
+    ('ቊ', 'ቍ'),
+    ('ቐ', 'ቖ'),
+    ('ቘ', 'ቘ'),
+    ('ቚ', 'ቝ'),
+    ('በ', 'ኈ'),
+    ('ኊ', 'ኍ'),
+    ('ነ', 'ኰ'),
+    ('ኲ', 'ኵ'),
+    ('ኸ', 'ኾ'),
+    ('ዀ', 'ዀ'),
+    ('ዂ', 'ዅ'),
+    ('ወ', 'ዖ'),
+    ('ዘ', 'ጐ'),
+    ('ጒ', 'ጕ'),
+    ('ጘ', 'ፚ'),
+    ('ᎀ', 'ᎏ'),
+    ('Ꭰ', 'Ᏽ'),
+    ('ᏸ', 'ᏽ'),
+    ('ᐁ', 'ᙬ'),
+    ('ᙯ', 'ᙿ'),
+    ('ᚁ', 'ᚚ'),
+    ('ᚠ', 'ᛪ'),
+    ('ᛱ', 'ᛸ'),
+    ('ᜀ', 'ᜌ'),
+    ('ᜎ', 'ᜑ'),
+    ('ᜠ', 'ᜱ'),
+    ('ᝀ', 'ᝑ'),
+    ('ᝠ', 'ᝬ'),
+    ('ᝮ', 'ᝰ'),
+    ('ក', 'ឳ'),
+    ('ៗ', 'ៗ'),
+    ('ៜ', 'ៜ'),
+    ('ᠠ', 'ᡸ'),
+    ('ᢀ', 'ᢄ'),
+    ('ᢇ', 'ᢨ'),
+    ('ᢪ', 'ᢪ'),
+    ('ᢰ', 'ᣵ'),
+    ('ᤀ', 'ᤞ'),
+    ('ᥐ', 'ᥭ'),
+    ('ᥰ', 'ᥴ'),
+    ('ᦀ', 'ᦫ'),
+    ('ᦰ', 'ᧉ'),
+    ('ᨀ', 'ᨖ'),
+    ('ᨠ', 'ᩔ'),
+    ('ᪧ', 'ᪧ'),
+    ('ᬅ', 'ᬳ'),
+    ('ᭅ', 'ᭋ'),
+    ('ᮃ', 'ᮠ'),
+    ('ᮮ', 'ᮯ'),
+    ('ᮺ', 'ᯥ'),
+    ('ᰀ', 'ᰣ'),
+    ('ᱍ', 'ᱏ'),
+    ('ᱚ', 'ᱽ'),
+    ('ᲀ', 'ᲈ'),
+    ('Ა', 'Ჺ'),
+    ('Ჽ', 'Ჿ'),
+    ('ᳩ', 'ᳬ'),
+    ('ᳮ', 'ᳳ'),
+    ('ᳵ', 'ᳶ'),
+    ('ᳺ', 'ᳺ'),
+    ('ᴀ', 'ᶿ'),
+    ('Ḁ', 'ἕ'),
+    ('Ἐ', 'Ἕ'),
+    ('ἠ', 'ὅ'),
+    ('Ὀ', 'Ὅ'),
+    ('ὐ', 'ὗ'),
+    ('Ὑ', 'Ὑ'),
+    ('Ὓ', 'Ὓ'),
+    ('Ὕ', 'Ὕ'),
+    ('Ὗ', 'ώ'),
+    ('ᾀ', 'ᾴ'),
+    ('ᾶ', 'ᾼ'),
+    ('ι', 'ι'),
+    ('ῂ', 'ῄ'),
+    ('ῆ', 'ῌ'),
+    ('ῐ', 'ΐ'),
+    ('ῖ', 'Ί'),
+    ('ῠ', 'Ῥ'),
+    ('ῲ', 'ῴ'),
+    ('ῶ', 'ῼ'),
+    ('ⁱ', 'ⁱ'),
+    ('ⁿ', 'ⁿ'),
+    ('ₐ', 'ₜ'),
+    ('ℂ', 'ℂ'),
+    ('ℇ', 'ℇ'),
+    ('ℊ', 'ℓ'),
+    ('ℕ', 'ℕ'),
+    ('ℙ', 'ℝ'),
+    ('ℤ', 'ℤ'),
+    ('Ω', 'Ω'),
+    ('ℨ', 'ℨ'),
+    ('K', 'ℭ'),
+    ('ℯ', 'ℹ'),
+    ('ℼ', 'ℿ'),
+    ('ⅅ', 'ⅉ'),
+    ('ⅎ', 'ⅎ'),
+    ('Ↄ', 'ↄ'),
+    ('Ⰰ', 'Ⱞ'),
+    ('ⰰ', 'ⱞ'),
+    ('Ⱡ', 'ⳤ'),
+    ('Ⳬ', 'ⳮ'),
+    ('Ⳳ', 'ⳳ'),
+    ('ⴀ', 'ⴥ'),
+    ('ⴧ', 'ⴧ'),
+    ('ⴭ', 'ⴭ'),
+    ('ⴰ', 'ⵧ'),
+    ('ⵯ', 'ⵯ'),
+    ('ⶀ', 'ⶖ'),
+    ('ⶠ', 'ⶦ'),
+    ('ⶨ', 'ⶮ'),
+    ('ⶰ', 'ⶶ'),
+    ('ⶸ', 'ⶾ'),
+    ('ⷀ', 'ⷆ'),
+    ('ⷈ', 'ⷎ'),
+    ('ⷐ', 'ⷖ'),
+    ('ⷘ', 'ⷞ'),
+    ('ⸯ', 'ⸯ'),
+    ('々', '〆'),
+    ('〱', '〵'),
+    ('〻', '〼'),
+    ('ぁ', 'ゖ'),
+    ('ゝ', 'ゟ'),
+    ('ァ', 'ヺ'),
+    ('ー', 'ヿ'),
+    ('ㄅ', 'ㄯ'),
+    ('ㄱ', 'ㆎ'),
+    ('ㆠ', '\u{31bf}'),
+    ('ㇰ', 'ㇿ'),
+    ('㐀', '\u{4dbf}'),
+    ('一', '\u{9ffc}'),
+    ('ꀀ', 'ꒌ'),
+    ('ꓐ', 'ꓽ'),
+    ('ꔀ', 'ꘌ'),
+    ('ꘐ', 'ꘟ'),
+    ('ꘪ', 'ꘫ'),
+    ('Ꙁ', 'ꙮ'),
+    ('ꙿ', 'ꚝ'),
+    ('ꚠ', 'ꛥ'),
+    ('ꜗ', 'ꜟ'),
+    ('Ꜣ', 'ꞈ'),
+    ('Ꞌ', 'ꞿ'),
+    ('Ꟃ', '\u{a7ca}'),
+    ('\u{a7f5}', 'ꠁ'),
+    ('ꠃ', 'ꠅ'),
+    ('ꠇ', 'ꠊ'),
+    ('ꠌ', 'ꠢ'),
+    ('ꡀ', 'ꡳ'),
+    ('ꢂ', 'ꢳ'),
+    ('ꣲ', 'ꣷ'),
+    ('ꣻ', 'ꣻ'),
+    ('ꣽ', 'ꣾ'),
+    ('ꤊ', 'ꤥ'),
+    ('ꤰ', 'ꥆ'),
+    ('ꥠ', 'ꥼ'),
+    ('ꦄ', 'ꦲ'),
+    ('ꧏ', 'ꧏ'),
+    ('ꧠ', 'ꧤ'),
+    ('ꧦ', 'ꧯ'),
+    ('ꧺ', 'ꧾ'),
+    ('ꨀ', 'ꨨ'),
+    ('ꩀ', 'ꩂ'),
+    ('ꩄ', 'ꩋ'),
+    ('ꩠ', 'ꩶ'),
+    ('ꩺ', 'ꩺ'),
+    ('ꩾ', 'ꪯ'),
+    ('ꪱ', 'ꪱ'),
+    ('ꪵ', 'ꪶ'),
+    ('ꪹ', 'ꪽ'),
+    ('ꫀ', 'ꫀ'),
+    ('ꫂ', 'ꫂ'),
+    ('ꫛ', 'ꫝ'),
+    ('ꫠ', 'ꫪ'),
+    ('ꫲ', 'ꫴ'),
+    ('ꬁ', 'ꬆ'),
+    ('ꬉ', 'ꬎ'),
+    ('ꬑ', 'ꬖ'),
+    ('ꬠ', 'ꬦ'),
+    ('ꬨ', 'ꬮ'),
+    ('ꬰ', 'ꭚ'),
+    ('ꭜ', '\u{ab69}'),
+    ('ꭰ', 'ꯢ'),
+    ('가', '힣'),
+    ('ힰ', 'ퟆ'),
+    ('ퟋ', 'ퟻ'),
+    ('豈', '舘'),
+    ('並', '龎'),
+    ('ﬀ', 'ﬆ'),
+    ('ﬓ', 'ﬗ'),
+    ('יִ', 'יִ'),
+    ('ײַ', 'ﬨ'),
+    ('שׁ', 'זּ'),
+    ('טּ', 'לּ'),
+    ('מּ', 'מּ'),
+    ('נּ', 'סּ'),
+    ('ףּ', 'פּ'),
+    ('צּ', 'ﮱ'),
+    ('ﯓ', 'ﴽ'),
+    ('ﵐ', 'ﶏ'),
+    ('ﶒ', 'ﷇ'),
+    ('ﷰ', 'ﷻ'),
+    ('ﹰ', 'ﹴ'),
+    ('ﹶ', 'ﻼ'),
+    ('Ａ', 'Ｚ'),
+    ('ａ', 'ｚ'),
+    ('ｦ', 'ﾾ'),
+    ('ￂ', 'ￇ'),
+    ('ￊ', 'ￏ'),
+    ('ￒ', 'ￗ'),
+    ('ￚ', 'ￜ'),
+    ('𐀀', '𐀋'),
+    ('𐀍', '𐀦'),
+    ('𐀨', '𐀺'),
+    ('𐀼', '𐀽'),
+    ('𐀿', '𐁍'),
+    ('𐁐', '𐁝'),
+    ('𐂀', '𐃺'),
+    ('𐊀', '𐊜'),
+    ('𐊠', '𐋐'),
+    ('𐌀', '𐌟'),
+    ('𐌭', '𐍀'),
+    ('𐍂', '𐍉'),
+    ('𐍐', '𐍵'),
+    ('𐎀', '𐎝'),
+    ('𐎠', '𐏃'),
+    ('𐏈', '𐏏'),
+    ('𐐀', '𐒝'),
+    ('𐒰', '𐓓'),
+    ('𐓘', '𐓻'),
+    ('𐔀', '𐔧'),
+    ('𐔰', '𐕣'),
+    ('𐘀', '𐜶'),
+    ('𐝀', '𐝕'),
+    ('𐝠', '𐝧'),
+    ('𐠀', '𐠅'),
+    ('𐠈', '𐠈'),
+    ('𐠊', '𐠵'),
+    ('𐠷', '𐠸'),
+    ('𐠼', '𐠼'),
+    ('𐠿', '𐡕'),
+    ('𐡠', '𐡶'),
+    ('𐢀', '𐢞'),
+    ('𐣠', '𐣲'),
+    ('𐣴', '𐣵'),
+    ('𐤀', '𐤕'),
+    ('𐤠', '𐤹'),
+    ('𐦀', '𐦷'),
+    ('𐦾', '𐦿'),
+    ('𐨀', '𐨀'),
+    ('𐨐', '𐨓'),
+    ('𐨕', '𐨗'),
+    ('𐨙', '𐨵'),
+    ('𐩠', '𐩼'),
+    ('𐪀', '𐪜'),
+    ('𐫀', '𐫇'),
+    ('𐫉', '𐫤'),
+    ('𐬀', '𐬵'),
+    ('𐭀', '𐭕'),
+    ('𐭠', '𐭲'),
+    ('𐮀', '𐮑'),
+    ('𐰀', '𐱈'),
+    ('𐲀', '𐲲'),
+    ('𐳀', '𐳲'),
+    ('𐴀', '𐴣'),
+    ('\u{10e80}', '\u{10ea9}'),
+    ('\u{10eb0}', '\u{10eb1}'),
+    ('𐼀', '𐼜'),
+    ('𐼧', '𐼧'),
+    ('𐼰', '𐽅'),
+    ('\u{10fb0}', '\u{10fc4}'),
+    ('𐿠', '𐿶'),
+    ('𑀃', '𑀷'),
+    ('𑂃', '𑂯'),
+    ('𑃐', '𑃨'),
+    ('𑄃', '𑄦'),
+    ('𑅄', '𑅄'),
+    ('\u{11147}', '\u{11147}'),
+    ('𑅐', '𑅲'),
+    ('𑅶', '𑅶'),
+    ('𑆃', '𑆲'),
+    ('𑇁', '𑇄'),
+    ('𑇚', '𑇚'),
+    ('𑇜', '𑇜'),
+    ('𑈀', '𑈑'),
+    ('𑈓', '𑈫'),
+    ('𑊀', '𑊆'),
+    ('𑊈', '𑊈'),
+    ('𑊊', '𑊍'),
+    ('𑊏', '𑊝'),
+    ('𑊟', '𑊨'),
+    ('𑊰', '𑋞'),
+    ('𑌅', '𑌌'),
+    ('𑌏', '𑌐'),
+    ('𑌓', '𑌨'),
+    ('𑌪', '𑌰'),
+    ('𑌲', '𑌳'),
+    ('𑌵', '𑌹'),
+    ('𑌽', '𑌽'),
+    ('𑍐', '𑍐'),
+    ('𑍝', '𑍡'),
+    ('𑐀', '𑐴'),
+    ('𑑇', '𑑊'),
+    ('𑑟', '\u{11461}'),
+    ('𑒀', '𑒯'),
+    ('𑓄', '𑓅'),
+    ('𑓇', '𑓇'),
+    ('𑖀', '𑖮'),
+    ('𑗘', '𑗛'),
+    ('𑘀', '𑘯'),
+    ('𑙄', '𑙄'),
+    ('𑚀', '𑚪'),
+    ('𑚸', '𑚸'),
+    ('𑜀', '𑜚'),
+    ('𑠀', '𑠫'),
+    ('𑢠', '𑣟'),
+    ('𑣿', '\u{11906}'),
+    ('\u{11909}', '\u{11909}'),
+    ('\u{1190c}', '\u{11913}'),
+    ('\u{11915}', '\u{11916}'),
+    ('\u{11918}', '\u{1192f}'),
+    ('\u{1193f}', '\u{1193f}'),
+    ('\u{11941}', '\u{11941}'),
+    ('𑦠', '𑦧'),
+    ('𑦪', '𑧐'),
+    ('𑧡', '𑧡'),
+    ('𑧣', '𑧣'),
+    ('𑨀', '𑨀'),
+    ('𑨋', '𑨲'),
+    ('𑨺', '𑨺'),
+    ('𑩐', '𑩐'),
+    ('𑩜', '𑪉'),
+    ('𑪝', '𑪝'),
+    ('𑫀', '𑫸'),
+    ('𑰀', '𑰈'),
+    ('𑰊', '𑰮'),
+    ('𑱀', '𑱀'),
+    ('𑱲', '𑲏'),
+    ('𑴀', '𑴆'),
+    ('𑴈', '𑴉'),
+    ('𑴋', '𑴰'),
+    ('𑵆', '𑵆'),
+    ('𑵠', '𑵥'),
+    ('𑵧', '𑵨'),
+    ('𑵪', '𑶉'),
+    ('𑶘', '𑶘'),
+    ('𑻠', '𑻲'),
+    ('\u{11fb0}', '\u{11fb0}'),
+    ('𒀀', '𒎙'),
+    ('𒒀', '𒕃'),
+    ('𓀀', '𓐮'),
+    ('𔐀', '𔙆'),
+    ('𖠀', '𖨸'),
+    ('𖩀', '𖩞'),
+    ('𖫐', '𖫭'),
+    ('𖬀', '𖬯'),
+    ('𖭀', '𖭃'),
+    ('𖭣', '𖭷'),
+    ('𖭽', '𖮏'),
+    ('𖹀', '𖹿'),
+    ('𖼀', '𖽊'),
+    ('𖽐', '𖽐'),
+    ('𖾓', '𖾟'),
+    ('𖿠', '𖿡'),
+    ('𖿣', '𖿣'),
+    ('𗀀', '𘟷'),
+    ('𘠀', '\u{18cd5}'),
+    ('\u{18d00}', '\u{18d08}'),
+    ('𛀀', '𛄞'),
+    ('𛅐', '𛅒'),
+    ('𛅤', '𛅧'),
+    ('𛅰', '𛋻'),
+    ('𛰀', '𛱪'),
+    ('𛱰', '𛱼'),
+    ('𛲀', '𛲈'),
+    ('𛲐', '𛲙'),
+    ('𝐀', '𝑔'),
+    ('𝑖', '𝒜'),
+    ('𝒞', '𝒟'),
+    ('𝒢', '𝒢'),
+    ('𝒥', '𝒦'),
+    ('𝒩', '𝒬'),
+    ('𝒮', '𝒹'),
+    ('𝒻', '𝒻'),
+    ('𝒽', '𝓃'),
+    ('𝓅', '𝔅'),
+    ('𝔇', '𝔊'),
+    ('𝔍', '𝔔'),
+    ('𝔖', '𝔜'),
+    ('𝔞', '𝔹'),
+    ('𝔻', '𝔾'),
+    ('𝕀', '𝕄'),
+    ('𝕆', '𝕆'),
+    ('𝕊', '𝕐'),
+    ('𝕒', '𝚥'),
+    ('𝚨', '𝛀'),
+    ('𝛂', '𝛚'),
+    ('𝛜', '𝛺'),
+    ('𝛼', '𝜔'),
+    ('𝜖', '𝜴'),
+    ('𝜶', '𝝎'),
+    ('𝝐', '𝝮'),
+    ('𝝰', '𝞈'),
+    ('𝞊', '𝞨'),
+    ('𝞪', '𝟂'),
+    ('𝟄', '𝟋'),
+    ('𞄀', '𞄬'),
+    ('𞄷', '𞄽'),
+    ('𞅎', '𞅎'),
+    ('𞋀', '𞋫'),
+    ('𞠀', '𞣄'),
+    ('𞤀', '𞥃'),
+    ('𞥋', '𞥋'),
+    ('𞸀', '𞸃'),
+    ('𞸅', '𞸟'),
+    ('𞸡', '𞸢'),
+    ('𞸤', '𞸤'),
+    ('𞸧', '𞸧'),
+    ('𞸩', '𞸲'),
+    ('𞸴', '𞸷'),
+    ('𞸹', '𞸹'),
+    ('𞸻', '𞸻'),
+    ('𞹂', '𞹂'),
+    ('𞹇', '𞹇'),
+    ('𞹉', '𞹉'),
+    ('𞹋', '𞹋'),
+    ('𞹍', '𞹏'),
+    ('𞹑', '𞹒'),
+    ('𞹔', '𞹔'),
+    ('𞹗', '𞹗'),
+    ('𞹙', '𞹙'),
+    ('𞹛', '𞹛'),
+    ('𞹝', '𞹝'),
+    ('𞹟', '𞹟'),
+    ('𞹡', '𞹢'),
+    ('𞹤', '𞹤'),
+    ('𞹧', '𞹪'),
+    ('𞹬', '𞹲'),
+    ('𞹴', '𞹷'),
+    ('𞹹', '𞹼'),
+    ('𞹾', '𞹾'),
+    ('𞺀', '𞺉'),
+    ('𞺋', '𞺛'),
+    ('𞺡', '𞺣'),
+    ('𞺥', '𞺩'),
+    ('𞺫', '𞺻'),
+    ('𠀀', '\u{2a6dd}'),
+    ('𪜀', '𫜴'),
+    ('𫝀', '𫠝'),
+    ('𫠠', '𬺡'),
+    ('𬺰', '𮯠'),
+    ('丽', '𪘀'),
+    ('\u{30000}', '\u{3134a}'),
+];
+
+pub const LETTER_NUMBER: &'static [(char, char)] = &[
+    ('ᛮ', 'ᛰ'),
+    ('Ⅰ', 'ↂ'),
+    ('ↅ', 'ↈ'),
+    ('〇', '〇'),
+    ('〡', '〩'),
+    ('〸', '〺'),
+    ('ꛦ', 'ꛯ'),
+    ('𐅀', '𐅴'),
+    ('𐍁', '𐍁'),
+    ('𐍊', '𐍊'),
+    ('𐏑', '𐏕'),
+    ('𒐀', '𒑮'),
+];
+
+pub const LINE_SEPARATOR: &'static [(char, char)] =
+    &[('\u{2028}', '\u{2028}')];
+
+pub const LOWERCASE_LETTER: &'static [(char, char)] = &[
+    ('a', 'z'),
+    ('µ', 'µ'),
+    ('ß', 'ö'),
+    ('ø', 'ÿ'),
+    ('ā', 'ā'),
+    ('ă', 'ă'),
+    ('ą', 'ą'),
+    ('ć', 'ć'),
+    ('ĉ', 'ĉ'),
+    ('ċ', 'ċ'),
+    ('č', 'č'),
+    ('ď', 'ď'),
+    ('đ', 'đ'),
+    ('ē', 'ē'),
+    ('ĕ', 'ĕ'),
+    ('ė', 'ė'),
+    ('ę', 'ę'),
+    ('ě', 'ě'),
+    ('ĝ', 'ĝ'),
+    ('ğ', 'ğ'),
+    ('ġ', 'ġ'),
+    ('ģ', 'ģ'),
+    ('ĥ', 'ĥ'),
+    ('ħ', 'ħ'),
+    ('ĩ', 'ĩ'),
+    ('ī', 'ī'),
+    ('ĭ', 'ĭ'),
+    ('į', 'į'),
+    ('ı', 'ı'),
+    ('ĳ', 'ĳ'),
+    ('ĵ', 'ĵ'),
+    ('ķ', 'ĸ'),
+    ('ĺ', 'ĺ'),
+    ('ļ', 'ļ'),
+    ('ľ', 'ľ'),
+    ('ŀ', 'ŀ'),
+    ('ł', 'ł'),
+    ('ń', 'ń'),
+    ('ņ', 'ņ'),
+    ('ň', 'ŉ'),
+    ('ŋ', 'ŋ'),
+    ('ō', 'ō'),
+    ('ŏ', 'ŏ'),
+    ('ő', 'ő'),
+    ('œ', 'œ'),
+    ('ŕ', 'ŕ'),
+    ('ŗ', 'ŗ'),
+    ('ř', 'ř'),
+    ('ś', 'ś'),
+    ('ŝ', 'ŝ'),
+    ('ş', 'ş'),
+    ('š', 'š'),
+    ('ţ', 'ţ'),
+    ('ť', 'ť'),
+    ('ŧ', 'ŧ'),
+    ('ũ', 'ũ'),
+    ('ū', 'ū'),
+    ('ŭ', 'ŭ'),
+    ('ů', 'ů'),
+    ('ű', 'ű'),
+    ('ų', 'ų'),
+    ('ŵ', 'ŵ'),
+    ('ŷ', 'ŷ'),
+    ('ź', 'ź'),
+    ('ż', 'ż'),
+    ('ž', 'ƀ'),
+    ('ƃ', 'ƃ'),
+    ('ƅ', 'ƅ'),
+    ('ƈ', 'ƈ'),
+    ('ƌ', 'ƍ'),
+    ('ƒ', 'ƒ'),
+    ('ƕ', 'ƕ'),
+    ('ƙ', 'ƛ'),
+    ('ƞ', 'ƞ'),
+    ('ơ', 'ơ'),
+    ('ƣ', 'ƣ'),
+    ('ƥ', 'ƥ'),
+    ('ƨ', 'ƨ'),
+    ('ƪ', 'ƫ'),
+    ('ƭ', 'ƭ'),
+    ('ư', 'ư'),
+    ('ƴ', 'ƴ'),
+    ('ƶ', 'ƶ'),
+    ('ƹ', 'ƺ'),
+    ('ƽ', 'ƿ'),
+    ('ǆ', 'ǆ'),
+    ('ǉ', 'ǉ'),
+    ('ǌ', 'ǌ'),
+    ('ǎ', 'ǎ'),
+    ('ǐ', 'ǐ'),
+    ('ǒ', 'ǒ'),
+    ('ǔ', 'ǔ'),
+    ('ǖ', 'ǖ'),
+    ('ǘ', 'ǘ'),
+    ('ǚ', 'ǚ'),
+    ('ǜ', 'ǝ'),
+    ('ǟ', 'ǟ'),
+    ('ǡ', 'ǡ'),
+    ('ǣ', 'ǣ'),
+    ('ǥ', 'ǥ'),
+    ('ǧ', 'ǧ'),
+    ('ǩ', 'ǩ'),
+    ('ǫ', 'ǫ'),
+    ('ǭ', 'ǭ'),
+    ('ǯ', 'ǰ'),
+    ('ǳ', 'ǳ'),
+    ('ǵ', 'ǵ'),
+    ('ǹ', 'ǹ'),
+    ('ǻ', 'ǻ'),
+    ('ǽ', 'ǽ'),
+    ('ǿ', 'ǿ'),
+    ('ȁ', 'ȁ'),
+    ('ȃ', 'ȃ'),
+    ('ȅ', 'ȅ'),
+    ('ȇ', 'ȇ'),
+    ('ȉ', 'ȉ'),
+    ('ȋ', 'ȋ'),
+    ('ȍ', 'ȍ'),
+    ('ȏ', 'ȏ'),
+    ('ȑ', 'ȑ'),
+    ('ȓ', 'ȓ'),
+    ('ȕ', 'ȕ'),
+    ('ȗ', 'ȗ'),
+    ('ș', 'ș'),
+    ('ț', 'ț'),
+    ('ȝ', 'ȝ'),
+    ('ȟ', 'ȟ'),
+    ('ȡ', 'ȡ'),
+    ('ȣ', 'ȣ'),
+    ('ȥ', 'ȥ'),
+    ('ȧ', 'ȧ'),
+    ('ȩ', 'ȩ'),
+    ('ȫ', 'ȫ'),
+    ('ȭ', 'ȭ'),
+    ('ȯ', 'ȯ'),
+    ('ȱ', 'ȱ'),
+    ('ȳ', 'ȹ'),
+    ('ȼ', 'ȼ'),
+    ('ȿ', 'ɀ'),
+    ('ɂ', 'ɂ'),
+    ('ɇ', 'ɇ'),
+    ('ɉ', 'ɉ'),
+    ('ɋ', 'ɋ'),
+    ('ɍ', 'ɍ'),
+    ('ɏ', 'ʓ'),
+    ('ʕ', 'ʯ'),
+    ('ͱ', 'ͱ'),
+    ('ͳ', 'ͳ'),
+    ('ͷ', 'ͷ'),
+    ('ͻ', 'ͽ'),
+    ('ΐ', 'ΐ'),
+    ('ά', 'ώ'),
+    ('ϐ', 'ϑ'),
+    ('ϕ', 'ϗ'),
+    ('ϙ', 'ϙ'),
+    ('ϛ', 'ϛ'),
+    ('ϝ', 'ϝ'),
+    ('ϟ', 'ϟ'),
+    ('ϡ', 'ϡ'),
+    ('ϣ', 'ϣ'),
+    ('ϥ', 'ϥ'),
+    ('ϧ', 'ϧ'),
+    ('ϩ', 'ϩ'),
+    ('ϫ', 'ϫ'),
+    ('ϭ', 'ϭ'),
+    ('ϯ', 'ϳ'),
+    ('ϵ', 'ϵ'),
+    ('ϸ', 'ϸ'),
+    ('ϻ', 'ϼ'),
+    ('а', 'џ'),
+    ('ѡ', 'ѡ'),
+    ('ѣ', 'ѣ'),
+    ('ѥ', 'ѥ'),
+    ('ѧ', 'ѧ'),
+    ('ѩ', 'ѩ'),
+    ('ѫ', 'ѫ'),
+    ('ѭ', 'ѭ'),
+    ('ѯ', 'ѯ'),
+    ('ѱ', 'ѱ'),
+    ('ѳ', 'ѳ'),
+    ('ѵ', 'ѵ'),
+    ('ѷ', 'ѷ'),
+    ('ѹ', 'ѹ'),
+    ('ѻ', 'ѻ'),
+    ('ѽ', 'ѽ'),
+    ('ѿ', 'ѿ'),
+    ('ҁ', 'ҁ'),
+    ('ҋ', 'ҋ'),
+    ('ҍ', 'ҍ'),
+    ('ҏ', 'ҏ'),
+    ('ґ', 'ґ'),
+    ('ғ', 'ғ'),
+    ('ҕ', 'ҕ'),
+    ('җ', 'җ'),
+    ('ҙ', 'ҙ'),
+    ('қ', 'қ'),
+    ('ҝ', 'ҝ'),
+    ('ҟ', 'ҟ'),
+    ('ҡ', 'ҡ'),
+    ('ң', 'ң'),
+    ('ҥ', 'ҥ'),
+    ('ҧ', 'ҧ'),
+    ('ҩ', 'ҩ'),
+    ('ҫ', 'ҫ'),
+    ('ҭ', 'ҭ'),
+    ('ү', 'ү'),
+    ('ұ', 'ұ'),
+    ('ҳ', 'ҳ'),
+    ('ҵ', 'ҵ'),
+    ('ҷ', 'ҷ'),
+    ('ҹ', 'ҹ'),
+    ('һ', 'һ'),
+    ('ҽ', 'ҽ'),
+    ('ҿ', 'ҿ'),
+    ('ӂ', 'ӂ'),
+    ('ӄ', 'ӄ'),
+    ('ӆ', 'ӆ'),
+    ('ӈ', 'ӈ'),
+    ('ӊ', 'ӊ'),
+    ('ӌ', 'ӌ'),
+    ('ӎ', 'ӏ'),
+    ('ӑ', 'ӑ'),
+    ('ӓ', 'ӓ'),
+    ('ӕ', 'ӕ'),
+    ('ӗ', 'ӗ'),
+    ('ә', 'ә'),
+    ('ӛ', 'ӛ'),
+    ('ӝ', 'ӝ'),
+    ('ӟ', 'ӟ'),
+    ('ӡ', 'ӡ'),
+    ('ӣ', 'ӣ'),
+    ('ӥ', 'ӥ'),
+    ('ӧ', 'ӧ'),
+    ('ө', 'ө'),
+    ('ӫ', 'ӫ'),
+    ('ӭ', 'ӭ'),
+    ('ӯ', 'ӯ'),
+    ('ӱ', 'ӱ'),
+    ('ӳ', 'ӳ'),
+    ('ӵ', 'ӵ'),
+    ('ӷ', 'ӷ'),
+    ('ӹ', 'ӹ'),
+    ('ӻ', 'ӻ'),
+    ('ӽ', 'ӽ'),
+    ('ӿ', 'ӿ'),
+    ('ԁ', 'ԁ'),
+    ('ԃ', 'ԃ'),
+    ('ԅ', 'ԅ'),
+    ('ԇ', 'ԇ'),
+    ('ԉ', 'ԉ'),
+    ('ԋ', 'ԋ'),
+    ('ԍ', 'ԍ'),
+    ('ԏ', 'ԏ'),
+    ('ԑ', 'ԑ'),
+    ('ԓ', 'ԓ'),
+    ('ԕ', 'ԕ'),
+    ('ԗ', 'ԗ'),
+    ('ԙ', 'ԙ'),
+    ('ԛ', 'ԛ'),
+    ('ԝ', 'ԝ'),
+    ('ԟ', 'ԟ'),
+    ('ԡ', 'ԡ'),
+    ('ԣ', 'ԣ'),
+    ('ԥ', 'ԥ'),
+    ('ԧ', 'ԧ'),
+    ('ԩ', 'ԩ'),
+    ('ԫ', 'ԫ'),
+    ('ԭ', 'ԭ'),
+    ('ԯ', 'ԯ'),
+    ('ՠ', 'ֈ'),
+    ('ა', 'ჺ'),
+    ('ჽ', 'ჿ'),
+    ('ᏸ', 'ᏽ'),
+    ('ᲀ', 'ᲈ'),
+    ('ᴀ', 'ᴫ'),
+    ('ᵫ', 'ᵷ'),
+    ('ᵹ', 'ᶚ'),
+    ('ḁ', 'ḁ'),
+    ('ḃ', 'ḃ'),
+    ('ḅ', 'ḅ'),
+    ('ḇ', 'ḇ'),
+    ('ḉ', 'ḉ'),
+    ('ḋ', 'ḋ'),
+    ('ḍ', 'ḍ'),
+    ('ḏ', 'ḏ'),
+    ('ḑ', 'ḑ'),
+    ('ḓ', 'ḓ'),
+    ('ḕ', 'ḕ'),
+    ('ḗ', 'ḗ'),
+    ('ḙ', 'ḙ'),
+    ('ḛ', 'ḛ'),
+    ('ḝ', 'ḝ'),
+    ('ḟ', 'ḟ'),
+    ('ḡ', 'ḡ'),
+    ('ḣ', 'ḣ'),
+    ('ḥ', 'ḥ'),
+    ('ḧ', 'ḧ'),
+    ('ḩ', 'ḩ'),
+    ('ḫ', 'ḫ'),
+    ('ḭ', 'ḭ'),
+    ('ḯ', 'ḯ'),
+    ('ḱ', 'ḱ'),
+    ('ḳ', 'ḳ'),
+    ('ḵ', 'ḵ'),
+    ('ḷ', 'ḷ'),
+    ('ḹ', 'ḹ'),
+    ('ḻ', 'ḻ'),
+    ('ḽ', 'ḽ'),
+    ('ḿ', 'ḿ'),
+    ('ṁ', 'ṁ'),
+    ('ṃ', 'ṃ'),
+    ('ṅ', 'ṅ'),
+    ('ṇ', 'ṇ'),
+    ('ṉ', 'ṉ'),
+    ('ṋ', 'ṋ'),
+    ('ṍ', 'ṍ'),
+    ('ṏ', 'ṏ'),
+    ('ṑ', 'ṑ'),
+    ('ṓ', 'ṓ'),
+    ('ṕ', 'ṕ'),
+    ('ṗ', 'ṗ'),
+    ('ṙ', 'ṙ'),
+    ('ṛ', 'ṛ'),
+    ('ṝ', 'ṝ'),
+    ('ṟ', 'ṟ'),
+    ('ṡ', 'ṡ'),
+    ('ṣ', 'ṣ'),
+    ('ṥ', 'ṥ'),
+    ('ṧ', 'ṧ'),
+    ('ṩ', 'ṩ'),
+    ('ṫ', 'ṫ'),
+    ('ṭ', 'ṭ'),
+    ('ṯ', 'ṯ'),
+    ('ṱ', 'ṱ'),
+    ('ṳ', 'ṳ'),
+    ('ṵ', 'ṵ'),
+    ('ṷ', 'ṷ'),
+    ('ṹ', 'ṹ'),
+    ('ṻ', 'ṻ'),
+    ('ṽ', 'ṽ'),
+    ('ṿ', 'ṿ'),
+    ('ẁ', 'ẁ'),
+    ('ẃ', 'ẃ'),
+    ('ẅ', 'ẅ'),
+    ('ẇ', 'ẇ'),
+    ('ẉ', 'ẉ'),
+    ('ẋ', 'ẋ'),
+    ('ẍ', 'ẍ'),
+    ('ẏ', 'ẏ'),
+    ('ẑ', 'ẑ'),
+    ('ẓ', 'ẓ'),
+    ('ẕ', 'ẝ'),
+    ('ẟ', 'ẟ'),
+    ('ạ', 'ạ'),
+    ('ả', 'ả'),
+    ('ấ', 'ấ'),
+    ('ầ', 'ầ'),
+    ('ẩ', 'ẩ'),
+    ('ẫ', 'ẫ'),
+    ('ậ', 'ậ'),
+    ('ắ', 'ắ'),
+    ('ằ', 'ằ'),
+    ('ẳ', 'ẳ'),
+    ('ẵ', 'ẵ'),
+    ('ặ', 'ặ'),
+    ('ẹ', 'ẹ'),
+    ('ẻ', 'ẻ'),
+    ('ẽ', 'ẽ'),
+    ('ế', 'ế'),
+    ('ề', 'ề'),
+    ('ể', 'ể'),
+    ('ễ', 'ễ'),
+    ('ệ', 'ệ'),
+    ('ỉ', 'ỉ'),
+    ('ị', 'ị'),
+    ('ọ', 'ọ'),
+    ('ỏ', 'ỏ'),
+    ('ố', 'ố'),
+    ('ồ', 'ồ'),
+    ('ổ', 'ổ'),
+    ('ỗ', 'ỗ'),
+    ('ộ', 'ộ'),
+    ('ớ', 'ớ'),
+    ('ờ', 'ờ'),
+    ('ở', 'ở'),
+    ('ỡ', 'ỡ'),
+    ('ợ', 'ợ'),
+    ('ụ', 'ụ'),
+    ('ủ', 'ủ'),
+    ('ứ', 'ứ'),
+    ('ừ', 'ừ'),
+    ('ử', 'ử'),
+    ('ữ', 'ữ'),
+    ('ự', 'ự'),
+    ('ỳ', 'ỳ'),
+    ('ỵ', 'ỵ'),
+    ('ỷ', 'ỷ'),
+    ('ỹ', 'ỹ'),
+    ('ỻ', 'ỻ'),
+    ('ỽ', 'ỽ'),
+    ('ỿ', 'ἇ'),
+    ('ἐ', 'ἕ'),
+    ('ἠ', 'ἧ'),
+    ('ἰ', 'ἷ'),
+    ('ὀ', 'ὅ'),
+    ('ὐ', 'ὗ'),
+    ('ὠ', 'ὧ'),
+    ('ὰ', 'ώ'),
+    ('ᾀ', 'ᾇ'),
+    ('ᾐ', 'ᾗ'),
+    ('ᾠ', 'ᾧ'),
+    ('ᾰ', 'ᾴ'),
+    ('ᾶ', 'ᾷ'),
+    ('ι', 'ι'),
+    ('ῂ', 'ῄ'),
+    ('ῆ', 'ῇ'),
+    ('ῐ', 'ΐ'),
+    ('ῖ', 'ῗ'),
+    ('ῠ', 'ῧ'),
+    ('ῲ', 'ῴ'),
+    ('ῶ', 'ῷ'),
+    ('ℊ', 'ℊ'),
+    ('ℎ', 'ℏ'),
+    ('ℓ', 'ℓ'),
+    ('ℯ', 'ℯ'),
+    ('ℴ', 'ℴ'),
+    ('ℹ', 'ℹ'),
+    ('ℼ', 'ℽ'),
+    ('ⅆ', 'ⅉ'),
+    ('ⅎ', 'ⅎ'),
+    ('ↄ', 'ↄ'),
+    ('ⰰ', 'ⱞ'),
+    ('ⱡ', 'ⱡ'),
+    ('ⱥ', 'ⱦ'),
+    ('ⱨ', 'ⱨ'),
+    ('ⱪ', 'ⱪ'),
+    ('ⱬ', 'ⱬ'),
+    ('ⱱ', 'ⱱ'),
+    ('ⱳ', 'ⱴ'),
+    ('ⱶ', 'ⱻ'),
+    ('ⲁ', 'ⲁ'),
+    ('ⲃ', 'ⲃ'),
+    ('ⲅ', 'ⲅ'),
+    ('ⲇ', 'ⲇ'),
+    ('ⲉ', 'ⲉ'),
+    ('ⲋ', 'ⲋ'),
+    ('ⲍ', 'ⲍ'),
+    ('ⲏ', 'ⲏ'),
+    ('ⲑ', 'ⲑ'),
+    ('ⲓ', 'ⲓ'),
+    ('ⲕ', 'ⲕ'),
+    ('ⲗ', 'ⲗ'),
+    ('ⲙ', 'ⲙ'),
+    ('ⲛ', 'ⲛ'),
+    ('ⲝ', 'ⲝ'),
+    ('ⲟ', 'ⲟ'),
+    ('ⲡ', 'ⲡ'),
+    ('ⲣ', 'ⲣ'),
+    ('ⲥ', 'ⲥ'),
+    ('ⲧ', 'ⲧ'),
+    ('ⲩ', 'ⲩ'),
+    ('ⲫ', 'ⲫ'),
+    ('ⲭ', 'ⲭ'),
+    ('ⲯ', 'ⲯ'),
+    ('ⲱ', 'ⲱ'),
+    ('ⲳ', 'ⲳ'),
+    ('ⲵ', 'ⲵ'),
+    ('ⲷ', 'ⲷ'),
+    ('ⲹ', 'ⲹ'),
+    ('ⲻ', 'ⲻ'),
+    ('ⲽ', 'ⲽ'),
+    ('ⲿ', 'ⲿ'),
+    ('ⳁ', 'ⳁ'),
+    ('ⳃ', 'ⳃ'),
+    ('ⳅ', 'ⳅ'),
+    ('ⳇ', 'ⳇ'),
+    ('ⳉ', 'ⳉ'),
+    ('ⳋ', 'ⳋ'),
+    ('ⳍ', 'ⳍ'),
+    ('ⳏ', 'ⳏ'),
+    ('ⳑ', 'ⳑ'),
+    ('ⳓ', 'ⳓ'),
+    ('ⳕ', 'ⳕ'),
+    ('ⳗ', 'ⳗ'),
+    ('ⳙ', 'ⳙ'),
+    ('ⳛ', 'ⳛ'),
+    ('ⳝ', 'ⳝ'),
+    ('ⳟ', 'ⳟ'),
+    ('ⳡ', 'ⳡ'),
+    ('ⳣ', 'ⳤ'),
+    ('ⳬ', 'ⳬ'),
+    ('ⳮ', 'ⳮ'),
+    ('ⳳ', 'ⳳ'),
+    ('ⴀ', 'ⴥ'),
+    ('ⴧ', 'ⴧ'),
+    ('ⴭ', 'ⴭ'),
+    ('ꙁ', 'ꙁ'),
+    ('ꙃ', 'ꙃ'),
+    ('ꙅ', 'ꙅ'),
+    ('ꙇ', 'ꙇ'),
+    ('ꙉ', 'ꙉ'),
+    ('ꙋ', 'ꙋ'),
+    ('ꙍ', 'ꙍ'),
+    ('ꙏ', 'ꙏ'),
+    ('ꙑ', 'ꙑ'),
+    ('ꙓ', 'ꙓ'),
+    ('ꙕ', 'ꙕ'),
+    ('ꙗ', 'ꙗ'),
+    ('ꙙ', 'ꙙ'),
+    ('ꙛ', 'ꙛ'),
+    ('ꙝ', 'ꙝ'),
+    ('ꙟ', 'ꙟ'),
+    ('ꙡ', 'ꙡ'),
+    ('ꙣ', 'ꙣ'),
+    ('ꙥ', 'ꙥ'),
+    ('ꙧ', 'ꙧ'),
+    ('ꙩ', 'ꙩ'),
+    ('ꙫ', 'ꙫ'),
+    ('ꙭ', 'ꙭ'),
+    ('ꚁ', 'ꚁ'),
+    ('ꚃ', 'ꚃ'),
+    ('ꚅ', 'ꚅ'),
+    ('ꚇ', 'ꚇ'),
+    ('ꚉ', 'ꚉ'),
+    ('ꚋ', 'ꚋ'),
+    ('ꚍ', 'ꚍ'),
+    ('ꚏ', 'ꚏ'),
+    ('ꚑ', 'ꚑ'),
+    ('ꚓ', 'ꚓ'),
+    ('ꚕ', 'ꚕ'),
+    ('ꚗ', 'ꚗ'),
+    ('ꚙ', 'ꚙ'),
+    ('ꚛ', 'ꚛ'),
+    ('ꜣ', 'ꜣ'),
+    ('ꜥ', 'ꜥ'),
+    ('ꜧ', 'ꜧ'),
+    ('ꜩ', 'ꜩ'),
+    ('ꜫ', 'ꜫ'),
+    ('ꜭ', 'ꜭ'),
+    ('ꜯ', 'ꜱ'),
+    ('ꜳ', 'ꜳ'),
+    ('ꜵ', 'ꜵ'),
+    ('ꜷ', 'ꜷ'),
+    ('ꜹ', 'ꜹ'),
+    ('ꜻ', 'ꜻ'),
+    ('ꜽ', 'ꜽ'),
+    ('ꜿ', 'ꜿ'),
+    ('ꝁ', 'ꝁ'),
+    ('ꝃ', 'ꝃ'),
+    ('ꝅ', 'ꝅ'),
+    ('ꝇ', 'ꝇ'),
+    ('ꝉ', 'ꝉ'),
+    ('ꝋ', 'ꝋ'),
+    ('ꝍ', 'ꝍ'),
+    ('ꝏ', 'ꝏ'),
+    ('ꝑ', 'ꝑ'),
+    ('ꝓ', 'ꝓ'),
+    ('ꝕ', 'ꝕ'),
+    ('ꝗ', 'ꝗ'),
+    ('ꝙ', 'ꝙ'),
+    ('ꝛ', 'ꝛ'),
+    ('ꝝ', 'ꝝ'),
+    ('ꝟ', 'ꝟ'),
+    ('ꝡ', 'ꝡ'),
+    ('ꝣ', 'ꝣ'),
+    ('ꝥ', 'ꝥ'),
+    ('ꝧ', 'ꝧ'),
+    ('ꝩ', 'ꝩ'),
+    ('ꝫ', 'ꝫ'),
+    ('ꝭ', 'ꝭ'),
+    ('ꝯ', 'ꝯ'),
+    ('ꝱ', 'ꝸ'),
+    ('ꝺ', 'ꝺ'),
+    ('ꝼ', 'ꝼ'),
+    ('ꝿ', 'ꝿ'),
+    ('ꞁ', 'ꞁ'),
+    ('ꞃ', 'ꞃ'),
+    ('ꞅ', 'ꞅ'),
+    ('ꞇ', 'ꞇ'),
+    ('ꞌ', 'ꞌ'),
+    ('ꞎ', 'ꞎ'),
+    ('ꞑ', 'ꞑ'),
+    ('ꞓ', 'ꞕ'),
+    ('ꞗ', 'ꞗ'),
+    ('ꞙ', 'ꞙ'),
+    ('ꞛ', 'ꞛ'),
+    ('ꞝ', 'ꞝ'),
+    ('ꞟ', 'ꞟ'),
+    ('ꞡ', 'ꞡ'),
+    ('ꞣ', 'ꞣ'),
+    ('ꞥ', 'ꞥ'),
+    ('ꞧ', 'ꞧ'),
+    ('ꞩ', 'ꞩ'),
+    ('ꞯ', 'ꞯ'),
+    ('ꞵ', 'ꞵ'),
+    ('ꞷ', 'ꞷ'),
+    ('ꞹ', 'ꞹ'),
+    ('ꞻ', 'ꞻ'),
+    ('ꞽ', 'ꞽ'),
+    ('ꞿ', 'ꞿ'),
+    ('ꟃ', 'ꟃ'),
+    ('\u{a7c8}', '\u{a7c8}'),
+    ('\u{a7ca}', '\u{a7ca}'),
+    ('\u{a7f6}', '\u{a7f6}'),
+    ('ꟺ', 'ꟺ'),
+    ('ꬰ', 'ꭚ'),
+    ('ꭠ', '\u{ab68}'),
+    ('ꭰ', 'ꮿ'),
+    ('ﬀ', 'ﬆ'),
+    ('ﬓ', 'ﬗ'),
+    ('ａ', 'ｚ'),
+    ('𐐨', '𐑏'),
+    ('𐓘', '𐓻'),
+    ('𐳀', '𐳲'),
+    ('𑣀', '𑣟'),
+    ('𖹠', '𖹿'),
+    ('𝐚', '𝐳'),
+    ('𝑎', '𝑔'),
+    ('𝑖', '𝑧'),
+    ('𝒂', '𝒛'),
+    ('𝒶', '𝒹'),
+    ('𝒻', '𝒻'),
+    ('𝒽', '𝓃'),
+    ('𝓅', '𝓏'),
+    ('𝓪', '𝔃'),
+    ('𝔞', '𝔷'),
+    ('𝕒', '𝕫'),
+    ('𝖆', '𝖟'),
+    ('𝖺', '𝗓'),
+    ('𝗮', '𝘇'),
+    ('𝘢', '𝘻'),
+    ('𝙖', '𝙯'),
+    ('𝚊', '𝚥'),
+    ('𝛂', '𝛚'),
+    ('𝛜', '𝛡'),
+    ('𝛼', '𝜔'),
+    ('𝜖', '𝜛'),
+    ('𝜶', '𝝎'),
+    ('𝝐', '𝝕'),
+    ('𝝰', '𝞈'),
+    ('𝞊', '𝞏'),
+    ('𝞪', '𝟂'),
+    ('𝟄', '𝟉'),
+    ('𝟋', '𝟋'),
+    ('𞤢', '𞥃'),
+];
+
+pub const MARK: &'static [(char, char)] = &[
+    ('\u{300}', '\u{36f}'),
+    ('\u{483}', '\u{489}'),
+    ('\u{591}', '\u{5bd}'),
+    ('\u{5bf}', '\u{5bf}'),
+    ('\u{5c1}', '\u{5c2}'),
+    ('\u{5c4}', '\u{5c5}'),
+    ('\u{5c7}', '\u{5c7}'),
+    ('\u{610}', '\u{61a}'),
+    ('\u{64b}', '\u{65f}'),
+    ('\u{670}', '\u{670}'),
+    ('\u{6d6}', '\u{6dc}'),
+    ('\u{6df}', '\u{6e4}'),
+    ('\u{6e7}', '\u{6e8}'),
+    ('\u{6ea}', '\u{6ed}'),
+    ('\u{711}', '\u{711}'),
+    ('\u{730}', '\u{74a}'),
+    ('\u{7a6}', '\u{7b0}'),
+    ('\u{7eb}', '\u{7f3}'),
+    ('\u{7fd}', '\u{7fd}'),
+    ('\u{816}', '\u{819}'),
+    ('\u{81b}', '\u{823}'),
+    ('\u{825}', '\u{827}'),
+    ('\u{829}', '\u{82d}'),
+    ('\u{859}', '\u{85b}'),
+    ('\u{8d3}', '\u{8e1}'),
+    ('\u{8e3}', 'ः'),
+    ('\u{93a}', '\u{93c}'),
+    ('ा', 'ॏ'),
+    ('\u{951}', '\u{957}'),
+    ('\u{962}', '\u{963}'),
+    ('\u{981}', 'ঃ'),
+    ('\u{9bc}', '\u{9bc}'),
+    ('\u{9be}', '\u{9c4}'),
+    ('ে', 'ৈ'),
+    ('ো', '\u{9cd}'),
+    ('\u{9d7}', '\u{9d7}'),
+    ('\u{9e2}', '\u{9e3}'),
+    ('\u{9fe}', '\u{9fe}'),
+    ('\u{a01}', 'ਃ'),
+    ('\u{a3c}', '\u{a3c}'),
+    ('ਾ', '\u{a42}'),
+    ('\u{a47}', '\u{a48}'),
+    ('\u{a4b}', '\u{a4d}'),
+    ('\u{a51}', '\u{a51}'),
+    ('\u{a70}', '\u{a71}'),
+    ('\u{a75}', '\u{a75}'),
+    ('\u{a81}', 'ઃ'),
+    ('\u{abc}', '\u{abc}'),
+    ('ા', '\u{ac5}'),
+    ('\u{ac7}', 'ૉ'),
+    ('ો', '\u{acd}'),
+    ('\u{ae2}', '\u{ae3}'),
+    ('\u{afa}', '\u{aff}'),
+    ('\u{b01}', 'ଃ'),
+    ('\u{b3c}', '\u{b3c}'),
+    ('\u{b3e}', '\u{b44}'),
+    ('େ', 'ୈ'),
+    ('ୋ', '\u{b4d}'),
+    ('\u{b55}', '\u{b57}'),
+    ('\u{b62}', '\u{b63}'),
+    ('\u{b82}', '\u{b82}'),
+    ('\u{bbe}', 'ூ'),
+    ('ெ', 'ை'),
+    ('ொ', '\u{bcd}'),
+    ('\u{bd7}', '\u{bd7}'),
+    ('\u{c00}', '\u{c04}'),
+    ('\u{c3e}', 'ౄ'),
+    ('\u{c46}', '\u{c48}'),
+    ('\u{c4a}', '\u{c4d}'),
+    ('\u{c55}', '\u{c56}'),
+    ('\u{c62}', '\u{c63}'),
+    ('\u{c81}', 'ಃ'),
+    ('\u{cbc}', '\u{cbc}'),
+    ('ಾ', 'ೄ'),
+    ('\u{cc6}', 'ೈ'),
+    ('ೊ', '\u{ccd}'),
+    ('\u{cd5}', '\u{cd6}'),
+    ('\u{ce2}', '\u{ce3}'),
+    ('\u{d00}', 'ഃ'),
+    ('\u{d3b}', '\u{d3c}'),
+    ('\u{d3e}', '\u{d44}'),
+    ('െ', 'ൈ'),
+    ('ൊ', '\u{d4d}'),
+    ('\u{d57}', '\u{d57}'),
+    ('\u{d62}', '\u{d63}'),
+    ('\u{d81}', 'ඃ'),
+    ('\u{dca}', '\u{dca}'),
+    ('\u{dcf}', '\u{dd4}'),
+    ('\u{dd6}', '\u{dd6}'),
+    ('ෘ', '\u{ddf}'),
+    ('ෲ', 'ෳ'),
+    ('\u{e31}', '\u{e31}'),
+    ('\u{e34}', '\u{e3a}'),
+    ('\u{e47}', '\u{e4e}'),
+    ('\u{eb1}', '\u{eb1}'),
+    ('\u{eb4}', '\u{ebc}'),
+    ('\u{ec8}', '\u{ecd}'),
+    ('\u{f18}', '\u{f19}'),
+    ('\u{f35}', '\u{f35}'),
+    ('\u{f37}', '\u{f37}'),
+    ('\u{f39}', '\u{f39}'),
+    ('༾', '༿'),
+    ('\u{f71}', '\u{f84}'),
+    ('\u{f86}', '\u{f87}'),
+    ('\u{f8d}', '\u{f97}'),
+    ('\u{f99}', '\u{fbc}'),
+    ('\u{fc6}', '\u{fc6}'),
+    ('ါ', '\u{103e}'),
+    ('ၖ', '\u{1059}'),
+    ('\u{105e}', '\u{1060}'),
+    ('ၢ', 'ၤ'),
+    ('ၧ', 'ၭ'),
+    ('\u{1071}', '\u{1074}'),
+    ('\u{1082}', '\u{108d}'),
+    ('ႏ', 'ႏ'),
+    ('ႚ', '\u{109d}'),
+    ('\u{135d}', '\u{135f}'),
+    ('\u{1712}', '\u{1714}'),
+    ('\u{1732}', '\u{1734}'),
+    ('\u{1752}', '\u{1753}'),
+    ('\u{1772}', '\u{1773}'),
+    ('\u{17b4}', '\u{17d3}'),
+    ('\u{17dd}', '\u{17dd}'),
+    ('\u{180b}', '\u{180d}'),
+    ('\u{1885}', '\u{1886}'),
+    ('\u{18a9}', '\u{18a9}'),
+    ('\u{1920}', 'ᤫ'),
+    ('ᤰ', '\u{193b}'),
+    ('\u{1a17}', '\u{1a1b}'),
+    ('ᩕ', '\u{1a5e}'),
+    ('\u{1a60}', '\u{1a7c}'),
+    ('\u{1a7f}', '\u{1a7f}'),
+    ('\u{1ab0}', '\u{1ac0}'),
+    ('\u{1b00}', 'ᬄ'),
+    ('\u{1b34}', '᭄'),
+    ('\u{1b6b}', '\u{1b73}'),
+    ('\u{1b80}', 'ᮂ'),
+    ('ᮡ', '\u{1bad}'),
+    ('\u{1be6}', '᯳'),
+    ('ᰤ', '\u{1c37}'),
+    ('\u{1cd0}', '\u{1cd2}'),
+    ('\u{1cd4}', '\u{1ce8}'),
+    ('\u{1ced}', '\u{1ced}'),
+    ('\u{1cf4}', '\u{1cf4}'),
+    ('᳷', '\u{1cf9}'),
+    ('\u{1dc0}', '\u{1df9}'),
+    ('\u{1dfb}', '\u{1dff}'),
+    ('\u{20d0}', '\u{20f0}'),
+    ('\u{2cef}', '\u{2cf1}'),
+    ('\u{2d7f}', '\u{2d7f}'),
+    ('\u{2de0}', '\u{2dff}'),
+    ('\u{302a}', '\u{302f}'),
+    ('\u{3099}', '\u{309a}'),
+    ('\u{a66f}', '\u{a672}'),
+    ('\u{a674}', '\u{a67d}'),
+    ('\u{a69e}', '\u{a69f}'),
+    ('\u{a6f0}', '\u{a6f1}'),
+    ('\u{a802}', '\u{a802}'),
+    ('\u{a806}', '\u{a806}'),
+    ('\u{a80b}', '\u{a80b}'),
+    ('ꠣ', 'ꠧ'),
+    ('\u{a82c}', '\u{a82c}'),
+    ('ꢀ', 'ꢁ'),
+    ('ꢴ', '\u{a8c5}'),
+    ('\u{a8e0}', '\u{a8f1}'),
+    ('\u{a8ff}', '\u{a8ff}'),
+    ('\u{a926}', '\u{a92d}'),
+    ('\u{a947}', '꥓'),
+    ('\u{a980}', 'ꦃ'),
+    ('\u{a9b3}', '꧀'),
+    ('\u{a9e5}', '\u{a9e5}'),
+    ('\u{aa29}', '\u{aa36}'),
+    ('\u{aa43}', '\u{aa43}'),
+    ('\u{aa4c}', 'ꩍ'),
+    ('ꩻ', 'ꩽ'),
+    ('\u{aab0}', '\u{aab0}'),
+    ('\u{aab2}', '\u{aab4}'),
+    ('\u{aab7}', '\u{aab8}'),
+    ('\u{aabe}', '\u{aabf}'),
+    ('\u{aac1}', '\u{aac1}'),
+    ('ꫫ', 'ꫯ'),
+    ('ꫵ', '\u{aaf6}'),
+    ('ꯣ', 'ꯪ'),
+    ('꯬', '\u{abed}'),
+    ('\u{fb1e}', '\u{fb1e}'),
+    ('\u{fe00}', '\u{fe0f}'),
+    ('\u{fe20}', '\u{fe2f}'),
+    ('\u{101fd}', '\u{101fd}'),
+    ('\u{102e0}', '\u{102e0}'),
+    ('\u{10376}', '\u{1037a}'),
+    ('\u{10a01}', '\u{10a03}'),
+    ('\u{10a05}', '\u{10a06}'),
+    ('\u{10a0c}', '\u{10a0f}'),
+    ('\u{10a38}', '\u{10a3a}'),
+    ('\u{10a3f}', '\u{10a3f}'),
+    ('\u{10ae5}', '\u{10ae6}'),
+    ('\u{10d24}', '\u{10d27}'),
+    ('\u{10eab}', '\u{10eac}'),
+    ('\u{10f46}', '\u{10f50}'),
+    ('𑀀', '𑀂'),
+    ('\u{11038}', '\u{11046}'),
+    ('\u{1107f}', '𑂂'),
+    ('𑂰', '\u{110ba}'),
+    ('\u{11100}', '\u{11102}'),
+    ('\u{11127}', '\u{11134}'),
+    ('𑅅', '𑅆'),
+    ('\u{11173}', '\u{11173}'),
+    ('\u{11180}', '𑆂'),
+    ('𑆳', '𑇀'),
+    ('\u{111c9}', '\u{111cc}'),
+    ('\u{111ce}', '\u{111cf}'),
+    ('𑈬', '\u{11237}'),
+    ('\u{1123e}', '\u{1123e}'),
+    ('\u{112df}', '\u{112ea}'),
+    ('\u{11300}', '𑌃'),
+    ('\u{1133b}', '\u{1133c}'),
+    ('\u{1133e}', '𑍄'),
+    ('𑍇', '𑍈'),
+    ('𑍋', '𑍍'),
+    ('\u{11357}', '\u{11357}'),
+    ('𑍢', '𑍣'),
+    ('\u{11366}', '\u{1136c}'),
+    ('\u{11370}', '\u{11374}'),
+    ('𑐵', '\u{11446}'),
+    ('\u{1145e}', '\u{1145e}'),
+    ('\u{114b0}', '\u{114c3}'),
+    ('\u{115af}', '\u{115b5}'),
+    ('𑖸', '\u{115c0}'),
+    ('\u{115dc}', '\u{115dd}'),
+    ('𑘰', '\u{11640}'),
+    ('\u{116ab}', '\u{116b7}'),
+    ('\u{1171d}', '\u{1172b}'),
+    ('𑠬', '\u{1183a}'),
+    ('\u{11930}', '\u{11935}'),
+    ('\u{11937}', '\u{11938}'),
+    ('\u{1193b}', '\u{1193e}'),
+    ('\u{11940}', '\u{11940}'),
+    ('\u{11942}', '\u{11943}'),
+    ('𑧑', '\u{119d7}'),
+    ('\u{119da}', '\u{119e0}'),
+    ('𑧤', '𑧤'),
+    ('\u{11a01}', '\u{11a0a}'),
+    ('\u{11a33}', '𑨹'),
+    ('\u{11a3b}', '\u{11a3e}'),
+    ('\u{11a47}', '\u{11a47}'),
+    ('\u{11a51}', '\u{11a5b}'),
+    ('\u{11a8a}', '\u{11a99}'),
+    ('𑰯', '\u{11c36}'),
+    ('\u{11c38}', '\u{11c3f}'),
+    ('\u{11c92}', '\u{11ca7}'),
+    ('𑲩', '\u{11cb6}'),
+    ('\u{11d31}', '\u{11d36}'),
+    ('\u{11d3a}', '\u{11d3a}'),
+    ('\u{11d3c}', '\u{11d3d}'),
+    ('\u{11d3f}', '\u{11d45}'),
+    ('\u{11d47}', '\u{11d47}'),
+    ('𑶊', '𑶎'),
+    ('\u{11d90}', '\u{11d91}'),
+    ('𑶓', '\u{11d97}'),
+    ('\u{11ef3}', '𑻶'),
+    ('\u{16af0}', '\u{16af4}'),
+    ('\u{16b30}', '\u{16b36}'),
+    ('\u{16f4f}', '\u{16f4f}'),
+    ('𖽑', '𖾇'),
+    ('\u{16f8f}', '\u{16f92}'),
+    ('\u{16fe4}', '\u{16fe4}'),
+    ('\u{16ff0}', '\u{16ff1}'),
+    ('\u{1bc9d}', '\u{1bc9e}'),
+    ('\u{1d165}', '\u{1d169}'),
+    ('𝅭', '\u{1d172}'),
+    ('\u{1d17b}', '\u{1d182}'),
+    ('\u{1d185}', '\u{1d18b}'),
+    ('\u{1d1aa}', '\u{1d1ad}'),
+    ('\u{1d242}', '\u{1d244}'),
+    ('\u{1da00}', '\u{1da36}'),
+    ('\u{1da3b}', '\u{1da6c}'),
+    ('\u{1da75}', '\u{1da75}'),
+    ('\u{1da84}', '\u{1da84}'),
+    ('\u{1da9b}', '\u{1da9f}'),
+    ('\u{1daa1}', '\u{1daaf}'),
+    ('\u{1e000}', '\u{1e006}'),
+    ('\u{1e008}', '\u{1e018}'),
+    ('\u{1e01b}', '\u{1e021}'),
+    ('\u{1e023}', '\u{1e024}'),
+    ('\u{1e026}', '\u{1e02a}'),
+    ('\u{1e130}', '\u{1e136}'),
+    ('\u{1e2ec}', '\u{1e2ef}'),
+    ('\u{1e8d0}', '\u{1e8d6}'),
+    ('\u{1e944}', '\u{1e94a}'),
+    ('\u{e0100}', '\u{e01ef}'),
+];
+
+pub const MATH_SYMBOL: &'static [(char, char)] = &[
+    ('+', '+'),
+    ('<', '>'),
+    ('|', '|'),
+    ('~', '~'),
+    ('¬', '¬'),
+    ('±', '±'),
+    ('×', '×'),
+    ('÷', '÷'),
+    ('϶', '϶'),
+    ('؆', '؈'),
+    ('⁄', '⁄'),
+    ('⁒', '⁒'),
+    ('⁺', '⁼'),
+    ('₊', '₌'),
+    ('℘', '℘'),
+    ('⅀', '⅄'),
+    ('⅋', '⅋'),
+    ('←', '↔'),
+    ('↚', '↛'),
+    ('↠', '↠'),
+    ('↣', '↣'),
+    ('↦', '↦'),
+    ('↮', '↮'),
+    ('⇎', '⇏'),
+    ('⇒', '⇒'),
+    ('⇔', '⇔'),
+    ('⇴', '⋿'),
+    ('⌠', '⌡'),
+    ('⍼', '⍼'),
+    ('⎛', '⎳'),
+    ('⏜', '⏡'),
+    ('▷', '▷'),
+    ('◁', '◁'),
+    ('◸', '◿'),
+    ('♯', '♯'),
+    ('⟀', '⟄'),
+    ('⟇', '⟥'),
+    ('⟰', '⟿'),
+    ('⤀', '⦂'),
+    ('⦙', '⧗'),
+    ('⧜', '⧻'),
+    ('⧾', '⫿'),
+    ('⬰', '⭄'),
+    ('⭇', '⭌'),
+    ('﬩', '﬩'),
+    ('﹢', '﹢'),
+    ('﹤', '﹦'),
+    ('＋', '＋'),
+    ('＜', '＞'),
+    ('｜', '｜'),
+    ('～', '～'),
+    ('￢', '￢'),
+    ('￩', '￬'),
+    ('𝛁', '𝛁'),
+    ('𝛛', '𝛛'),
+    ('𝛻', '𝛻'),
+    ('𝜕', '𝜕'),
+    ('𝜵', '𝜵'),
+    ('𝝏', '𝝏'),
+    ('𝝯', '𝝯'),
+    ('𝞉', '𝞉'),
+    ('𝞩', '𝞩'),
+    ('𝟃', '𝟃'),
+    ('𞻰', '𞻱'),
+];
+
+pub const MODIFIER_LETTER: &'static [(char, char)] = &[
+    ('ʰ', 'ˁ'),
+    ('ˆ', 'ˑ'),
+    ('ˠ', 'ˤ'),
+    ('ˬ', 'ˬ'),
+    ('ˮ', 'ˮ'),
+    ('ʹ', 'ʹ'),
+    ('ͺ', 'ͺ'),
+    ('ՙ', 'ՙ'),
+    ('ـ', 'ـ'),
+    ('ۥ', 'ۦ'),
+    ('ߴ', 'ߵ'),
+    ('ߺ', 'ߺ'),
+    ('ࠚ', 'ࠚ'),
+    ('ࠤ', 'ࠤ'),
+    ('ࠨ', 'ࠨ'),
+    ('ॱ', 'ॱ'),
+    ('ๆ', 'ๆ'),
+    ('ໆ', 'ໆ'),
+    ('ჼ', 'ჼ'),
+    ('ៗ', 'ៗ'),
+    ('ᡃ', 'ᡃ'),
+    ('ᪧ', 'ᪧ'),
+    ('ᱸ', 'ᱽ'),
+    ('ᴬ', 'ᵪ'),
+    ('ᵸ', 'ᵸ'),
+    ('ᶛ', 'ᶿ'),
+    ('ⁱ', 'ⁱ'),
+    ('ⁿ', 'ⁿ'),
+    ('ₐ', 'ₜ'),
+    ('ⱼ', 'ⱽ'),
+    ('ⵯ', 'ⵯ'),
+    ('ⸯ', 'ⸯ'),
+    ('々', '々'),
+    ('〱', '〵'),
+    ('〻', '〻'),
+    ('ゝ', 'ゞ'),
+    ('ー', 'ヾ'),
+    ('ꀕ', 'ꀕ'),
+    ('ꓸ', 'ꓽ'),
+    ('ꘌ', 'ꘌ'),
+    ('ꙿ', 'ꙿ'),
+    ('ꚜ', 'ꚝ'),
+    ('ꜗ', 'ꜟ'),
+    ('ꝰ', 'ꝰ'),
+    ('ꞈ', 'ꞈ'),
+    ('ꟸ', 'ꟹ'),
+    ('ꧏ', 'ꧏ'),
+    ('ꧦ', 'ꧦ'),
+    ('ꩰ', 'ꩰ'),
+    ('ꫝ', 'ꫝ'),
+    ('ꫳ', 'ꫴ'),
+    ('ꭜ', 'ꭟ'),
+    ('\u{ab69}', '\u{ab69}'),
+    ('ｰ', 'ｰ'),
+    ('\u{ff9e}', '\u{ff9f}'),
+    ('𖭀', '𖭃'),
+    ('𖾓', '𖾟'),
+    ('𖿠', '𖿡'),
+    ('𖿣', '𖿣'),
+    ('𞄷', '𞄽'),
+    ('𞥋', '𞥋'),
+];
+
+pub const MODIFIER_SYMBOL: &'static [(char, char)] = &[
+    ('^', '^'),
+    ('`', '`'),
+    ('¨', '¨'),
+    ('¯', '¯'),
+    ('´', '´'),
+    ('¸', '¸'),
+    ('˂', '˅'),
+    ('˒', '˟'),
+    ('˥', '˫'),
+    ('˭', '˭'),
+    ('˯', '˿'),
+    ('͵', '͵'),
+    ('΄', '΅'),
+    ('᾽', '᾽'),
+    ('᾿', '῁'),
+    ('῍', '῏'),
+    ('῝', '῟'),
+    ('῭', '`'),
+    ('´', '῾'),
+    ('゛', '゜'),
+    ('꜀', '꜖'),
+    ('꜠', '꜡'),
+    ('꞉', '꞊'),
+    ('꭛', '꭛'),
+    ('\u{ab6a}', '\u{ab6b}'),
+    ('﮲', '﯁'),
+    ('＾', '＾'),
+    ('｀', '｀'),
+    ('￣', '￣'),
+    ('🏻', '🏿'),
+];
+
+pub const NONSPACING_MARK: &'static [(char, char)] = &[
+    ('\u{300}', '\u{36f}'),
+    ('\u{483}', '\u{487}'),
+    ('\u{591}', '\u{5bd}'),
+    ('\u{5bf}', '\u{5bf}'),
+    ('\u{5c1}', '\u{5c2}'),
+    ('\u{5c4}', '\u{5c5}'),
+    ('\u{5c7}', '\u{5c7}'),
+    ('\u{610}', '\u{61a}'),
+    ('\u{64b}', '\u{65f}'),
+    ('\u{670}', '\u{670}'),
+    ('\u{6d6}', '\u{6dc}'),
+    ('\u{6df}', '\u{6e4}'),
+    ('\u{6e7}', '\u{6e8}'),
+    ('\u{6ea}', '\u{6ed}'),
+    ('\u{711}', '\u{711}'),
+    ('\u{730}', '\u{74a}'),
+    ('\u{7a6}', '\u{7b0}'),
+    ('\u{7eb}', '\u{7f3}'),
+    ('\u{7fd}', '\u{7fd}'),
+    ('\u{816}', '\u{819}'),
+    ('\u{81b}', '\u{823}'),
+    ('\u{825}', '\u{827}'),
+    ('\u{829}', '\u{82d}'),
+    ('\u{859}', '\u{85b}'),
+    ('\u{8d3}', '\u{8e1}'),
+    ('\u{8e3}', '\u{902}'),
+    ('\u{93a}', '\u{93a}'),
+    ('\u{93c}', '\u{93c}'),
+    ('\u{941}', '\u{948}'),
+    ('\u{94d}', '\u{94d}'),
+    ('\u{951}', '\u{957}'),
+    ('\u{962}', '\u{963}'),
+    ('\u{981}', '\u{981}'),
+    ('\u{9bc}', '\u{9bc}'),
+    ('\u{9c1}', '\u{9c4}'),
+    ('\u{9cd}', '\u{9cd}'),
+    ('\u{9e2}', '\u{9e3}'),
+    ('\u{9fe}', '\u{9fe}'),
+    ('\u{a01}', '\u{a02}'),
+    ('\u{a3c}', '\u{a3c}'),
+    ('\u{a41}', '\u{a42}'),
+    ('\u{a47}', '\u{a48}'),
+    ('\u{a4b}', '\u{a4d}'),
+    ('\u{a51}', '\u{a51}'),
+    ('\u{a70}', '\u{a71}'),
+    ('\u{a75}', '\u{a75}'),
+    ('\u{a81}', '\u{a82}'),
+    ('\u{abc}', '\u{abc}'),
+    ('\u{ac1}', '\u{ac5}'),
+    ('\u{ac7}', '\u{ac8}'),
+    ('\u{acd}', '\u{acd}'),
+    ('\u{ae2}', '\u{ae3}'),
+    ('\u{afa}', '\u{aff}'),
+    ('\u{b01}', '\u{b01}'),
+    ('\u{b3c}', '\u{b3c}'),
+    ('\u{b3f}', '\u{b3f}'),
+    ('\u{b41}', '\u{b44}'),
+    ('\u{b4d}', '\u{b4d}'),
+    ('\u{b55}', '\u{b56}'),
+    ('\u{b62}', '\u{b63}'),
+    ('\u{b82}', '\u{b82}'),
+    ('\u{bc0}', '\u{bc0}'),
+    ('\u{bcd}', '\u{bcd}'),
+    ('\u{c00}', '\u{c00}'),
+    ('\u{c04}', '\u{c04}'),
+    ('\u{c3e}', '\u{c40}'),
+    ('\u{c46}', '\u{c48}'),
+    ('\u{c4a}', '\u{c4d}'),
+    ('\u{c55}', '\u{c56}'),
+    ('\u{c62}', '\u{c63}'),
+    ('\u{c81}', '\u{c81}'),
+    ('\u{cbc}', '\u{cbc}'),
+    ('\u{cbf}', '\u{cbf}'),
+    ('\u{cc6}', '\u{cc6}'),
+    ('\u{ccc}', '\u{ccd}'),
+    ('\u{ce2}', '\u{ce3}'),
+    ('\u{d00}', '\u{d01}'),
+    ('\u{d3b}', '\u{d3c}'),
+    ('\u{d41}', '\u{d44}'),
+    ('\u{d4d}', '\u{d4d}'),
+    ('\u{d62}', '\u{d63}'),
+    ('\u{d81}', '\u{d81}'),
+    ('\u{dca}', '\u{dca}'),
+    ('\u{dd2}', '\u{dd4}'),
+    ('\u{dd6}', '\u{dd6}'),
+    ('\u{e31}', '\u{e31}'),
+    ('\u{e34}', '\u{e3a}'),
+    ('\u{e47}', '\u{e4e}'),
+    ('\u{eb1}', '\u{eb1}'),
+    ('\u{eb4}', '\u{ebc}'),
+    ('\u{ec8}', '\u{ecd}'),
+    ('\u{f18}', '\u{f19}'),
+    ('\u{f35}', '\u{f35}'),
+    ('\u{f37}', '\u{f37}'),
+    ('\u{f39}', '\u{f39}'),
+    ('\u{f71}', '\u{f7e}'),
+    ('\u{f80}', '\u{f84}'),
+    ('\u{f86}', '\u{f87}'),
+    ('\u{f8d}', '\u{f97}'),
+    ('\u{f99}', '\u{fbc}'),
+    ('\u{fc6}', '\u{fc6}'),
+    ('\u{102d}', '\u{1030}'),
+    ('\u{1032}', '\u{1037}'),
+    ('\u{1039}', '\u{103a}'),
+    ('\u{103d}', '\u{103e}'),
+    ('\u{1058}', '\u{1059}'),
+    ('\u{105e}', '\u{1060}'),
+    ('\u{1071}', '\u{1074}'),
+    ('\u{1082}', '\u{1082}'),
+    ('\u{1085}', '\u{1086}'),
+    ('\u{108d}', '\u{108d}'),
+    ('\u{109d}', '\u{109d}'),
+    ('\u{135d}', '\u{135f}'),
+    ('\u{1712}', '\u{1714}'),
+    ('\u{1732}', '\u{1734}'),
+    ('\u{1752}', '\u{1753}'),
+    ('\u{1772}', '\u{1773}'),
+    ('\u{17b4}', '\u{17b5}'),
+    ('\u{17b7}', '\u{17bd}'),
+    ('\u{17c6}', '\u{17c6}'),
+    ('\u{17c9}', '\u{17d3}'),
+    ('\u{17dd}', '\u{17dd}'),
+    ('\u{180b}', '\u{180d}'),
+    ('\u{1885}', '\u{1886}'),
+    ('\u{18a9}', '\u{18a9}'),
+    ('\u{1920}', '\u{1922}'),
+    ('\u{1927}', '\u{1928}'),
+    ('\u{1932}', '\u{1932}'),
+    ('\u{1939}', '\u{193b}'),
+    ('\u{1a17}', '\u{1a18}'),
+    ('\u{1a1b}', '\u{1a1b}'),
+    ('\u{1a56}', '\u{1a56}'),
+    ('\u{1a58}', '\u{1a5e}'),
+    ('\u{1a60}', '\u{1a60}'),
+    ('\u{1a62}', '\u{1a62}'),
+    ('\u{1a65}', '\u{1a6c}'),
+    ('\u{1a73}', '\u{1a7c}'),
+    ('\u{1a7f}', '\u{1a7f}'),
+    ('\u{1ab0}', '\u{1abd}'),
+    ('\u{1abf}', '\u{1ac0}'),
+    ('\u{1b00}', '\u{1b03}'),
+    ('\u{1b34}', '\u{1b34}'),
+    ('\u{1b36}', '\u{1b3a}'),
+    ('\u{1b3c}', '\u{1b3c}'),
+    ('\u{1b42}', '\u{1b42}'),
+    ('\u{1b6b}', '\u{1b73}'),
+    ('\u{1b80}', '\u{1b81}'),
+    ('\u{1ba2}', '\u{1ba5}'),
+    ('\u{1ba8}', '\u{1ba9}'),
+    ('\u{1bab}', '\u{1bad}'),
+    ('\u{1be6}', '\u{1be6}'),
+    ('\u{1be8}', '\u{1be9}'),
+    ('\u{1bed}', '\u{1bed}'),
+    ('\u{1bef}', '\u{1bf1}'),
+    ('\u{1c2c}', '\u{1c33}'),
+    ('\u{1c36}', '\u{1c37}'),
+    ('\u{1cd0}', '\u{1cd2}'),
+    ('\u{1cd4}', '\u{1ce0}'),
+    ('\u{1ce2}', '\u{1ce8}'),
+    ('\u{1ced}', '\u{1ced}'),
+    ('\u{1cf4}', '\u{1cf4}'),
+    ('\u{1cf8}', '\u{1cf9}'),
+    ('\u{1dc0}', '\u{1df9}'),
+    ('\u{1dfb}', '\u{1dff}'),
+    ('\u{20d0}', '\u{20dc}'),
+    ('\u{20e1}', '\u{20e1}'),
+    ('\u{20e5}', '\u{20f0}'),
+    ('\u{2cef}', '\u{2cf1}'),
+    ('\u{2d7f}', '\u{2d7f}'),
+    ('\u{2de0}', '\u{2dff}'),
+    ('\u{302a}', '\u{302d}'),
+    ('\u{3099}', '\u{309a}'),
+    ('\u{a66f}', '\u{a66f}'),
+    ('\u{a674}', '\u{a67d}'),
+    ('\u{a69e}', '\u{a69f}'),
+    ('\u{a6f0}', '\u{a6f1}'),
+    ('\u{a802}', '\u{a802}'),
+    ('\u{a806}', '\u{a806}'),
+    ('\u{a80b}', '\u{a80b}'),
+    ('\u{a825}', '\u{a826}'),
+    ('\u{a82c}', '\u{a82c}'),
+    ('\u{a8c4}', '\u{a8c5}'),
+    ('\u{a8e0}', '\u{a8f1}'),
+    ('\u{a8ff}', '\u{a8ff}'),
+    ('\u{a926}', '\u{a92d}'),
+    ('\u{a947}', '\u{a951}'),
+    ('\u{a980}', '\u{a982}'),
+    ('\u{a9b3}', '\u{a9b3}'),
+    ('\u{a9b6}', '\u{a9b9}'),
+    ('\u{a9bc}', '\u{a9bd}'),
+    ('\u{a9e5}', '\u{a9e5}'),
+    ('\u{aa29}', '\u{aa2e}'),
+    ('\u{aa31}', '\u{aa32}'),
+    ('\u{aa35}', '\u{aa36}'),
+    ('\u{aa43}', '\u{aa43}'),
+    ('\u{aa4c}', '\u{aa4c}'),
+    ('\u{aa7c}', '\u{aa7c}'),
+    ('\u{aab0}', '\u{aab0}'),
+    ('\u{aab2}', '\u{aab4}'),
+    ('\u{aab7}', '\u{aab8}'),
+    ('\u{aabe}', '\u{aabf}'),
+    ('\u{aac1}', '\u{aac1}'),
+    ('\u{aaec}', '\u{aaed}'),
+    ('\u{aaf6}', '\u{aaf6}'),
+    ('\u{abe5}', '\u{abe5}'),
+    ('\u{abe8}', '\u{abe8}'),
+    ('\u{abed}', '\u{abed}'),
+    ('\u{fb1e}', '\u{fb1e}'),
+    ('\u{fe00}', '\u{fe0f}'),
+    ('\u{fe20}', '\u{fe2f}'),
+    ('\u{101fd}', '\u{101fd}'),
+    ('\u{102e0}', '\u{102e0}'),
+    ('\u{10376}', '\u{1037a}'),
+    ('\u{10a01}', '\u{10a03}'),
+    ('\u{10a05}', '\u{10a06}'),
+    ('\u{10a0c}', '\u{10a0f}'),
+    ('\u{10a38}', '\u{10a3a}'),
+    ('\u{10a3f}', '\u{10a3f}'),
+    ('\u{10ae5}', '\u{10ae6}'),
+    ('\u{10d24}', '\u{10d27}'),
+    ('\u{10eab}', '\u{10eac}'),
+    ('\u{10f46}', '\u{10f50}'),
+    ('\u{11001}', '\u{11001}'),
+    ('\u{11038}', '\u{11046}'),
+    ('\u{1107f}', '\u{11081}'),
+    ('\u{110b3}', '\u{110b6}'),
+    ('\u{110b9}', '\u{110ba}'),
+    ('\u{11100}', '\u{11102}'),
+    ('\u{11127}', '\u{1112b}'),
+    ('\u{1112d}', '\u{11134}'),
+    ('\u{11173}', '\u{11173}'),
+    ('\u{11180}', '\u{11181}'),
+    ('\u{111b6}', '\u{111be}'),
+    ('\u{111c9}', '\u{111cc}'),
+    ('\u{111cf}', '\u{111cf}'),
+    ('\u{1122f}', '\u{11231}'),
+    ('\u{11234}', '\u{11234}'),
+    ('\u{11236}', '\u{11237}'),
+    ('\u{1123e}', '\u{1123e}'),
+    ('\u{112df}', '\u{112df}'),
+    ('\u{112e3}', '\u{112ea}'),
+    ('\u{11300}', '\u{11301}'),
+    ('\u{1133b}', '\u{1133c}'),
+    ('\u{11340}', '\u{11340}'),
+    ('\u{11366}', '\u{1136c}'),
+    ('\u{11370}', '\u{11374}'),
+    ('\u{11438}', '\u{1143f}'),
+    ('\u{11442}', '\u{11444}'),
+    ('\u{11446}', '\u{11446}'),
+    ('\u{1145e}', '\u{1145e}'),
+    ('\u{114b3}', '\u{114b8}'),
+    ('\u{114ba}', '\u{114ba}'),
+    ('\u{114bf}', '\u{114c0}'),
+    ('\u{114c2}', '\u{114c3}'),
+    ('\u{115b2}', '\u{115b5}'),
+    ('\u{115bc}', '\u{115bd}'),
+    ('\u{115bf}', '\u{115c0}'),
+    ('\u{115dc}', '\u{115dd}'),
+    ('\u{11633}', '\u{1163a}'),
+    ('\u{1163d}', '\u{1163d}'),
+    ('\u{1163f}', '\u{11640}'),
+    ('\u{116ab}', '\u{116ab}'),
+    ('\u{116ad}', '\u{116ad}'),
+    ('\u{116b0}', '\u{116b5}'),
+    ('\u{116b7}', '\u{116b7}'),
+    ('\u{1171d}', '\u{1171f}'),
+    ('\u{11722}', '\u{11725}'),
+    ('\u{11727}', '\u{1172b}'),
+    ('\u{1182f}', '\u{11837}'),
+    ('\u{11839}', '\u{1183a}'),
+    ('\u{1193b}', '\u{1193c}'),
+    ('\u{1193e}', '\u{1193e}'),
+    ('\u{11943}', '\u{11943}'),
+    ('\u{119d4}', '\u{119d7}'),
+    ('\u{119da}', '\u{119db}'),
+    ('\u{119e0}', '\u{119e0}'),
+    ('\u{11a01}', '\u{11a0a}'),
+    ('\u{11a33}', '\u{11a38}'),
+    ('\u{11a3b}', '\u{11a3e}'),
+    ('\u{11a47}', '\u{11a47}'),
+    ('\u{11a51}', '\u{11a56}'),
+    ('\u{11a59}', '\u{11a5b}'),
+    ('\u{11a8a}', '\u{11a96}'),
+    ('\u{11a98}', '\u{11a99}'),
+    ('\u{11c30}', '\u{11c36}'),
+    ('\u{11c38}', '\u{11c3d}'),
+    ('\u{11c3f}', '\u{11c3f}'),
+    ('\u{11c92}', '\u{11ca7}'),
+    ('\u{11caa}', '\u{11cb0}'),
+    ('\u{11cb2}', '\u{11cb3}'),
+    ('\u{11cb5}', '\u{11cb6}'),
+    ('\u{11d31}', '\u{11d36}'),
+    ('\u{11d3a}', '\u{11d3a}'),
+    ('\u{11d3c}', '\u{11d3d}'),
+    ('\u{11d3f}', '\u{11d45}'),
+    ('\u{11d47}', '\u{11d47}'),
+    ('\u{11d90}', '\u{11d91}'),
+    ('\u{11d95}', '\u{11d95}'),
+    ('\u{11d97}', '\u{11d97}'),
+    ('\u{11ef3}', '\u{11ef4}'),
+    ('\u{16af0}', '\u{16af4}'),
+    ('\u{16b30}', '\u{16b36}'),
+    ('\u{16f4f}', '\u{16f4f}'),
+    ('\u{16f8f}', '\u{16f92}'),
+    ('\u{16fe4}', '\u{16fe4}'),
+    ('\u{1bc9d}', '\u{1bc9e}'),
+    ('\u{1d167}', '\u{1d169}'),
+    ('\u{1d17b}', '\u{1d182}'),
+    ('\u{1d185}', '\u{1d18b}'),
+    ('\u{1d1aa}', '\u{1d1ad}'),
+    ('\u{1d242}', '\u{1d244}'),
+    ('\u{1da00}', '\u{1da36}'),
+    ('\u{1da3b}', '\u{1da6c}'),
+    ('\u{1da75}', '\u{1da75}'),
+    ('\u{1da84}', '\u{1da84}'),
+    ('\u{1da9b}', '\u{1da9f}'),
+    ('\u{1daa1}', '\u{1daaf}'),
+    ('\u{1e000}', '\u{1e006}'),
+    ('\u{1e008}', '\u{1e018}'),
+    ('\u{1e01b}', '\u{1e021}'),
+    ('\u{1e023}', '\u{1e024}'),
+    ('\u{1e026}', '\u{1e02a}'),
+    ('\u{1e130}', '\u{1e136}'),
+    ('\u{1e2ec}', '\u{1e2ef}'),
+    ('\u{1e8d0}', '\u{1e8d6}'),
+    ('\u{1e944}', '\u{1e94a}'),
+    ('\u{e0100}', '\u{e01ef}'),
+];
+
+pub const NUMBER: &'static [(char, char)] = &[
+    ('0', '9'),
+    ('²', '³'),
+    ('¹', '¹'),
+    ('¼', '¾'),
+    ('٠', '٩'),
+    ('۰', '۹'),
+    ('߀', '߉'),
+    ('०', '९'),
+    ('০', '৯'),
+    ('৴', '৹'),
+    ('੦', '੯'),
+    ('૦', '૯'),
+    ('୦', '୯'),
+    ('୲', '୷'),
+    ('௦', '௲'),
+    ('౦', '౯'),
+    ('౸', '౾'),
+    ('೦', '೯'),
+    ('൘', '൞'),
+    ('൦', '൸'),
+    ('෦', '෯'),
+    ('๐', '๙'),
+    ('໐', '໙'),
+    ('༠', '༳'),
+    ('၀', '၉'),
+    ('႐', '႙'),
+    ('፩', '፼'),
+    ('ᛮ', 'ᛰ'),
+    ('០', '៩'),
+    ('៰', '៹'),
+    ('᠐', '᠙'),
+    ('᥆', '᥏'),
+    ('᧐', '᧚'),
+    ('᪀', '᪉'),
+    ('᪐', '᪙'),
+    ('᭐', '᭙'),
+    ('᮰', '᮹'),
+    ('᱀', '᱉'),
+    ('᱐', '᱙'),
+    ('⁰', '⁰'),
+    ('⁴', '⁹'),
+    ('₀', '₉'),
+    ('⅐', 'ↂ'),
+    ('ↅ', '↉'),
+    ('①', '⒛'),
+    ('⓪', '⓿'),
+    ('❶', '➓'),
+    ('⳽', '⳽'),
+    ('〇', '〇'),
+    ('〡', '〩'),
+    ('〸', '〺'),
+    ('㆒', '㆕'),
+    ('㈠', '㈩'),
+    ('㉈', '㉏'),
+    ('㉑', '㉟'),
+    ('㊀', '㊉'),
+    ('㊱', '㊿'),
+    ('꘠', '꘩'),
+    ('ꛦ', 'ꛯ'),
+    ('꠰', '꠵'),
+    ('꣐', '꣙'),
+    ('꤀', '꤉'),
+    ('꧐', '꧙'),
+    ('꧰', '꧹'),
+    ('꩐', '꩙'),
+    ('꯰', '꯹'),
+    ('０', '９'),
+    ('𐄇', '𐄳'),
+    ('𐅀', '𐅸'),
+    ('𐆊', '𐆋'),
+    ('𐋡', '𐋻'),
+    ('𐌠', '𐌣'),
+    ('𐍁', '𐍁'),
+    ('𐍊', '𐍊'),
+    ('𐏑', '𐏕'),
+    ('𐒠', '𐒩'),
+    ('𐡘', '𐡟'),
+    ('𐡹', '𐡿'),
+    ('𐢧', '𐢯'),
+    ('𐣻', '𐣿'),
+    ('𐤖', '𐤛'),
+    ('𐦼', '𐦽'),
+    ('𐧀', '𐧏'),
+    ('𐧒', '𐧿'),
+    ('𐩀', '𐩈'),
+    ('𐩽', '𐩾'),
+    ('𐪝', '𐪟'),
+    ('𐫫', '𐫯'),
+    ('𐭘', '𐭟'),
+    ('𐭸', '𐭿'),
+    ('𐮩', '𐮯'),
+    ('𐳺', '𐳿'),
+    ('𐴰', '𐴹'),
+    ('𐹠', '𐹾'),
+    ('𐼝', '𐼦'),
+    ('𐽑', '𐽔'),
+    ('\u{10fc5}', '\u{10fcb}'),
+    ('𑁒', '𑁯'),
+    ('𑃰', '𑃹'),
+    ('𑄶', '𑄿'),
+    ('𑇐', '𑇙'),
+    ('𑇡', '𑇴'),
+    ('𑋰', '𑋹'),
+    ('𑑐', '𑑙'),
+    ('𑓐', '𑓙'),
+    ('𑙐', '𑙙'),
+    ('𑛀', '𑛉'),
+    ('𑜰', '𑜻'),
+    ('𑣠', '𑣲'),
+    ('\u{11950}', '\u{11959}'),
+    ('𑱐', '𑱬'),
+    ('𑵐', '𑵙'),
+    ('𑶠', '𑶩'),
+    ('𑿀', '𑿔'),
+    ('𒐀', '𒑮'),
+    ('𖩠', '𖩩'),
+    ('𖭐', '𖭙'),
+    ('𖭛', '𖭡'),
+    ('𖺀', '𖺖'),
+    ('𝋠', '𝋳'),
+    ('𝍠', '𝍸'),
+    ('𝟎', '𝟿'),
+    ('𞅀', '𞅉'),
+    ('𞋰', '𞋹'),
+    ('𞣇', '𞣏'),
+    ('𞥐', '𞥙'),
+    ('𞱱', '𞲫'),
+    ('𞲭', '𞲯'),
+    ('𞲱', '𞲴'),
+    ('𞴁', '𞴭'),
+    ('𞴯', '𞴽'),
+    ('🄀', '🄌'),
+    ('\u{1fbf0}', '\u{1fbf9}'),
+];
+
+pub const OPEN_PUNCTUATION: &'static [(char, char)] = &[
+    ('(', '('),
+    ('[', '['),
+    ('{', '{'),
+    ('༺', '༺'),
+    ('༼', '༼'),
+    ('᚛', '᚛'),
+    ('‚', '‚'),
+    ('„', '„'),
+    ('⁅', '⁅'),
+    ('⁽', '⁽'),
+    ('₍', '₍'),
+    ('⌈', '⌈'),
+    ('⌊', '⌊'),
+    ('⟨', '⟨'),
+    ('❨', '❨'),
+    ('❪', '❪'),
+    ('❬', '❬'),
+    ('❮', '❮'),
+    ('❰', '❰'),
+    ('❲', '❲'),
+    ('❴', '❴'),
+    ('⟅', '⟅'),
+    ('⟦', '⟦'),
+    ('⟨', '⟨'),
+    ('⟪', '⟪'),
+    ('⟬', '⟬'),
+    ('⟮', '⟮'),
+    ('⦃', '⦃'),
+    ('⦅', '⦅'),
+    ('⦇', '⦇'),
+    ('⦉', '⦉'),
+    ('⦋', '⦋'),
+    ('⦍', '⦍'),
+    ('⦏', '⦏'),
+    ('⦑', '⦑'),
+    ('⦓', '⦓'),
+    ('⦕', '⦕'),
+    ('⦗', '⦗'),
+    ('⧘', '⧘'),
+    ('⧚', '⧚'),
+    ('⧼', '⧼'),
+    ('⸢', '⸢'),
+    ('⸤', '⸤'),
+    ('⸦', '⸦'),
+    ('⸨', '⸨'),
+    ('⹂', '⹂'),
+    ('〈', '〈'),
+    ('《', '《'),
+    ('「', '「'),
+    ('『', '『'),
+    ('【', '【'),
+    ('〔', '〔'),
+    ('〖', '〖'),
+    ('〘', '〘'),
+    ('〚', '〚'),
+    ('〝', '〝'),
+    ('﴿', '﴿'),
+    ('︗', '︗'),
+    ('︵', '︵'),
+    ('︷', '︷'),
+    ('︹', '︹'),
+    ('︻', '︻'),
+    ('︽', '︽'),
+    ('︿', '︿'),
+    ('﹁', '﹁'),
+    ('﹃', '﹃'),
+    ('﹇', '﹇'),
+    ('﹙', '﹙'),
+    ('﹛', '﹛'),
+    ('﹝', '﹝'),
+    ('（', '（'),
+    ('［', '［'),
+    ('｛', '｛'),
+    ('｟', '｟'),
+    ('｢', '｢'),
+];
+
+pub const OTHER: &'static [(char, char)] = &[
+    ('\u{0}', '\u{1f}'),
+    ('\u{7f}', '\u{9f}'),
+    ('\u{ad}', '\u{ad}'),
+    ('\u{378}', '\u{379}'),
+    ('\u{380}', '\u{383}'),
+    ('\u{38b}', '\u{38b}'),
+    ('\u{38d}', '\u{38d}'),
+    ('\u{3a2}', '\u{3a2}'),
+    ('\u{530}', '\u{530}'),
+    ('\u{557}', '\u{558}'),
+    ('\u{58b}', '\u{58c}'),
+    ('\u{590}', '\u{590}'),
+    ('\u{5c8}', '\u{5cf}'),
+    ('\u{5eb}', '\u{5ee}'),
+    ('\u{5f5}', '\u{605}'),
+    ('\u{61c}', '\u{61d}'),
+    ('\u{6dd}', '\u{6dd}'),
+    ('\u{70e}', '\u{70f}'),
+    ('\u{74b}', '\u{74c}'),
+    ('\u{7b2}', '\u{7bf}'),
+    ('\u{7fb}', '\u{7fc}'),
+    ('\u{82e}', '\u{82f}'),
+    ('\u{83f}', '\u{83f}'),
+    ('\u{85c}', '\u{85d}'),
+    ('\u{85f}', '\u{85f}'),
+    ('\u{86b}', '\u{89f}'),
+    ('\u{8b5}', '\u{8b5}'),
+    ('\u{8c8}', '\u{8d2}'),
+    ('\u{8e2}', '\u{8e2}'),
+    ('\u{984}', '\u{984}'),
+    ('\u{98d}', '\u{98e}'),
+    ('\u{991}', '\u{992}'),
+    ('\u{9a9}', '\u{9a9}'),
+    ('\u{9b1}', '\u{9b1}'),
+    ('\u{9b3}', '\u{9b5}'),
+    ('\u{9ba}', '\u{9bb}'),
+    ('\u{9c5}', '\u{9c6}'),
+    ('\u{9c9}', '\u{9ca}'),
+    ('\u{9cf}', '\u{9d6}'),
+    ('\u{9d8}', '\u{9db}'),
+    ('\u{9de}', '\u{9de}'),
+    ('\u{9e4}', '\u{9e5}'),
+    ('\u{9ff}', '\u{a00}'),
+    ('\u{a04}', '\u{a04}'),
+    ('\u{a0b}', '\u{a0e}'),
+    ('\u{a11}', '\u{a12}'),
+    ('\u{a29}', '\u{a29}'),
+    ('\u{a31}', '\u{a31}'),
+    ('\u{a34}', '\u{a34}'),
+    ('\u{a37}', '\u{a37}'),
+    ('\u{a3a}', '\u{a3b}'),
+    ('\u{a3d}', '\u{a3d}'),
+    ('\u{a43}', '\u{a46}'),
+    ('\u{a49}', '\u{a4a}'),
+    ('\u{a4e}', '\u{a50}'),
+    ('\u{a52}', '\u{a58}'),
+    ('\u{a5d}', '\u{a5d}'),
+    ('\u{a5f}', '\u{a65}'),
+    ('\u{a77}', '\u{a80}'),
+    ('\u{a84}', '\u{a84}'),
+    ('\u{a8e}', '\u{a8e}'),
+    ('\u{a92}', '\u{a92}'),
+    ('\u{aa9}', '\u{aa9}'),
+    ('\u{ab1}', '\u{ab1}'),
+    ('\u{ab4}', '\u{ab4}'),
+    ('\u{aba}', '\u{abb}'),
+    ('\u{ac6}', '\u{ac6}'),
+    ('\u{aca}', '\u{aca}'),
+    ('\u{ace}', '\u{acf}'),
+    ('\u{ad1}', '\u{adf}'),
+    ('\u{ae4}', '\u{ae5}'),
+    ('\u{af2}', '\u{af8}'),
+    ('\u{b00}', '\u{b00}'),
+    ('\u{b04}', '\u{b04}'),
+    ('\u{b0d}', '\u{b0e}'),
+    ('\u{b11}', '\u{b12}'),
+    ('\u{b29}', '\u{b29}'),
+    ('\u{b31}', '\u{b31}'),
+    ('\u{b34}', '\u{b34}'),
+    ('\u{b3a}', '\u{b3b}'),
+    ('\u{b45}', '\u{b46}'),
+    ('\u{b49}', '\u{b4a}'),
+    ('\u{b4e}', '\u{b54}'),
+    ('\u{b58}', '\u{b5b}'),
+    ('\u{b5e}', '\u{b5e}'),
+    ('\u{b64}', '\u{b65}'),
+    ('\u{b78}', '\u{b81}'),
+    ('\u{b84}', '\u{b84}'),
+    ('\u{b8b}', '\u{b8d}'),
+    ('\u{b91}', '\u{b91}'),
+    ('\u{b96}', '\u{b98}'),
+    ('\u{b9b}', '\u{b9b}'),
+    ('\u{b9d}', '\u{b9d}'),
+    ('\u{ba0}', '\u{ba2}'),
+    ('\u{ba5}', '\u{ba7}'),
+    ('\u{bab}', '\u{bad}'),
+    ('\u{bba}', '\u{bbd}'),
+    ('\u{bc3}', '\u{bc5}'),
+    ('\u{bc9}', '\u{bc9}'),
+    ('\u{bce}', '\u{bcf}'),
+    ('\u{bd1}', '\u{bd6}'),
+    ('\u{bd8}', '\u{be5}'),
+    ('\u{bfb}', '\u{bff}'),
+    ('\u{c0d}', '\u{c0d}'),
+    ('\u{c11}', '\u{c11}'),
+    ('\u{c29}', '\u{c29}'),
+    ('\u{c3a}', '\u{c3c}'),
+    ('\u{c45}', '\u{c45}'),
+    ('\u{c49}', '\u{c49}'),
+    ('\u{c4e}', '\u{c54}'),
+    ('\u{c57}', '\u{c57}'),
+    ('\u{c5b}', '\u{c5f}'),
+    ('\u{c64}', '\u{c65}'),
+    ('\u{c70}', '\u{c76}'),
+    ('\u{c8d}', '\u{c8d}'),
+    ('\u{c91}', '\u{c91}'),
+    ('\u{ca9}', '\u{ca9}'),
+    ('\u{cb4}', '\u{cb4}'),
+    ('\u{cba}', '\u{cbb}'),
+    ('\u{cc5}', '\u{cc5}'),
+    ('\u{cc9}', '\u{cc9}'),
+    ('\u{cce}', '\u{cd4}'),
+    ('\u{cd7}', '\u{cdd}'),
+    ('\u{cdf}', '\u{cdf}'),
+    ('\u{ce4}', '\u{ce5}'),
+    ('\u{cf0}', '\u{cf0}'),
+    ('\u{cf3}', '\u{cff}'),
+    ('\u{d0d}', '\u{d0d}'),
+    ('\u{d11}', '\u{d11}'),
+    ('\u{d45}', '\u{d45}'),
+    ('\u{d49}', '\u{d49}'),
+    ('\u{d50}', '\u{d53}'),
+    ('\u{d64}', '\u{d65}'),
+    ('\u{d80}', '\u{d80}'),
+    ('\u{d84}', '\u{d84}'),
+    ('\u{d97}', '\u{d99}'),
+    ('\u{db2}', '\u{db2}'),
+    ('\u{dbc}', '\u{dbc}'),
+    ('\u{dbe}', '\u{dbf}'),
+    ('\u{dc7}', '\u{dc9}'),
+    ('\u{dcb}', '\u{dce}'),
+    ('\u{dd5}', '\u{dd5}'),
+    ('\u{dd7}', '\u{dd7}'),
+    ('\u{de0}', '\u{de5}'),
+    ('\u{df0}', '\u{df1}'),
+    ('\u{df5}', '\u{e00}'),
+    ('\u{e3b}', '\u{e3e}'),
+    ('\u{e5c}', '\u{e80}'),
+    ('\u{e83}', '\u{e83}'),
+    ('\u{e85}', '\u{e85}'),
+    ('\u{e8b}', '\u{e8b}'),
+    ('\u{ea4}', '\u{ea4}'),
+    ('\u{ea6}', '\u{ea6}'),
+    ('\u{ebe}', '\u{ebf}'),
+    ('\u{ec5}', '\u{ec5}'),
+    ('\u{ec7}', '\u{ec7}'),
+    ('\u{ece}', '\u{ecf}'),
+    ('\u{eda}', '\u{edb}'),
+    ('\u{ee0}', '\u{eff}'),
+    ('\u{f48}', '\u{f48}'),
+    ('\u{f6d}', '\u{f70}'),
+    ('\u{f98}', '\u{f98}'),
+    ('\u{fbd}', '\u{fbd}'),
+    ('\u{fcd}', '\u{fcd}'),
+    ('\u{fdb}', '\u{fff}'),
+    ('\u{10c6}', '\u{10c6}'),
+    ('\u{10c8}', '\u{10cc}'),
+    ('\u{10ce}', '\u{10cf}'),
+    ('\u{1249}', '\u{1249}'),
+    ('\u{124e}', '\u{124f}'),
+    ('\u{1257}', '\u{1257}'),
+    ('\u{1259}', '\u{1259}'),
+    ('\u{125e}', '\u{125f}'),
+    ('\u{1289}', '\u{1289}'),
+    ('\u{128e}', '\u{128f}'),
+    ('\u{12b1}', '\u{12b1}'),
+    ('\u{12b6}', '\u{12b7}'),
+    ('\u{12bf}', '\u{12bf}'),
+    ('\u{12c1}', '\u{12c1}'),
+    ('\u{12c6}', '\u{12c7}'),
+    ('\u{12d7}', '\u{12d7}'),
+    ('\u{1311}', '\u{1311}'),
+    ('\u{1316}', '\u{1317}'),
+    ('\u{135b}', '\u{135c}'),
+    ('\u{137d}', '\u{137f}'),
+    ('\u{139a}', '\u{139f}'),
+    ('\u{13f6}', '\u{13f7}'),
+    ('\u{13fe}', '\u{13ff}'),
+    ('\u{169d}', '\u{169f}'),
+    ('\u{16f9}', '\u{16ff}'),
+    ('\u{170d}', '\u{170d}'),
+    ('\u{1715}', '\u{171f}'),
+    ('\u{1737}', '\u{173f}'),
+    ('\u{1754}', '\u{175f}'),
+    ('\u{176d}', '\u{176d}'),
+    ('\u{1771}', '\u{1771}'),
+    ('\u{1774}', '\u{177f}'),
+    ('\u{17de}', '\u{17df}'),
+    ('\u{17ea}', '\u{17ef}'),
+    ('\u{17fa}', '\u{17ff}'),
+    ('\u{180e}', '\u{180f}'),
+    ('\u{181a}', '\u{181f}'),
+    ('\u{1879}', '\u{187f}'),
+    ('\u{18ab}', '\u{18af}'),
+    ('\u{18f6}', '\u{18ff}'),
+    ('\u{191f}', '\u{191f}'),
+    ('\u{192c}', '\u{192f}'),
+    ('\u{193c}', '\u{193f}'),
+    ('\u{1941}', '\u{1943}'),
+    ('\u{196e}', '\u{196f}'),
+    ('\u{1975}', '\u{197f}'),
+    ('\u{19ac}', '\u{19af}'),
+    ('\u{19ca}', '\u{19cf}'),
+    ('\u{19db}', '\u{19dd}'),
+    ('\u{1a1c}', '\u{1a1d}'),
+    ('\u{1a5f}', '\u{1a5f}'),
+    ('\u{1a7d}', '\u{1a7e}'),
+    ('\u{1a8a}', '\u{1a8f}'),
+    ('\u{1a9a}', '\u{1a9f}'),
+    ('\u{1aae}', '\u{1aaf}'),
+    ('\u{1ac1}', '\u{1aff}'),
+    ('\u{1b4c}', '\u{1b4f}'),
+    ('\u{1b7d}', '\u{1b7f}'),
+    ('\u{1bf4}', '\u{1bfb}'),
+    ('\u{1c38}', '\u{1c3a}'),
+    ('\u{1c4a}', '\u{1c4c}'),
+    ('\u{1c89}', '\u{1c8f}'),
+    ('\u{1cbb}', '\u{1cbc}'),
+    ('\u{1cc8}', '\u{1ccf}'),
+    ('\u{1cfb}', '\u{1cff}'),
+    ('\u{1dfa}', '\u{1dfa}'),
+    ('\u{1f16}', '\u{1f17}'),
+    ('\u{1f1e}', '\u{1f1f}'),
+    ('\u{1f46}', '\u{1f47}'),
+    ('\u{1f4e}', '\u{1f4f}'),
+    ('\u{1f58}', '\u{1f58}'),
+    ('\u{1f5a}', '\u{1f5a}'),
+    ('\u{1f5c}', '\u{1f5c}'),
+    ('\u{1f5e}', '\u{1f5e}'),
+    ('\u{1f7e}', '\u{1f7f}'),
+    ('\u{1fb5}', '\u{1fb5}'),
+    ('\u{1fc5}', '\u{1fc5}'),
+    ('\u{1fd4}', '\u{1fd5}'),
+    ('\u{1fdc}', '\u{1fdc}'),
+    ('\u{1ff0}', '\u{1ff1}'),
+    ('\u{1ff5}', '\u{1ff5}'),
+    ('\u{1fff}', '\u{1fff}'),
+    ('\u{200b}', '\u{200f}'),
+    ('\u{202a}', '\u{202e}'),
+    ('\u{2060}', '\u{206f}'),
+    ('\u{2072}', '\u{2073}'),
+    ('\u{208f}', '\u{208f}'),
+    ('\u{209d}', '\u{209f}'),
+    ('\u{20c0}', '\u{20cf}'),
+    ('\u{20f1}', '\u{20ff}'),
+    ('\u{218c}', '\u{218f}'),
+    ('\u{2427}', '\u{243f}'),
+    ('\u{244b}', '\u{245f}'),
+    ('\u{2b74}', '\u{2b75}'),
+    ('\u{2b96}', '\u{2b96}'),
+    ('\u{2c2f}', '\u{2c2f}'),
+    ('\u{2c5f}', '\u{2c5f}'),
+    ('\u{2cf4}', '\u{2cf8}'),
+    ('\u{2d26}', '\u{2d26}'),
+    ('\u{2d28}', '\u{2d2c}'),
+    ('\u{2d2e}', '\u{2d2f}'),
+    ('\u{2d68}', '\u{2d6e}'),
+    ('\u{2d71}', '\u{2d7e}'),
+    ('\u{2d97}', '\u{2d9f}'),
+    ('\u{2da7}', '\u{2da7}'),
+    ('\u{2daf}', '\u{2daf}'),
+    ('\u{2db7}', '\u{2db7}'),
+    ('\u{2dbf}', '\u{2dbf}'),
+    ('\u{2dc7}', '\u{2dc7}'),
+    ('\u{2dcf}', '\u{2dcf}'),
+    ('\u{2dd7}', '\u{2dd7}'),
+    ('\u{2ddf}', '\u{2ddf}'),
+    ('\u{2e53}', '\u{2e7f}'),
+    ('\u{2e9a}', '\u{2e9a}'),
+    ('\u{2ef4}', '\u{2eff}'),
+    ('\u{2fd6}', '\u{2fef}'),
+    ('\u{2ffc}', '\u{2fff}'),
+    ('\u{3040}', '\u{3040}'),
+    ('\u{3097}', '\u{3098}'),
+    ('\u{3100}', '\u{3104}'),
+    ('\u{3130}', '\u{3130}'),
+    ('\u{318f}', '\u{318f}'),
+    ('\u{31e4}', '\u{31ef}'),
+    ('\u{321f}', '\u{321f}'),
+    ('\u{9ffd}', '\u{9fff}'),
+    ('\u{a48d}', '\u{a48f}'),
+    ('\u{a4c7}', '\u{a4cf}'),
+    ('\u{a62c}', '\u{a63f}'),
+    ('\u{a6f8}', '\u{a6ff}'),
+    ('\u{a7c0}', '\u{a7c1}'),
+    ('\u{a7cb}', '\u{a7f4}'),
+    ('\u{a82d}', '\u{a82f}'),
+    ('\u{a83a}', '\u{a83f}'),
+    ('\u{a878}', '\u{a87f}'),
+    ('\u{a8c6}', '\u{a8cd}'),
+    ('\u{a8da}', '\u{a8df}'),
+    ('\u{a954}', '\u{a95e}'),
+    ('\u{a97d}', '\u{a97f}'),
+    ('\u{a9ce}', '\u{a9ce}'),
+    ('\u{a9da}', '\u{a9dd}'),
+    ('\u{a9ff}', '\u{a9ff}'),
+    ('\u{aa37}', '\u{aa3f}'),
+    ('\u{aa4e}', '\u{aa4f}'),
+    ('\u{aa5a}', '\u{aa5b}'),
+    ('\u{aac3}', '\u{aada}'),
+    ('\u{aaf7}', '\u{ab00}'),
+    ('\u{ab07}', '\u{ab08}'),
+    ('\u{ab0f}', '\u{ab10}'),
+    ('\u{ab17}', '\u{ab1f}'),
+    ('\u{ab27}', '\u{ab27}'),
+    ('\u{ab2f}', '\u{ab2f}'),
+    ('\u{ab6c}', '\u{ab6f}'),
+    ('\u{abee}', '\u{abef}'),
+    ('\u{abfa}', '\u{abff}'),
+    ('\u{d7a4}', '\u{d7af}'),
+    ('\u{d7c7}', '\u{d7ca}'),
+    ('\u{d7fc}', '\u{f8ff}'),
+    ('\u{fa6e}', '\u{fa6f}'),
+    ('\u{fada}', '\u{faff}'),
+    ('\u{fb07}', '\u{fb12}'),
+    ('\u{fb18}', '\u{fb1c}'),
+    ('\u{fb37}', '\u{fb37}'),
+    ('\u{fb3d}', '\u{fb3d}'),
+    ('\u{fb3f}', '\u{fb3f}'),
+    ('\u{fb42}', '\u{fb42}'),
+    ('\u{fb45}', '\u{fb45}'),
+    ('\u{fbc2}', '\u{fbd2}'),
+    ('\u{fd40}', '\u{fd4f}'),
+    ('\u{fd90}', '\u{fd91}'),
+    ('\u{fdc8}', '\u{fdef}'),
+    ('\u{fdfe}', '\u{fdff}'),
+    ('\u{fe1a}', '\u{fe1f}'),
+    ('\u{fe53}', '\u{fe53}'),
+    ('\u{fe67}', '\u{fe67}'),
+    ('\u{fe6c}', '\u{fe6f}'),
+    ('\u{fe75}', '\u{fe75}'),
+    ('\u{fefd}', '\u{ff00}'),
+    ('\u{ffbf}', '\u{ffc1}'),
+    ('\u{ffc8}', '\u{ffc9}'),
+    ('\u{ffd0}', '\u{ffd1}'),
+    ('\u{ffd8}', '\u{ffd9}'),
+    ('\u{ffdd}', '\u{ffdf}'),
+    ('\u{ffe7}', '\u{ffe7}'),
+    ('\u{ffef}', '\u{fffb}'),
+    ('\u{fffe}', '\u{ffff}'),
+    ('\u{1000c}', '\u{1000c}'),
+    ('\u{10027}', '\u{10027}'),
+    ('\u{1003b}', '\u{1003b}'),
+    ('\u{1003e}', '\u{1003e}'),
+    ('\u{1004e}', '\u{1004f}'),
+    ('\u{1005e}', '\u{1007f}'),
+    ('\u{100fb}', '\u{100ff}'),
+    ('\u{10103}', '\u{10106}'),
+    ('\u{10134}', '\u{10136}'),
+    ('\u{1018f}', '\u{1018f}'),
+    ('\u{1019d}', '\u{1019f}'),
+    ('\u{101a1}', '\u{101cf}'),
+    ('\u{101fe}', '\u{1027f}'),
+    ('\u{1029d}', '\u{1029f}'),
+    ('\u{102d1}', '\u{102df}'),
+    ('\u{102fc}', '\u{102ff}'),
+    ('\u{10324}', '\u{1032c}'),
+    ('\u{1034b}', '\u{1034f}'),
+    ('\u{1037b}', '\u{1037f}'),
+    ('\u{1039e}', '\u{1039e}'),
+    ('\u{103c4}', '\u{103c7}'),
+    ('\u{103d6}', '\u{103ff}'),
+    ('\u{1049e}', '\u{1049f}'),
+    ('\u{104aa}', '\u{104af}'),
+    ('\u{104d4}', '\u{104d7}'),
+    ('\u{104fc}', '\u{104ff}'),
+    ('\u{10528}', '\u{1052f}'),
+    ('\u{10564}', '\u{1056e}'),
+    ('\u{10570}', '\u{105ff}'),
+    ('\u{10737}', '\u{1073f}'),
+    ('\u{10756}', '\u{1075f}'),
+    ('\u{10768}', '\u{107ff}'),
+    ('\u{10806}', '\u{10807}'),
+    ('\u{10809}', '\u{10809}'),
+    ('\u{10836}', '\u{10836}'),
+    ('\u{10839}', '\u{1083b}'),
+    ('\u{1083d}', '\u{1083e}'),
+    ('\u{10856}', '\u{10856}'),
+    ('\u{1089f}', '\u{108a6}'),
+    ('\u{108b0}', '\u{108df}'),
+    ('\u{108f3}', '\u{108f3}'),
+    ('\u{108f6}', '\u{108fa}'),
+    ('\u{1091c}', '\u{1091e}'),
+    ('\u{1093a}', '\u{1093e}'),
+    ('\u{10940}', '\u{1097f}'),
+    ('\u{109b8}', '\u{109bb}'),
+    ('\u{109d0}', '\u{109d1}'),
+    ('\u{10a04}', '\u{10a04}'),
+    ('\u{10a07}', '\u{10a0b}'),
+    ('\u{10a14}', '\u{10a14}'),
+    ('\u{10a18}', '\u{10a18}'),
+    ('\u{10a36}', '\u{10a37}'),
+    ('\u{10a3b}', '\u{10a3e}'),
+    ('\u{10a49}', '\u{10a4f}'),
+    ('\u{10a59}', '\u{10a5f}'),
+    ('\u{10aa0}', '\u{10abf}'),
+    ('\u{10ae7}', '\u{10aea}'),
+    ('\u{10af7}', '\u{10aff}'),
+    ('\u{10b36}', '\u{10b38}'),
+    ('\u{10b56}', '\u{10b57}'),
+    ('\u{10b73}', '\u{10b77}'),
+    ('\u{10b92}', '\u{10b98}'),
+    ('\u{10b9d}', '\u{10ba8}'),
+    ('\u{10bb0}', '\u{10bff}'),
+    ('\u{10c49}', '\u{10c7f}'),
+    ('\u{10cb3}', '\u{10cbf}'),
+    ('\u{10cf3}', '\u{10cf9}'),
+    ('\u{10d28}', '\u{10d2f}'),
+    ('\u{10d3a}', '\u{10e5f}'),
+    ('\u{10e7f}', '\u{10e7f}'),
+    ('\u{10eaa}', '\u{10eaa}'),
+    ('\u{10eae}', '\u{10eaf}'),
+    ('\u{10eb2}', '\u{10eff}'),
+    ('\u{10f28}', '\u{10f2f}'),
+    ('\u{10f5a}', '\u{10faf}'),
+    ('\u{10fcc}', '\u{10fdf}'),
+    ('\u{10ff7}', '\u{10fff}'),
+    ('\u{1104e}', '\u{11051}'),
+    ('\u{11070}', '\u{1107e}'),
+    ('\u{110bd}', '\u{110bd}'),
+    ('\u{110c2}', '\u{110cf}'),
+    ('\u{110e9}', '\u{110ef}'),
+    ('\u{110fa}', '\u{110ff}'),
+    ('\u{11135}', '\u{11135}'),
+    ('\u{11148}', '\u{1114f}'),
+    ('\u{11177}', '\u{1117f}'),
+    ('\u{111e0}', '\u{111e0}'),
+    ('\u{111f5}', '\u{111ff}'),
+    ('\u{11212}', '\u{11212}'),
+    ('\u{1123f}', '\u{1127f}'),
+    ('\u{11287}', '\u{11287}'),
+    ('\u{11289}', '\u{11289}'),
+    ('\u{1128e}', '\u{1128e}'),
+    ('\u{1129e}', '\u{1129e}'),
+    ('\u{112aa}', '\u{112af}'),
+    ('\u{112eb}', '\u{112ef}'),
+    ('\u{112fa}', '\u{112ff}'),
+    ('\u{11304}', '\u{11304}'),
+    ('\u{1130d}', '\u{1130e}'),
+    ('\u{11311}', '\u{11312}'),
+    ('\u{11329}', '\u{11329}'),
+    ('\u{11331}', '\u{11331}'),
+    ('\u{11334}', '\u{11334}'),
+    ('\u{1133a}', '\u{1133a}'),
+    ('\u{11345}', '\u{11346}'),
+    ('\u{11349}', '\u{1134a}'),
+    ('\u{1134e}', '\u{1134f}'),
+    ('\u{11351}', '\u{11356}'),
+    ('\u{11358}', '\u{1135c}'),
+    ('\u{11364}', '\u{11365}'),
+    ('\u{1136d}', '\u{1136f}'),
+    ('\u{11375}', '\u{113ff}'),
+    ('\u{1145c}', '\u{1145c}'),
+    ('\u{11462}', '\u{1147f}'),
+    ('\u{114c8}', '\u{114cf}'),
+    ('\u{114da}', '\u{1157f}'),
+    ('\u{115b6}', '\u{115b7}'),
+    ('\u{115de}', '\u{115ff}'),
+    ('\u{11645}', '\u{1164f}'),
+    ('\u{1165a}', '\u{1165f}'),
+    ('\u{1166d}', '\u{1167f}'),
+    ('\u{116b9}', '\u{116bf}'),
+    ('\u{116ca}', '\u{116ff}'),
+    ('\u{1171b}', '\u{1171c}'),
+    ('\u{1172c}', '\u{1172f}'),
+    ('\u{11740}', '\u{117ff}'),
+    ('\u{1183c}', '\u{1189f}'),
+    ('\u{118f3}', '\u{118fe}'),
+    ('\u{11907}', '\u{11908}'),
+    ('\u{1190a}', '\u{1190b}'),
+    ('\u{11914}', '\u{11914}'),
+    ('\u{11917}', '\u{11917}'),
+    ('\u{11936}', '\u{11936}'),
+    ('\u{11939}', '\u{1193a}'),
+    ('\u{11947}', '\u{1194f}'),
+    ('\u{1195a}', '\u{1199f}'),
+    ('\u{119a8}', '\u{119a9}'),
+    ('\u{119d8}', '\u{119d9}'),
+    ('\u{119e5}', '\u{119ff}'),
+    ('\u{11a48}', '\u{11a4f}'),
+    ('\u{11aa3}', '\u{11abf}'),
+    ('\u{11af9}', '\u{11bff}'),
+    ('\u{11c09}', '\u{11c09}'),
+    ('\u{11c37}', '\u{11c37}'),
+    ('\u{11c46}', '\u{11c4f}'),
+    ('\u{11c6d}', '\u{11c6f}'),
+    ('\u{11c90}', '\u{11c91}'),
+    ('\u{11ca8}', '\u{11ca8}'),
+    ('\u{11cb7}', '\u{11cff}'),
+    ('\u{11d07}', '\u{11d07}'),
+    ('\u{11d0a}', '\u{11d0a}'),
+    ('\u{11d37}', '\u{11d39}'),
+    ('\u{11d3b}', '\u{11d3b}'),
+    ('\u{11d3e}', '\u{11d3e}'),
+    ('\u{11d48}', '\u{11d4f}'),
+    ('\u{11d5a}', '\u{11d5f}'),
+    ('\u{11d66}', '\u{11d66}'),
+    ('\u{11d69}', '\u{11d69}'),
+    ('\u{11d8f}', '\u{11d8f}'),
+    ('\u{11d92}', '\u{11d92}'),
+    ('\u{11d99}', '\u{11d9f}'),
+    ('\u{11daa}', '\u{11edf}'),
+    ('\u{11ef9}', '\u{11faf}'),
+    ('\u{11fb1}', '\u{11fbf}'),
+    ('\u{11ff2}', '\u{11ffe}'),
+    ('\u{1239a}', '\u{123ff}'),
+    ('\u{1246f}', '\u{1246f}'),
+    ('\u{12475}', '\u{1247f}'),
+    ('\u{12544}', '\u{12fff}'),
+    ('\u{1342f}', '\u{143ff}'),
+    ('\u{14647}', '\u{167ff}'),
+    ('\u{16a39}', '\u{16a3f}'),
+    ('\u{16a5f}', '\u{16a5f}'),
+    ('\u{16a6a}', '\u{16a6d}'),
+    ('\u{16a70}', '\u{16acf}'),
+    ('\u{16aee}', '\u{16aef}'),
+    ('\u{16af6}', '\u{16aff}'),
+    ('\u{16b46}', '\u{16b4f}'),
+    ('\u{16b5a}', '\u{16b5a}'),
+    ('\u{16b62}', '\u{16b62}'),
+    ('\u{16b78}', '\u{16b7c}'),
+    ('\u{16b90}', '\u{16e3f}'),
+    ('\u{16e9b}', '\u{16eff}'),
+    ('\u{16f4b}', '\u{16f4e}'),
+    ('\u{16f88}', '\u{16f8e}'),
+    ('\u{16fa0}', '\u{16fdf}'),
+    ('\u{16fe5}', '\u{16fef}'),
+    ('\u{16ff2}', '\u{16fff}'),
+    ('\u{187f8}', '\u{187ff}'),
+    ('\u{18cd6}', '\u{18cff}'),
+    ('\u{18d09}', '\u{1afff}'),
+    ('\u{1b11f}', '\u{1b14f}'),
+    ('\u{1b153}', '\u{1b163}'),
+    ('\u{1b168}', '\u{1b16f}'),
+    ('\u{1b2fc}', '\u{1bbff}'),
+    ('\u{1bc6b}', '\u{1bc6f}'),
+    ('\u{1bc7d}', '\u{1bc7f}'),
+    ('\u{1bc89}', '\u{1bc8f}'),
+    ('\u{1bc9a}', '\u{1bc9b}'),
+    ('\u{1bca0}', '\u{1cfff}'),
+    ('\u{1d0f6}', '\u{1d0ff}'),
+    ('\u{1d127}', '\u{1d128}'),
+    ('\u{1d173}', '\u{1d17a}'),
+    ('\u{1d1e9}', '\u{1d1ff}'),
+    ('\u{1d246}', '\u{1d2df}'),
+    ('\u{1d2f4}', '\u{1d2ff}'),
+    ('\u{1d357}', '\u{1d35f}'),
+    ('\u{1d379}', '\u{1d3ff}'),
+    ('\u{1d455}', '\u{1d455}'),
+    ('\u{1d49d}', '\u{1d49d}'),
+    ('\u{1d4a0}', '\u{1d4a1}'),
+    ('\u{1d4a3}', '\u{1d4a4}'),
+    ('\u{1d4a7}', '\u{1d4a8}'),
+    ('\u{1d4ad}', '\u{1d4ad}'),
+    ('\u{1d4ba}', '\u{1d4ba}'),
+    ('\u{1d4bc}', '\u{1d4bc}'),
+    ('\u{1d4c4}', '\u{1d4c4}'),
+    ('\u{1d506}', '\u{1d506}'),
+    ('\u{1d50b}', '\u{1d50c}'),
+    ('\u{1d515}', '\u{1d515}'),
+    ('\u{1d51d}', '\u{1d51d}'),
+    ('\u{1d53a}', '\u{1d53a}'),
+    ('\u{1d53f}', '\u{1d53f}'),
+    ('\u{1d545}', '\u{1d545}'),
+    ('\u{1d547}', '\u{1d549}'),
+    ('\u{1d551}', '\u{1d551}'),
+    ('\u{1d6a6}', '\u{1d6a7}'),
+    ('\u{1d7cc}', '\u{1d7cd}'),
+    ('\u{1da8c}', '\u{1da9a}'),
+    ('\u{1daa0}', '\u{1daa0}'),
+    ('\u{1dab0}', '\u{1dfff}'),
+    ('\u{1e007}', '\u{1e007}'),
+    ('\u{1e019}', '\u{1e01a}'),
+    ('\u{1e022}', '\u{1e022}'),
+    ('\u{1e025}', '\u{1e025}'),
+    ('\u{1e02b}', '\u{1e0ff}'),
+    ('\u{1e12d}', '\u{1e12f}'),
+    ('\u{1e13e}', '\u{1e13f}'),
+    ('\u{1e14a}', '\u{1e14d}'),
+    ('\u{1e150}', '\u{1e2bf}'),
+    ('\u{1e2fa}', '\u{1e2fe}'),
+    ('\u{1e300}', '\u{1e7ff}'),
+    ('\u{1e8c5}', '\u{1e8c6}'),
+    ('\u{1e8d7}', '\u{1e8ff}'),
+    ('\u{1e94c}', '\u{1e94f}'),
+    ('\u{1e95a}', '\u{1e95d}'),
+    ('\u{1e960}', '\u{1ec70}'),
+    ('\u{1ecb5}', '\u{1ed00}'),
+    ('\u{1ed3e}', '\u{1edff}'),
+    ('\u{1ee04}', '\u{1ee04}'),
+    ('\u{1ee20}', '\u{1ee20}'),
+    ('\u{1ee23}', '\u{1ee23}'),
+    ('\u{1ee25}', '\u{1ee26}'),
+    ('\u{1ee28}', '\u{1ee28}'),
+    ('\u{1ee33}', '\u{1ee33}'),
+    ('\u{1ee38}', '\u{1ee38}'),
+    ('\u{1ee3a}', '\u{1ee3a}'),
+    ('\u{1ee3c}', '\u{1ee41}'),
+    ('\u{1ee43}', '\u{1ee46}'),
+    ('\u{1ee48}', '\u{1ee48}'),
+    ('\u{1ee4a}', '\u{1ee4a}'),
+    ('\u{1ee4c}', '\u{1ee4c}'),
+    ('\u{1ee50}', '\u{1ee50}'),
+    ('\u{1ee53}', '\u{1ee53}'),
+    ('\u{1ee55}', '\u{1ee56}'),
+    ('\u{1ee58}', '\u{1ee58}'),
+    ('\u{1ee5a}', '\u{1ee5a}'),
+    ('\u{1ee5c}', '\u{1ee5c}'),
+    ('\u{1ee5e}', '\u{1ee5e}'),
+    ('\u{1ee60}', '\u{1ee60}'),
+    ('\u{1ee63}', '\u{1ee63}'),
+    ('\u{1ee65}', '\u{1ee66}'),
+    ('\u{1ee6b}', '\u{1ee6b}'),
+    ('\u{1ee73}', '\u{1ee73}'),
+    ('\u{1ee78}', '\u{1ee78}'),
+    ('\u{1ee7d}', '\u{1ee7d}'),
+    ('\u{1ee7f}', '\u{1ee7f}'),
+    ('\u{1ee8a}', '\u{1ee8a}'),
+    ('\u{1ee9c}', '\u{1eea0}'),
+    ('\u{1eea4}', '\u{1eea4}'),
+    ('\u{1eeaa}', '\u{1eeaa}'),
+    ('\u{1eebc}', '\u{1eeef}'),
+    ('\u{1eef2}', '\u{1efff}'),
+    ('\u{1f02c}', '\u{1f02f}'),
+    ('\u{1f094}', '\u{1f09f}'),
+    ('\u{1f0af}', '\u{1f0b0}'),
+    ('\u{1f0c0}', '\u{1f0c0}'),
+    ('\u{1f0d0}', '\u{1f0d0}'),
+    ('\u{1f0f6}', '\u{1f0ff}'),
+    ('\u{1f1ae}', '\u{1f1e5}'),
+    ('\u{1f203}', '\u{1f20f}'),
+    ('\u{1f23c}', '\u{1f23f}'),
+    ('\u{1f249}', '\u{1f24f}'),
+    ('\u{1f252}', '\u{1f25f}'),
+    ('\u{1f266}', '\u{1f2ff}'),
+    ('\u{1f6d8}', '\u{1f6df}'),
+    ('\u{1f6ed}', '\u{1f6ef}'),
+    ('\u{1f6fd}', '\u{1f6ff}'),
+    ('\u{1f774}', '\u{1f77f}'),
+    ('\u{1f7d9}', '\u{1f7df}'),
+    ('\u{1f7ec}', '\u{1f7ff}'),
+    ('\u{1f80c}', '\u{1f80f}'),
+    ('\u{1f848}', '\u{1f84f}'),
+    ('\u{1f85a}', '\u{1f85f}'),
+    ('\u{1f888}', '\u{1f88f}'),
+    ('\u{1f8ae}', '\u{1f8af}'),
+    ('\u{1f8b2}', '\u{1f8ff}'),
+    ('\u{1f979}', '\u{1f979}'),
+    ('\u{1f9cc}', '\u{1f9cc}'),
+    ('\u{1fa54}', '\u{1fa5f}'),
+    ('\u{1fa6e}', '\u{1fa6f}'),
+    ('\u{1fa75}', '\u{1fa77}'),
+    ('\u{1fa7b}', '\u{1fa7f}'),
+    ('\u{1fa87}', '\u{1fa8f}'),
+    ('\u{1faa9}', '\u{1faaf}'),
+    ('\u{1fab7}', '\u{1fabf}'),
+    ('\u{1fac3}', '\u{1facf}'),
+    ('\u{1fad7}', '\u{1faff}'),
+    ('\u{1fb93}', '\u{1fb93}'),
+    ('\u{1fbcb}', '\u{1fbef}'),
+    ('\u{1fbfa}', '\u{1ffff}'),
+    ('\u{2a6de}', '\u{2a6ff}'),
+    ('\u{2b735}', '\u{2b73f}'),
+    ('\u{2b81e}', '\u{2b81f}'),
+    ('\u{2cea2}', '\u{2ceaf}'),
+    ('\u{2ebe1}', '\u{2f7ff}'),
+    ('\u{2fa1e}', '\u{2ffff}'),
+    ('\u{3134b}', '\u{e00ff}'),
+    ('\u{e01f0}', '\u{10ffff}'),
+];
+
+pub const OTHER_LETTER: &'static [(char, char)] = &[
+    ('ª', 'ª'),
+    ('º', 'º'),
+    ('ƻ', 'ƻ'),
+    ('ǀ', 'ǃ'),
+    ('ʔ', 'ʔ'),
+    ('א', 'ת'),
+    ('ׯ', 'ײ'),
+    ('ؠ', 'ؿ'),
+    ('ف', 'ي'),
+    ('ٮ', 'ٯ'),
+    ('ٱ', 'ۓ'),
+    ('ە', 'ە'),
+    ('ۮ', 'ۯ'),
+    ('ۺ', 'ۼ'),
+    ('ۿ', 'ۿ'),
+    ('ܐ', 'ܐ'),
+    ('ܒ', 'ܯ'),
+    ('ݍ', 'ޥ'),
+    ('ޱ', 'ޱ'),
+    ('ߊ', 'ߪ'),
+    ('ࠀ', 'ࠕ'),
+    ('ࡀ', 'ࡘ'),
+    ('ࡠ', 'ࡪ'),
+    ('ࢠ', 'ࢴ'),
+    ('ࢶ', '\u{8c7}'),
+    ('ऄ', 'ह'),
+    ('ऽ', 'ऽ'),
+    ('ॐ', 'ॐ'),
+    ('क़', 'ॡ'),
+    ('ॲ', 'ঀ'),
+    ('অ', 'ঌ'),
+    ('এ', 'ঐ'),
+    ('ও', 'ন'),
+    ('প', 'র'),
+    ('ল', 'ল'),
+    ('শ', 'হ'),
+    ('ঽ', 'ঽ'),
+    ('ৎ', 'ৎ'),
+    ('ড়', 'ঢ়'),
+    ('য়', 'ৡ'),
+    ('ৰ', 'ৱ'),
+    ('ৼ', 'ৼ'),
+    ('ਅ', 'ਊ'),
+    ('ਏ', 'ਐ'),
+    ('ਓ', 'ਨ'),
+    ('ਪ', 'ਰ'),
+    ('ਲ', 'ਲ਼'),
+    ('ਵ', 'ਸ਼'),
+    ('ਸ', 'ਹ'),
+    ('ਖ਼', 'ੜ'),
+    ('ਫ਼', 'ਫ਼'),
+    ('ੲ', 'ੴ'),
+    ('અ', 'ઍ'),
+    ('એ', 'ઑ'),
+    ('ઓ', 'ન'),
+    ('પ', 'ર'),
+    ('લ', 'ળ'),
+    ('વ', 'હ'),
+    ('ઽ', 'ઽ'),
+    ('ૐ', 'ૐ'),
+    ('ૠ', 'ૡ'),
+    ('ૹ', 'ૹ'),
+    ('ଅ', 'ଌ'),
+    ('ଏ', 'ଐ'),
+    ('ଓ', 'ନ'),
+    ('ପ', 'ର'),
+    ('ଲ', 'ଳ'),
+    ('ଵ', 'ହ'),
+    ('ଽ', 'ଽ'),
+    ('ଡ଼', 'ଢ଼'),
+    ('ୟ', 'ୡ'),
+    ('ୱ', 'ୱ'),
+    ('ஃ', 'ஃ'),
+    ('அ', 'ஊ'),
+    ('எ', 'ஐ'),
+    ('ஒ', 'க'),
+    ('ங', 'ச'),
+    ('ஜ', 'ஜ'),
+    ('ஞ', 'ட'),
+    ('ண', 'த'),
+    ('ந', 'ப'),
+    ('ம', 'ஹ'),
+    ('ௐ', 'ௐ'),
+    ('అ', 'ఌ'),
+    ('ఎ', 'ఐ'),
+    ('ఒ', 'న'),
+    ('ప', 'హ'),
+    ('ఽ', 'ఽ'),
+    ('ౘ', 'ౚ'),
+    ('ౠ', 'ౡ'),
+    ('ಀ', 'ಀ'),
+    ('ಅ', 'ಌ'),
+    ('ಎ', 'ಐ'),
+    ('ಒ', 'ನ'),
+    ('ಪ', 'ಳ'),
+    ('ವ', 'ಹ'),
+    ('ಽ', 'ಽ'),
+    ('ೞ', 'ೞ'),
+    ('ೠ', 'ೡ'),
+    ('ೱ', 'ೲ'),
+    ('\u{d04}', 'ഌ'),
+    ('എ', 'ഐ'),
+    ('ഒ', 'ഺ'),
+    ('ഽ', 'ഽ'),
+    ('ൎ', 'ൎ'),
+    ('ൔ', 'ൖ'),
+    ('ൟ', 'ൡ'),
+    ('ൺ', 'ൿ'),
+    ('අ', 'ඖ'),
+    ('ක', 'න'),
+    ('ඳ', 'ර'),
+    ('ල', 'ල'),
+    ('ව', 'ෆ'),
+    ('ก', 'ะ'),
+    ('า', 'ำ'),
+    ('เ', 'ๅ'),
+    ('ກ', 'ຂ'),
+    ('ຄ', 'ຄ'),
+    ('ຆ', 'ຊ'),
+    ('ຌ', 'ຣ'),
+    ('ລ', 'ລ'),
+    ('ວ', 'ະ'),
+    ('າ', 'ຳ'),
+    ('ຽ', 'ຽ'),
+    ('ເ', 'ໄ'),
+    ('ໜ', 'ໟ'),
+    ('ༀ', 'ༀ'),
+    ('ཀ', 'ཇ'),
+    ('ཉ', 'ཬ'),
+    ('ྈ', 'ྌ'),
+    ('က', 'ဪ'),
+    ('ဿ', 'ဿ'),
+    ('ၐ', 'ၕ'),
+    ('ၚ', 'ၝ'),
+    ('ၡ', 'ၡ'),
+    ('ၥ', 'ၦ'),
+    ('ၮ', 'ၰ'),
+    ('ၵ', 'ႁ'),
+    ('ႎ', 'ႎ'),
+    ('ᄀ', 'ቈ'),
+    ('ቊ', 'ቍ'),
+    ('ቐ', 'ቖ'),
+    ('ቘ', 'ቘ'),
+    ('ቚ', 'ቝ'),
+    ('በ', 'ኈ'),
+    ('ኊ', 'ኍ'),
+    ('ነ', 'ኰ'),
+    ('ኲ', 'ኵ'),
+    ('ኸ', 'ኾ'),
+    ('ዀ', 'ዀ'),
+    ('ዂ', 'ዅ'),
+    ('ወ', 'ዖ'),
+    ('ዘ', 'ጐ'),
+    ('ጒ', 'ጕ'),
+    ('ጘ', 'ፚ'),
+    ('ᎀ', 'ᎏ'),
+    ('ᐁ', 'ᙬ'),
+    ('ᙯ', 'ᙿ'),
+    ('ᚁ', 'ᚚ'),
+    ('ᚠ', 'ᛪ'),
+    ('ᛱ', 'ᛸ'),
+    ('ᜀ', 'ᜌ'),
+    ('ᜎ', 'ᜑ'),
+    ('ᜠ', 'ᜱ'),
+    ('ᝀ', 'ᝑ'),
+    ('ᝠ', 'ᝬ'),
+    ('ᝮ', 'ᝰ'),
+    ('ក', 'ឳ'),
+    ('ៜ', 'ៜ'),
+    ('ᠠ', 'ᡂ'),
+    ('ᡄ', 'ᡸ'),
+    ('ᢀ', 'ᢄ'),
+    ('ᢇ', 'ᢨ'),
+    ('ᢪ', 'ᢪ'),
+    ('ᢰ', 'ᣵ'),
+    ('ᤀ', 'ᤞ'),
+    ('ᥐ', 'ᥭ'),
+    ('ᥰ', 'ᥴ'),
+    ('ᦀ', 'ᦫ'),
+    ('ᦰ', 'ᧉ'),
+    ('ᨀ', 'ᨖ'),
+    ('ᨠ', 'ᩔ'),
+    ('ᬅ', 'ᬳ'),
+    ('ᭅ', 'ᭋ'),
+    ('ᮃ', 'ᮠ'),
+    ('ᮮ', 'ᮯ'),
+    ('ᮺ', 'ᯥ'),
+    ('ᰀ', 'ᰣ'),
+    ('ᱍ', 'ᱏ'),
+    ('ᱚ', 'ᱷ'),
+    ('ᳩ', 'ᳬ'),
+    ('ᳮ', 'ᳳ'),
+    ('ᳵ', 'ᳶ'),
+    ('ᳺ', 'ᳺ'),
+    ('ℵ', 'ℸ'),
+    ('ⴰ', 'ⵧ'),
+    ('ⶀ', 'ⶖ'),
+    ('ⶠ', 'ⶦ'),
+    ('ⶨ', 'ⶮ'),
+    ('ⶰ', 'ⶶ'),
+    ('ⶸ', 'ⶾ'),
+    ('ⷀ', 'ⷆ'),
+    ('ⷈ', 'ⷎ'),
+    ('ⷐ', 'ⷖ'),
+    ('ⷘ', 'ⷞ'),
+    ('〆', '〆'),
+    ('〼', '〼'),
+    ('ぁ', 'ゖ'),
+    ('ゟ', 'ゟ'),
+    ('ァ', 'ヺ'),
+    ('ヿ', 'ヿ'),
+    ('ㄅ', 'ㄯ'),
+    ('ㄱ', 'ㆎ'),
+    ('ㆠ', '\u{31bf}'),
+    ('ㇰ', 'ㇿ'),
+    ('㐀', '\u{4dbf}'),
+    ('一', '\u{9ffc}'),
+    ('ꀀ', 'ꀔ'),
+    ('ꀖ', 'ꒌ'),
+    ('ꓐ', 'ꓷ'),
+    ('ꔀ', 'ꘋ'),
+    ('ꘐ', 'ꘟ'),
+    ('ꘪ', 'ꘫ'),
+    ('ꙮ', 'ꙮ'),
+    ('ꚠ', 'ꛥ'),
+    ('ꞏ', 'ꞏ'),
+    ('ꟷ', 'ꟷ'),
+    ('ꟻ', 'ꠁ'),
+    ('ꠃ', 'ꠅ'),
+    ('ꠇ', 'ꠊ'),
+    ('ꠌ', 'ꠢ'),
+    ('ꡀ', 'ꡳ'),
+    ('ꢂ', 'ꢳ'),
+    ('ꣲ', 'ꣷ'),
+    ('ꣻ', 'ꣻ'),
+    ('ꣽ', 'ꣾ'),
+    ('ꤊ', 'ꤥ'),
+    ('ꤰ', 'ꥆ'),
+    ('ꥠ', 'ꥼ'),
+    ('ꦄ', 'ꦲ'),
+    ('ꧠ', 'ꧤ'),
+    ('ꧧ', 'ꧯ'),
+    ('ꧺ', 'ꧾ'),
+    ('ꨀ', 'ꨨ'),
+    ('ꩀ', 'ꩂ'),
+    ('ꩄ', 'ꩋ'),
+    ('ꩠ', 'ꩯ'),
+    ('ꩱ', 'ꩶ'),
+    ('ꩺ', 'ꩺ'),
+    ('ꩾ', 'ꪯ'),
+    ('ꪱ', 'ꪱ'),
+    ('ꪵ', 'ꪶ'),
+    ('ꪹ', 'ꪽ'),
+    ('ꫀ', 'ꫀ'),
+    ('ꫂ', 'ꫂ'),
+    ('ꫛ', 'ꫜ'),
+    ('ꫠ', 'ꫪ'),
+    ('ꫲ', 'ꫲ'),
+    ('ꬁ', 'ꬆ'),
+    ('ꬉ', 'ꬎ'),
+    ('ꬑ', 'ꬖ'),
+    ('ꬠ', 'ꬦ'),
+    ('ꬨ', 'ꬮ'),
+    ('ꯀ', 'ꯢ'),
+    ('가', '힣'),
+    ('ힰ', 'ퟆ'),
+    ('ퟋ', 'ퟻ'),
+    ('豈', '舘'),
+    ('並', '龎'),
+    ('יִ', 'יִ'),
+    ('ײַ', 'ﬨ'),
+    ('שׁ', 'זּ'),
+    ('טּ', 'לּ'),
+    ('מּ', 'מּ'),
+    ('נּ', 'סּ'),
+    ('ףּ', 'פּ'),
+    ('צּ', 'ﮱ'),
+    ('ﯓ', 'ﴽ'),
+    ('ﵐ', 'ﶏ'),
+    ('ﶒ', 'ﷇ'),
+    ('ﷰ', 'ﷻ'),
+    ('ﹰ', 'ﹴ'),
+    ('ﹶ', 'ﻼ'),
+    ('ｦ', 'ｯ'),
+    ('ｱ', 'ﾝ'),
+    ('ﾠ', 'ﾾ'),
+    ('ￂ', 'ￇ'),
+    ('ￊ', 'ￏ'),
+    ('ￒ', 'ￗ'),
+    ('ￚ', 'ￜ'),
+    ('𐀀', '𐀋'),
+    ('𐀍', '𐀦'),
+    ('𐀨', '𐀺'),
+    ('𐀼', '𐀽'),
+    ('𐀿', '𐁍'),
+    ('𐁐', '𐁝'),
+    ('𐂀', '𐃺'),
+    ('𐊀', '𐊜'),
+    ('𐊠', '𐋐'),
+    ('𐌀', '𐌟'),
+    ('𐌭', '𐍀'),
+    ('𐍂', '𐍉'),
+    ('𐍐', '𐍵'),
+    ('𐎀', '𐎝'),
+    ('𐎠', '𐏃'),
+    ('𐏈', '𐏏'),
+    ('𐑐', '𐒝'),
+    ('𐔀', '𐔧'),
+    ('𐔰', '𐕣'),
+    ('𐘀', '𐜶'),
+    ('𐝀', '𐝕'),
+    ('𐝠', '𐝧'),
+    ('𐠀', '𐠅'),
+    ('𐠈', '𐠈'),
+    ('𐠊', '𐠵'),
+    ('𐠷', '𐠸'),
+    ('𐠼', '𐠼'),
+    ('𐠿', '𐡕'),
+    ('𐡠', '𐡶'),
+    ('𐢀', '𐢞'),
+    ('𐣠', '𐣲'),
+    ('𐣴', '𐣵'),
+    ('𐤀', '𐤕'),
+    ('𐤠', '𐤹'),
+    ('𐦀', '𐦷'),
+    ('𐦾', '𐦿'),
+    ('𐨀', '𐨀'),
+    ('𐨐', '𐨓'),
+    ('𐨕', '𐨗'),
+    ('𐨙', '𐨵'),
+    ('𐩠', '𐩼'),
+    ('𐪀', '𐪜'),
+    ('𐫀', '𐫇'),
+    ('𐫉', '𐫤'),
+    ('𐬀', '𐬵'),
+    ('𐭀', '𐭕'),
+    ('𐭠', '𐭲'),
+    ('𐮀', '𐮑'),
+    ('𐰀', '𐱈'),
+    ('𐴀', '𐴣'),
+    ('\u{10e80}', '\u{10ea9}'),
+    ('\u{10eb0}', '\u{10eb1}'),
+    ('𐼀', '𐼜'),
+    ('𐼧', '𐼧'),
+    ('𐼰', '𐽅'),
+    ('\u{10fb0}', '\u{10fc4}'),
+    ('𐿠', '𐿶'),
+    ('𑀃', '𑀷'),
+    ('𑂃', '𑂯'),
+    ('𑃐', '𑃨'),
+    ('𑄃', '𑄦'),
+    ('𑅄', '𑅄'),
+    ('\u{11147}', '\u{11147}'),
+    ('𑅐', '𑅲'),
+    ('𑅶', '𑅶'),
+    ('𑆃', '𑆲'),
+    ('𑇁', '𑇄'),
+    ('𑇚', '𑇚'),
+    ('𑇜', '𑇜'),
+    ('𑈀', '𑈑'),
+    ('𑈓', '𑈫'),
+    ('𑊀', '𑊆'),
+    ('𑊈', '𑊈'),
+    ('𑊊', '𑊍'),
+    ('𑊏', '𑊝'),
+    ('𑊟', '𑊨'),
+    ('𑊰', '𑋞'),
+    ('𑌅', '𑌌'),
+    ('𑌏', '𑌐'),
+    ('𑌓', '𑌨'),
+    ('𑌪', '𑌰'),
+    ('𑌲', '𑌳'),
+    ('𑌵', '𑌹'),
+    ('𑌽', '𑌽'),
+    ('𑍐', '𑍐'),
+    ('𑍝', '𑍡'),
+    ('𑐀', '𑐴'),
+    ('𑑇', '𑑊'),
+    ('𑑟', '\u{11461}'),
+    ('𑒀', '𑒯'),
+    ('𑓄', '𑓅'),
+    ('𑓇', '𑓇'),
+    ('𑖀', '𑖮'),
+    ('𑗘', '𑗛'),
+    ('𑘀', '𑘯'),
+    ('𑙄', '𑙄'),
+    ('𑚀', '𑚪'),
+    ('𑚸', '𑚸'),
+    ('𑜀', '𑜚'),
+    ('𑠀', '𑠫'),
+    ('𑣿', '\u{11906}'),
+    ('\u{11909}', '\u{11909}'),
+    ('\u{1190c}', '\u{11913}'),
+    ('\u{11915}', '\u{11916}'),
+    ('\u{11918}', '\u{1192f}'),
+    ('\u{1193f}', '\u{1193f}'),
+    ('\u{11941}', '\u{11941}'),
+    ('𑦠', '𑦧'),
+    ('𑦪', '𑧐'),
+    ('𑧡', '𑧡'),
+    ('𑧣', '𑧣'),
+    ('𑨀', '𑨀'),
+    ('𑨋', '𑨲'),
+    ('𑨺', '𑨺'),
+    ('𑩐', '𑩐'),
+    ('𑩜', '𑪉'),
+    ('𑪝', '𑪝'),
+    ('𑫀', '𑫸'),
+    ('𑰀', '𑰈'),
+    ('𑰊', '𑰮'),
+    ('𑱀', '𑱀'),
+    ('𑱲', '𑲏'),
+    ('𑴀', '𑴆'),
+    ('𑴈', '𑴉'),
+    ('𑴋', '𑴰'),
+    ('𑵆', '𑵆'),
+    ('𑵠', '𑵥'),
+    ('𑵧', '𑵨'),
+    ('𑵪', '𑶉'),
+    ('𑶘', '𑶘'),
+    ('𑻠', '𑻲'),
+    ('\u{11fb0}', '\u{11fb0}'),
+    ('𒀀', '𒎙'),
+    ('𒒀', '𒕃'),
+    ('𓀀', '𓐮'),
+    ('𔐀', '𔙆'),
+    ('𖠀', '𖨸'),
+    ('𖩀', '𖩞'),
+    ('𖫐', '𖫭'),
+    ('𖬀', '𖬯'),
+    ('𖭣', '𖭷'),
+    ('𖭽', '𖮏'),
+    ('𖼀', '𖽊'),
+    ('𖽐', '𖽐'),
+    ('𗀀', '𘟷'),
+    ('𘠀', '\u{18cd5}'),
+    ('\u{18d00}', '\u{18d08}'),
+    ('𛀀', '𛄞'),
+    ('𛅐', '𛅒'),
+    ('𛅤', '𛅧'),
+    ('𛅰', '𛋻'),
+    ('𛰀', '𛱪'),
+    ('𛱰', '𛱼'),
+    ('𛲀', '𛲈'),
+    ('𛲐', '𛲙'),
+    ('𞄀', '𞄬'),
+    ('𞅎', '𞅎'),
+    ('𞋀', '𞋫'),
+    ('𞠀', '𞣄'),
+    ('𞸀', '𞸃'),
+    ('𞸅', '𞸟'),
+    ('𞸡', '𞸢'),
+    ('𞸤', '𞸤'),
+    ('𞸧', '𞸧'),
+    ('𞸩', '𞸲'),
+    ('𞸴', '𞸷'),
+    ('𞸹', '𞸹'),
+    ('𞸻', '𞸻'),
+    ('𞹂', '𞹂'),
+    ('𞹇', '𞹇'),
+    ('𞹉', '𞹉'),
+    ('𞹋', '𞹋'),
+    ('𞹍', '𞹏'),
+    ('𞹑', '𞹒'),
+    ('𞹔', '𞹔'),
+    ('𞹗', '𞹗'),
+    ('𞹙', '𞹙'),
+    ('𞹛', '𞹛'),
+    ('𞹝', '𞹝'),
+    ('𞹟', '𞹟'),
+    ('𞹡', '𞹢'),
+    ('𞹤', '𞹤'),
+    ('𞹧', '𞹪'),
+    ('𞹬', '𞹲'),
+    ('𞹴', '𞹷'),
+    ('𞹹', '𞹼'),
+    ('𞹾', '𞹾'),
+    ('𞺀', '𞺉'),
+    ('𞺋', '𞺛'),
+    ('𞺡', '𞺣'),
+    ('𞺥', '𞺩'),
+    ('𞺫', '𞺻'),
+    ('𠀀', '\u{2a6dd}'),
+    ('𪜀', '𫜴'),
+    ('𫝀', '𫠝'),
+    ('𫠠', '𬺡'),
+    ('𬺰', '𮯠'),
+    ('丽', '𪘀'),
+    ('\u{30000}', '\u{3134a}'),
+];
+
+pub const OTHER_NUMBER: &'static [(char, char)] = &[
+    ('²', '³'),
+    ('¹', '¹'),
+    ('¼', '¾'),
+    ('৴', '৹'),
+    ('୲', '୷'),
+    ('௰', '௲'),
+    ('౸', '౾'),
+    ('൘', '൞'),
+    ('൰', '൸'),
+    ('༪', '༳'),
+    ('፩', '፼'),
+    ('៰', '៹'),
+    ('᧚', '᧚'),
+    ('⁰', '⁰'),
+    ('⁴', '⁹'),
+    ('₀', '₉'),
+    ('⅐', '⅟'),
+    ('↉', '↉'),
+    ('①', '⒛'),
+    ('⓪', '⓿'),
+    ('❶', '➓'),
+    ('⳽', '⳽'),
+    ('㆒', '㆕'),
+    ('㈠', '㈩'),
+    ('㉈', '㉏'),
+    ('㉑', '㉟'),
+    ('㊀', '㊉'),
+    ('㊱', '㊿'),
+    ('꠰', '꠵'),
+    ('𐄇', '𐄳'),
+    ('𐅵', '𐅸'),
+    ('𐆊', '𐆋'),
+    ('𐋡', '𐋻'),
+    ('𐌠', '𐌣'),
+    ('𐡘', '𐡟'),
+    ('𐡹', '𐡿'),
+    ('𐢧', '𐢯'),
+    ('𐣻', '𐣿'),
+    ('𐤖', '𐤛'),
+    ('𐦼', '𐦽'),
+    ('𐧀', '𐧏'),
+    ('𐧒', '𐧿'),
+    ('𐩀', '𐩈'),
+    ('𐩽', '𐩾'),
+    ('𐪝', '𐪟'),
+    ('𐫫', '𐫯'),
+    ('𐭘', '𐭟'),
+    ('𐭸', '𐭿'),
+    ('𐮩', '𐮯'),
+    ('𐳺', '𐳿'),
+    ('𐹠', '𐹾'),
+    ('𐼝', '𐼦'),
+    ('𐽑', '𐽔'),
+    ('\u{10fc5}', '\u{10fcb}'),
+    ('𑁒', '𑁥'),
+    ('𑇡', '𑇴'),
+    ('𑜺', '𑜻'),
+    ('𑣪', '𑣲'),
+    ('𑱚', '𑱬'),
+    ('𑿀', '𑿔'),
+    ('𖭛', '𖭡'),
+    ('𖺀', '𖺖'),
+    ('𝋠', '𝋳'),
+    ('𝍠', '𝍸'),
+    ('𞣇', '𞣏'),
+    ('𞱱', '𞲫'),
+    ('𞲭', '𞲯'),
+    ('𞲱', '𞲴'),
+    ('𞴁', '𞴭'),
+    ('𞴯', '𞴽'),
+    ('🄀', '🄌'),
+];
+
+pub const OTHER_PUNCTUATION: &'static [(char, char)] = &[
+    ('!', '#'),
+    ('%', '\''),
+    ('*', '*'),
+    (',', ','),
+    ('.', '/'),
+    (':', ';'),
+    ('?', '@'),
+    ('\\', '\\'),
+    ('¡', '¡'),
+    ('§', '§'),
+    ('¶', '·'),
+    ('¿', '¿'),
+    (';', ';'),
+    ('·', '·'),
+    ('՚', '՟'),
+    ('։', '։'),
+    ('׀', '׀'),
+    ('׃', '׃'),
+    ('׆', '׆'),
+    ('׳', '״'),
+    ('؉', '؊'),
+    ('،', '؍'),
+    ('؛', '؛'),
+    ('؞', '؟'),
+    ('٪', '٭'),
+    ('۔', '۔'),
+    ('܀', '܍'),
+    ('߷', '߹'),
+    ('࠰', '࠾'),
+    ('࡞', '࡞'),
+    ('।', '॥'),
+    ('॰', '॰'),
+    ('৽', '৽'),
+    ('੶', '੶'),
+    ('૰', '૰'),
+    ('౷', '౷'),
+    ('಄', '಄'),
+    ('෴', '෴'),
+    ('๏', '๏'),
+    ('๚', '๛'),
+    ('༄', '༒'),
+    ('༔', '༔'),
+    ('྅', '྅'),
+    ('࿐', '࿔'),
+    ('࿙', '࿚'),
+    ('၊', '၏'),
+    ('჻', '჻'),
+    ('፠', '፨'),
+    ('᙮', '᙮'),
+    ('᛫', '᛭'),
+    ('᜵', '᜶'),
+    ('។', '៖'),
+    ('៘', '៚'),
+    ('᠀', '᠅'),
+    ('᠇', '᠊'),
+    ('᥄', '᥅'),
+    ('᨞', '᨟'),
+    ('᪠', '᪦'),
+    ('᪨', '᪭'),
+    ('᭚', '᭠'),
+    ('᯼', '᯿'),
+    ('᰻', '᰿'),
+    ('᱾', '᱿'),
+    ('᳀', '᳇'),
+    ('᳓', '᳓'),
+    ('‖', '‗'),
+    ('†', '‧'),
+    ('‰', '‸'),
+    ('※', '‾'),
+    ('⁁', '⁃'),
+    ('⁇', '⁑'),
+    ('⁓', '⁓'),
+    ('⁕', '⁞'),
+    ('⳹', '⳼'),
+    ('⳾', '⳿'),
+    ('⵰', '⵰'),
+    ('⸀', '⸁'),
+    ('⸆', '⸈'),
+    ('⸋', '⸋'),
+    ('⸎', '⸖'),
+    ('⸘', '⸙'),
+    ('⸛', '⸛'),
+    ('⸞', '⸟'),
+    ('⸪', '⸮'),
+    ('⸰', '⸹'),
+    ('⸼', '⸿'),
+    ('⹁', '⹁'),
+    ('⹃', '⹏'),
+    ('\u{2e52}', '\u{2e52}'),
+    ('、', '〃'),
+    ('〽', '〽'),
+    ('・', '・'),
+    ('꓾', '꓿'),
+    ('꘍', '꘏'),
+    ('꙳', '꙳'),
+    ('꙾', '꙾'),
+    ('꛲', '꛷'),
+    ('꡴', '꡷'),
+    ('꣎', '꣏'),
+    ('꣸', '꣺'),
+    ('꣼', '꣼'),
+    ('꤮', '꤯'),
+    ('꥟', '꥟'),
+    ('꧁', '꧍'),
+    ('꧞', '꧟'),
+    ('꩜', '꩟'),
+    ('꫞', '꫟'),
+    ('꫰', '꫱'),
+    ('꯫', '꯫'),
+    ('︐', '︖'),
+    ('︙', '︙'),
+    ('︰', '︰'),
+    ('﹅', '﹆'),
+    ('﹉', '﹌'),
+    ('﹐', '﹒'),
+    ('﹔', '﹗'),
+    ('﹟', '﹡'),
+    ('﹨', '﹨'),
+    ('﹪', '﹫'),
+    ('！', '＃'),
+    ('％', '＇'),
+    ('＊', '＊'),
+    ('，', '，'),
+    ('．', '／'),
+    ('：', '；'),
+    ('？', '＠'),
+    ('＼', '＼'),
+    ('｡', '｡'),
+    ('､', '･'),
+    ('𐄀', '𐄂'),
+    ('𐎟', '𐎟'),
+    ('𐏐', '𐏐'),
+    ('𐕯', '𐕯'),
+    ('𐡗', '𐡗'),
+    ('𐤟', '𐤟'),
+    ('𐤿', '𐤿'),
+    ('𐩐', '𐩘'),
+    ('𐩿', '𐩿'),
+    ('𐫰', '𐫶'),
+    ('𐬹', '𐬿'),
+    ('𐮙', '𐮜'),
+    ('𐽕', '𐽙'),
+    ('𑁇', '𑁍'),
+    ('𑂻', '𑂼'),
+    ('𑂾', '𑃁'),
+    ('𑅀', '𑅃'),
+    ('𑅴', '𑅵'),
+    ('𑇅', '𑇈'),
+    ('𑇍', '𑇍'),
+    ('𑇛', '𑇛'),
+    ('𑇝', '𑇟'),
+    ('𑈸', '𑈽'),
+    ('𑊩', '𑊩'),
+    ('𑑋', '𑑏'),
+    ('\u{1145a}', '𑑛'),
+    ('𑑝', '𑑝'),
+    ('𑓆', '𑓆'),
+    ('𑗁', '𑗗'),
+    ('𑙁', '𑙃'),
+    ('𑙠', '𑙬'),
+    ('𑜼', '𑜾'),
+    ('𑠻', '𑠻'),
+    ('\u{11944}', '\u{11946}'),
+    ('𑧢', '𑧢'),
+    ('𑨿', '𑩆'),
+    ('𑪚', '𑪜'),
+    ('𑪞', '𑪢'),
+    ('𑱁', '𑱅'),
+    ('𑱰', '𑱱'),
+    ('𑻷', '𑻸'),
+    ('𑿿', '𑿿'),
+    ('𒑰', '𒑴'),
+    ('𖩮', '𖩯'),
+    ('𖫵', '𖫵'),
+    ('𖬷', '𖬻'),
+    ('𖭄', '𖭄'),
+    ('𖺗', '𖺚'),
+    ('𖿢', '𖿢'),
+    ('𛲟', '𛲟'),
+    ('𝪇', '𝪋'),
+    ('𞥞', '𞥟'),
+];
+
+pub const OTHER_SYMBOL: &'static [(char, char)] = &[
+    ('¦', '¦'),
+    ('©', '©'),
+    ('®', '®'),
+    ('°', '°'),
+    ('҂', '҂'),
+    ('֍', '֎'),
+    ('؎', '؏'),
+    ('۞', '۞'),
+    ('۩', '۩'),
+    ('۽', '۾'),
+    ('߶', '߶'),
+    ('৺', '৺'),
+    ('୰', '୰'),
+    ('௳', '௸'),
+    ('௺', '௺'),
+    ('౿', '౿'),
+    ('൏', '൏'),
+    ('൹', '൹'),
+    ('༁', '༃'),
+    ('༓', '༓'),
+    ('༕', '༗'),
+    ('༚', '༟'),
+    ('༴', '༴'),
+    ('༶', '༶'),
+    ('༸', '༸'),
+    ('྾', '࿅'),
+    ('࿇', '࿌'),
+    ('࿎', '࿏'),
+    ('࿕', '࿘'),
+    ('႞', '႟'),
+    ('᎐', '᎙'),
+    ('᙭', '᙭'),
+    ('᥀', '᥀'),
+    ('᧞', '᧿'),
+    ('᭡', '᭪'),
+    ('᭴', '᭼'),
+    ('℀', '℁'),
+    ('℃', '℆'),
+    ('℈', '℉'),
+    ('℔', '℔'),
+    ('№', '℗'),
+    ('℞', '℣'),
+    ('℥', '℥'),
+    ('℧', '℧'),
+    ('℩', '℩'),
+    ('℮', '℮'),
+    ('℺', '℻'),
+    ('⅊', '⅊'),
+    ('⅌', '⅍'),
+    ('⅏', '⅏'),
+    ('↊', '↋'),
+    ('↕', '↙'),
+    ('↜', '↟'),
+    ('↡', '↢'),
+    ('↤', '↥'),
+    ('↧', '↭'),
+    ('↯', '⇍'),
+    ('⇐', '⇑'),
+    ('⇓', '⇓'),
+    ('⇕', '⇳'),
+    ('⌀', '⌇'),
+    ('⌌', '⌟'),
+    ('⌢', '⌨'),
+    ('⌫', '⍻'),
+    ('⍽', '⎚'),
+    ('⎴', '⏛'),
+    ('⏢', '␦'),
+    ('⑀', '⑊'),
+    ('⒜', 'ⓩ'),
+    ('─', '▶'),
+    ('▸', '◀'),
+    ('◂', '◷'),
+    ('☀', '♮'),
+    ('♰', '❧'),
+    ('➔', '➿'),
+    ('⠀', '⣿'),
+    ('⬀', '⬯'),
+    ('⭅', '⭆'),
+    ('⭍', '⭳'),
+    ('⭶', '⮕'),
+    ('\u{2b97}', '⯿'),
+    ('⳥', '⳪'),
+    ('\u{2e50}', '\u{2e51}'),
+    ('⺀', '⺙'),
+    ('⺛', '⻳'),
+    ('⼀', '⿕'),
+    ('⿰', '⿻'),
+    ('〄', '〄'),
+    ('〒', '〓'),
+    ('〠', '〠'),
+    ('〶', '〷'),
+    ('〾', '〿'),
+    ('㆐', '㆑'),
+    ('㆖', '㆟'),
+    ('㇀', '㇣'),
+    ('㈀', '㈞'),
+    ('㈪', '㉇'),
+    ('㉐', '㉐'),
+    ('㉠', '㉿'),
+    ('㊊', '㊰'),
+    ('㋀', '㏿'),
+    ('䷀', '䷿'),
+    ('꒐', '꓆'),
+    ('꠨', '꠫'),
+    ('꠶', '꠷'),
+    ('꠹', '꠹'),
+    ('꩷', '꩹'),
+    ('﷽', '﷽'),
+    ('￤', '￤'),
+    ('￨', '￨'),
+    ('￭', '￮'),
+    ('', '�'),
+    ('𐄷', '𐄿'),
+    ('𐅹', '𐆉'),
+    ('𐆌', '𐆎'),
+    ('𐆐', '\u{1019c}'),
+    ('𐆠', '𐆠'),
+    ('𐇐', '𐇼'),
+    ('𐡷', '𐡸'),
+    ('𐫈', '𐫈'),
+    ('𑜿', '𑜿'),
+    ('𑿕', '𑿜'),
+    ('𑿡', '𑿱'),
+    ('𖬼', '𖬿'),
+    ('𖭅', '𖭅'),
+    ('𛲜', '𛲜'),
+    ('𝀀', '𝃵'),
+    ('𝄀', '𝄦'),
+    ('𝄩', '𝅘𝅥𝅲'),
+    ('𝅪', '𝅬'),
+    ('𝆃', '𝆄'),
+    ('𝆌', '𝆩'),
+    ('𝆮', '𝇨'),
+    ('𝈀', '𝉁'),
+    ('𝉅', '𝉅'),
+    ('𝌀', '𝍖'),
+    ('𝠀', '𝧿'),
+    ('𝨷', '𝨺'),
+    ('𝩭', '𝩴'),
+    ('𝩶', '𝪃'),
+    ('𝪅', '𝪆'),
+    ('𞅏', '𞅏'),
+    ('𞲬', '𞲬'),
+    ('𞴮', '𞴮'),
+    ('🀀', '🀫'),
+    ('🀰', '🂓'),
+    ('🂠', '🂮'),
+    ('🂱', '🂿'),
+    ('🃁', '🃏'),
+    ('🃑', '🃵'),
+    ('\u{1f10d}', '\u{1f1ad}'),
+    ('🇦', '🈂'),
+    ('🈐', '🈻'),
+    ('🉀', '🉈'),
+    ('🉐', '🉑'),
+    ('🉠', '🉥'),
+    ('🌀', '🏺'),
+    ('🐀', '\u{1f6d7}'),
+    ('🛠', '🛬'),
+    ('🛰', '\u{1f6fc}'),
+    ('🜀', '🝳'),
+    ('🞀', '🟘'),
+    ('🟠', '🟫'),
+    ('🠀', '🠋'),
+    ('🠐', '🡇'),
+    ('🡐', '🡙'),
+    ('🡠', '🢇'),
+    ('🢐', '🢭'),
+    ('\u{1f8b0}', '\u{1f8b1}'),
+    ('🤀', '\u{1f978}'),
+    ('🥺', '\u{1f9cb}'),
+    ('🧍', '🩓'),
+    ('🩠', '🩭'),
+    ('🩰', '\u{1fa74}'),
+    ('🩸', '🩺'),
+    ('🪀', '\u{1fa86}'),
+    ('🪐', '\u{1faa8}'),
+    ('\u{1fab0}', '\u{1fab6}'),
+    ('\u{1fac0}', '\u{1fac2}'),
+    ('\u{1fad0}', '\u{1fad6}'),
+    ('\u{1fb00}', '\u{1fb92}'),
+    ('\u{1fb94}', '\u{1fbca}'),
+];
+
+pub const PARAGRAPH_SEPARATOR: &'static [(char, char)] =
+    &[('\u{2029}', '\u{2029}')];
+
+pub const PRIVATE_USE: &'static [(char, char)] = &[
+    ('\u{e000}', '\u{f8ff}'),
+    ('\u{f0000}', '\u{ffffd}'),
+    ('\u{100000}', '\u{10fffd}'),
+];
+
+pub const PUNCTUATION: &'static [(char, char)] = &[
+    ('!', '#'),
+    ('%', '*'),
+    (',', '/'),
+    (':', ';'),
+    ('?', '@'),
+    ('[', ']'),
+    ('_', '_'),
+    ('{', '{'),
+    ('}', '}'),
+    ('¡', '¡'),
+    ('§', '§'),
+    ('«', '«'),
+    ('¶', '·'),
+    ('»', '»'),
+    ('¿', '¿'),
+    (';', ';'),
+    ('·', '·'),
+    ('՚', '՟'),
+    ('։', '֊'),
+    ('־', '־'),
+    ('׀', '׀'),
+    ('׃', '׃'),
+    ('׆', '׆'),
+    ('׳', '״'),
+    ('؉', '؊'),
+    ('،', '؍'),
+    ('؛', '؛'),
+    ('؞', '؟'),
+    ('٪', '٭'),
+    ('۔', '۔'),
+    ('܀', '܍'),
+    ('߷', '߹'),
+    ('࠰', '࠾'),
+    ('࡞', '࡞'),
+    ('।', '॥'),
+    ('॰', '॰'),
+    ('৽', '৽'),
+    ('੶', '੶'),
+    ('૰', '૰'),
+    ('౷', '౷'),
+    ('಄', '಄'),
+    ('෴', '෴'),
+    ('๏', '๏'),
+    ('๚', '๛'),
+    ('༄', '༒'),
+    ('༔', '༔'),
+    ('༺', '༽'),
+    ('྅', '྅'),
+    ('࿐', '࿔'),
+    ('࿙', '࿚'),
+    ('၊', '၏'),
+    ('჻', '჻'),
+    ('፠', '፨'),
+    ('᐀', '᐀'),
+    ('᙮', '᙮'),
+    ('᚛', '᚜'),
+    ('᛫', '᛭'),
+    ('᜵', '᜶'),
+    ('។', '៖'),
+    ('៘', '៚'),
+    ('᠀', '᠊'),
+    ('᥄', '᥅'),
+    ('᨞', '᨟'),
+    ('᪠', '᪦'),
+    ('᪨', '᪭'),
+    ('᭚', '᭠'),
+    ('᯼', '᯿'),
+    ('᰻', '᰿'),
+    ('᱾', '᱿'),
+    ('᳀', '᳇'),
+    ('᳓', '᳓'),
+    ('‐', '‧'),
+    ('‰', '⁃'),
+    ('⁅', '⁑'),
+    ('⁓', '⁞'),
+    ('⁽', '⁾'),
+    ('₍', '₎'),
+    ('⌈', '⌋'),
+    ('⟨', '⟩'),
+    ('❨', '❵'),
+    ('⟅', '⟆'),
+    ('⟦', '⟯'),
+    ('⦃', '⦘'),
+    ('⧘', '⧛'),
+    ('⧼', '⧽'),
+    ('⳹', '⳼'),
+    ('⳾', '⳿'),
+    ('⵰', '⵰'),
+    ('⸀', '⸮'),
+    ('⸰', '⹏'),
+    ('\u{2e52}', '\u{2e52}'),
+    ('、', '〃'),
+    ('〈', '】'),
+    ('〔', '〟'),
+    ('〰', '〰'),
+    ('〽', '〽'),
+    ('゠', '゠'),
+    ('・', '・'),
+    ('꓾', '꓿'),
+    ('꘍', '꘏'),
+    ('꙳', '꙳'),
+    ('꙾', '꙾'),
+    ('꛲', '꛷'),
+    ('꡴', '꡷'),
+    ('꣎', '꣏'),
+    ('꣸', '꣺'),
+    ('꣼', '꣼'),
+    ('꤮', '꤯'),
+    ('꥟', '꥟'),
+    ('꧁', '꧍'),
+    ('꧞', '꧟'),
+    ('꩜', '꩟'),
+    ('꫞', '꫟'),
+    ('꫰', '꫱'),
+    ('꯫', '꯫'),
+    ('﴾', '﴿'),
+    ('︐', '︙'),
+    ('︰', '﹒'),
+    ('﹔', '﹡'),
+    ('﹣', '﹣'),
+    ('﹨', '﹨'),
+    ('﹪', '﹫'),
+    ('！', '＃'),
+    ('％', '＊'),
+    ('，', '／'),
+    ('：', '；'),
+    ('？', '＠'),
+    ('［', '］'),
+    ('＿', '＿'),
+    ('｛', '｛'),
+    ('｝', '｝'),
+    ('｟', '･'),
+    ('𐄀', '𐄂'),
+    ('𐎟', '𐎟'),
+    ('𐏐', '𐏐'),
+    ('𐕯', '𐕯'),
+    ('𐡗', '𐡗'),
+    ('𐤟', '𐤟'),
+    ('𐤿', '𐤿'),
+    ('𐩐', '𐩘'),
+    ('𐩿', '𐩿'),
+    ('𐫰', '𐫶'),
+    ('𐬹', '𐬿'),
+    ('𐮙', '𐮜'),
+    ('\u{10ead}', '\u{10ead}'),
+    ('𐽕', '𐽙'),
+    ('𑁇', '𑁍'),
+    ('𑂻', '𑂼'),
+    ('𑂾', '𑃁'),
+    ('𑅀', '𑅃'),
+    ('𑅴', '𑅵'),
+    ('𑇅', '𑇈'),
+    ('𑇍', '𑇍'),
+    ('𑇛', '𑇛'),
+    ('𑇝', '𑇟'),
+    ('𑈸', '𑈽'),
+    ('𑊩', '𑊩'),
+    ('𑑋', '𑑏'),
+    ('\u{1145a}', '𑑛'),
+    ('𑑝', '𑑝'),
+    ('𑓆', '𑓆'),
+    ('𑗁', '𑗗'),
+    ('𑙁', '𑙃'),
+    ('𑙠', '𑙬'),
+    ('𑜼', '𑜾'),
+    ('𑠻', '𑠻'),
+    ('\u{11944}', '\u{11946}'),
+    ('𑧢', '𑧢'),
+    ('𑨿', '𑩆'),
+    ('𑪚', '𑪜'),
+    ('𑪞', '𑪢'),
+    ('𑱁', '𑱅'),
+    ('𑱰', '𑱱'),
+    ('𑻷', '𑻸'),
+    ('𑿿', '𑿿'),
+    ('𒑰', '𒑴'),
+    ('𖩮', '𖩯'),
+    ('𖫵', '𖫵'),
+    ('𖬷', '𖬻'),
+    ('𖭄', '𖭄'),
+    ('𖺗', '𖺚'),
+    ('𖿢', '𖿢'),
+    ('𛲟', '𛲟'),
+    ('𝪇', '𝪋'),
+    ('𞥞', '𞥟'),
+];
+
+pub const SEPARATOR: &'static [(char, char)] = &[
+    (' ', ' '),
+    ('\u{a0}', '\u{a0}'),
+    ('\u{1680}', '\u{1680}'),
+    ('\u{2000}', '\u{200a}'),
+    ('\u{2028}', '\u{2029}'),
+    ('\u{202f}', '\u{202f}'),
+    ('\u{205f}', '\u{205f}'),
+    ('\u{3000}', '\u{3000}'),
+];
+
+pub const SPACE_SEPARATOR: &'static [(char, char)] = &[
+    (' ', ' '),
+    ('\u{a0}', '\u{a0}'),
+    ('\u{1680}', '\u{1680}'),
+    ('\u{2000}', '\u{200a}'),
+    ('\u{202f}', '\u{202f}'),
+    ('\u{205f}', '\u{205f}'),
+    ('\u{3000}', '\u{3000}'),
+];
+
+pub const SPACING_MARK: &'static [(char, char)] = &[
+    ('ः', 'ः'),
+    ('ऻ', 'ऻ'),
+    ('ा', 'ी'),
+    ('ॉ', 'ौ'),
+    ('ॎ', 'ॏ'),
+    ('ং', 'ঃ'),
+    ('\u{9be}', 'ী'),
+    ('ে', 'ৈ'),
+    ('ো', 'ৌ'),
+    ('\u{9d7}', '\u{9d7}'),
+    ('ਃ', 'ਃ'),
+    ('ਾ', 'ੀ'),
+    ('ઃ', 'ઃ'),
+    ('ા', 'ી'),
+    ('ૉ', 'ૉ'),
+    ('ો', 'ૌ'),
+    ('ଂ', 'ଃ'),
+    ('\u{b3e}', '\u{b3e}'),
+    ('ୀ', 'ୀ'),
+    ('େ', 'ୈ'),
+    ('ୋ', 'ୌ'),
+    ('\u{b57}', '\u{b57}'),
+    ('\u{bbe}', 'ி'),
+    ('ு', 'ூ'),
+    ('ெ', 'ை'),
+    ('ொ', 'ௌ'),
+    ('\u{bd7}', '\u{bd7}'),
+    ('ఁ', 'ః'),
+    ('ు', 'ౄ'),
+    ('ಂ', 'ಃ'),
+    ('ಾ', 'ಾ'),
+    ('ೀ', 'ೄ'),
+    ('ೇ', 'ೈ'),
+    ('ೊ', 'ೋ'),
+    ('\u{cd5}', '\u{cd6}'),
+    ('ം', 'ഃ'),
+    ('\u{d3e}', 'ീ'),
+    ('െ', 'ൈ'),
+    ('ൊ', 'ൌ'),
+    ('\u{d57}', '\u{d57}'),
+    ('ං', 'ඃ'),
+    ('\u{dcf}', 'ෑ'),
+    ('ෘ', '\u{ddf}'),
+    ('ෲ', 'ෳ'),
+    ('༾', '༿'),
+    ('ཿ', 'ཿ'),
+    ('ါ', 'ာ'),
+    ('ေ', 'ေ'),
+    ('း', 'း'),
+    ('ျ', 'ြ'),
+    ('ၖ', 'ၗ'),
+    ('ၢ', 'ၤ'),
+    ('ၧ', 'ၭ'),
+    ('ႃ', 'ႄ'),
+    ('ႇ', 'ႌ'),
+    ('ႏ', 'ႏ'),
+    ('ႚ', 'ႜ'),
+    ('ា', 'ា'),
+    ('ើ', 'ៅ'),
+    ('ះ', 'ៈ'),
+    ('ᤣ', 'ᤦ'),
+    ('ᤩ', 'ᤫ'),
+    ('ᤰ', 'ᤱ'),
+    ('ᤳ', 'ᤸ'),
+    ('ᨙ', 'ᨚ'),
+    ('ᩕ', 'ᩕ'),
+    ('ᩗ', 'ᩗ'),
+    ('ᩡ', 'ᩡ'),
+    ('ᩣ', 'ᩤ'),
+    ('ᩭ', 'ᩲ'),
+    ('ᬄ', 'ᬄ'),
+    ('\u{1b35}', '\u{1b35}'),
+    ('ᬻ', 'ᬻ'),
+    ('ᬽ', 'ᭁ'),
+    ('ᭃ', '᭄'),
+    ('ᮂ', 'ᮂ'),
+    ('ᮡ', 'ᮡ'),
+    ('ᮦ', 'ᮧ'),
+    ('᮪', '᮪'),
+    ('ᯧ', 'ᯧ'),
+    ('ᯪ', 'ᯬ'),
+    ('ᯮ', 'ᯮ'),
+    ('᯲', '᯳'),
+    ('ᰤ', 'ᰫ'),
+    ('ᰴ', 'ᰵ'),
+    ('᳡', '᳡'),
+    ('᳷', '᳷'),
+    ('\u{302e}', '\u{302f}'),
+    ('ꠣ', 'ꠤ'),
+    ('ꠧ', 'ꠧ'),
+    ('ꢀ', 'ꢁ'),
+    ('ꢴ', 'ꣃ'),
+    ('ꥒ', '꥓'),
+    ('ꦃ', 'ꦃ'),
+    ('ꦴ', 'ꦵ'),
+    ('ꦺ', 'ꦻ'),
+    ('ꦾ', '꧀'),
+    ('ꨯ', 'ꨰ'),
+    ('ꨳ', 'ꨴ'),
+    ('ꩍ', 'ꩍ'),
+    ('ꩻ', 'ꩻ'),
+    ('ꩽ', 'ꩽ'),
+    ('ꫫ', 'ꫫ'),
+    ('ꫮ', 'ꫯ'),
+    ('ꫵ', 'ꫵ'),
+    ('ꯣ', 'ꯤ'),
+    ('ꯦ', 'ꯧ'),
+    ('ꯩ', 'ꯪ'),
+    ('꯬', '꯬'),
+    ('𑀀', '𑀀'),
+    ('𑀂', '𑀂'),
+    ('𑂂', '𑂂'),
+    ('𑂰', '𑂲'),
+    ('𑂷', '𑂸'),
+    ('𑄬', '𑄬'),
+    ('𑅅', '𑅆'),
+    ('𑆂', '𑆂'),
+    ('𑆳', '𑆵'),
+    ('𑆿', '𑇀'),
+    ('\u{111ce}', '\u{111ce}'),
+    ('𑈬', '𑈮'),
+    ('𑈲', '𑈳'),
+    ('𑈵', '𑈵'),
+    ('𑋠', '𑋢'),
+    ('𑌂', '𑌃'),
+    ('\u{1133e}', '𑌿'),
+    ('𑍁', '𑍄'),
+    ('𑍇', '𑍈'),
+    ('𑍋', '𑍍'),
+    ('\u{11357}', '\u{11357}'),
+    ('𑍢', '𑍣'),
+    ('𑐵', '𑐷'),
+    ('𑑀', '𑑁'),
+    ('𑑅', '𑑅'),
+    ('\u{114b0}', '𑒲'),
+    ('𑒹', '𑒹'),
+    ('𑒻', '𑒾'),
+    ('𑓁', '𑓁'),
+    ('\u{115af}', '𑖱'),
+    ('𑖸', '𑖻'),
+    ('𑖾', '𑖾'),
+    ('𑘰', '𑘲'),
+    ('𑘻', '𑘼'),
+    ('𑘾', '𑘾'),
+    ('𑚬', '𑚬'),
+    ('𑚮', '𑚯'),
+    ('𑚶', '𑚶'),
+    ('𑜠', '𑜡'),
+    ('𑜦', '𑜦'),
+    ('𑠬', '𑠮'),
+    ('𑠸', '𑠸'),
+    ('\u{11930}', '\u{11935}'),
+    ('\u{11937}', '\u{11938}'),
+    ('\u{1193d}', '\u{1193d}'),
+    ('\u{11940}', '\u{11940}'),
+    ('\u{11942}', '\u{11942}'),
+    ('𑧑', '𑧓'),
+    ('𑧜', '𑧟'),
+    ('𑧤', '𑧤'),
+    ('𑨹', '𑨹'),
+    ('𑩗', '𑩘'),
+    ('𑪗', '𑪗'),
+    ('𑰯', '𑰯'),
+    ('𑰾', '𑰾'),
+    ('𑲩', '𑲩'),
+    ('𑲱', '𑲱'),
+    ('𑲴', '𑲴'),
+    ('𑶊', '𑶎'),
+    ('𑶓', '𑶔'),
+    ('𑶖', '𑶖'),
+    ('𑻵', '𑻶'),
+    ('𖽑', '𖾇'),
+    ('\u{16ff0}', '\u{16ff1}'),
+    ('\u{1d165}', '𝅦'),
+    ('𝅭', '\u{1d172}'),
+];
+
+pub const SYMBOL: &'static [(char, char)] = &[
+    ('$', '$'),
+    ('+', '+'),
+    ('<', '>'),
+    ('^', '^'),
+    ('`', '`'),
+    ('|', '|'),
+    ('~', '~'),
+    ('¢', '¦'),
+    ('¨', '©'),
+    ('¬', '¬'),
+    ('®', '±'),
+    ('´', '´'),
+    ('¸', '¸'),
+    ('×', '×'),
+    ('÷', '÷'),
+    ('˂', '˅'),
+    ('˒', '˟'),
+    ('˥', '˫'),
+    ('˭', '˭'),
+    ('˯', '˿'),
+    ('͵', '͵'),
+    ('΄', '΅'),
+    ('϶', '϶'),
+    ('҂', '҂'),
+    ('֍', '֏'),
+    ('؆', '؈'),
+    ('؋', '؋'),
+    ('؎', '؏'),
+    ('۞', '۞'),
+    ('۩', '۩'),
+    ('۽', '۾'),
+    ('߶', '߶'),
+    ('߾', '߿'),
+    ('৲', '৳'),
+    ('৺', '৻'),
+    ('૱', '૱'),
+    ('୰', '୰'),
+    ('௳', '௺'),
+    ('౿', '౿'),
+    ('൏', '൏'),
+    ('൹', '൹'),
+    ('฿', '฿'),
+    ('༁', '༃'),
+    ('༓', '༓'),
+    ('༕', '༗'),
+    ('༚', '༟'),
+    ('༴', '༴'),
+    ('༶', '༶'),
+    ('༸', '༸'),
+    ('྾', '࿅'),
+    ('࿇', '࿌'),
+    ('࿎', '࿏'),
+    ('࿕', '࿘'),
+    ('႞', '႟'),
+    ('᎐', '᎙'),
+    ('᙭', '᙭'),
+    ('៛', '៛'),
+    ('᥀', '᥀'),
+    ('᧞', '᧿'),
+    ('᭡', '᭪'),
+    ('᭴', '᭼'),
+    ('᾽', '᾽'),
+    ('᾿', '῁'),
+    ('῍', '῏'),
+    ('῝', '῟'),
+    ('῭', '`'),
+    ('´', '῾'),
+    ('⁄', '⁄'),
+    ('⁒', '⁒'),
+    ('⁺', '⁼'),
+    ('₊', '₌'),
+    ('₠', '₿'),
+    ('℀', '℁'),
+    ('℃', '℆'),
+    ('℈', '℉'),
+    ('℔', '℔'),
+    ('№', '℘'),
+    ('℞', '℣'),
+    ('℥', '℥'),
+    ('℧', '℧'),
+    ('℩', '℩'),
+    ('℮', '℮'),
+    ('℺', '℻'),
+    ('⅀', '⅄'),
+    ('⅊', '⅍'),
+    ('⅏', '⅏'),
+    ('↊', '↋'),
+    ('←', '⌇'),
+    ('⌌', '⌨'),
+    ('⌫', '␦'),
+    ('⑀', '⑊'),
+    ('⒜', 'ⓩ'),
+    ('─', '❧'),
+    ('➔', '⟄'),
+    ('⟇', '⟥'),
+    ('⟰', '⦂'),
+    ('⦙', '⧗'),
+    ('⧜', '⧻'),
+    ('⧾', '⭳'),
+    ('⭶', '⮕'),
+    ('\u{2b97}', '⯿'),
+    ('⳥', '⳪'),
+    ('\u{2e50}', '\u{2e51}'),
+    ('⺀', '⺙'),
+    ('⺛', '⻳'),
+    ('⼀', '⿕'),
+    ('⿰', '⿻'),
+    ('〄', '〄'),
+    ('〒', '〓'),
+    ('〠', '〠'),
+    ('〶', '〷'),
+    ('〾', '〿'),
+    ('゛', '゜'),
+    ('㆐', '㆑'),
+    ('㆖', '㆟'),
+    ('㇀', '㇣'),
+    ('㈀', '㈞'),
+    ('㈪', '㉇'),
+    ('㉐', '㉐'),
+    ('㉠', '㉿'),
+    ('㊊', '㊰'),
+    ('㋀', '㏿'),
+    ('䷀', '䷿'),
+    ('꒐', '꓆'),
+    ('꜀', '꜖'),
+    ('꜠', '꜡'),
+    ('꞉', '꞊'),
+    ('꠨', '꠫'),
+    ('꠶', '꠹'),
+    ('꩷', '꩹'),
+    ('꭛', '꭛'),
+    ('\u{ab6a}', '\u{ab6b}'),
+    ('﬩', '﬩'),
+    ('﮲', '﯁'),
+    ('﷼', '﷽'),
+    ('﹢', '﹢'),
+    ('﹤', '﹦'),
+    ('﹩', '﹩'),
+    ('＄', '＄'),
+    ('＋', '＋'),
+    ('＜', '＞'),
+    ('＾', '＾'),
+    ('｀', '｀'),
+    ('｜', '｜'),
+    ('～', '～'),
+    ('￠', '￦'),
+    ('￨', '￮'),
+    ('', '�'),
+    ('𐄷', '𐄿'),
+    ('𐅹', '𐆉'),
+    ('𐆌', '𐆎'),
+    ('𐆐', '\u{1019c}'),
+    ('𐆠', '𐆠'),
+    ('𐇐', '𐇼'),
+    ('𐡷', '𐡸'),
+    ('𐫈', '𐫈'),
+    ('𑜿', '𑜿'),
+    ('𑿕', '𑿱'),
+    ('𖬼', '𖬿'),
+    ('𖭅', '𖭅'),
+    ('𛲜', '𛲜'),
+    ('𝀀', '𝃵'),
+    ('𝄀', '𝄦'),
+    ('𝄩', '𝅘𝅥𝅲'),
+    ('𝅪', '𝅬'),
+    ('𝆃', '𝆄'),
+    ('𝆌', '𝆩'),
+    ('𝆮', '𝇨'),
+    ('𝈀', '𝉁'),
+    ('𝉅', '𝉅'),
+    ('𝌀', '𝍖'),
+    ('𝛁', '𝛁'),
+    ('𝛛', '𝛛'),
+    ('𝛻', '𝛻'),
+    ('𝜕', '𝜕'),
+    ('𝜵', '𝜵'),
+    ('𝝏', '𝝏'),
+    ('𝝯', '𝝯'),
+    ('𝞉', '𝞉'),
+    ('𝞩', '𝞩'),
+    ('𝟃', '𝟃'),
+    ('𝠀', '𝧿'),
+    ('𝨷', '𝨺'),
+    ('𝩭', '𝩴'),
+    ('𝩶', '𝪃'),
+    ('𝪅', '𝪆'),
+    ('𞅏', '𞅏'),
+    ('𞋿', '𞋿'),
+    ('𞲬', '𞲬'),
+    ('𞲰', '𞲰'),
+    ('𞴮', '𞴮'),
+    ('𞻰', '𞻱'),
+    ('🀀', '🀫'),
+    ('🀰', '🂓'),
+    ('🂠', '🂮'),
+    ('🂱', '🂿'),
+    ('🃁', '🃏'),
+    ('🃑', '🃵'),
+    ('\u{1f10d}', '\u{1f1ad}'),
+    ('🇦', '🈂'),
+    ('🈐', '🈻'),
+    ('🉀', '🉈'),
+    ('🉐', '🉑'),
+    ('🉠', '🉥'),
+    ('🌀', '\u{1f6d7}'),
+    ('🛠', '🛬'),
+    ('🛰', '\u{1f6fc}'),
+    ('🜀', '🝳'),
+    ('🞀', '🟘'),
+    ('🟠', '🟫'),
+    ('🠀', '🠋'),
+    ('🠐', '🡇'),
+    ('🡐', '🡙'),
+    ('🡠', '🢇'),
+    ('🢐', '🢭'),
+    ('\u{1f8b0}', '\u{1f8b1}'),
+    ('🤀', '\u{1f978}'),
+    ('🥺', '\u{1f9cb}'),
+    ('🧍', '🩓'),
+    ('🩠', '🩭'),
+    ('🩰', '\u{1fa74}'),
+    ('🩸', '🩺'),
+    ('🪀', '\u{1fa86}'),
+    ('🪐', '\u{1faa8}'),
+    ('\u{1fab0}', '\u{1fab6}'),
+    ('\u{1fac0}', '\u{1fac2}'),
+    ('\u{1fad0}', '\u{1fad6}'),
+    ('\u{1fb00}', '\u{1fb92}'),
+    ('\u{1fb94}', '\u{1fbca}'),
+];
+
+pub const TITLECASE_LETTER: &'static [(char, char)] = &[
+    ('ǅ', 'ǅ'),
+    ('ǈ', 'ǈ'),
+    ('ǋ', 'ǋ'),
+    ('ǲ', 'ǲ'),
+    ('ᾈ', 'ᾏ'),
+    ('ᾘ', 'ᾟ'),
+    ('ᾨ', 'ᾯ'),
+    ('ᾼ', 'ᾼ'),
+    ('ῌ', 'ῌ'),
+    ('ῼ', 'ῼ'),
+];
+
+pub const UNASSIGNED: &'static [(char, char)] = &[
+    ('\u{378}', '\u{379}'),
+    ('\u{380}', '\u{383}'),
+    ('\u{38b}', '\u{38b}'),
+    ('\u{38d}', '\u{38d}'),
+    ('\u{3a2}', '\u{3a2}'),
+    ('\u{530}', '\u{530}'),
+    ('\u{557}', '\u{558}'),
+    ('\u{58b}', '\u{58c}'),
+    ('\u{590}', '\u{590}'),
+    ('\u{5c8}', '\u{5cf}'),
+    ('\u{5eb}', '\u{5ee}'),
+    ('\u{5f5}', '\u{5ff}'),
+    ('\u{61d}', '\u{61d}'),
+    ('\u{70e}', '\u{70e}'),
+    ('\u{74b}', '\u{74c}'),
+    ('\u{7b2}', '\u{7bf}'),
+    ('\u{7fb}', '\u{7fc}'),
+    ('\u{82e}', '\u{82f}'),
+    ('\u{83f}', '\u{83f}'),
+    ('\u{85c}', '\u{85d}'),
+    ('\u{85f}', '\u{85f}'),
+    ('\u{86b}', '\u{89f}'),
+    ('\u{8b5}', '\u{8b5}'),
+    ('\u{8c8}', '\u{8d2}'),
+    ('\u{984}', '\u{984}'),
+    ('\u{98d}', '\u{98e}'),
+    ('\u{991}', '\u{992}'),
+    ('\u{9a9}', '\u{9a9}'),
+    ('\u{9b1}', '\u{9b1}'),
+    ('\u{9b3}', '\u{9b5}'),
+    ('\u{9ba}', '\u{9bb}'),
+    ('\u{9c5}', '\u{9c6}'),
+    ('\u{9c9}', '\u{9ca}'),
+    ('\u{9cf}', '\u{9d6}'),
+    ('\u{9d8}', '\u{9db}'),
+    ('\u{9de}', '\u{9de}'),
+    ('\u{9e4}', '\u{9e5}'),
+    ('\u{9ff}', '\u{a00}'),
+    ('\u{a04}', '\u{a04}'),
+    ('\u{a0b}', '\u{a0e}'),
+    ('\u{a11}', '\u{a12}'),
+    ('\u{a29}', '\u{a29}'),
+    ('\u{a31}', '\u{a31}'),
+    ('\u{a34}', '\u{a34}'),
+    ('\u{a37}', '\u{a37}'),
+    ('\u{a3a}', '\u{a3b}'),
+    ('\u{a3d}', '\u{a3d}'),
+    ('\u{a43}', '\u{a46}'),
+    ('\u{a49}', '\u{a4a}'),
+    ('\u{a4e}', '\u{a50}'),
+    ('\u{a52}', '\u{a58}'),
+    ('\u{a5d}', '\u{a5d}'),
+    ('\u{a5f}', '\u{a65}'),
+    ('\u{a77}', '\u{a80}'),
+    ('\u{a84}', '\u{a84}'),
+    ('\u{a8e}', '\u{a8e}'),
+    ('\u{a92}', '\u{a92}'),
+    ('\u{aa9}', '\u{aa9}'),
+    ('\u{ab1}', '\u{ab1}'),
+    ('\u{ab4}', '\u{ab4}'),
+    ('\u{aba}', '\u{abb}'),
+    ('\u{ac6}', '\u{ac6}'),
+    ('\u{aca}', '\u{aca}'),
+    ('\u{ace}', '\u{acf}'),
+    ('\u{ad1}', '\u{adf}'),
+    ('\u{ae4}', '\u{ae5}'),
+    ('\u{af2}', '\u{af8}'),
+    ('\u{b00}', '\u{b00}'),
+    ('\u{b04}', '\u{b04}'),
+    ('\u{b0d}', '\u{b0e}'),
+    ('\u{b11}', '\u{b12}'),
+    ('\u{b29}', '\u{b29}'),
+    ('\u{b31}', '\u{b31}'),
+    ('\u{b34}', '\u{b34}'),
+    ('\u{b3a}', '\u{b3b}'),
+    ('\u{b45}', '\u{b46}'),
+    ('\u{b49}', '\u{b4a}'),
+    ('\u{b4e}', '\u{b54}'),
+    ('\u{b58}', '\u{b5b}'),
+    ('\u{b5e}', '\u{b5e}'),
+    ('\u{b64}', '\u{b65}'),
+    ('\u{b78}', '\u{b81}'),
+    ('\u{b84}', '\u{b84}'),
+    ('\u{b8b}', '\u{b8d}'),
+    ('\u{b91}', '\u{b91}'),
+    ('\u{b96}', '\u{b98}'),
+    ('\u{b9b}', '\u{b9b}'),
+    ('\u{b9d}', '\u{b9d}'),
+    ('\u{ba0}', '\u{ba2}'),
+    ('\u{ba5}', '\u{ba7}'),
+    ('\u{bab}', '\u{bad}'),
+    ('\u{bba}', '\u{bbd}'),
+    ('\u{bc3}', '\u{bc5}'),
+    ('\u{bc9}', '\u{bc9}'),
+    ('\u{bce}', '\u{bcf}'),
+    ('\u{bd1}', '\u{bd6}'),
+    ('\u{bd8}', '\u{be5}'),
+    ('\u{bfb}', '\u{bff}'),
+    ('\u{c0d}', '\u{c0d}'),
+    ('\u{c11}', '\u{c11}'),
+    ('\u{c29}', '\u{c29}'),
+    ('\u{c3a}', '\u{c3c}'),
+    ('\u{c45}', '\u{c45}'),
+    ('\u{c49}', '\u{c49}'),
+    ('\u{c4e}', '\u{c54}'),
+    ('\u{c57}', '\u{c57}'),
+    ('\u{c5b}', '\u{c5f}'),
+    ('\u{c64}', '\u{c65}'),
+    ('\u{c70}', '\u{c76}'),
+    ('\u{c8d}', '\u{c8d}'),
+    ('\u{c91}', '\u{c91}'),
+    ('\u{ca9}', '\u{ca9}'),
+    ('\u{cb4}', '\u{cb4}'),
+    ('\u{cba}', '\u{cbb}'),
+    ('\u{cc5}', '\u{cc5}'),
+    ('\u{cc9}', '\u{cc9}'),
+    ('\u{cce}', '\u{cd4}'),
+    ('\u{cd7}', '\u{cdd}'),
+    ('\u{cdf}', '\u{cdf}'),
+    ('\u{ce4}', '\u{ce5}'),
+    ('\u{cf0}', '\u{cf0}'),
+    ('\u{cf3}', '\u{cff}'),
+    ('\u{d0d}', '\u{d0d}'),
+    ('\u{d11}', '\u{d11}'),
+    ('\u{d45}', '\u{d45}'),
+    ('\u{d49}', '\u{d49}'),
+    ('\u{d50}', '\u{d53}'),
+    ('\u{d64}', '\u{d65}'),
+    ('\u{d80}', '\u{d80}'),
+    ('\u{d84}', '\u{d84}'),
+    ('\u{d97}', '\u{d99}'),
+    ('\u{db2}', '\u{db2}'),
+    ('\u{dbc}', '\u{dbc}'),
+    ('\u{dbe}', '\u{dbf}'),
+    ('\u{dc7}', '\u{dc9}'),
+    ('\u{dcb}', '\u{dce}'),
+    ('\u{dd5}', '\u{dd5}'),
+    ('\u{dd7}', '\u{dd7}'),
+    ('\u{de0}', '\u{de5}'),
+    ('\u{df0}', '\u{df1}'),
+    ('\u{df5}', '\u{e00}'),
+    ('\u{e3b}', '\u{e3e}'),
+    ('\u{e5c}', '\u{e80}'),
+    ('\u{e83}', '\u{e83}'),
+    ('\u{e85}', '\u{e85}'),
+    ('\u{e8b}', '\u{e8b}'),
+    ('\u{ea4}', '\u{ea4}'),
+    ('\u{ea6}', '\u{ea6}'),
+    ('\u{ebe}', '\u{ebf}'),
+    ('\u{ec5}', '\u{ec5}'),
+    ('\u{ec7}', '\u{ec7}'),
+    ('\u{ece}', '\u{ecf}'),
+    ('\u{eda}', '\u{edb}'),
+    ('\u{ee0}', '\u{eff}'),
+    ('\u{f48}', '\u{f48}'),
+    ('\u{f6d}', '\u{f70}'),
+    ('\u{f98}', '\u{f98}'),
+    ('\u{fbd}', '\u{fbd}'),
+    ('\u{fcd}', '\u{fcd}'),
+    ('\u{fdb}', '\u{fff}'),
+    ('\u{10c6}', '\u{10c6}'),
+    ('\u{10c8}', '\u{10cc}'),
+    ('\u{10ce}', '\u{10cf}'),
+    ('\u{1249}', '\u{1249}'),
+    ('\u{124e}', '\u{124f}'),
+    ('\u{1257}', '\u{1257}'),
+    ('\u{1259}', '\u{1259}'),
+    ('\u{125e}', '\u{125f}'),
+    ('\u{1289}', '\u{1289}'),
+    ('\u{128e}', '\u{128f}'),
+    ('\u{12b1}', '\u{12b1}'),
+    ('\u{12b6}', '\u{12b7}'),
+    ('\u{12bf}', '\u{12bf}'),
+    ('\u{12c1}', '\u{12c1}'),
+    ('\u{12c6}', '\u{12c7}'),
+    ('\u{12d7}', '\u{12d7}'),
+    ('\u{1311}', '\u{1311}'),
+    ('\u{1316}', '\u{1317}'),
+    ('\u{135b}', '\u{135c}'),
+    ('\u{137d}', '\u{137f}'),
+    ('\u{139a}', '\u{139f}'),
+    ('\u{13f6}', '\u{13f7}'),
+    ('\u{13fe}', '\u{13ff}'),
+    ('\u{169d}', '\u{169f}'),
+    ('\u{16f9}', '\u{16ff}'),
+    ('\u{170d}', '\u{170d}'),
+    ('\u{1715}', '\u{171f}'),
+    ('\u{1737}', '\u{173f}'),
+    ('\u{1754}', '\u{175f}'),
+    ('\u{176d}', '\u{176d}'),
+    ('\u{1771}', '\u{1771}'),
+    ('\u{1774}', '\u{177f}'),
+    ('\u{17de}', '\u{17df}'),
+    ('\u{17ea}', '\u{17ef}'),
+    ('\u{17fa}', '\u{17ff}'),
+    ('\u{180f}', '\u{180f}'),
+    ('\u{181a}', '\u{181f}'),
+    ('\u{1879}', '\u{187f}'),
+    ('\u{18ab}', '\u{18af}'),
+    ('\u{18f6}', '\u{18ff}'),
+    ('\u{191f}', '\u{191f}'),
+    ('\u{192c}', '\u{192f}'),
+    ('\u{193c}', '\u{193f}'),
+    ('\u{1941}', '\u{1943}'),
+    ('\u{196e}', '\u{196f}'),
+    ('\u{1975}', '\u{197f}'),
+    ('\u{19ac}', '\u{19af}'),
+    ('\u{19ca}', '\u{19cf}'),
+    ('\u{19db}', '\u{19dd}'),
+    ('\u{1a1c}', '\u{1a1d}'),
+    ('\u{1a5f}', '\u{1a5f}'),
+    ('\u{1a7d}', '\u{1a7e}'),
+    ('\u{1a8a}', '\u{1a8f}'),
+    ('\u{1a9a}', '\u{1a9f}'),
+    ('\u{1aae}', '\u{1aaf}'),
+    ('\u{1ac1}', '\u{1aff}'),
+    ('\u{1b4c}', '\u{1b4f}'),
+    ('\u{1b7d}', '\u{1b7f}'),
+    ('\u{1bf4}', '\u{1bfb}'),
+    ('\u{1c38}', '\u{1c3a}'),
+    ('\u{1c4a}', '\u{1c4c}'),
+    ('\u{1c89}', '\u{1c8f}'),
+    ('\u{1cbb}', '\u{1cbc}'),
+    ('\u{1cc8}', '\u{1ccf}'),
+    ('\u{1cfb}', '\u{1cff}'),
+    ('\u{1dfa}', '\u{1dfa}'),
+    ('\u{1f16}', '\u{1f17}'),
+    ('\u{1f1e}', '\u{1f1f}'),
+    ('\u{1f46}', '\u{1f47}'),
+    ('\u{1f4e}', '\u{1f4f}'),
+    ('\u{1f58}', '\u{1f58}'),
+    ('\u{1f5a}', '\u{1f5a}'),
+    ('\u{1f5c}', '\u{1f5c}'),
+    ('\u{1f5e}', '\u{1f5e}'),
+    ('\u{1f7e}', '\u{1f7f}'),
+    ('\u{1fb5}', '\u{1fb5}'),
+    ('\u{1fc5}', '\u{1fc5}'),
+    ('\u{1fd4}', '\u{1fd5}'),
+    ('\u{1fdc}', '\u{1fdc}'),
+    ('\u{1ff0}', '\u{1ff1}'),
+    ('\u{1ff5}', '\u{1ff5}'),
+    ('\u{1fff}', '\u{1fff}'),
+    ('\u{2065}', '\u{2065}'),
+    ('\u{2072}', '\u{2073}'),
+    ('\u{208f}', '\u{208f}'),
+    ('\u{209d}', '\u{209f}'),
+    ('\u{20c0}', '\u{20cf}'),
+    ('\u{20f1}', '\u{20ff}'),
+    ('\u{218c}', '\u{218f}'),
+    ('\u{2427}', '\u{243f}'),
+    ('\u{244b}', '\u{245f}'),
+    ('\u{2b74}', '\u{2b75}'),
+    ('\u{2b96}', '\u{2b96}'),
+    ('\u{2c2f}', '\u{2c2f}'),
+    ('\u{2c5f}', '\u{2c5f}'),
+    ('\u{2cf4}', '\u{2cf8}'),
+    ('\u{2d26}', '\u{2d26}'),
+    ('\u{2d28}', '\u{2d2c}'),
+    ('\u{2d2e}', '\u{2d2f}'),
+    ('\u{2d68}', '\u{2d6e}'),
+    ('\u{2d71}', '\u{2d7e}'),
+    ('\u{2d97}', '\u{2d9f}'),
+    ('\u{2da7}', '\u{2da7}'),
+    ('\u{2daf}', '\u{2daf}'),
+    ('\u{2db7}', '\u{2db7}'),
+    ('\u{2dbf}', '\u{2dbf}'),
+    ('\u{2dc7}', '\u{2dc7}'),
+    ('\u{2dcf}', '\u{2dcf}'),
+    ('\u{2dd7}', '\u{2dd7}'),
+    ('\u{2ddf}', '\u{2ddf}'),
+    ('\u{2e53}', '\u{2e7f}'),
+    ('\u{2e9a}', '\u{2e9a}'),
+    ('\u{2ef4}', '\u{2eff}'),
+    ('\u{2fd6}', '\u{2fef}'),
+    ('\u{2ffc}', '\u{2fff}'),
+    ('\u{3040}', '\u{3040}'),
+    ('\u{3097}', '\u{3098}'),
+    ('\u{3100}', '\u{3104}'),
+    ('\u{3130}', '\u{3130}'),
+    ('\u{318f}', '\u{318f}'),
+    ('\u{31e4}', '\u{31ef}'),
+    ('\u{321f}', '\u{321f}'),
+    ('\u{9ffd}', '\u{9fff}'),
+    ('\u{a48d}', '\u{a48f}'),
+    ('\u{a4c7}', '\u{a4cf}'),
+    ('\u{a62c}', '\u{a63f}'),
+    ('\u{a6f8}', '\u{a6ff}'),
+    ('\u{a7c0}', '\u{a7c1}'),
+    ('\u{a7cb}', '\u{a7f4}'),
+    ('\u{a82d}', '\u{a82f}'),
+    ('\u{a83a}', '\u{a83f}'),
+    ('\u{a878}', '\u{a87f}'),
+    ('\u{a8c6}', '\u{a8cd}'),
+    ('\u{a8da}', '\u{a8df}'),
+    ('\u{a954}', '\u{a95e}'),
+    ('\u{a97d}', '\u{a97f}'),
+    ('\u{a9ce}', '\u{a9ce}'),
+    ('\u{a9da}', '\u{a9dd}'),
+    ('\u{a9ff}', '\u{a9ff}'),
+    ('\u{aa37}', '\u{aa3f}'),
+    ('\u{aa4e}', '\u{aa4f}'),
+    ('\u{aa5a}', '\u{aa5b}'),
+    ('\u{aac3}', '\u{aada}'),
+    ('\u{aaf7}', '\u{ab00}'),
+    ('\u{ab07}', '\u{ab08}'),
+    ('\u{ab0f}', '\u{ab10}'),
+    ('\u{ab17}', '\u{ab1f}'),
+    ('\u{ab27}', '\u{ab27}'),
+    ('\u{ab2f}', '\u{ab2f}'),
+    ('\u{ab6c}', '\u{ab6f}'),
+    ('\u{abee}', '\u{abef}'),
+    ('\u{abfa}', '\u{abff}'),
+    ('\u{d7a4}', '\u{d7af}'),
+    ('\u{d7c7}', '\u{d7ca}'),
+    ('\u{d7fc}', '\u{d7ff}'),
+    ('\u{fa6e}', '\u{fa6f}'),
+    ('\u{fada}', '\u{faff}'),
+    ('\u{fb07}', '\u{fb12}'),
+    ('\u{fb18}', '\u{fb1c}'),
+    ('\u{fb37}', '\u{fb37}'),
+    ('\u{fb3d}', '\u{fb3d}'),
+    ('\u{fb3f}', '\u{fb3f}'),
+    ('\u{fb42}', '\u{fb42}'),
+    ('\u{fb45}', '\u{fb45}'),
+    ('\u{fbc2}', '\u{fbd2}'),
+    ('\u{fd40}', '\u{fd4f}'),
+    ('\u{fd90}', '\u{fd91}'),
+    ('\u{fdc8}', '\u{fdef}'),
+    ('\u{fdfe}', '\u{fdff}'),
+    ('\u{fe1a}', '\u{fe1f}'),
+    ('\u{fe53}', '\u{fe53}'),
+    ('\u{fe67}', '\u{fe67}'),
+    ('\u{fe6c}', '\u{fe6f}'),
+    ('\u{fe75}', '\u{fe75}'),
+    ('\u{fefd}', '\u{fefe}'),
+    ('\u{ff00}', '\u{ff00}'),
+    ('\u{ffbf}', '\u{ffc1}'),
+    ('\u{ffc8}', '\u{ffc9}'),
+    ('\u{ffd0}', '\u{ffd1}'),
+    ('\u{ffd8}', '\u{ffd9}'),
+    ('\u{ffdd}', '\u{ffdf}'),
+    ('\u{ffe7}', '\u{ffe7}'),
+    ('\u{ffef}', '\u{fff8}'),
+    ('\u{fffe}', '\u{ffff}'),
+    ('\u{1000c}', '\u{1000c}'),
+    ('\u{10027}', '\u{10027}'),
+    ('\u{1003b}', '\u{1003b}'),
+    ('\u{1003e}', '\u{1003e}'),
+    ('\u{1004e}', '\u{1004f}'),
+    ('\u{1005e}', '\u{1007f}'),
+    ('\u{100fb}', '\u{100ff}'),
+    ('\u{10103}', '\u{10106}'),
+    ('\u{10134}', '\u{10136}'),
+    ('\u{1018f}', '\u{1018f}'),
+    ('\u{1019d}', '\u{1019f}'),
+    ('\u{101a1}', '\u{101cf}'),
+    ('\u{101fe}', '\u{1027f}'),
+    ('\u{1029d}', '\u{1029f}'),
+    ('\u{102d1}', '\u{102df}'),
+    ('\u{102fc}', '\u{102ff}'),
+    ('\u{10324}', '\u{1032c}'),
+    ('\u{1034b}', '\u{1034f}'),
+    ('\u{1037b}', '\u{1037f}'),
+    ('\u{1039e}', '\u{1039e}'),
+    ('\u{103c4}', '\u{103c7}'),
+    ('\u{103d6}', '\u{103ff}'),
+    ('\u{1049e}', '\u{1049f}'),
+    ('\u{104aa}', '\u{104af}'),
+    ('\u{104d4}', '\u{104d7}'),
+    ('\u{104fc}', '\u{104ff}'),
+    ('\u{10528}', '\u{1052f}'),
+    ('\u{10564}', '\u{1056e}'),
+    ('\u{10570}', '\u{105ff}'),
+    ('\u{10737}', '\u{1073f}'),
+    ('\u{10756}', '\u{1075f}'),
+    ('\u{10768}', '\u{107ff}'),
+    ('\u{10806}', '\u{10807}'),
+    ('\u{10809}', '\u{10809}'),
+    ('\u{10836}', '\u{10836}'),
+    ('\u{10839}', '\u{1083b}'),
+    ('\u{1083d}', '\u{1083e}'),
+    ('\u{10856}', '\u{10856}'),
+    ('\u{1089f}', '\u{108a6}'),
+    ('\u{108b0}', '\u{108df}'),
+    ('\u{108f3}', '\u{108f3}'),
+    ('\u{108f6}', '\u{108fa}'),
+    ('\u{1091c}', '\u{1091e}'),
+    ('\u{1093a}', '\u{1093e}'),
+    ('\u{10940}', '\u{1097f}'),
+    ('\u{109b8}', '\u{109bb}'),
+    ('\u{109d0}', '\u{109d1}'),
+    ('\u{10a04}', '\u{10a04}'),
+    ('\u{10a07}', '\u{10a0b}'),
+    ('\u{10a14}', '\u{10a14}'),
+    ('\u{10a18}', '\u{10a18}'),
+    ('\u{10a36}', '\u{10a37}'),
+    ('\u{10a3b}', '\u{10a3e}'),
+    ('\u{10a49}', '\u{10a4f}'),
+    ('\u{10a59}', '\u{10a5f}'),
+    ('\u{10aa0}', '\u{10abf}'),
+    ('\u{10ae7}', '\u{10aea}'),
+    ('\u{10af7}', '\u{10aff}'),
+    ('\u{10b36}', '\u{10b38}'),
+    ('\u{10b56}', '\u{10b57}'),
+    ('\u{10b73}', '\u{10b77}'),
+    ('\u{10b92}', '\u{10b98}'),
+    ('\u{10b9d}', '\u{10ba8}'),
+    ('\u{10bb0}', '\u{10bff}'),
+    ('\u{10c49}', '\u{10c7f}'),
+    ('\u{10cb3}', '\u{10cbf}'),
+    ('\u{10cf3}', '\u{10cf9}'),
+    ('\u{10d28}', '\u{10d2f}'),
+    ('\u{10d3a}', '\u{10e5f}'),
+    ('\u{10e7f}', '\u{10e7f}'),
+    ('\u{10eaa}', '\u{10eaa}'),
+    ('\u{10eae}', '\u{10eaf}'),
+    ('\u{10eb2}', '\u{10eff}'),
+    ('\u{10f28}', '\u{10f2f}'),
+    ('\u{10f5a}', '\u{10faf}'),
+    ('\u{10fcc}', '\u{10fdf}'),
+    ('\u{10ff7}', '\u{10fff}'),
+    ('\u{1104e}', '\u{11051}'),
+    ('\u{11070}', '\u{1107e}'),
+    ('\u{110c2}', '\u{110cc}'),
+    ('\u{110ce}', '\u{110cf}'),
+    ('\u{110e9}', '\u{110ef}'),
+    ('\u{110fa}', '\u{110ff}'),
+    ('\u{11135}', '\u{11135}'),
+    ('\u{11148}', '\u{1114f}'),
+    ('\u{11177}', '\u{1117f}'),
+    ('\u{111e0}', '\u{111e0}'),
+    ('\u{111f5}', '\u{111ff}'),
+    ('\u{11212}', '\u{11212}'),
+    ('\u{1123f}', '\u{1127f}'),
+    ('\u{11287}', '\u{11287}'),
+    ('\u{11289}', '\u{11289}'),
+    ('\u{1128e}', '\u{1128e}'),
+    ('\u{1129e}', '\u{1129e}'),
+    ('\u{112aa}', '\u{112af}'),
+    ('\u{112eb}', '\u{112ef}'),
+    ('\u{112fa}', '\u{112ff}'),
+    ('\u{11304}', '\u{11304}'),
+    ('\u{1130d}', '\u{1130e}'),
+    ('\u{11311}', '\u{11312}'),
+    ('\u{11329}', '\u{11329}'),
+    ('\u{11331}', '\u{11331}'),
+    ('\u{11334}', '\u{11334}'),
+    ('\u{1133a}', '\u{1133a}'),
+    ('\u{11345}', '\u{11346}'),
+    ('\u{11349}', '\u{1134a}'),
+    ('\u{1134e}', '\u{1134f}'),
+    ('\u{11351}', '\u{11356}'),
+    ('\u{11358}', '\u{1135c}'),
+    ('\u{11364}', '\u{11365}'),
+    ('\u{1136d}', '\u{1136f}'),
+    ('\u{11375}', '\u{113ff}'),
+    ('\u{1145c}', '\u{1145c}'),
+    ('\u{11462}', '\u{1147f}'),
+    ('\u{114c8}', '\u{114cf}'),
+    ('\u{114da}', '\u{1157f}'),
+    ('\u{115b6}', '\u{115b7}'),
+    ('\u{115de}', '\u{115ff}'),
+    ('\u{11645}', '\u{1164f}'),
+    ('\u{1165a}', '\u{1165f}'),
+    ('\u{1166d}', '\u{1167f}'),
+    ('\u{116b9}', '\u{116bf}'),
+    ('\u{116ca}', '\u{116ff}'),
+    ('\u{1171b}', '\u{1171c}'),
+    ('\u{1172c}', '\u{1172f}'),
+    ('\u{11740}', '\u{117ff}'),
+    ('\u{1183c}', '\u{1189f}'),
+    ('\u{118f3}', '\u{118fe}'),
+    ('\u{11907}', '\u{11908}'),
+    ('\u{1190a}', '\u{1190b}'),
+    ('\u{11914}', '\u{11914}'),
+    ('\u{11917}', '\u{11917}'),
+    ('\u{11936}', '\u{11936}'),
+    ('\u{11939}', '\u{1193a}'),
+    ('\u{11947}', '\u{1194f}'),
+    ('\u{1195a}', '\u{1199f}'),
+    ('\u{119a8}', '\u{119a9}'),
+    ('\u{119d8}', '\u{119d9}'),
+    ('\u{119e5}', '\u{119ff}'),
+    ('\u{11a48}', '\u{11a4f}'),
+    ('\u{11aa3}', '\u{11abf}'),
+    ('\u{11af9}', '\u{11bff}'),
+    ('\u{11c09}', '\u{11c09}'),
+    ('\u{11c37}', '\u{11c37}'),
+    ('\u{11c46}', '\u{11c4f}'),
+    ('\u{11c6d}', '\u{11c6f}'),
+    ('\u{11c90}', '\u{11c91}'),
+    ('\u{11ca8}', '\u{11ca8}'),
+    ('\u{11cb7}', '\u{11cff}'),
+    ('\u{11d07}', '\u{11d07}'),
+    ('\u{11d0a}', '\u{11d0a}'),
+    ('\u{11d37}', '\u{11d39}'),
+    ('\u{11d3b}', '\u{11d3b}'),
+    ('\u{11d3e}', '\u{11d3e}'),
+    ('\u{11d48}', '\u{11d4f}'),
+    ('\u{11d5a}', '\u{11d5f}'),
+    ('\u{11d66}', '\u{11d66}'),
+    ('\u{11d69}', '\u{11d69}'),
+    ('\u{11d8f}', '\u{11d8f}'),
+    ('\u{11d92}', '\u{11d92}'),
+    ('\u{11d99}', '\u{11d9f}'),
+    ('\u{11daa}', '\u{11edf}'),
+    ('\u{11ef9}', '\u{11faf}'),
+    ('\u{11fb1}', '\u{11fbf}'),
+    ('\u{11ff2}', '\u{11ffe}'),
+    ('\u{1239a}', '\u{123ff}'),
+    ('\u{1246f}', '\u{1246f}'),
+    ('\u{12475}', '\u{1247f}'),
+    ('\u{12544}', '\u{12fff}'),
+    ('\u{1342f}', '\u{1342f}'),
+    ('\u{13439}', '\u{143ff}'),
+    ('\u{14647}', '\u{167ff}'),
+    ('\u{16a39}', '\u{16a3f}'),
+    ('\u{16a5f}', '\u{16a5f}'),
+    ('\u{16a6a}', '\u{16a6d}'),
+    ('\u{16a70}', '\u{16acf}'),
+    ('\u{16aee}', '\u{16aef}'),
+    ('\u{16af6}', '\u{16aff}'),
+    ('\u{16b46}', '\u{16b4f}'),
+    ('\u{16b5a}', '\u{16b5a}'),
+    ('\u{16b62}', '\u{16b62}'),
+    ('\u{16b78}', '\u{16b7c}'),
+    ('\u{16b90}', '\u{16e3f}'),
+    ('\u{16e9b}', '\u{16eff}'),
+    ('\u{16f4b}', '\u{16f4e}'),
+    ('\u{16f88}', '\u{16f8e}'),
+    ('\u{16fa0}', '\u{16fdf}'),
+    ('\u{16fe5}', '\u{16fef}'),
+    ('\u{16ff2}', '\u{16fff}'),
+    ('\u{187f8}', '\u{187ff}'),
+    ('\u{18cd6}', '\u{18cff}'),
+    ('\u{18d09}', '\u{1afff}'),
+    ('\u{1b11f}', '\u{1b14f}'),
+    ('\u{1b153}', '\u{1b163}'),
+    ('\u{1b168}', '\u{1b16f}'),
+    ('\u{1b2fc}', '\u{1bbff}'),
+    ('\u{1bc6b}', '\u{1bc6f}'),
+    ('\u{1bc7d}', '\u{1bc7f}'),
+    ('\u{1bc89}', '\u{1bc8f}'),
+    ('\u{1bc9a}', '\u{1bc9b}'),
+    ('\u{1bca4}', '\u{1cfff}'),
+    ('\u{1d0f6}', '\u{1d0ff}'),
+    ('\u{1d127}', '\u{1d128}'),
+    ('\u{1d1e9}', '\u{1d1ff}'),
+    ('\u{1d246}', '\u{1d2df}'),
+    ('\u{1d2f4}', '\u{1d2ff}'),
+    ('\u{1d357}', '\u{1d35f}'),
+    ('\u{1d379}', '\u{1d3ff}'),
+    ('\u{1d455}', '\u{1d455}'),
+    ('\u{1d49d}', '\u{1d49d}'),
+    ('\u{1d4a0}', '\u{1d4a1}'),
+    ('\u{1d4a3}', '\u{1d4a4}'),
+    ('\u{1d4a7}', '\u{1d4a8}'),
+    ('\u{1d4ad}', '\u{1d4ad}'),
+    ('\u{1d4ba}', '\u{1d4ba}'),
+    ('\u{1d4bc}', '\u{1d4bc}'),
+    ('\u{1d4c4}', '\u{1d4c4}'),
+    ('\u{1d506}', '\u{1d506}'),
+    ('\u{1d50b}', '\u{1d50c}'),
+    ('\u{1d515}', '\u{1d515}'),
+    ('\u{1d51d}', '\u{1d51d}'),
+    ('\u{1d53a}', '\u{1d53a}'),
+    ('\u{1d53f}', '\u{1d53f}'),
+    ('\u{1d545}', '\u{1d545}'),
+    ('\u{1d547}', '\u{1d549}'),
+    ('\u{1d551}', '\u{1d551}'),
+    ('\u{1d6a6}', '\u{1d6a7}'),
+    ('\u{1d7cc}', '\u{1d7cd}'),
+    ('\u{1da8c}', '\u{1da9a}'),
+    ('\u{1daa0}', '\u{1daa0}'),
+    ('\u{1dab0}', '\u{1dfff}'),
+    ('\u{1e007}', '\u{1e007}'),
+    ('\u{1e019}', '\u{1e01a}'),
+    ('\u{1e022}', '\u{1e022}'),
+    ('\u{1e025}', '\u{1e025}'),
+    ('\u{1e02b}', '\u{1e0ff}'),
+    ('\u{1e12d}', '\u{1e12f}'),
+    ('\u{1e13e}', '\u{1e13f}'),
+    ('\u{1e14a}', '\u{1e14d}'),
+    ('\u{1e150}', '\u{1e2bf}'),
+    ('\u{1e2fa}', '\u{1e2fe}'),
+    ('\u{1e300}', '\u{1e7ff}'),
+    ('\u{1e8c5}', '\u{1e8c6}'),
+    ('\u{1e8d7}', '\u{1e8ff}'),
+    ('\u{1e94c}', '\u{1e94f}'),
+    ('\u{1e95a}', '\u{1e95d}'),
+    ('\u{1e960}', '\u{1ec70}'),
+    ('\u{1ecb5}', '\u{1ed00}'),
+    ('\u{1ed3e}', '\u{1edff}'),
+    ('\u{1ee04}', '\u{1ee04}'),
+    ('\u{1ee20}', '\u{1ee20}'),
+    ('\u{1ee23}', '\u{1ee23}'),
+    ('\u{1ee25}', '\u{1ee26}'),
+    ('\u{1ee28}', '\u{1ee28}'),
+    ('\u{1ee33}', '\u{1ee33}'),
+    ('\u{1ee38}', '\u{1ee38}'),
+    ('\u{1ee3a}', '\u{1ee3a}'),
+    ('\u{1ee3c}', '\u{1ee41}'),
+    ('\u{1ee43}', '\u{1ee46}'),
+    ('\u{1ee48}', '\u{1ee48}'),
+    ('\u{1ee4a}', '\u{1ee4a}'),
+    ('\u{1ee4c}', '\u{1ee4c}'),
+    ('\u{1ee50}', '\u{1ee50}'),
+    ('\u{1ee53}', '\u{1ee53}'),
+    ('\u{1ee55}', '\u{1ee56}'),
+    ('\u{1ee58}', '\u{1ee58}'),
+    ('\u{1ee5a}', '\u{1ee5a}'),
+    ('\u{1ee5c}', '\u{1ee5c}'),
+    ('\u{1ee5e}', '\u{1ee5e}'),
+    ('\u{1ee60}', '\u{1ee60}'),
+    ('\u{1ee63}', '\u{1ee63}'),
+    ('\u{1ee65}', '\u{1ee66}'),
+    ('\u{1ee6b}', '\u{1ee6b}'),
+    ('\u{1ee73}', '\u{1ee73}'),
+    ('\u{1ee78}', '\u{1ee78}'),
+    ('\u{1ee7d}', '\u{1ee7d}'),
+    ('\u{1ee7f}', '\u{1ee7f}'),
+    ('\u{1ee8a}', '\u{1ee8a}'),
+    ('\u{1ee9c}', '\u{1eea0}'),
+    ('\u{1eea4}', '\u{1eea4}'),
+    ('\u{1eeaa}', '\u{1eeaa}'),
+    ('\u{1eebc}', '\u{1eeef}'),
+    ('\u{1eef2}', '\u{1efff}'),
+    ('\u{1f02c}', '\u{1f02f}'),
+    ('\u{1f094}', '\u{1f09f}'),
+    ('\u{1f0af}', '\u{1f0b0}'),
+    ('\u{1f0c0}', '\u{1f0c0}'),
+    ('\u{1f0d0}', '\u{1f0d0}'),
+    ('\u{1f0f6}', '\u{1f0ff}'),
+    ('\u{1f1ae}', '\u{1f1e5}'),
+    ('\u{1f203}', '\u{1f20f}'),
+    ('\u{1f23c}', '\u{1f23f}'),
+    ('\u{1f249}', '\u{1f24f}'),
+    ('\u{1f252}', '\u{1f25f}'),
+    ('\u{1f266}', '\u{1f2ff}'),
+    ('\u{1f6d8}', '\u{1f6df}'),
+    ('\u{1f6ed}', '\u{1f6ef}'),
+    ('\u{1f6fd}', '\u{1f6ff}'),
+    ('\u{1f774}', '\u{1f77f}'),
+    ('\u{1f7d9}', '\u{1f7df}'),
+    ('\u{1f7ec}', '\u{1f7ff}'),
+    ('\u{1f80c}', '\u{1f80f}'),
+    ('\u{1f848}', '\u{1f84f}'),
+    ('\u{1f85a}', '\u{1f85f}'),
+    ('\u{1f888}', '\u{1f88f}'),
+    ('\u{1f8ae}', '\u{1f8af}'),
+    ('\u{1f8b2}', '\u{1f8ff}'),
+    ('\u{1f979}', '\u{1f979}'),
+    ('\u{1f9cc}', '\u{1f9cc}'),
+    ('\u{1fa54}', '\u{1fa5f}'),
+    ('\u{1fa6e}', '\u{1fa6f}'),
+    ('\u{1fa75}', '\u{1fa77}'),
+    ('\u{1fa7b}', '\u{1fa7f}'),
+    ('\u{1fa87}', '\u{1fa8f}'),
+    ('\u{1faa9}', '\u{1faaf}'),
+    ('\u{1fab7}', '\u{1fabf}'),
+    ('\u{1fac3}', '\u{1facf}'),
+    ('\u{1fad7}', '\u{1faff}'),
+    ('\u{1fb93}', '\u{1fb93}'),
+    ('\u{1fbcb}', '\u{1fbef}'),
+    ('\u{1fbfa}', '\u{1ffff}'),
+    ('\u{2a6de}', '\u{2a6ff}'),
+    ('\u{2b735}', '\u{2b73f}'),
+    ('\u{2b81e}', '\u{2b81f}'),
+    ('\u{2cea2}', '\u{2ceaf}'),
+    ('\u{2ebe1}', '\u{2f7ff}'),
+    ('\u{2fa1e}', '\u{2ffff}'),
+    ('\u{3134b}', '\u{e0000}'),
+    ('\u{e0002}', '\u{e001f}'),
+    ('\u{e0080}', '\u{e00ff}'),
+    ('\u{e01f0}', '\u{effff}'),
+    ('\u{ffffe}', '\u{fffff}'),
+    ('\u{10fffe}', '\u{10ffff}'),
+];
+
+pub const UPPERCASE_LETTER: &'static [(char, char)] = &[
+    ('A', 'Z'),
+    ('À', 'Ö'),
+    ('Ø', 'Þ'),
+    ('Ā', 'Ā'),
+    ('Ă', 'Ă'),
+    ('Ą', 'Ą'),
+    ('Ć', 'Ć'),
+    ('Ĉ', 'Ĉ'),
+    ('Ċ', 'Ċ'),
+    ('Č', 'Č'),
+    ('Ď', 'Ď'),
+    ('Đ', 'Đ'),
+    ('Ē', 'Ē'),
+    ('Ĕ', 'Ĕ'),
+    ('Ė', 'Ė'),
+    ('Ę', 'Ę'),
+    ('Ě', 'Ě'),
+    ('Ĝ', 'Ĝ'),
+    ('Ğ', 'Ğ'),
+    ('Ġ', 'Ġ'),
+    ('Ģ', 'Ģ'),
+    ('Ĥ', 'Ĥ'),
+    ('Ħ', 'Ħ'),
+    ('Ĩ', 'Ĩ'),
+    ('Ī', 'Ī'),
+    ('Ĭ', 'Ĭ'),
+    ('Į', 'Į'),
+    ('İ', 'İ'),
+    ('Ĳ', 'Ĳ'),
+    ('Ĵ', 'Ĵ'),
+    ('Ķ', 'Ķ'),
+    ('Ĺ', 'Ĺ'),
+    ('Ļ', 'Ļ'),
+    ('Ľ', 'Ľ'),
+    ('Ŀ', 'Ŀ'),
+    ('Ł', 'Ł'),
+    ('Ń', 'Ń'),
+    ('Ņ', 'Ņ'),
+    ('Ň', 'Ň'),
+    ('Ŋ', 'Ŋ'),
+    ('Ō', 'Ō'),
+    ('Ŏ', 'Ŏ'),
+    ('Ő', 'Ő'),
+    ('Œ', 'Œ'),
+    ('Ŕ', 'Ŕ'),
+    ('Ŗ', 'Ŗ'),
+    ('Ř', 'Ř'),
+    ('Ś', 'Ś'),
+    ('Ŝ', 'Ŝ'),
+    ('Ş', 'Ş'),
+    ('Š', 'Š'),
+    ('Ţ', 'Ţ'),
+    ('Ť', 'Ť'),
+    ('Ŧ', 'Ŧ'),
+    ('Ũ', 'Ũ'),
+    ('Ū', 'Ū'),
+    ('Ŭ', 'Ŭ'),
+    ('Ů', 'Ů'),
+    ('Ű', 'Ű'),
+    ('Ų', 'Ų'),
+    ('Ŵ', 'Ŵ'),
+    ('Ŷ', 'Ŷ'),
+    ('Ÿ', 'Ź'),
+    ('Ż', 'Ż'),
+    ('Ž', 'Ž'),
+    ('Ɓ', 'Ƃ'),
+    ('Ƅ', 'Ƅ'),
+    ('Ɔ', 'Ƈ'),
+    ('Ɖ', 'Ƌ'),
+    ('Ǝ', 'Ƒ'),
+    ('Ɠ', 'Ɣ'),
+    ('Ɩ', 'Ƙ'),
+    ('Ɯ', 'Ɲ'),
+    ('Ɵ', 'Ơ'),
+    ('Ƣ', 'Ƣ'),
+    ('Ƥ', 'Ƥ'),
+    ('Ʀ', 'Ƨ'),
+    ('Ʃ', 'Ʃ'),
+    ('Ƭ', 'Ƭ'),
+    ('Ʈ', 'Ư'),
+    ('Ʊ', 'Ƴ'),
+    ('Ƶ', 'Ƶ'),
+    ('Ʒ', 'Ƹ'),
+    ('Ƽ', 'Ƽ'),
+    ('Ǆ', 'Ǆ'),
+    ('Ǉ', 'Ǉ'),
+    ('Ǌ', 'Ǌ'),
+    ('Ǎ', 'Ǎ'),
+    ('Ǐ', 'Ǐ'),
+    ('Ǒ', 'Ǒ'),
+    ('Ǔ', 'Ǔ'),
+    ('Ǖ', 'Ǖ'),
+    ('Ǘ', 'Ǘ'),
+    ('Ǚ', 'Ǚ'),
+    ('Ǜ', 'Ǜ'),
+    ('Ǟ', 'Ǟ'),
+    ('Ǡ', 'Ǡ'),
+    ('Ǣ', 'Ǣ'),
+    ('Ǥ', 'Ǥ'),
+    ('Ǧ', 'Ǧ'),
+    ('Ǩ', 'Ǩ'),
+    ('Ǫ', 'Ǫ'),
+    ('Ǭ', 'Ǭ'),
+    ('Ǯ', 'Ǯ'),
+    ('Ǳ', 'Ǳ'),
+    ('Ǵ', 'Ǵ'),
+    ('Ƕ', 'Ǹ'),
+    ('Ǻ', 'Ǻ'),
+    ('Ǽ', 'Ǽ'),
+    ('Ǿ', 'Ǿ'),
+    ('Ȁ', 'Ȁ'),
+    ('Ȃ', 'Ȃ'),
+    ('Ȅ', 'Ȅ'),
+    ('Ȇ', 'Ȇ'),
+    ('Ȉ', 'Ȉ'),
+    ('Ȋ', 'Ȋ'),
+    ('Ȍ', 'Ȍ'),
+    ('Ȏ', 'Ȏ'),
+    ('Ȑ', 'Ȑ'),
+    ('Ȓ', 'Ȓ'),
+    ('Ȕ', 'Ȕ'),
+    ('Ȗ', 'Ȗ'),
+    ('Ș', 'Ș'),
+    ('Ț', 'Ț'),
+    ('Ȝ', 'Ȝ'),
+    ('Ȟ', 'Ȟ'),
+    ('Ƞ', 'Ƞ'),
+    ('Ȣ', 'Ȣ'),
+    ('Ȥ', 'Ȥ'),
+    ('Ȧ', 'Ȧ'),
+    ('Ȩ', 'Ȩ'),
+    ('Ȫ', 'Ȫ'),
+    ('Ȭ', 'Ȭ'),
+    ('Ȯ', 'Ȯ'),
+    ('Ȱ', 'Ȱ'),
+    ('Ȳ', 'Ȳ'),
+    ('Ⱥ', 'Ȼ'),
+    ('Ƚ', 'Ⱦ'),
+    ('Ɂ', 'Ɂ'),
+    ('Ƀ', 'Ɇ'),
+    ('Ɉ', 'Ɉ'),
+    ('Ɋ', 'Ɋ'),
+    ('Ɍ', 'Ɍ'),
+    ('Ɏ', 'Ɏ'),
+    ('Ͱ', 'Ͱ'),
+    ('Ͳ', 'Ͳ'),
+    ('Ͷ', 'Ͷ'),
+    ('Ϳ', 'Ϳ'),
+    ('Ά', 'Ά'),
+    ('Έ', 'Ί'),
+    ('Ό', 'Ό'),
+    ('Ύ', 'Ώ'),
+    ('Α', 'Ρ'),
+    ('Σ', 'Ϋ'),
+    ('Ϗ', 'Ϗ'),
+    ('ϒ', 'ϔ'),
+    ('Ϙ', 'Ϙ'),
+    ('Ϛ', 'Ϛ'),
+    ('Ϝ', 'Ϝ'),
+    ('Ϟ', 'Ϟ'),
+    ('Ϡ', 'Ϡ'),
+    ('Ϣ', 'Ϣ'),
+    ('Ϥ', 'Ϥ'),
+    ('Ϧ', 'Ϧ'),
+    ('Ϩ', 'Ϩ'),
+    ('Ϫ', 'Ϫ'),
+    ('Ϭ', 'Ϭ'),
+    ('Ϯ', 'Ϯ'),
+    ('ϴ', 'ϴ'),
+    ('Ϸ', 'Ϸ'),
+    ('Ϲ', 'Ϻ'),
+    ('Ͻ', 'Я'),
+    ('Ѡ', 'Ѡ'),
+    ('Ѣ', 'Ѣ'),
+    ('Ѥ', 'Ѥ'),
+    ('Ѧ', 'Ѧ'),
+    ('Ѩ', 'Ѩ'),
+    ('Ѫ', 'Ѫ'),
+    ('Ѭ', 'Ѭ'),
+    ('Ѯ', 'Ѯ'),
+    ('Ѱ', 'Ѱ'),
+    ('Ѳ', 'Ѳ'),
+    ('Ѵ', 'Ѵ'),
+    ('Ѷ', 'Ѷ'),
+    ('Ѹ', 'Ѹ'),
+    ('Ѻ', 'Ѻ'),
+    ('Ѽ', 'Ѽ'),
+    ('Ѿ', 'Ѿ'),
+    ('Ҁ', 'Ҁ'),
+    ('Ҋ', 'Ҋ'),
+    ('Ҍ', 'Ҍ'),
+    ('Ҏ', 'Ҏ'),
+    ('Ґ', 'Ґ'),
+    ('Ғ', 'Ғ'),
+    ('Ҕ', 'Ҕ'),
+    ('Җ', 'Җ'),
+    ('Ҙ', 'Ҙ'),
+    ('Қ', 'Қ'),
+    ('Ҝ', 'Ҝ'),
+    ('Ҟ', 'Ҟ'),
+    ('Ҡ', 'Ҡ'),
+    ('Ң', 'Ң'),
+    ('Ҥ', 'Ҥ'),
+    ('Ҧ', 'Ҧ'),
+    ('Ҩ', 'Ҩ'),
+    ('Ҫ', 'Ҫ'),
+    ('Ҭ', 'Ҭ'),
+    ('Ү', 'Ү'),
+    ('Ұ', 'Ұ'),
+    ('Ҳ', 'Ҳ'),
+    ('Ҵ', 'Ҵ'),
+    ('Ҷ', 'Ҷ'),
+    ('Ҹ', 'Ҹ'),
+    ('Һ', 'Һ'),
+    ('Ҽ', 'Ҽ'),
+    ('Ҿ', 'Ҿ'),
+    ('Ӏ', 'Ӂ'),
+    ('Ӄ', 'Ӄ'),
+    ('Ӆ', 'Ӆ'),
+    ('Ӈ', 'Ӈ'),
+    ('Ӊ', 'Ӊ'),
+    ('Ӌ', 'Ӌ'),
+    ('Ӎ', 'Ӎ'),
+    ('Ӑ', 'Ӑ'),
+    ('Ӓ', 'Ӓ'),
+    ('Ӕ', 'Ӕ'),
+    ('Ӗ', 'Ӗ'),
+    ('Ә', 'Ә'),
+    ('Ӛ', 'Ӛ'),
+    ('Ӝ', 'Ӝ'),
+    ('Ӟ', 'Ӟ'),
+    ('Ӡ', 'Ӡ'),
+    ('Ӣ', 'Ӣ'),
+    ('Ӥ', 'Ӥ'),
+    ('Ӧ', 'Ӧ'),
+    ('Ө', 'Ө'),
+    ('Ӫ', 'Ӫ'),
+    ('Ӭ', 'Ӭ'),
+    ('Ӯ', 'Ӯ'),
+    ('Ӱ', 'Ӱ'),
+    ('Ӳ', 'Ӳ'),
+    ('Ӵ', 'Ӵ'),
+    ('Ӷ', 'Ӷ'),
+    ('Ӹ', 'Ӹ'),
+    ('Ӻ', 'Ӻ'),
+    ('Ӽ', 'Ӽ'),
+    ('Ӿ', 'Ӿ'),
+    ('Ԁ', 'Ԁ'),
+    ('Ԃ', 'Ԃ'),
+    ('Ԅ', 'Ԅ'),
+    ('Ԇ', 'Ԇ'),
+    ('Ԉ', 'Ԉ'),
+    ('Ԋ', 'Ԋ'),
+    ('Ԍ', 'Ԍ'),
+    ('Ԏ', 'Ԏ'),
+    ('Ԑ', 'Ԑ'),
+    ('Ԓ', 'Ԓ'),
+    ('Ԕ', 'Ԕ'),
+    ('Ԗ', 'Ԗ'),
+    ('Ԙ', 'Ԙ'),
+    ('Ԛ', 'Ԛ'),
+    ('Ԝ', 'Ԝ'),
+    ('Ԟ', 'Ԟ'),
+    ('Ԡ', 'Ԡ'),
+    ('Ԣ', 'Ԣ'),
+    ('Ԥ', 'Ԥ'),
+    ('Ԧ', 'Ԧ'),
+    ('Ԩ', 'Ԩ'),
+    ('Ԫ', 'Ԫ'),
+    ('Ԭ', 'Ԭ'),
+    ('Ԯ', 'Ԯ'),
+    ('Ա', 'Ֆ'),
+    ('Ⴀ', 'Ⴥ'),
+    ('Ⴧ', 'Ⴧ'),
+    ('Ⴭ', 'Ⴭ'),
+    ('Ꭰ', 'Ᏽ'),
+    ('Ა', 'Ჺ'),
+    ('Ჽ', 'Ჿ'),
+    ('Ḁ', 'Ḁ'),
+    ('Ḃ', 'Ḃ'),
+    ('Ḅ', 'Ḅ'),
+    ('Ḇ', 'Ḇ'),
+    ('Ḉ', 'Ḉ'),
+    ('Ḋ', 'Ḋ'),
+    ('Ḍ', 'Ḍ'),
+    ('Ḏ', 'Ḏ'),
+    ('Ḑ', 'Ḑ'),
+    ('Ḓ', 'Ḓ'),
+    ('Ḕ', 'Ḕ'),
+    ('Ḗ', 'Ḗ'),
+    ('Ḙ', 'Ḙ'),
+    ('Ḛ', 'Ḛ'),
+    ('Ḝ', 'Ḝ'),
+    ('Ḟ', 'Ḟ'),
+    ('Ḡ', 'Ḡ'),
+    ('Ḣ', 'Ḣ'),
+    ('Ḥ', 'Ḥ'),
+    ('Ḧ', 'Ḧ'),
+    ('Ḩ', 'Ḩ'),
+    ('Ḫ', 'Ḫ'),
+    ('Ḭ', 'Ḭ'),
+    ('Ḯ', 'Ḯ'),
+    ('Ḱ', 'Ḱ'),
+    ('Ḳ', 'Ḳ'),
+    ('Ḵ', 'Ḵ'),
+    ('Ḷ', 'Ḷ'),
+    ('Ḹ', 'Ḹ'),
+    ('Ḻ', 'Ḻ'),
+    ('Ḽ', 'Ḽ'),
+    ('Ḿ', 'Ḿ'),
+    ('Ṁ', 'Ṁ'),
+    ('Ṃ', 'Ṃ'),
+    ('Ṅ', 'Ṅ'),
+    ('Ṇ', 'Ṇ'),
+    ('Ṉ', 'Ṉ'),
+    ('Ṋ', 'Ṋ'),
+    ('Ṍ', 'Ṍ'),
+    ('Ṏ', 'Ṏ'),
+    ('Ṑ', 'Ṑ'),
+    ('Ṓ', 'Ṓ'),
+    ('Ṕ', 'Ṕ'),
+    ('Ṗ', 'Ṗ'),
+    ('Ṙ', 'Ṙ'),
+    ('Ṛ', 'Ṛ'),
+    ('Ṝ', 'Ṝ'),
+    ('Ṟ', 'Ṟ'),
+    ('Ṡ', 'Ṡ'),
+    ('Ṣ', 'Ṣ'),
+    ('Ṥ', 'Ṥ'),
+    ('Ṧ', 'Ṧ'),
+    ('Ṩ', 'Ṩ'),
+    ('Ṫ', 'Ṫ'),
+    ('Ṭ', 'Ṭ'),
+    ('Ṯ', 'Ṯ'),
+    ('Ṱ', 'Ṱ'),
+    ('Ṳ', 'Ṳ'),
+    ('Ṵ', 'Ṵ'),
+    ('Ṷ', 'Ṷ'),
+    ('Ṹ', 'Ṹ'),
+    ('Ṻ', 'Ṻ'),
+    ('Ṽ', 'Ṽ'),
+    ('Ṿ', 'Ṿ'),
+    ('Ẁ', 'Ẁ'),
+    ('Ẃ', 'Ẃ'),
+    ('Ẅ', 'Ẅ'),
+    ('Ẇ', 'Ẇ'),
+    ('Ẉ', 'Ẉ'),
+    ('Ẋ', 'Ẋ'),
+    ('Ẍ', 'Ẍ'),
+    ('Ẏ', 'Ẏ'),
+    ('Ẑ', 'Ẑ'),
+    ('Ẓ', 'Ẓ'),
+    ('Ẕ', 'Ẕ'),
+    ('ẞ', 'ẞ'),
+    ('Ạ', 'Ạ'),
+    ('Ả', 'Ả'),
+    ('Ấ', 'Ấ'),
+    ('Ầ', 'Ầ'),
+    ('Ẩ', 'Ẩ'),
+    ('Ẫ', 'Ẫ'),
+    ('Ậ', 'Ậ'),
+    ('Ắ', 'Ắ'),
+    ('Ằ', 'Ằ'),
+    ('Ẳ', 'Ẳ'),
+    ('Ẵ', 'Ẵ'),
+    ('Ặ', 'Ặ'),
+    ('Ẹ', 'Ẹ'),
+    ('Ẻ', 'Ẻ'),
+    ('Ẽ', 'Ẽ'),
+    ('Ế', 'Ế'),
+    ('Ề', 'Ề'),
+    ('Ể', 'Ể'),
+    ('Ễ', 'Ễ'),
+    ('Ệ', 'Ệ'),
+    ('Ỉ', 'Ỉ'),
+    ('Ị', 'Ị'),
+    ('Ọ', 'Ọ'),
+    ('Ỏ', 'Ỏ'),
+    ('Ố', 'Ố'),
+    ('Ồ', 'Ồ'),
+    ('Ổ', 'Ổ'),
+    ('Ỗ', 'Ỗ'),
+    ('Ộ', 'Ộ'),
+    ('Ớ', 'Ớ'),
+    ('Ờ', 'Ờ'),
+    ('Ở', 'Ở'),
+    ('Ỡ', 'Ỡ'),
+    ('Ợ', 'Ợ'),
+    ('Ụ', 'Ụ'),
+    ('Ủ', 'Ủ'),
+    ('Ứ', 'Ứ'),
+    ('Ừ', 'Ừ'),
+    ('Ử', 'Ử'),
+    ('Ữ', 'Ữ'),
+    ('Ự', 'Ự'),
+    ('Ỳ', 'Ỳ'),
+    ('Ỵ', 'Ỵ'),
+    ('Ỷ', 'Ỷ'),
+    ('Ỹ', 'Ỹ'),
+    ('Ỻ', 'Ỻ'),
+    ('Ỽ', 'Ỽ'),
+    ('Ỿ', 'Ỿ'),
+    ('Ἀ', 'Ἇ'),
+    ('Ἐ', 'Ἕ'),
+    ('Ἠ', 'Ἧ'),
+    ('Ἰ', 'Ἷ'),
+    ('Ὀ', 'Ὅ'),
+    ('Ὑ', 'Ὑ'),
+    ('Ὓ', 'Ὓ'),
+    ('Ὕ', 'Ὕ'),
+    ('Ὗ', 'Ὗ'),
+    ('Ὠ', 'Ὧ'),
+    ('Ᾰ', 'Ά'),
+    ('Ὲ', 'Ή'),
+    ('Ῐ', 'Ί'),
+    ('Ῠ', 'Ῥ'),
+    ('Ὸ', 'Ώ'),
+    ('ℂ', 'ℂ'),
+    ('ℇ', 'ℇ'),
+    ('ℋ', 'ℍ'),
+    ('ℐ', 'ℒ'),
+    ('ℕ', 'ℕ'),
+    ('ℙ', 'ℝ'),
+    ('ℤ', 'ℤ'),
+    ('Ω', 'Ω'),
+    ('ℨ', 'ℨ'),
+    ('K', 'ℭ'),
+    ('ℰ', 'ℳ'),
+    ('ℾ', 'ℿ'),
+    ('ⅅ', 'ⅅ'),
+    ('Ↄ', 'Ↄ'),
+    ('Ⰰ', 'Ⱞ'),
+    ('Ⱡ', 'Ⱡ'),
+    ('Ɫ', 'Ɽ'),
+    ('Ⱨ', 'Ⱨ'),
+    ('Ⱪ', 'Ⱪ'),
+    ('Ⱬ', 'Ⱬ'),
+    ('Ɑ', 'Ɒ'),
+    ('Ⱳ', 'Ⱳ'),
+    ('Ⱶ', 'Ⱶ'),
+    ('Ȿ', 'Ⲁ'),
+    ('Ⲃ', 'Ⲃ'),
+    ('Ⲅ', 'Ⲅ'),
+    ('Ⲇ', 'Ⲇ'),
+    ('Ⲉ', 'Ⲉ'),
+    ('Ⲋ', 'Ⲋ'),
+    ('Ⲍ', 'Ⲍ'),
+    ('Ⲏ', 'Ⲏ'),
+    ('Ⲑ', 'Ⲑ'),
+    ('Ⲓ', 'Ⲓ'),
+    ('Ⲕ', 'Ⲕ'),
+    ('Ⲗ', 'Ⲗ'),
+    ('Ⲙ', 'Ⲙ'),
+    ('Ⲛ', 'Ⲛ'),
+    ('Ⲝ', 'Ⲝ'),
+    ('Ⲟ', 'Ⲟ'),
+    ('Ⲡ', 'Ⲡ'),
+    ('Ⲣ', 'Ⲣ'),
+    ('Ⲥ', 'Ⲥ'),
+    ('Ⲧ', 'Ⲧ'),
+    ('Ⲩ', 'Ⲩ'),
+    ('Ⲫ', 'Ⲫ'),
+    ('Ⲭ', 'Ⲭ'),
+    ('Ⲯ', 'Ⲯ'),
+    ('Ⲱ', 'Ⲱ'),
+    ('Ⲳ', 'Ⲳ'),
+    ('Ⲵ', 'Ⲵ'),
+    ('Ⲷ', 'Ⲷ'),
+    ('Ⲹ', 'Ⲹ'),
+    ('Ⲻ', 'Ⲻ'),
+    ('Ⲽ', 'Ⲽ'),
+    ('Ⲿ', 'Ⲿ'),
+    ('Ⳁ', 'Ⳁ'),
+    ('Ⳃ', 'Ⳃ'),
+    ('Ⳅ', 'Ⳅ'),
+    ('Ⳇ', 'Ⳇ'),
+    ('Ⳉ', 'Ⳉ'),
+    ('Ⳋ', 'Ⳋ'),
+    ('Ⳍ', 'Ⳍ'),
+    ('Ⳏ', 'Ⳏ'),
+    ('Ⳑ', 'Ⳑ'),
+    ('Ⳓ', 'Ⳓ'),
+    ('Ⳕ', 'Ⳕ'),
+    ('Ⳗ', 'Ⳗ'),
+    ('Ⳙ', 'Ⳙ'),
+    ('Ⳛ', 'Ⳛ'),
+    ('Ⳝ', 'Ⳝ'),
+    ('Ⳟ', 'Ⳟ'),
+    ('Ⳡ', 'Ⳡ'),
+    ('Ⳣ', 'Ⳣ'),
+    ('Ⳬ', 'Ⳬ'),
+    ('Ⳮ', 'Ⳮ'),
+    ('Ⳳ', 'Ⳳ'),
+    ('Ꙁ', 'Ꙁ'),
+    ('Ꙃ', 'Ꙃ'),
+    ('Ꙅ', 'Ꙅ'),
+    ('Ꙇ', 'Ꙇ'),
+    ('Ꙉ', 'Ꙉ'),
+    ('Ꙋ', 'Ꙋ'),
+    ('Ꙍ', 'Ꙍ'),
+    ('Ꙏ', 'Ꙏ'),
+    ('Ꙑ', 'Ꙑ'),
+    ('Ꙓ', 'Ꙓ'),
+    ('Ꙕ', 'Ꙕ'),
+    ('Ꙗ', 'Ꙗ'),
+    ('Ꙙ', 'Ꙙ'),
+    ('Ꙛ', 'Ꙛ'),
+    ('Ꙝ', 'Ꙝ'),
+    ('Ꙟ', 'Ꙟ'),
+    ('Ꙡ', 'Ꙡ'),
+    ('Ꙣ', 'Ꙣ'),
+    ('Ꙥ', 'Ꙥ'),
+    ('Ꙧ', 'Ꙧ'),
+    ('Ꙩ', 'Ꙩ'),
+    ('Ꙫ', 'Ꙫ'),
+    ('Ꙭ', 'Ꙭ'),
+    ('Ꚁ', 'Ꚁ'),
+    ('Ꚃ', 'Ꚃ'),
+    ('Ꚅ', 'Ꚅ'),
+    ('Ꚇ', 'Ꚇ'),
+    ('Ꚉ', 'Ꚉ'),
+    ('Ꚋ', 'Ꚋ'),
+    ('Ꚍ', 'Ꚍ'),
+    ('Ꚏ', 'Ꚏ'),
+    ('Ꚑ', 'Ꚑ'),
+    ('Ꚓ', 'Ꚓ'),
+    ('Ꚕ', 'Ꚕ'),
+    ('Ꚗ', 'Ꚗ'),
+    ('Ꚙ', 'Ꚙ'),
+    ('Ꚛ', 'Ꚛ'),
+    ('Ꜣ', 'Ꜣ'),
+    ('Ꜥ', 'Ꜥ'),
+    ('Ꜧ', 'Ꜧ'),
+    ('Ꜩ', 'Ꜩ'),
+    ('Ꜫ', 'Ꜫ'),
+    ('Ꜭ', 'Ꜭ'),
+    ('Ꜯ', 'Ꜯ'),
+    ('Ꜳ', 'Ꜳ'),
+    ('Ꜵ', 'Ꜵ'),
+    ('Ꜷ', 'Ꜷ'),
+    ('Ꜹ', 'Ꜹ'),
+    ('Ꜻ', 'Ꜻ'),
+    ('Ꜽ', 'Ꜽ'),
+    ('Ꜿ', 'Ꜿ'),
+    ('Ꝁ', 'Ꝁ'),
+    ('Ꝃ', 'Ꝃ'),
+    ('Ꝅ', 'Ꝅ'),
+    ('Ꝇ', 'Ꝇ'),
+    ('Ꝉ', 'Ꝉ'),
+    ('Ꝋ', 'Ꝋ'),
+    ('Ꝍ', 'Ꝍ'),
+    ('Ꝏ', 'Ꝏ'),
+    ('Ꝑ', 'Ꝑ'),
+    ('Ꝓ', 'Ꝓ'),
+    ('Ꝕ', 'Ꝕ'),
+    ('Ꝗ', 'Ꝗ'),
+    ('Ꝙ', 'Ꝙ'),
+    ('Ꝛ', 'Ꝛ'),
+    ('Ꝝ', 'Ꝝ'),
+    ('Ꝟ', 'Ꝟ'),
+    ('Ꝡ', 'Ꝡ'),
+    ('Ꝣ', 'Ꝣ'),
+    ('Ꝥ', 'Ꝥ'),
+    ('Ꝧ', 'Ꝧ'),
+    ('Ꝩ', 'Ꝩ'),
+    ('Ꝫ', 'Ꝫ'),
+    ('Ꝭ', 'Ꝭ'),
+    ('Ꝯ', 'Ꝯ'),
+    ('Ꝺ', 'Ꝺ'),
+    ('Ꝼ', 'Ꝼ'),
+    ('Ᵹ', 'Ꝿ'),
+    ('Ꞁ', 'Ꞁ'),
+    ('Ꞃ', 'Ꞃ'),
+    ('Ꞅ', 'Ꞅ'),
+    ('Ꞇ', 'Ꞇ'),
+    ('Ꞌ', 'Ꞌ'),
+    ('Ɥ', 'Ɥ'),
+    ('Ꞑ', 'Ꞑ'),
+    ('Ꞓ', 'Ꞓ'),
+    ('Ꞗ', 'Ꞗ'),
+    ('Ꞙ', 'Ꞙ'),
+    ('Ꞛ', 'Ꞛ'),
+    ('Ꞝ', 'Ꞝ'),
+    ('Ꞟ', 'Ꞟ'),
+    ('Ꞡ', 'Ꞡ'),
+    ('Ꞣ', 'Ꞣ'),
+    ('Ꞥ', 'Ꞥ'),
+    ('Ꞧ', 'Ꞧ'),
+    ('Ꞩ', 'Ꞩ'),
+    ('Ɦ', 'Ɪ'),
+    ('Ʞ', 'Ꞵ'),
+    ('Ꞷ', 'Ꞷ'),
+    ('Ꞹ', 'Ꞹ'),
+    ('Ꞻ', 'Ꞻ'),
+    ('Ꞽ', 'Ꞽ'),
+    ('Ꞿ', 'Ꞿ'),
+    ('Ꟃ', 'Ꟃ'),
+    ('Ꞔ', '\u{a7c7}'),
+    ('\u{a7c9}', '\u{a7c9}'),
+    ('\u{a7f5}', '\u{a7f5}'),
+    ('Ａ', 'Ｚ'),
+    ('𐐀', '𐐧'),
+    ('𐒰', '𐓓'),
+    ('𐲀', '𐲲'),
+    ('𑢠', '𑢿'),
+    ('𖹀', '𖹟'),
+    ('𝐀', '𝐙'),
+    ('𝐴', '𝑍'),
+    ('𝑨', '𝒁'),
+    ('𝒜', '𝒜'),
+    ('𝒞', '𝒟'),
+    ('𝒢', '𝒢'),
+    ('𝒥', '𝒦'),
+    ('𝒩', '𝒬'),
+    ('𝒮', '𝒵'),
+    ('𝓐', '𝓩'),
+    ('𝔄', '𝔅'),
+    ('𝔇', '𝔊'),
+    ('𝔍', '𝔔'),
+    ('𝔖', '𝔜'),
+    ('𝔸', '𝔹'),
+    ('𝔻', '𝔾'),
+    ('𝕀', '𝕄'),
+    ('𝕆', '𝕆'),
+    ('𝕊', '𝕐'),
+    ('𝕬', '𝖅'),
+    ('𝖠', '𝖹'),
+    ('𝗔', '𝗭'),
+    ('𝘈', '𝘡'),
+    ('𝘼', '𝙕'),
+    ('𝙰', '𝚉'),
+    ('𝚨', '𝛀'),
+    ('𝛢', '𝛺'),
+    ('𝜜', '𝜴'),
+    ('𝝖', '𝝮'),
+    ('𝞐', '𝞨'),
+    ('𝟊', '𝟊'),
+    ('𞤀', '𞤡'),
+];

diff --git a/src/unicode_tables/grapheme_cluster_break.rs b/src/unicode_tables/grapheme_cluster_break.rs
new file mode 100644
index 0000000..1d0b285
--- /dev/null
+++ b/src/unicode_tables/grapheme_cluster_break.rs

@@ -0,0 +1,1387 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+//  ucd-generate grapheme-cluster-break ucd-13.0.0 --chars
+//
+// ucd-generate 0.2.7 is available on crates.io.
+
+pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
+    ("CR", CR),
+    ("Control", CONTROL),
+    ("Extend", EXTEND),
+    ("L", L),
+    ("LF", LF),
+    ("LV", LV),
+    ("LVT", LVT),
+    ("Prepend", PREPEND),
+    ("Regional_Indicator", REGIONAL_INDICATOR),
+    ("SpacingMark", SPACINGMARK),
+    ("T", T),
+    ("V", V),
+    ("ZWJ", ZWJ),
+];
+
+pub const CR: &'static [(char, char)] = &[('\r', '\r')];
+
+pub const CONTROL: &'static [(char, char)] = &[
+    ('\u{0}', '\t'),
+    ('\u{b}', '\u{c}'),
+    ('\u{e}', '\u{1f}'),
+    ('\u{7f}', '\u{9f}'),
+    ('\u{ad}', '\u{ad}'),
+    ('\u{61c}', '\u{61c}'),
+    ('\u{180e}', '\u{180e}'),
+    ('\u{200b}', '\u{200b}'),
+    ('\u{200e}', '\u{200f}'),
+    ('\u{2028}', '\u{202e}'),
+    ('\u{2060}', '\u{206f}'),
+    ('\u{feff}', '\u{feff}'),
+    ('\u{fff0}', '\u{fffb}'),
+    ('\u{13430}', '\u{13438}'),
+    ('\u{1bca0}', '\u{1bca3}'),
+    ('\u{1d173}', '\u{1d17a}'),
+    ('\u{e0000}', '\u{e001f}'),
+    ('\u{e0080}', '\u{e00ff}'),
+    ('\u{e01f0}', '\u{e0fff}'),
+];
+
+pub const EXTEND: &'static [(char, char)] = &[
+    ('\u{300}', '\u{36f}'),
+    ('\u{483}', '\u{489}'),
+    ('\u{591}', '\u{5bd}'),
+    ('\u{5bf}', '\u{5bf}'),
+    ('\u{5c1}', '\u{5c2}'),
+    ('\u{5c4}', '\u{5c5}'),
+    ('\u{5c7}', '\u{5c7}'),
+    ('\u{610}', '\u{61a}'),
+    ('\u{64b}', '\u{65f}'),
+    ('\u{670}', '\u{670}'),
+    ('\u{6d6}', '\u{6dc}'),
+    ('\u{6df}', '\u{6e4}'),
+    ('\u{6e7}', '\u{6e8}'),
+    ('\u{6ea}', '\u{6ed}'),
+    ('\u{711}', '\u{711}'),
+    ('\u{730}', '\u{74a}'),
+    ('\u{7a6}', '\u{7b0}'),
+    ('\u{7eb}', '\u{7f3}'),
+    ('\u{7fd}', '\u{7fd}'),
+    ('\u{816}', '\u{819}'),
+    ('\u{81b}', '\u{823}'),
+    ('\u{825}', '\u{827}'),
+    ('\u{829}', '\u{82d}'),
+    ('\u{859}', '\u{85b}'),
+    ('\u{8d3}', '\u{8e1}'),
+    ('\u{8e3}', '\u{902}'),
+    ('\u{93a}', '\u{93a}'),
+    ('\u{93c}', '\u{93c}'),
+    ('\u{941}', '\u{948}'),
+    ('\u{94d}', '\u{94d}'),
+    ('\u{951}', '\u{957}'),
+    ('\u{962}', '\u{963}'),
+    ('\u{981}', '\u{981}'),
+    ('\u{9bc}', '\u{9bc}'),
+    ('\u{9be}', '\u{9be}'),
+    ('\u{9c1}', '\u{9c4}'),
+    ('\u{9cd}', '\u{9cd}'),
+    ('\u{9d7}', '\u{9d7}'),
+    ('\u{9e2}', '\u{9e3}'),
+    ('\u{9fe}', '\u{9fe}'),
+    ('\u{a01}', '\u{a02}'),
+    ('\u{a3c}', '\u{a3c}'),
+    ('\u{a41}', '\u{a42}'),
+    ('\u{a47}', '\u{a48}'),
+    ('\u{a4b}', '\u{a4d}'),
+    ('\u{a51}', '\u{a51}'),
+    ('\u{a70}', '\u{a71}'),
+    ('\u{a75}', '\u{a75}'),
+    ('\u{a81}', '\u{a82}'),
+    ('\u{abc}', '\u{abc}'),
+    ('\u{ac1}', '\u{ac5}'),
+    ('\u{ac7}', '\u{ac8}'),
+    ('\u{acd}', '\u{acd}'),
+    ('\u{ae2}', '\u{ae3}'),
+    ('\u{afa}', '\u{aff}'),
+    ('\u{b01}', '\u{b01}'),
+    ('\u{b3c}', '\u{b3c}'),
+    ('\u{b3e}', '\u{b3f}'),
+    ('\u{b41}', '\u{b44}'),
+    ('\u{b4d}', '\u{b4d}'),
+    ('\u{b55}', '\u{b57}'),
+    ('\u{b62}', '\u{b63}'),
+    ('\u{b82}', '\u{b82}'),
+    ('\u{bbe}', '\u{bbe}'),
+    ('\u{bc0}', '\u{bc0}'),
+    ('\u{bcd}', '\u{bcd}'),
+    ('\u{bd7}', '\u{bd7}'),
+    ('\u{c00}', '\u{c00}'),
+    ('\u{c04}', '\u{c04}'),
+    ('\u{c3e}', '\u{c40}'),
+    ('\u{c46}', '\u{c48}'),
+    ('\u{c4a}', '\u{c4d}'),
+    ('\u{c55}', '\u{c56}'),
+    ('\u{c62}', '\u{c63}'),
+    ('\u{c81}', '\u{c81}'),
+    ('\u{cbc}', '\u{cbc}'),
+    ('\u{cbf}', '\u{cbf}'),
+    ('\u{cc2}', '\u{cc2}'),
+    ('\u{cc6}', '\u{cc6}'),
+    ('\u{ccc}', '\u{ccd}'),
+    ('\u{cd5}', '\u{cd6}'),
+    ('\u{ce2}', '\u{ce3}'),
+    ('\u{d00}', '\u{d01}'),
+    ('\u{d3b}', '\u{d3c}'),
+    ('\u{d3e}', '\u{d3e}'),
+    ('\u{d41}', '\u{d44}'),
+    ('\u{d4d}', '\u{d4d}'),
+    ('\u{d57}', '\u{d57}'),
+    ('\u{d62}', '\u{d63}'),
+    ('\u{d81}', '\u{d81}'),
+    ('\u{dca}', '\u{dca}'),
+    ('\u{dcf}', '\u{dcf}'),
+    ('\u{dd2}', '\u{dd4}'),
+    ('\u{dd6}', '\u{dd6}'),
+    ('\u{ddf}', '\u{ddf}'),
+    ('\u{e31}', '\u{e31}'),
+    ('\u{e34}', '\u{e3a}'),
+    ('\u{e47}', '\u{e4e}'),
+    ('\u{eb1}', '\u{eb1}'),
+    ('\u{eb4}', '\u{ebc}'),
+    ('\u{ec8}', '\u{ecd}'),
+    ('\u{f18}', '\u{f19}'),
+    ('\u{f35}', '\u{f35}'),
+    ('\u{f37}', '\u{f37}'),
+    ('\u{f39}', '\u{f39}'),
+    ('\u{f71}', '\u{f7e}'),
+    ('\u{f80}', '\u{f84}'),
+    ('\u{f86}', '\u{f87}'),
+    ('\u{f8d}', '\u{f97}'),
+    ('\u{f99}', '\u{fbc}'),
+    ('\u{fc6}', '\u{fc6}'),
+    ('\u{102d}', '\u{1030}'),
+    ('\u{1032}', '\u{1037}'),
+    ('\u{1039}', '\u{103a}'),
+    ('\u{103d}', '\u{103e}'),
+    ('\u{1058}', '\u{1059}'),
+    ('\u{105e}', '\u{1060}'),
+    ('\u{1071}', '\u{1074}'),
+    ('\u{1082}', '\u{1082}'),
+    ('\u{1085}', '\u{1086}'),
+    ('\u{108d}', '\u{108d}'),
+    ('\u{109d}', '\u{109d}'),
+    ('\u{135d}', '\u{135f}'),
+    ('\u{1712}', '\u{1714}'),
+    ('\u{1732}', '\u{1734}'),
+    ('\u{1752}', '\u{1753}'),
+    ('\u{1772}', '\u{1773}'),
+    ('\u{17b4}', '\u{17b5}'),
+    ('\u{17b7}', '\u{17bd}'),
+    ('\u{17c6}', '\u{17c6}'),
+    ('\u{17c9}', '\u{17d3}'),
+    ('\u{17dd}', '\u{17dd}'),
+    ('\u{180b}', '\u{180d}'),
+    ('\u{1885}', '\u{1886}'),
+    ('\u{18a9}', '\u{18a9}'),
+    ('\u{1920}', '\u{1922}'),
+    ('\u{1927}', '\u{1928}'),
+    ('\u{1932}', '\u{1932}'),
+    ('\u{1939}', '\u{193b}'),
+    ('\u{1a17}', '\u{1a18}'),
+    ('\u{1a1b}', '\u{1a1b}'),
+    ('\u{1a56}', '\u{1a56}'),
+    ('\u{1a58}', '\u{1a5e}'),
+    ('\u{1a60}', '\u{1a60}'),
+    ('\u{1a62}', '\u{1a62}'),
+    ('\u{1a65}', '\u{1a6c}'),
+    ('\u{1a73}', '\u{1a7c}'),
+    ('\u{1a7f}', '\u{1a7f}'),
+    ('\u{1ab0}', '\u{1ac0}'),
+    ('\u{1b00}', '\u{1b03}'),
+    ('\u{1b34}', '\u{1b3a}'),
+    ('\u{1b3c}', '\u{1b3c}'),
+    ('\u{1b42}', '\u{1b42}'),
+    ('\u{1b6b}', '\u{1b73}'),
+    ('\u{1b80}', '\u{1b81}'),
+    ('\u{1ba2}', '\u{1ba5}'),
+    ('\u{1ba8}', '\u{1ba9}'),
+    ('\u{1bab}', '\u{1bad}'),
+    ('\u{1be6}', '\u{1be6}'),
+    ('\u{1be8}', '\u{1be9}'),
+    ('\u{1bed}', '\u{1bed}'),
+    ('\u{1bef}', '\u{1bf1}'),
+    ('\u{1c2c}', '\u{1c33}'),
+    ('\u{1c36}', '\u{1c37}'),
+    ('\u{1cd0}', '\u{1cd2}'),
+    ('\u{1cd4}', '\u{1ce0}'),
+    ('\u{1ce2}', '\u{1ce8}'),
+    ('\u{1ced}', '\u{1ced}'),
+    ('\u{1cf4}', '\u{1cf4}'),
+    ('\u{1cf8}', '\u{1cf9}'),
+    ('\u{1dc0}', '\u{1df9}'),
+    ('\u{1dfb}', '\u{1dff}'),
+    ('\u{200c}', '\u{200c}'),
+    ('\u{20d0}', '\u{20f0}'),
+    ('\u{2cef}', '\u{2cf1}'),
+    ('\u{2d7f}', '\u{2d7f}'),
+    ('\u{2de0}', '\u{2dff}'),
+    ('\u{302a}', '\u{302f}'),
+    ('\u{3099}', '\u{309a}'),
+    ('\u{a66f}', '\u{a672}'),
+    ('\u{a674}', '\u{a67d}'),
+    ('\u{a69e}', '\u{a69f}'),
+    ('\u{a6f0}', '\u{a6f1}'),
+    ('\u{a802}', '\u{a802}'),
+    ('\u{a806}', '\u{a806}'),
+    ('\u{a80b}', '\u{a80b}'),
+    ('\u{a825}', '\u{a826}'),
+    ('\u{a82c}', '\u{a82c}'),
+    ('\u{a8c4}', '\u{a8c5}'),
+    ('\u{a8e0}', '\u{a8f1}'),
+    ('\u{a8ff}', '\u{a8ff}'),
+    ('\u{a926}', '\u{a92d}'),
+    ('\u{a947}', '\u{a951}'),
+    ('\u{a980}', '\u{a982}'),
+    ('\u{a9b3}', '\u{a9b3}'),
+    ('\u{a9b6}', '\u{a9b9}'),
+    ('\u{a9bc}', '\u{a9bd}'),
+    ('\u{a9e5}', '\u{a9e5}'),
+    ('\u{aa29}', '\u{aa2e}'),
+    ('\u{aa31}', '\u{aa32}'),
+    ('\u{aa35}', '\u{aa36}'),
+    ('\u{aa43}', '\u{aa43}'),
+    ('\u{aa4c}', '\u{aa4c}'),
+    ('\u{aa7c}', '\u{aa7c}'),
+    ('\u{aab0}', '\u{aab0}'),
+    ('\u{aab2}', '\u{aab4}'),
+    ('\u{aab7}', '\u{aab8}'),
+    ('\u{aabe}', '\u{aabf}'),
+    ('\u{aac1}', '\u{aac1}'),
+    ('\u{aaec}', '\u{aaed}'),
+    ('\u{aaf6}', '\u{aaf6}'),
+    ('\u{abe5}', '\u{abe5}'),
+    ('\u{abe8}', '\u{abe8}'),
+    ('\u{abed}', '\u{abed}'),
+    ('\u{fb1e}', '\u{fb1e}'),
+    ('\u{fe00}', '\u{fe0f}'),
+    ('\u{fe20}', '\u{fe2f}'),
+    ('\u{ff9e}', '\u{ff9f}'),
+    ('\u{101fd}', '\u{101fd}'),
+    ('\u{102e0}', '\u{102e0}'),
+    ('\u{10376}', '\u{1037a}'),
+    ('\u{10a01}', '\u{10a03}'),
+    ('\u{10a05}', '\u{10a06}'),
+    ('\u{10a0c}', '\u{10a0f}'),
+    ('\u{10a38}', '\u{10a3a}'),
+    ('\u{10a3f}', '\u{10a3f}'),
+    ('\u{10ae5}', '\u{10ae6}'),
+    ('\u{10d24}', '\u{10d27}'),
+    ('\u{10eab}', '\u{10eac}'),
+    ('\u{10f46}', '\u{10f50}'),
+    ('\u{11001}', '\u{11001}'),
+    ('\u{11038}', '\u{11046}'),
+    ('\u{1107f}', '\u{11081}'),
+    ('\u{110b3}', '\u{110b6}'),
+    ('\u{110b9}', '\u{110ba}'),
+    ('\u{11100}', '\u{11102}'),
+    ('\u{11127}', '\u{1112b}'),
+    ('\u{1112d}', '\u{11134}'),
+    ('\u{11173}', '\u{11173}'),
+    ('\u{11180}', '\u{11181}'),
+    ('\u{111b6}', '\u{111be}'),
+    ('\u{111c9}', '\u{111cc}'),
+    ('\u{111cf}', '\u{111cf}'),
+    ('\u{1122f}', '\u{11231}'),
+    ('\u{11234}', '\u{11234}'),
+    ('\u{11236}', '\u{11237}'),
+    ('\u{1123e}', '\u{1123e}'),
+    ('\u{112df}', '\u{112df}'),
+    ('\u{112e3}', '\u{112ea}'),
+    ('\u{11300}', '\u{11301}'),
+    ('\u{1133b}', '\u{1133c}'),
+    ('\u{1133e}', '\u{1133e}'),
+    ('\u{11340}', '\u{11340}'),
+    ('\u{11357}', '\u{11357}'),
+    ('\u{11366}', '\u{1136c}'),
+    ('\u{11370}', '\u{11374}'),
+    ('\u{11438}', '\u{1143f}'),
+    ('\u{11442}', '\u{11444}'),
+    ('\u{11446}', '\u{11446}'),
+    ('\u{1145e}', '\u{1145e}'),
+    ('\u{114b0}', '\u{114b0}'),
+    ('\u{114b3}', '\u{114b8}'),
+    ('\u{114ba}', '\u{114ba}'),
+    ('\u{114bd}', '\u{114bd}'),
+    ('\u{114bf}', '\u{114c0}'),
+    ('\u{114c2}', '\u{114c3}'),
+    ('\u{115af}', '\u{115af}'),
+    ('\u{115b2}', '\u{115b5}'),
+    ('\u{115bc}', '\u{115bd}'),
+    ('\u{115bf}', '\u{115c0}'),
+    ('\u{115dc}', '\u{115dd}'),
+    ('\u{11633}', '\u{1163a}'),
+    ('\u{1163d}', '\u{1163d}'),
+    ('\u{1163f}', '\u{11640}'),
+    ('\u{116ab}', '\u{116ab}'),
+    ('\u{116ad}', '\u{116ad}'),
+    ('\u{116b0}', '\u{116b5}'),
+    ('\u{116b7}', '\u{116b7}'),
+    ('\u{1171d}', '\u{1171f}'),
+    ('\u{11722}', '\u{11725}'),
+    ('\u{11727}', '\u{1172b}'),
+    ('\u{1182f}', '\u{11837}'),
+    ('\u{11839}', '\u{1183a}'),
+    ('\u{11930}', '\u{11930}'),
+    ('\u{1193b}', '\u{1193c}'),
+    ('\u{1193e}', '\u{1193e}'),
+    ('\u{11943}', '\u{11943}'),
+    ('\u{119d4}', '\u{119d7}'),
+    ('\u{119da}', '\u{119db}'),
+    ('\u{119e0}', '\u{119e0}'),
+    ('\u{11a01}', '\u{11a0a}'),
+    ('\u{11a33}', '\u{11a38}'),
+    ('\u{11a3b}', '\u{11a3e}'),
+    ('\u{11a47}', '\u{11a47}'),
+    ('\u{11a51}', '\u{11a56}'),
+    ('\u{11a59}', '\u{11a5b}'),
+    ('\u{11a8a}', '\u{11a96}'),
+    ('\u{11a98}', '\u{11a99}'),
+    ('\u{11c30}', '\u{11c36}'),
+    ('\u{11c38}', '\u{11c3d}'),
+    ('\u{11c3f}', '\u{11c3f}'),
+    ('\u{11c92}', '\u{11ca7}'),
+    ('\u{11caa}', '\u{11cb0}'),
+    ('\u{11cb2}', '\u{11cb3}'),
+    ('\u{11cb5}', '\u{11cb6}'),
+    ('\u{11d31}', '\u{11d36}'),
+    ('\u{11d3a}', '\u{11d3a}'),
+    ('\u{11d3c}', '\u{11d3d}'),
+    ('\u{11d3f}', '\u{11d45}'),
+    ('\u{11d47}', '\u{11d47}'),
+    ('\u{11d90}', '\u{11d91}'),
+    ('\u{11d95}', '\u{11d95}'),
+    ('\u{11d97}', '\u{11d97}'),
+    ('\u{11ef3}', '\u{11ef4}'),
+    ('\u{16af0}', '\u{16af4}'),
+    ('\u{16b30}', '\u{16b36}'),
+    ('\u{16f4f}', '\u{16f4f}'),
+    ('\u{16f8f}', '\u{16f92}'),
+    ('\u{16fe4}', '\u{16fe4}'),
+    ('\u{1bc9d}', '\u{1bc9e}'),
+    ('\u{1d165}', '\u{1d165}'),
+    ('\u{1d167}', '\u{1d169}'),
+    ('\u{1d16e}', '\u{1d172}'),
+    ('\u{1d17b}', '\u{1d182}'),
+    ('\u{1d185}', '\u{1d18b}'),
+    ('\u{1d1aa}', '\u{1d1ad}'),
+    ('\u{1d242}', '\u{1d244}'),
+    ('\u{1da00}', '\u{1da36}'),
+    ('\u{1da3b}', '\u{1da6c}'),
+    ('\u{1da75}', '\u{1da75}'),
+    ('\u{1da84}', '\u{1da84}'),
+    ('\u{1da9b}', '\u{1da9f}'),
+    ('\u{1daa1}', '\u{1daaf}'),
+    ('\u{1e000}', '\u{1e006}'),
+    ('\u{1e008}', '\u{1e018}'),
+    ('\u{1e01b}', '\u{1e021}'),
+    ('\u{1e023}', '\u{1e024}'),
+    ('\u{1e026}', '\u{1e02a}'),
+    ('\u{1e130}', '\u{1e136}'),
+    ('\u{1e2ec}', '\u{1e2ef}'),
+    ('\u{1e8d0}', '\u{1e8d6}'),
+    ('\u{1e944}', '\u{1e94a}'),
+    ('🏻', '🏿'),
+    ('\u{e0020}', '\u{e007f}'),
+    ('\u{e0100}', '\u{e01ef}'),
+];
+
+pub const L: &'static [(char, char)] = &[('ᄀ', 'ᅟ'), ('ꥠ', 'ꥼ')];
+
+pub const LF: &'static [(char, char)] = &[('\n', '\n')];
+
+pub const LV: &'static [(char, char)] = &[
+    ('가', '가'),
+    ('개', '개'),
+    ('갸', '갸'),
+    ('걔', '걔'),
+    ('거', '거'),
+    ('게', '게'),
+    ('겨', '겨'),
+    ('계', '계'),
+    ('고', '고'),
+    ('과', '과'),
+    ('괘', '괘'),
+    ('괴', '괴'),
+    ('교', '교'),
+    ('구', '구'),
+    ('궈', '궈'),
+    ('궤', '궤'),
+    ('귀', '귀'),
+    ('규', '규'),
+    ('그', '그'),
+    ('긔', '긔'),
+    ('기', '기'),
+    ('까', '까'),
+    ('깨', '깨'),
+    ('꺄', '꺄'),
+    ('꺠', '꺠'),
+    ('꺼', '꺼'),
+    ('께', '께'),
+    ('껴', '껴'),
+    ('꼐', '꼐'),
+    ('꼬', '꼬'),
+    ('꽈', '꽈'),
+    ('꽤', '꽤'),
+    ('꾀', '꾀'),
+    ('꾜', '꾜'),
+    ('꾸', '꾸'),
+    ('꿔', '꿔'),
+    ('꿰', '꿰'),
+    ('뀌', '뀌'),
+    ('뀨', '뀨'),
+    ('끄', '끄'),
+    ('끠', '끠'),
+    ('끼', '끼'),
+    ('나', '나'),
+    ('내', '내'),
+    ('냐', '냐'),
+    ('냬', '냬'),
+    ('너', '너'),
+    ('네', '네'),
+    ('녀', '녀'),
+    ('녜', '녜'),
+    ('노', '노'),
+    ('놔', '놔'),
+    ('놰', '놰'),
+    ('뇌', '뇌'),
+    ('뇨', '뇨'),
+    ('누', '누'),
+    ('눠', '눠'),
+    ('눼', '눼'),
+    ('뉘', '뉘'),
+    ('뉴', '뉴'),
+    ('느', '느'),
+    ('늬', '늬'),
+    ('니', '니'),
+    ('다', '다'),
+    ('대', '대'),
+    ('댜', '댜'),
+    ('댸', '댸'),
+    ('더', '더'),
+    ('데', '데'),
+    ('뎌', '뎌'),
+    ('뎨', '뎨'),
+    ('도', '도'),
+    ('돠', '돠'),
+    ('돼', '돼'),
+    ('되', '되'),
+    ('됴', '됴'),
+    ('두', '두'),
+    ('둬', '둬'),
+    ('뒈', '뒈'),
+    ('뒤', '뒤'),
+    ('듀', '듀'),
+    ('드', '드'),
+    ('듸', '듸'),
+    ('디', '디'),
+    ('따', '따'),
+    ('때', '때'),
+    ('땨', '땨'),
+    ('떄', '떄'),
+    ('떠', '떠'),
+    ('떼', '떼'),
+    ('뗘', '뗘'),
+    ('뗴', '뗴'),
+    ('또', '또'),
+    ('똬', '똬'),
+    ('뙈', '뙈'),
+    ('뙤', '뙤'),
+    ('뚀', '뚀'),
+    ('뚜', '뚜'),
+    ('뚸', '뚸'),
+    ('뛔', '뛔'),
+    ('뛰', '뛰'),
+    ('뜌', '뜌'),
+    ('뜨', '뜨'),
+    ('띄', '띄'),
+    ('띠', '띠'),
+    ('라', '라'),
+    ('래', '래'),
+    ('랴', '랴'),
+    ('럐', '럐'),
+    ('러', '러'),
+    ('레', '레'),
+    ('려', '려'),
+    ('례', '례'),
+    ('로', '로'),
+    ('롸', '롸'),
+    ('뢔', '뢔'),
+    ('뢰', '뢰'),
+    ('료', '료'),
+    ('루', '루'),
+    ('뤄', '뤄'),
+    ('뤠', '뤠'),
+    ('뤼', '뤼'),
+    ('류', '류'),
+    ('르', '르'),
+    ('릐', '릐'),
+    ('리', '리'),
+    ('마', '마'),
+    ('매', '매'),
+    ('먀', '먀'),
+    ('먜', '먜'),
+    ('머', '머'),
+    ('메', '메'),
+    ('며', '며'),
+    ('몌', '몌'),
+    ('모', '모'),
+    ('뫄', '뫄'),
+    ('뫠', '뫠'),
+    ('뫼', '뫼'),
+    ('묘', '묘'),
+    ('무', '무'),
+    ('뭐', '뭐'),
+    ('뭬', '뭬'),
+    ('뮈', '뮈'),
+    ('뮤', '뮤'),
+    ('므', '므'),
+    ('믜', '믜'),
+    ('미', '미'),
+    ('바', '바'),
+    ('배', '배'),
+    ('뱌', '뱌'),
+    ('뱨', '뱨'),
+    ('버', '버'),
+    ('베', '베'),
+    ('벼', '벼'),
+    ('볘', '볘'),
+    ('보', '보'),
+    ('봐', '봐'),
+    ('봬', '봬'),
+    ('뵈', '뵈'),
+    ('뵤', '뵤'),
+    ('부', '부'),
+    ('붜', '붜'),
+    ('붸', '붸'),
+    ('뷔', '뷔'),
+    ('뷰', '뷰'),
+    ('브', '브'),
+    ('븨', '븨'),
+    ('비', '비'),
+    ('빠', '빠'),
+    ('빼', '빼'),
+    ('뺘', '뺘'),
+    ('뺴', '뺴'),
+    ('뻐', '뻐'),
+    ('뻬', '뻬'),
+    ('뼈', '뼈'),
+    ('뼤', '뼤'),
+    ('뽀', '뽀'),
+    ('뽜', '뽜'),
+    ('뽸', '뽸'),
+    ('뾔', '뾔'),
+    ('뾰', '뾰'),
+    ('뿌', '뿌'),
+    ('뿨', '뿨'),
+    ('쀄', '쀄'),
+    ('쀠', '쀠'),
+    ('쀼', '쀼'),
+    ('쁘', '쁘'),
+    ('쁴', '쁴'),
+    ('삐', '삐'),
+    ('사', '사'),
+    ('새', '새'),
+    ('샤', '샤'),
+    ('섀', '섀'),
+    ('서', '서'),
+    ('세', '세'),
+    ('셔', '셔'),
+    ('셰', '셰'),
+    ('소', '소'),
+    ('솨', '솨'),
+    ('쇄', '쇄'),
+    ('쇠', '쇠'),
+    ('쇼', '쇼'),
+    ('수', '수'),
+    ('숴', '숴'),
+    ('쉐', '쉐'),
+    ('쉬', '쉬'),
+    ('슈', '슈'),
+    ('스', '스'),
+    ('싀', '싀'),
+    ('시', '시'),
+    ('싸', '싸'),
+    ('쌔', '쌔'),
+    ('쌰', '쌰'),
+    ('썌', '썌'),
+    ('써', '써'),
+    ('쎄', '쎄'),
+    ('쎠', '쎠'),
+    ('쎼', '쎼'),
+    ('쏘', '쏘'),
+    ('쏴', '쏴'),
+    ('쐐', '쐐'),
+    ('쐬', '쐬'),
+    ('쑈', '쑈'),
+    ('쑤', '쑤'),
+    ('쒀', '쒀'),
+    ('쒜', '쒜'),
+    ('쒸', '쒸'),
+    ('쓔', '쓔'),
+    ('쓰', '쓰'),
+    ('씌', '씌'),
+    ('씨', '씨'),
+    ('아', '아'),
+    ('애', '애'),
+    ('야', '야'),
+    ('얘', '얘'),
+    ('어', '어'),
+    ('에', '에'),
+    ('여', '여'),
+    ('예', '예'),
+    ('오', '오'),
+    ('와', '와'),
+    ('왜', '왜'),
+    ('외', '외'),
+    ('요', '요'),
+    ('우', '우'),
+    ('워', '워'),
+    ('웨', '웨'),
+    ('위', '위'),
+    ('유', '유'),
+    ('으', '으'),
+    ('의', '의'),
+    ('이', '이'),
+    ('자', '자'),
+    ('재', '재'),
+    ('쟈', '쟈'),
+    ('쟤', '쟤'),
+    ('저', '저'),
+    ('제', '제'),
+    ('져', '져'),
+    ('졔', '졔'),
+    ('조', '조'),
+    ('좌', '좌'),
+    ('좨', '좨'),
+    ('죄', '죄'),
+    ('죠', '죠'),
+    ('주', '주'),
+    ('줘', '줘'),
+    ('줴', '줴'),
+    ('쥐', '쥐'),
+    ('쥬', '쥬'),
+    ('즈', '즈'),
+    ('즤', '즤'),
+    ('지', '지'),
+    ('짜', '짜'),
+    ('째', '째'),
+    ('쨔', '쨔'),
+    ('쨰', '쨰'),
+    ('쩌', '쩌'),
+    ('쩨', '쩨'),
+    ('쪄', '쪄'),
+    ('쪠', '쪠'),
+    ('쪼', '쪼'),
+    ('쫘', '쫘'),
+    ('쫴', '쫴'),
+    ('쬐', '쬐'),
+    ('쬬', '쬬'),
+    ('쭈', '쭈'),
+    ('쭤', '쭤'),
+    ('쮀', '쮀'),
+    ('쮜', '쮜'),
+    ('쮸', '쮸'),
+    ('쯔', '쯔'),
+    ('쯰', '쯰'),
+    ('찌', '찌'),
+    ('차', '차'),
+    ('채', '채'),
+    ('챠', '챠'),
+    ('챼', '챼'),
+    ('처', '처'),
+    ('체', '체'),
+    ('쳐', '쳐'),
+    ('쳬', '쳬'),
+    ('초', '초'),
+    ('촤', '촤'),
+    ('쵀', '쵀'),
+    ('최', '최'),
+    ('쵸', '쵸'),
+    ('추', '추'),
+    ('춰', '춰'),
+    ('췌', '췌'),
+    ('취', '취'),
+    ('츄', '츄'),
+    ('츠', '츠'),
+    ('츼', '츼'),
+    ('치', '치'),
+    ('카', '카'),
+    ('캐', '캐'),
+    ('캬', '캬'),
+    ('컈', '컈'),
+    ('커', '커'),
+    ('케', '케'),
+    ('켜', '켜'),
+    ('켸', '켸'),
+    ('코', '코'),
+    ('콰', '콰'),
+    ('쾌', '쾌'),
+    ('쾨', '쾨'),
+    ('쿄', '쿄'),
+    ('쿠', '쿠'),
+    ('쿼', '쿼'),
+    ('퀘', '퀘'),
+    ('퀴', '퀴'),
+    ('큐', '큐'),
+    ('크', '크'),
+    ('킈', '킈'),
+    ('키', '키'),
+    ('타', '타'),
+    ('태', '태'),
+    ('탸', '탸'),
+    ('턔', '턔'),
+    ('터', '터'),
+    ('테', '테'),
+    ('텨', '텨'),
+    ('톄', '톄'),
+    ('토', '토'),
+    ('톼', '톼'),
+    ('퇘', '퇘'),
+    ('퇴', '퇴'),
+    ('툐', '툐'),
+    ('투', '투'),
+    ('퉈', '퉈'),
+    ('퉤', '퉤'),
+    ('튀', '튀'),
+    ('튜', '튜'),
+    ('트', '트'),
+    ('틔', '틔'),
+    ('티', '티'),
+    ('파', '파'),
+    ('패', '패'),
+    ('퍄', '퍄'),
+    ('퍠', '퍠'),
+    ('퍼', '퍼'),
+    ('페', '페'),
+    ('펴', '펴'),
+    ('폐', '폐'),
+    ('포', '포'),
+    ('퐈', '퐈'),
+    ('퐤', '퐤'),
+    ('푀', '푀'),
+    ('표', '표'),
+    ('푸', '푸'),
+    ('풔', '풔'),
+    ('풰', '풰'),
+    ('퓌', '퓌'),
+    ('퓨', '퓨'),
+    ('프', '프'),
+    ('픠', '픠'),
+    ('피', '피'),
+    ('하', '하'),
+    ('해', '해'),
+    ('햐', '햐'),
+    ('햬', '햬'),
+    ('허', '허'),
+    ('헤', '헤'),
+    ('혀', '혀'),
+    ('혜', '혜'),
+    ('호', '호'),
+    ('화', '화'),
+    ('홰', '홰'),
+    ('회', '회'),
+    ('효', '효'),
+    ('후', '후'),
+    ('훠', '훠'),
+    ('훼', '훼'),
+    ('휘', '휘'),
+    ('휴', '휴'),
+    ('흐', '흐'),
+    ('희', '희'),
+    ('히', '히'),
+];
+
+pub const LVT: &'static [(char, char)] = &[
+    ('각', '갛'),
+    ('객', '갷'),
+    ('갹', '걓'),
+    ('걕', '걯'),
+    ('걱', '겋'),
+    ('겍', '겧'),
+    ('격', '곃'),
+    ('곅', '곟'),
+    ('곡', '곻'),
+    ('곽', '괗'),
+    ('괙', '괳'),
+    ('괵', '굏'),
+    ('굑', '굫'),
+    ('국', '궇'),
+    ('궉', '궣'),
+    ('궥', '궿'),
+    ('귁', '귛'),
+    ('귝', '귷'),
+    ('극', '긓'),
+    ('긕', '긯'),
+    ('긱', '깋'),
+    ('깍', '깧'),
+    ('깩', '꺃'),
+    ('꺅', '꺟'),
+    ('꺡', '꺻'),
+    ('꺽', '껗'),
+    ('껙', '껳'),
+    ('껵', '꼏'),
+    ('꼑', '꼫'),
+    ('꼭', '꽇'),
+    ('꽉', '꽣'),
+    ('꽥', '꽿'),
+    ('꾁', '꾛'),
+    ('꾝', '꾷'),
+    ('꾹', '꿓'),
+    ('꿕', '꿯'),
+    ('꿱', '뀋'),
+    ('뀍', '뀧'),
+    ('뀩', '끃'),
+    ('끅', '끟'),
+    ('끡', '끻'),
+    ('끽', '낗'),
+    ('낙', '낳'),
+    ('낵', '냏'),
+    ('냑', '냫'),
+    ('냭', '넇'),
+    ('넉', '넣'),
+    ('넥', '넿'),
+    ('녁', '녛'),
+    ('녝', '녷'),
+    ('녹', '놓'),
+    ('놕', '놯'),
+    ('놱', '뇋'),
+    ('뇍', '뇧'),
+    ('뇩', '눃'),
+    ('눅', '눟'),
+    ('눡', '눻'),
+    ('눽', '뉗'),
+    ('뉙', '뉳'),
+    ('뉵', '늏'),
+    ('늑', '늫'),
+    ('늭', '닇'),
+    ('닉', '닣'),
+    ('닥', '닿'),
+    ('댁', '댛'),
+    ('댝', '댷'),
+    ('댹', '덓'),
+    ('덕', '덯'),
+    ('덱', '뎋'),
+    ('뎍', '뎧'),
+    ('뎩', '돃'),
+    ('독', '돟'),
+    ('돡', '돻'),
+    ('돽', '됗'),
+    ('됙', '됳'),
+    ('됵', '둏'),
+    ('둑', '둫'),
+    ('둭', '뒇'),
+    ('뒉', '뒣'),
+    ('뒥', '뒿'),
+    ('듁', '듛'),
+    ('득', '듷'),
+    ('듹', '딓'),
+    ('딕', '딯'),
+    ('딱', '땋'),
+    ('땍', '땧'),
+    ('땩', '떃'),
+    ('떅', '떟'),
+    ('떡', '떻'),
+    ('떽', '뗗'),
+    ('뗙', '뗳'),
+    ('뗵', '똏'),
+    ('똑', '똫'),
+    ('똭', '뙇'),
+    ('뙉', '뙣'),
+    ('뙥', '뙿'),
+    ('뚁', '뚛'),
+    ('뚝', '뚷'),
+    ('뚹', '뛓'),
+    ('뛕', '뛯'),
+    ('뛱', '뜋'),
+    ('뜍', '뜧'),
+    ('뜩', '띃'),
+    ('띅', '띟'),
+    ('띡', '띻'),
+    ('락', '랗'),
+    ('랙', '랳'),
+    ('략', '럏'),
+    ('럑', '럫'),
+    ('럭', '렇'),
+    ('렉', '렣'),
+    ('력', '렿'),
+    ('롁', '롛'),
+    ('록', '롷'),
+    ('롹', '뢓'),
+    ('뢕', '뢯'),
+    ('뢱', '룋'),
+    ('룍', '룧'),
+    ('룩', '뤃'),
+    ('뤅', '뤟'),
+    ('뤡', '뤻'),
+    ('뤽', '륗'),
+    ('륙', '륳'),
+    ('륵', '릏'),
+    ('릑', '릫'),
+    ('릭', '맇'),
+    ('막', '맣'),
+    ('맥', '맿'),
+    ('먁', '먛'),
+    ('먝', '먷'),
+    ('먹', '멓'),
+    ('멕', '멯'),
+    ('멱', '몋'),
+    ('몍', '몧'),
+    ('목', '뫃'),
+    ('뫅', '뫟'),
+    ('뫡', '뫻'),
+    ('뫽', '묗'),
+    ('묙', '묳'),
+    ('묵', '뭏'),
+    ('뭑', '뭫'),
+    ('뭭', '뮇'),
+    ('뮉', '뮣'),
+    ('뮥', '뮿'),
+    ('믁', '믛'),
+    ('믝', '믷'),
+    ('믹', '밓'),
+    ('박', '밯'),
+    ('백', '뱋'),
+    ('뱍', '뱧'),
+    ('뱩', '벃'),
+    ('벅', '벟'),
+    ('벡', '벻'),
+    ('벽', '볗'),
+    ('볙', '볳'),
+    ('복', '봏'),
+    ('봑', '봫'),
+    ('봭', '뵇'),
+    ('뵉', '뵣'),
+    ('뵥', '뵿'),
+    ('북', '붛'),
+    ('붝', '붷'),
+    ('붹', '뷓'),
+    ('뷕', '뷯'),
+    ('뷱', '븋'),
+    ('븍', '븧'),
+    ('븩', '빃'),
+    ('빅', '빟'),
+    ('빡', '빻'),
+    ('빽', '뺗'),
+    ('뺙', '뺳'),
+    ('뺵', '뻏'),
+    ('뻑', '뻫'),
+    ('뻭', '뼇'),
+    ('뼉', '뼣'),
+    ('뼥', '뼿'),
+    ('뽁', '뽛'),
+    ('뽝', '뽷'),
+    ('뽹', '뾓'),
+    ('뾕', '뾯'),
+    ('뾱', '뿋'),
+    ('뿍', '뿧'),
+    ('뿩', '쀃'),
+    ('쀅', '쀟'),
+    ('쀡', '쀻'),
+    ('쀽', '쁗'),
+    ('쁙', '쁳'),
+    ('쁵', '삏'),
+    ('삑', '삫'),
+    ('삭', '샇'),
+    ('색', '샣'),
+    ('샥', '샿'),
+    ('섁', '섛'),
+    ('석', '섷'),
+    ('섹', '셓'),
+    ('셕', '셯'),
+    ('셱', '솋'),
+    ('속', '솧'),
+    ('솩', '쇃'),
+    ('쇅', '쇟'),
+    ('쇡', '쇻'),
+    ('쇽', '숗'),
+    ('숙', '숳'),
+    ('숵', '쉏'),
+    ('쉑', '쉫'),
+    ('쉭', '슇'),
+    ('슉', '슣'),
+    ('슥', '슿'),
+    ('싁', '싛'),
+    ('식', '싷'),
+    ('싹', '쌓'),
+    ('쌕', '쌯'),
+    ('쌱', '썋'),
+    ('썍', '썧'),
+    ('썩', '쎃'),
+    ('쎅', '쎟'),
+    ('쎡', '쎻'),
+    ('쎽', '쏗'),
+    ('쏙', '쏳'),
+    ('쏵', '쐏'),
+    ('쐑', '쐫'),
+    ('쐭', '쑇'),
+    ('쑉', '쑣'),
+    ('쑥', '쑿'),
+    ('쒁', '쒛'),
+    ('쒝', '쒷'),
+    ('쒹', '쓓'),
+    ('쓕', '쓯'),
+    ('쓱', '씋'),
+    ('씍', '씧'),
+    ('씩', '앃'),
+    ('악', '앟'),
+    ('액', '앻'),
+    ('약', '얗'),
+    ('얙', '얳'),
+    ('억', '엏'),
+    ('엑', '엫'),
+    ('역', '옇'),
+    ('옉', '옣'),
+    ('옥', '옿'),
+    ('왁', '왛'),
+    ('왝', '왷'),
+    ('왹', '욓'),
+    ('욕', '욯'),
+    ('욱', '웋'),
+    ('웍', '웧'),
+    ('웩', '윃'),
+    ('윅', '윟'),
+    ('육', '윻'),
+    ('윽', '읗'),
+    ('읙', '읳'),
+    ('익', '잏'),
+    ('작', '잫'),
+    ('잭', '쟇'),
+    ('쟉', '쟣'),
+    ('쟥', '쟿'),
+    ('적', '젛'),
+    ('젝', '젷'),
+    ('젹', '졓'),
+    ('졕', '졯'),
+    ('족', '좋'),
+    ('좍', '좧'),
+    ('좩', '죃'),
+    ('죅', '죟'),
+    ('죡', '죻'),
+    ('죽', '줗'),
+    ('줙', '줳'),
+    ('줵', '쥏'),
+    ('쥑', '쥫'),
+    ('쥭', '즇'),
+    ('즉', '즣'),
+    ('즥', '즿'),
+    ('직', '짛'),
+    ('짝', '짷'),
+    ('짹', '쨓'),
+    ('쨕', '쨯'),
+    ('쨱', '쩋'),
+    ('쩍', '쩧'),
+    ('쩩', '쪃'),
+    ('쪅', '쪟'),
+    ('쪡', '쪻'),
+    ('쪽', '쫗'),
+    ('쫙', '쫳'),
+    ('쫵', '쬏'),
+    ('쬑', '쬫'),
+    ('쬭', '쭇'),
+    ('쭉', '쭣'),
+    ('쭥', '쭿'),
+    ('쮁', '쮛'),
+    ('쮝', '쮷'),
+    ('쮹', '쯓'),
+    ('쯕', '쯯'),
+    ('쯱', '찋'),
+    ('찍', '찧'),
+    ('착', '챃'),
+    ('책', '챟'),
+    ('챡', '챻'),
+    ('챽', '첗'),
+    ('척', '첳'),
+    ('첵', '쳏'),
+    ('쳑', '쳫'),
+    ('쳭', '촇'),
+    ('촉', '촣'),
+    ('촥', '촿'),
+    ('쵁', '쵛'),
+    ('쵝', '쵷'),
+    ('쵹', '춓'),
+    ('축', '춯'),
+    ('춱', '췋'),
+    ('췍', '췧'),
+    ('췩', '츃'),
+    ('츅', '츟'),
+    ('측', '츻'),
+    ('츽', '칗'),
+    ('칙', '칳'),
+    ('칵', '캏'),
+    ('캑', '캫'),
+    ('캭', '컇'),
+    ('컉', '컣'),
+    ('컥', '컿'),
+    ('켁', '켛'),
+    ('켝', '켷'),
+    ('켹', '콓'),
+    ('콕', '콯'),
+    ('콱', '쾋'),
+    ('쾍', '쾧'),
+    ('쾩', '쿃'),
+    ('쿅', '쿟'),
+    ('쿡', '쿻'),
+    ('쿽', '퀗'),
+    ('퀙', '퀳'),
+    ('퀵', '큏'),
+    ('큑', '큫'),
+    ('큭', '킇'),
+    ('킉', '킣'),
+    ('킥', '킿'),
+    ('탁', '탛'),
+    ('택', '탷'),
+    ('탹', '턓'),
+    ('턕', '턯'),
+    ('턱', '텋'),
+    ('텍', '텧'),
+    ('텩', '톃'),
+    ('톅', '톟'),
+    ('톡', '톻'),
+    ('톽', '퇗'),
+    ('퇙', '퇳'),
+    ('퇵', '툏'),
+    ('툑', '툫'),
+    ('툭', '퉇'),
+    ('퉉', '퉣'),
+    ('퉥', '퉿'),
+    ('튁', '튛'),
+    ('튝', '튷'),
+    ('특', '틓'),
+    ('틕', '틯'),
+    ('틱', '팋'),
+    ('팍', '팧'),
+    ('팩', '퍃'),
+    ('퍅', '퍟'),
+    ('퍡', '퍻'),
+    ('퍽', '펗'),
+    ('펙', '펳'),
+    ('펵', '폏'),
+    ('폑', '폫'),
+    ('폭', '퐇'),
+    ('퐉', '퐣'),
+    ('퐥', '퐿'),
+    ('푁', '푛'),
+    ('푝', '푷'),
+    ('푹', '풓'),
+    ('풕', '풯'),
+    ('풱', '퓋'),
+    ('퓍', '퓧'),
+    ('퓩', '픃'),
+    ('픅', '픟'),
+    ('픡', '픻'),
+    ('픽', '핗'),
+    ('학', '핳'),
+    ('핵', '햏'),
+    ('햑', '햫'),
+    ('햭', '헇'),
+    ('헉', '헣'),
+    ('헥', '헿'),
+    ('혁', '혛'),
+    ('혝', '혷'),
+    ('혹', '홓'),
+    ('확', '홯'),
+    ('홱', '횋'),
+    ('획', '횧'),
+    ('횩', '훃'),
+    ('훅', '훟'),
+    ('훡', '훻'),
+    ('훽', '휗'),
+    ('휙', '휳'),
+    ('휵', '흏'),
+    ('흑', '흫'),
+    ('흭', '힇'),
+    ('힉', '힣'),
+];
+
+pub const PREPEND: &'static [(char, char)] = &[
+    ('\u{600}', '\u{605}'),
+    ('\u{6dd}', '\u{6dd}'),
+    ('\u{70f}', '\u{70f}'),
+    ('\u{8e2}', '\u{8e2}'),
+    ('ൎ', 'ൎ'),
+    ('\u{110bd}', '\u{110bd}'),
+    ('\u{110cd}', '\u{110cd}'),
+    ('𑇂', '𑇃'),
+    ('\u{1193f}', '\u{1193f}'),
+    ('\u{11941}', '\u{11941}'),
+    ('𑨺', '𑨺'),
+    ('𑪄', '𑪉'),
+    ('𑵆', '𑵆'),
+];
+
+pub const REGIONAL_INDICATOR: &'static [(char, char)] = &[('🇦', '🇿')];
+
+pub const SPACINGMARK: &'static [(char, char)] = &[
+    ('ः', 'ः'),
+    ('ऻ', 'ऻ'),
+    ('ा', 'ी'),
+    ('ॉ', 'ौ'),
+    ('ॎ', 'ॏ'),
+    ('ং', 'ঃ'),
+    ('ি', 'ী'),
+    ('ে', 'ৈ'),
+    ('ো', 'ৌ'),
+    ('ਃ', 'ਃ'),
+    ('ਾ', 'ੀ'),
+    ('ઃ', 'ઃ'),
+    ('ા', 'ી'),
+    ('ૉ', 'ૉ'),
+    ('ો', 'ૌ'),
+    ('ଂ', 'ଃ'),
+    ('ୀ', 'ୀ'),
+    ('େ', 'ୈ'),
+    ('ୋ', 'ୌ'),
+    ('ி', 'ி'),
+    ('ு', 'ூ'),
+    ('ெ', 'ை'),
+    ('ொ', 'ௌ'),
+    ('ఁ', 'ః'),
+    ('ు', 'ౄ'),
+    ('ಂ', 'ಃ'),
+    ('ಾ', 'ಾ'),
+    ('ೀ', 'ು'),
+    ('ೃ', 'ೄ'),
+    ('ೇ', 'ೈ'),
+    ('ೊ', 'ೋ'),
+    ('ം', 'ഃ'),
+    ('ി', 'ീ'),
+    ('െ', 'ൈ'),
+    ('ൊ', 'ൌ'),
+    ('ං', 'ඃ'),
+    ('ැ', 'ෑ'),
+    ('ෘ', 'ෞ'),
+    ('ෲ', 'ෳ'),
+    ('ำ', 'ำ'),
+    ('ຳ', 'ຳ'),
+    ('༾', '༿'),
+    ('ཿ', 'ཿ'),
+    ('ေ', 'ေ'),
+    ('ျ', 'ြ'),
+    ('ၖ', 'ၗ'),
+    ('ႄ', 'ႄ'),
+    ('ា', 'ា'),
+    ('ើ', 'ៅ'),
+    ('ះ', 'ៈ'),
+    ('ᤣ', 'ᤦ'),
+    ('ᤩ', 'ᤫ'),
+    ('ᤰ', 'ᤱ'),
+    ('ᤳ', 'ᤸ'),
+    ('ᨙ', 'ᨚ'),
+    ('ᩕ', 'ᩕ'),
+    ('ᩗ', 'ᩗ'),
+    ('ᩭ', 'ᩲ'),
+    ('ᬄ', 'ᬄ'),
+    ('ᬻ', 'ᬻ'),
+    ('ᬽ', 'ᭁ'),
+    ('ᭃ', '᭄'),
+    ('ᮂ', 'ᮂ'),
+    ('ᮡ', 'ᮡ'),
+    ('ᮦ', 'ᮧ'),
+    ('᮪', '᮪'),
+    ('ᯧ', 'ᯧ'),
+    ('ᯪ', 'ᯬ'),
+    ('ᯮ', 'ᯮ'),
+    ('᯲', '᯳'),
+    ('ᰤ', 'ᰫ'),
+    ('ᰴ', 'ᰵ'),
+    ('᳡', '᳡'),
+    ('᳷', '᳷'),
+    ('ꠣ', 'ꠤ'),
+    ('ꠧ', 'ꠧ'),
+    ('ꢀ', 'ꢁ'),
+    ('ꢴ', 'ꣃ'),
+    ('ꥒ', '꥓'),
+    ('ꦃ', 'ꦃ'),
+    ('ꦴ', 'ꦵ'),
+    ('ꦺ', 'ꦻ'),
+    ('ꦾ', '꧀'),
+    ('ꨯ', 'ꨰ'),
+    ('ꨳ', 'ꨴ'),
+    ('ꩍ', 'ꩍ'),
+    ('ꫫ', 'ꫫ'),
+    ('ꫮ', 'ꫯ'),
+    ('ꫵ', 'ꫵ'),
+    ('ꯣ', 'ꯤ'),
+    ('ꯦ', 'ꯧ'),
+    ('ꯩ', 'ꯪ'),
+    ('꯬', '꯬'),
+    ('𑀀', '𑀀'),
+    ('𑀂', '𑀂'),
+    ('𑂂', '𑂂'),
+    ('𑂰', '𑂲'),
+    ('𑂷', '𑂸'),
+    ('𑄬', '𑄬'),
+    ('𑅅', '𑅆'),
+    ('𑆂', '𑆂'),
+    ('𑆳', '𑆵'),
+    ('𑆿', '𑇀'),
+    ('\u{111ce}', '\u{111ce}'),
+    ('𑈬', '𑈮'),
+    ('𑈲', '𑈳'),
+    ('𑈵', '𑈵'),
+    ('𑋠', '𑋢'),
+    ('𑌂', '𑌃'),
+    ('𑌿', '𑌿'),
+    ('𑍁', '𑍄'),
+    ('𑍇', '𑍈'),
+    ('𑍋', '𑍍'),
+    ('𑍢', '𑍣'),
+    ('𑐵', '𑐷'),
+    ('𑑀', '𑑁'),
+    ('𑑅', '𑑅'),
+    ('𑒱', '𑒲'),
+    ('𑒹', '𑒹'),
+    ('𑒻', '𑒼'),
+    ('𑒾', '𑒾'),
+    ('𑓁', '𑓁'),
+    ('𑖰', '𑖱'),
+    ('𑖸', '𑖻'),
+    ('𑖾', '𑖾'),
+    ('𑘰', '𑘲'),
+    ('𑘻', '𑘼'),
+    ('𑘾', '𑘾'),
+    ('𑚬', '𑚬'),
+    ('𑚮', '𑚯'),
+    ('𑚶', '𑚶'),
+    ('𑜠', '𑜡'),
+    ('𑜦', '𑜦'),
+    ('𑠬', '𑠮'),
+    ('𑠸', '𑠸'),
+    ('\u{11931}', '\u{11935}'),
+    ('\u{11937}', '\u{11938}'),
+    ('\u{1193d}', '\u{1193d}'),
+    ('\u{11940}', '\u{11940}'),
+    ('\u{11942}', '\u{11942}'),
+    ('𑧑', '𑧓'),
+    ('𑧜', '𑧟'),
+    ('𑧤', '𑧤'),
+    ('𑨹', '𑨹'),
+    ('𑩗', '𑩘'),
+    ('𑪗', '𑪗'),
+    ('𑰯', '𑰯'),
+    ('𑰾', '𑰾'),
+    ('𑲩', '𑲩'),
+    ('𑲱', '𑲱'),
+    ('𑲴', '𑲴'),
+    ('𑶊', '𑶎'),
+    ('𑶓', '𑶔'),
+    ('𑶖', '𑶖'),
+    ('𑻵', '𑻶'),
+    ('𖽑', '𖾇'),
+    ('\u{16ff0}', '\u{16ff1}'),
+    ('𝅦', '𝅦'),
+    ('𝅭', '𝅭'),
+];
+
+pub const T: &'static [(char, char)] = &[('ᆨ', 'ᇿ'), ('ퟋ', 'ퟻ')];
+
+pub const V: &'static [(char, char)] = &[('ᅠ', 'ᆧ'), ('ힰ', 'ퟆ')];
+
+pub const ZWJ: &'static [(char, char)] = &[('\u{200d}', '\u{200d}')];

diff --git a/src/unicode_tables/mod.rs b/src/unicode_tables/mod.rs
new file mode 100644
index 0000000..20736c7
--- /dev/null
+++ b/src/unicode_tables/mod.rs

@@ -0,0 +1,57 @@
+#[cfg(feature = "unicode-age")]
+pub mod age;
+
+#[cfg(feature = "unicode-case")]
+pub mod case_folding_simple;
+
+#[cfg(feature = "unicode-gencat")]
+pub mod general_category;
+
+#[cfg(feature = "unicode-segment")]
+pub mod grapheme_cluster_break;
+
+#[cfg(all(feature = "unicode-perl", not(feature = "unicode-gencat")))]
+#[allow(dead_code)]
+pub mod perl_decimal;
+
+#[cfg(all(feature = "unicode-perl", not(feature = "unicode-bool")))]
+#[allow(dead_code)]
+pub mod perl_space;
+
+#[cfg(feature = "unicode-perl")]
+pub mod perl_word;
+
+#[cfg(feature = "unicode-bool")]
+pub mod property_bool;
+
+#[cfg(any(
+    feature = "unicode-age",
+    feature = "unicode-bool",
+    feature = "unicode-gencat",
+    feature = "unicode-perl",
+    feature = "unicode-script",
+    feature = "unicode-segment",
+))]
+pub mod property_names;
+
+#[cfg(any(
+    feature = "unicode-age",
+    feature = "unicode-bool",
+    feature = "unicode-gencat",
+    feature = "unicode-perl",
+    feature = "unicode-script",
+    feature = "unicode-segment",
+))]
+pub mod property_values;
+
+#[cfg(feature = "unicode-script")]
+pub mod script;
+
+#[cfg(feature = "unicode-script")]
+pub mod script_extension;
+
+#[cfg(feature = "unicode-segment")]
+pub mod sentence_break;
+
+#[cfg(feature = "unicode-segment")]
+pub mod word_break;

diff --git a/src/unicode_tables/perl_decimal.rs b/src/unicode_tables/perl_decimal.rs
new file mode 100644
index 0000000..541f324
--- /dev/null
+++ b/src/unicode_tables/perl_decimal.rs

@@ -0,0 +1,72 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+//  ucd-generate general-category ucd-13.0.0 --chars --include decimalnumber
+//
+// ucd-generate 0.2.7 is available on crates.io.
+
+pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] =
+    &[("Decimal_Number", DECIMAL_NUMBER)];
+
+pub const DECIMAL_NUMBER: &'static [(char, char)] = &[
+    ('0', '9'),
+    ('٠', '٩'),
+    ('۰', '۹'),
+    ('߀', '߉'),
+    ('०', '९'),
+    ('০', '৯'),
+    ('੦', '੯'),
+    ('૦', '૯'),
+    ('୦', '୯'),
+    ('௦', '௯'),
+    ('౦', '౯'),
+    ('೦', '೯'),
+    ('൦', '൯'),
+    ('෦', '෯'),
+    ('๐', '๙'),
+    ('໐', '໙'),
+    ('༠', '༩'),
+    ('၀', '၉'),
+    ('႐', '႙'),
+    ('០', '៩'),
+    ('᠐', '᠙'),
+    ('᥆', '᥏'),
+    ('᧐', '᧙'),
+    ('᪀', '᪉'),
+    ('᪐', '᪙'),
+    ('᭐', '᭙'),
+    ('᮰', '᮹'),
+    ('᱀', '᱉'),
+    ('᱐', '᱙'),
+    ('꘠', '꘩'),
+    ('꣐', '꣙'),
+    ('꤀', '꤉'),
+    ('꧐', '꧙'),
+    ('꧰', '꧹'),
+    ('꩐', '꩙'),
+    ('꯰', '꯹'),
+    ('０', '９'),
+    ('𐒠', '𐒩'),
+    ('𐴰', '𐴹'),
+    ('𑁦', '𑁯'),
+    ('𑃰', '𑃹'),
+    ('𑄶', '𑄿'),
+    ('𑇐', '𑇙'),
+    ('𑋰', '𑋹'),
+    ('𑑐', '𑑙'),
+    ('𑓐', '𑓙'),
+    ('𑙐', '𑙙'),
+    ('𑛀', '𑛉'),
+    ('𑜰', '𑜹'),
+    ('𑣠', '𑣩'),
+    ('\u{11950}', '\u{11959}'),
+    ('𑱐', '𑱙'),
+    ('𑵐', '𑵙'),
+    ('𑶠', '𑶩'),
+    ('𖩠', '𖩩'),
+    ('𖭐', '𖭙'),
+    ('𝟎', '𝟿'),
+    ('𞅀', '𞅉'),
+    ('𞋰', '𞋹'),
+    ('𞥐', '𞥙'),
+    ('\u{1fbf0}', '\u{1fbf9}'),
+];

diff --git a/src/unicode_tables/perl_space.rs b/src/unicode_tables/perl_space.rs
new file mode 100644
index 0000000..8f048f8
--- /dev/null
+++ b/src/unicode_tables/perl_space.rs

@@ -0,0 +1,21 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+//  ucd-generate property-bool ucd-13.0.0 --chars --include whitespace
+//
+// ucd-generate 0.2.7 is available on crates.io.
+
+pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] =
+    &[("White_Space", WHITE_SPACE)];
+
+pub const WHITE_SPACE: &'static [(char, char)] = &[
+    ('\t', '\r'),
+    (' ', ' '),
+    ('\u{85}', '\u{85}'),
+    ('\u{a0}', '\u{a0}'),
+    ('\u{1680}', '\u{1680}'),
+    ('\u{2000}', '\u{200a}'),
+    ('\u{2028}', '\u{2029}'),
+    ('\u{202f}', '\u{202f}'),
+    ('\u{205f}', '\u{205f}'),
+    ('\u{3000}', '\u{3000}'),
+];

diff --git a/src/unicode_tables/perl_word.rs b/src/unicode_tables/perl_word.rs
new file mode 100644
index 0000000..68e9451
--- /dev/null
+++ b/src/unicode_tables/perl_word.rs

@@ -0,0 +1,741 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+//  ucd-generate perl-word ucd-13.0.0 --chars
+//
+// ucd-generate 0.2.7 is available on crates.io.
+
+pub const PERL_WORD: &'static [(char, char)] = &[
+    ('0', '9'),
+    ('A', 'Z'),
+    ('_', '_'),
+    ('a', 'z'),
+    ('ª', 'ª'),
+    ('µ', 'µ'),
+    ('º', 'º'),
+    ('À', 'Ö'),
+    ('Ø', 'ö'),
+    ('ø', 'ˁ'),
+    ('ˆ', 'ˑ'),
+    ('ˠ', 'ˤ'),
+    ('ˬ', 'ˬ'),
+    ('ˮ', 'ˮ'),
+    ('\u{300}', 'ʹ'),
+    ('Ͷ', 'ͷ'),
+    ('ͺ', 'ͽ'),
+    ('Ϳ', 'Ϳ'),
+    ('Ά', 'Ά'),
+    ('Έ', 'Ί'),
+    ('Ό', 'Ό'),
+    ('Ύ', 'Ρ'),
+    ('Σ', 'ϵ'),
+    ('Ϸ', 'ҁ'),
+    ('\u{483}', 'ԯ'),
+    ('Ա', 'Ֆ'),
+    ('ՙ', 'ՙ'),
+    ('ՠ', 'ֈ'),
+    ('\u{591}', '\u{5bd}'),
+    ('\u{5bf}', '\u{5bf}'),
+    ('\u{5c1}', '\u{5c2}'),
+    ('\u{5c4}', '\u{5c5}'),
+    ('\u{5c7}', '\u{5c7}'),
+    ('א', 'ת'),
+    ('ׯ', 'ײ'),
+    ('\u{610}', '\u{61a}'),
+    ('ؠ', '٩'),
+    ('ٮ', 'ۓ'),
+    ('ە', '\u{6dc}'),
+    ('\u{6df}', '\u{6e8}'),
+    ('\u{6ea}', 'ۼ'),
+    ('ۿ', 'ۿ'),
+    ('ܐ', '\u{74a}'),
+    ('ݍ', 'ޱ'),
+    ('߀', 'ߵ'),
+    ('ߺ', 'ߺ'),
+    ('\u{7fd}', '\u{7fd}'),
+    ('ࠀ', '\u{82d}'),
+    ('ࡀ', '\u{85b}'),
+    ('ࡠ', 'ࡪ'),
+    ('ࢠ', 'ࢴ'),
+    ('ࢶ', '\u{8c7}'),
+    ('\u{8d3}', '\u{8e1}'),
+    ('\u{8e3}', '\u{963}'),
+    ('०', '९'),
+    ('ॱ', 'ঃ'),
+    ('অ', 'ঌ'),
+    ('এ', 'ঐ'),
+    ('ও', 'ন'),
+    ('প', 'র'),
+    ('ল', 'ল'),
+    ('শ', 'হ'),
+    ('\u{9bc}', '\u{9c4}'),
+    ('ে', 'ৈ'),
+    ('ো', 'ৎ'),
+    ('\u{9d7}', '\u{9d7}'),
+    ('ড়', 'ঢ়'),
+    ('য়', '\u{9e3}'),
+    ('০', 'ৱ'),
+    ('ৼ', 'ৼ'),
+    ('\u{9fe}', '\u{9fe}'),
+    ('\u{a01}', 'ਃ'),
+    ('ਅ', 'ਊ'),
+    ('ਏ', 'ਐ'),
+    ('ਓ', 'ਨ'),
+    ('ਪ', 'ਰ'),
+    ('ਲ', 'ਲ਼'),
+    ('ਵ', 'ਸ਼'),
+    ('ਸ', 'ਹ'),
+    ('\u{a3c}', '\u{a3c}'),
+    ('ਾ', '\u{a42}'),
+    ('\u{a47}', '\u{a48}'),
+    ('\u{a4b}', '\u{a4d}'),
+    ('\u{a51}', '\u{a51}'),
+    ('ਖ਼', 'ੜ'),
+    ('ਫ਼', 'ਫ਼'),
+    ('੦', '\u{a75}'),
+    ('\u{a81}', 'ઃ'),
+    ('અ', 'ઍ'),
+    ('એ', 'ઑ'),
+    ('ઓ', 'ન'),
+    ('પ', 'ર'),
+    ('લ', 'ળ'),
+    ('વ', 'હ'),
+    ('\u{abc}', '\u{ac5}'),
+    ('\u{ac7}', 'ૉ'),
+    ('ો', '\u{acd}'),
+    ('ૐ', 'ૐ'),
+    ('ૠ', '\u{ae3}'),
+    ('૦', '૯'),
+    ('ૹ', '\u{aff}'),
+    ('\u{b01}', 'ଃ'),
+    ('ଅ', 'ଌ'),
+    ('ଏ', 'ଐ'),
+    ('ଓ', 'ନ'),
+    ('ପ', 'ର'),
+    ('ଲ', 'ଳ'),
+    ('ଵ', 'ହ'),
+    ('\u{b3c}', '\u{b44}'),
+    ('େ', 'ୈ'),
+    ('ୋ', '\u{b4d}'),
+    ('\u{b55}', '\u{b57}'),
+    ('ଡ଼', 'ଢ଼'),
+    ('ୟ', '\u{b63}'),
+    ('୦', '୯'),
+    ('ୱ', 'ୱ'),
+    ('\u{b82}', 'ஃ'),
+    ('அ', 'ஊ'),
+    ('எ', 'ஐ'),
+    ('ஒ', 'க'),
+    ('ங', 'ச'),
+    ('ஜ', 'ஜ'),
+    ('ஞ', 'ட'),
+    ('ண', 'த'),
+    ('ந', 'ப'),
+    ('ம', 'ஹ'),
+    ('\u{bbe}', 'ூ'),
+    ('ெ', 'ை'),
+    ('ொ', '\u{bcd}'),
+    ('ௐ', 'ௐ'),
+    ('\u{bd7}', '\u{bd7}'),
+    ('௦', '௯'),
+    ('\u{c00}', 'ఌ'),
+    ('ఎ', 'ఐ'),
+    ('ఒ', 'న'),
+    ('ప', 'హ'),
+    ('ఽ', 'ౄ'),
+    ('\u{c46}', '\u{c48}'),
+    ('\u{c4a}', '\u{c4d}'),
+    ('\u{c55}', '\u{c56}'),
+    ('ౘ', 'ౚ'),
+    ('ౠ', '\u{c63}'),
+    ('౦', '౯'),
+    ('ಀ', 'ಃ'),
+    ('ಅ', 'ಌ'),
+    ('ಎ', 'ಐ'),
+    ('ಒ', 'ನ'),
+    ('ಪ', 'ಳ'),
+    ('ವ', 'ಹ'),
+    ('\u{cbc}', 'ೄ'),
+    ('\u{cc6}', 'ೈ'),
+    ('ೊ', '\u{ccd}'),
+    ('\u{cd5}', '\u{cd6}'),
+    ('ೞ', 'ೞ'),
+    ('ೠ', '\u{ce3}'),
+    ('೦', '೯'),
+    ('ೱ', 'ೲ'),
+    ('\u{d00}', 'ഌ'),
+    ('എ', 'ഐ'),
+    ('ഒ', '\u{d44}'),
+    ('െ', 'ൈ'),
+    ('ൊ', 'ൎ'),
+    ('ൔ', '\u{d57}'),
+    ('ൟ', '\u{d63}'),
+    ('൦', '൯'),
+    ('ൺ', 'ൿ'),
+    ('\u{d81}', 'ඃ'),
+    ('අ', 'ඖ'),
+    ('ක', 'න'),
+    ('ඳ', 'ර'),
+    ('ල', 'ල'),
+    ('ව', 'ෆ'),
+    ('\u{dca}', '\u{dca}'),
+    ('\u{dcf}', '\u{dd4}'),
+    ('\u{dd6}', '\u{dd6}'),
+    ('ෘ', '\u{ddf}'),
+    ('෦', '෯'),
+    ('ෲ', 'ෳ'),
+    ('ก', '\u{e3a}'),
+    ('เ', '\u{e4e}'),
+    ('๐', '๙'),
+    ('ກ', 'ຂ'),
+    ('ຄ', 'ຄ'),
+    ('ຆ', 'ຊ'),
+    ('ຌ', 'ຣ'),
+    ('ລ', 'ລ'),
+    ('ວ', 'ຽ'),
+    ('ເ', 'ໄ'),
+    ('ໆ', 'ໆ'),
+    ('\u{ec8}', '\u{ecd}'),
+    ('໐', '໙'),
+    ('ໜ', 'ໟ'),
+    ('ༀ', 'ༀ'),
+    ('\u{f18}', '\u{f19}'),
+    ('༠', '༩'),
+    ('\u{f35}', '\u{f35}'),
+    ('\u{f37}', '\u{f37}'),
+    ('\u{f39}', '\u{f39}'),
+    ('༾', 'ཇ'),
+    ('ཉ', 'ཬ'),
+    ('\u{f71}', '\u{f84}'),
+    ('\u{f86}', '\u{f97}'),
+    ('\u{f99}', '\u{fbc}'),
+    ('\u{fc6}', '\u{fc6}'),
+    ('က', '၉'),
+    ('ၐ', '\u{109d}'),
+    ('Ⴀ', 'Ⴥ'),
+    ('Ⴧ', 'Ⴧ'),
+    ('Ⴭ', 'Ⴭ'),
+    ('ა', 'ჺ'),
+    ('ჼ', 'ቈ'),
+    ('ቊ', 'ቍ'),
+    ('ቐ', 'ቖ'),
+    ('ቘ', 'ቘ'),
+    ('ቚ', 'ቝ'),
+    ('በ', 'ኈ'),
+    ('ኊ', 'ኍ'),
+    ('ነ', 'ኰ'),
+    ('ኲ', 'ኵ'),
+    ('ኸ', 'ኾ'),
+    ('ዀ', 'ዀ'),
+    ('ዂ', 'ዅ'),
+    ('ወ', 'ዖ'),
+    ('ዘ', 'ጐ'),
+    ('ጒ', 'ጕ'),
+    ('ጘ', 'ፚ'),
+    ('\u{135d}', '\u{135f}'),
+    ('ᎀ', 'ᎏ'),
+    ('Ꭰ', 'Ᏽ'),
+    ('ᏸ', 'ᏽ'),
+    ('ᐁ', 'ᙬ'),
+    ('ᙯ', 'ᙿ'),
+    ('ᚁ', 'ᚚ'),
+    ('ᚠ', 'ᛪ'),
+    ('ᛮ', 'ᛸ'),
+    ('ᜀ', 'ᜌ'),
+    ('ᜎ', '\u{1714}'),
+    ('ᜠ', '\u{1734}'),
+    ('ᝀ', '\u{1753}'),
+    ('ᝠ', 'ᝬ'),
+    ('ᝮ', 'ᝰ'),
+    ('\u{1772}', '\u{1773}'),
+    ('ក', '\u{17d3}'),
+    ('ៗ', 'ៗ'),
+    ('ៜ', '\u{17dd}'),
+    ('០', '៩'),
+    ('\u{180b}', '\u{180d}'),
+    ('᠐', '᠙'),
+    ('ᠠ', 'ᡸ'),
+    ('ᢀ', 'ᢪ'),
+    ('ᢰ', 'ᣵ'),
+    ('ᤀ', 'ᤞ'),
+    ('\u{1920}', 'ᤫ'),
+    ('ᤰ', '\u{193b}'),
+    ('᥆', 'ᥭ'),
+    ('ᥰ', 'ᥴ'),
+    ('ᦀ', 'ᦫ'),
+    ('ᦰ', 'ᧉ'),
+    ('᧐', '᧙'),
+    ('ᨀ', '\u{1a1b}'),
+    ('ᨠ', '\u{1a5e}'),
+    ('\u{1a60}', '\u{1a7c}'),
+    ('\u{1a7f}', '᪉'),
+    ('᪐', '᪙'),
+    ('ᪧ', 'ᪧ'),
+    ('\u{1ab0}', '\u{1ac0}'),
+    ('\u{1b00}', 'ᭋ'),
+    ('᭐', '᭙'),
+    ('\u{1b6b}', '\u{1b73}'),
+    ('\u{1b80}', '᯳'),
+    ('ᰀ', '\u{1c37}'),
+    ('᱀', '᱉'),
+    ('ᱍ', 'ᱽ'),
+    ('ᲀ', 'ᲈ'),
+    ('Ა', 'Ჺ'),
+    ('Ჽ', 'Ჿ'),
+    ('\u{1cd0}', '\u{1cd2}'),
+    ('\u{1cd4}', 'ᳺ'),
+    ('ᴀ', '\u{1df9}'),
+    ('\u{1dfb}', 'ἕ'),
+    ('Ἐ', 'Ἕ'),
+    ('ἠ', 'ὅ'),
+    ('Ὀ', 'Ὅ'),
+    ('ὐ', 'ὗ'),
+    ('Ὑ', 'Ὑ'),
+    ('Ὓ', 'Ὓ'),
+    ('Ὕ', 'Ὕ'),
+    ('Ὗ', 'ώ'),
+    ('ᾀ', 'ᾴ'),
+    ('ᾶ', 'ᾼ'),
+    ('ι', 'ι'),
+    ('ῂ', 'ῄ'),
+    ('ῆ', 'ῌ'),
+    ('ῐ', 'ΐ'),
+    ('ῖ', 'Ί'),
+    ('ῠ', 'Ῥ'),
+    ('ῲ', 'ῴ'),
+    ('ῶ', 'ῼ'),
+    ('\u{200c}', '\u{200d}'),
+    ('‿', '⁀'),
+    ('⁔', '⁔'),
+    ('ⁱ', 'ⁱ'),
+    ('ⁿ', 'ⁿ'),
+    ('ₐ', 'ₜ'),
+    ('\u{20d0}', '\u{20f0}'),
+    ('ℂ', 'ℂ'),
+    ('ℇ', 'ℇ'),
+    ('ℊ', 'ℓ'),
+    ('ℕ', 'ℕ'),
+    ('ℙ', 'ℝ'),
+    ('ℤ', 'ℤ'),
+    ('Ω', 'Ω'),
+    ('ℨ', 'ℨ'),
+    ('K', 'ℭ'),
+    ('ℯ', 'ℹ'),
+    ('ℼ', 'ℿ'),
+    ('ⅅ', 'ⅉ'),
+    ('ⅎ', 'ⅎ'),
+    ('Ⅰ', 'ↈ'),
+    ('Ⓐ', 'ⓩ'),
+    ('Ⰰ', 'Ⱞ'),
+    ('ⰰ', 'ⱞ'),
+    ('Ⱡ', 'ⳤ'),
+    ('Ⳬ', 'ⳳ'),
+    ('ⴀ', 'ⴥ'),
+    ('ⴧ', 'ⴧ'),
+    ('ⴭ', 'ⴭ'),
+    ('ⴰ', 'ⵧ'),
+    ('ⵯ', 'ⵯ'),
+    ('\u{2d7f}', 'ⶖ'),
+    ('ⶠ', 'ⶦ'),
+    ('ⶨ', 'ⶮ'),
+    ('ⶰ', 'ⶶ'),
+    ('ⶸ', 'ⶾ'),
+    ('ⷀ', 'ⷆ'),
+    ('ⷈ', 'ⷎ'),
+    ('ⷐ', 'ⷖ'),
+    ('ⷘ', 'ⷞ'),
+    ('\u{2de0}', '\u{2dff}'),
+    ('ⸯ', 'ⸯ'),
+    ('々', '〇'),
+    ('〡', '\u{302f}'),
+    ('〱', '〵'),
+    ('〸', '〼'),
+    ('ぁ', 'ゖ'),
+    ('\u{3099}', '\u{309a}'),
+    ('ゝ', 'ゟ'),
+    ('ァ', 'ヺ'),
+    ('ー', 'ヿ'),
+    ('ㄅ', 'ㄯ'),
+    ('ㄱ', 'ㆎ'),
+    ('ㆠ', '\u{31bf}'),
+    ('ㇰ', 'ㇿ'),
+    ('㐀', '\u{4dbf}'),
+    ('一', '\u{9ffc}'),
+    ('ꀀ', 'ꒌ'),
+    ('ꓐ', 'ꓽ'),
+    ('ꔀ', 'ꘌ'),
+    ('ꘐ', 'ꘫ'),
+    ('Ꙁ', '\u{a672}'),
+    ('\u{a674}', '\u{a67d}'),
+    ('ꙿ', '\u{a6f1}'),
+    ('ꜗ', 'ꜟ'),
+    ('Ꜣ', 'ꞈ'),
+    ('Ꞌ', 'ꞿ'),
+    ('Ꟃ', '\u{a7ca}'),
+    ('\u{a7f5}', 'ꠧ'),
+    ('\u{a82c}', '\u{a82c}'),
+    ('ꡀ', 'ꡳ'),
+    ('ꢀ', '\u{a8c5}'),
+    ('꣐', '꣙'),
+    ('\u{a8e0}', 'ꣷ'),
+    ('ꣻ', 'ꣻ'),
+    ('ꣽ', '\u{a92d}'),
+    ('ꤰ', '꥓'),
+    ('ꥠ', 'ꥼ'),
+    ('\u{a980}', '꧀'),
+    ('ꧏ', '꧙'),
+    ('ꧠ', 'ꧾ'),
+    ('ꨀ', '\u{aa36}'),
+    ('ꩀ', 'ꩍ'),
+    ('꩐', '꩙'),
+    ('ꩠ', 'ꩶ'),
+    ('ꩺ', 'ꫂ'),
+    ('ꫛ', 'ꫝ'),
+    ('ꫠ', 'ꫯ'),
+    ('ꫲ', '\u{aaf6}'),
+    ('ꬁ', 'ꬆ'),
+    ('ꬉ', 'ꬎ'),
+    ('ꬑ', 'ꬖ'),
+    ('ꬠ', 'ꬦ'),
+    ('ꬨ', 'ꬮ'),
+    ('ꬰ', 'ꭚ'),
+    ('ꭜ', '\u{ab69}'),
+    ('ꭰ', 'ꯪ'),
+    ('꯬', '\u{abed}'),
+    ('꯰', '꯹'),
+    ('가', '힣'),
+    ('ힰ', 'ퟆ'),
+    ('ퟋ', 'ퟻ'),
+    ('豈', '舘'),
+    ('並', '龎'),
+    ('ﬀ', 'ﬆ'),
+    ('ﬓ', 'ﬗ'),
+    ('יִ', 'ﬨ'),
+    ('שׁ', 'זּ'),
+    ('טּ', 'לּ'),
+    ('מּ', 'מּ'),
+    ('נּ', 'סּ'),
+    ('ףּ', 'פּ'),
+    ('צּ', 'ﮱ'),
+    ('ﯓ', 'ﴽ'),
+    ('ﵐ', 'ﶏ'),
+    ('ﶒ', 'ﷇ'),
+    ('ﷰ', 'ﷻ'),
+    ('\u{fe00}', '\u{fe0f}'),
+    ('\u{fe20}', '\u{fe2f}'),
+    ('︳', '︴'),
+    ('﹍', '﹏'),
+    ('ﹰ', 'ﹴ'),
+    ('ﹶ', 'ﻼ'),
+    ('０', '９'),
+    ('Ａ', 'Ｚ'),
+    ('＿', '＿'),
+    ('ａ', 'ｚ'),
+    ('ｦ', 'ﾾ'),
+    ('ￂ', 'ￇ'),
+    ('ￊ', 'ￏ'),
+    ('ￒ', 'ￗ'),
+    ('ￚ', 'ￜ'),
+    ('𐀀', '𐀋'),
+    ('𐀍', '𐀦'),
+    ('𐀨', '𐀺'),
+    ('𐀼', '𐀽'),
+    ('𐀿', '𐁍'),
+    ('𐁐', '𐁝'),
+    ('𐂀', '𐃺'),
+    ('𐅀', '𐅴'),
+    ('\u{101fd}', '\u{101fd}'),
+    ('𐊀', '𐊜'),
+    ('𐊠', '𐋐'),
+    ('\u{102e0}', '\u{102e0}'),
+    ('𐌀', '𐌟'),
+    ('𐌭', '𐍊'),
+    ('𐍐', '\u{1037a}'),
+    ('𐎀', '𐎝'),
+    ('𐎠', '𐏃'),
+    ('𐏈', '𐏏'),
+    ('𐏑', '𐏕'),
+    ('𐐀', '𐒝'),
+    ('𐒠', '𐒩'),
+    ('𐒰', '𐓓'),
+    ('𐓘', '𐓻'),
+    ('𐔀', '𐔧'),
+    ('𐔰', '𐕣'),
+    ('𐘀', '𐜶'),
+    ('𐝀', '𐝕'),
+    ('𐝠', '𐝧'),
+    ('𐠀', '𐠅'),
+    ('𐠈', '𐠈'),
+    ('𐠊', '𐠵'),
+    ('𐠷', '𐠸'),
+    ('𐠼', '𐠼'),
+    ('𐠿', '𐡕'),
+    ('𐡠', '𐡶'),
+    ('𐢀', '𐢞'),
+    ('𐣠', '𐣲'),
+    ('𐣴', '𐣵'),
+    ('𐤀', '𐤕'),
+    ('𐤠', '𐤹'),
+    ('𐦀', '𐦷'),
+    ('𐦾', '𐦿'),
+    ('𐨀', '\u{10a03}'),
+    ('\u{10a05}', '\u{10a06}'),
+    ('\u{10a0c}', '𐨓'),
+    ('𐨕', '𐨗'),
+    ('𐨙', '𐨵'),
+    ('\u{10a38}', '\u{10a3a}'),
+    ('\u{10a3f}', '\u{10a3f}'),
+    ('𐩠', '𐩼'),
+    ('𐪀', '𐪜'),
+    ('𐫀', '𐫇'),
+    ('𐫉', '\u{10ae6}'),
+    ('𐬀', '𐬵'),
+    ('𐭀', '𐭕'),
+    ('𐭠', '𐭲'),
+    ('𐮀', '𐮑'),
+    ('𐰀', '𐱈'),
+    ('𐲀', '𐲲'),
+    ('𐳀', '𐳲'),
+    ('𐴀', '\u{10d27}'),
+    ('𐴰', '𐴹'),
+    ('\u{10e80}', '\u{10ea9}'),
+    ('\u{10eab}', '\u{10eac}'),
+    ('\u{10eb0}', '\u{10eb1}'),
+    ('𐼀', '𐼜'),
+    ('𐼧', '𐼧'),
+    ('𐼰', '\u{10f50}'),
+    ('\u{10fb0}', '\u{10fc4}'),
+    ('𐿠', '𐿶'),
+    ('𑀀', '\u{11046}'),
+    ('𑁦', '𑁯'),
+    ('\u{1107f}', '\u{110ba}'),
+    ('𑃐', '𑃨'),
+    ('𑃰', '𑃹'),
+    ('\u{11100}', '\u{11134}'),
+    ('𑄶', '𑄿'),
+    ('𑅄', '\u{11147}'),
+    ('𑅐', '\u{11173}'),
+    ('𑅶', '𑅶'),
+    ('\u{11180}', '𑇄'),
+    ('\u{111c9}', '\u{111cc}'),
+    ('\u{111ce}', '𑇚'),
+    ('𑇜', '𑇜'),
+    ('𑈀', '𑈑'),
+    ('𑈓', '\u{11237}'),
+    ('\u{1123e}', '\u{1123e}'),
+    ('𑊀', '𑊆'),
+    ('𑊈', '𑊈'),
+    ('𑊊', '𑊍'),
+    ('𑊏', '𑊝'),
+    ('𑊟', '𑊨'),
+    ('𑊰', '\u{112ea}'),
+    ('𑋰', '𑋹'),
+    ('\u{11300}', '𑌃'),
+    ('𑌅', '𑌌'),
+    ('𑌏', '𑌐'),
+    ('𑌓', '𑌨'),
+    ('𑌪', '𑌰'),
+    ('𑌲', '𑌳'),
+    ('𑌵', '𑌹'),
+    ('\u{1133b}', '𑍄'),
+    ('𑍇', '𑍈'),
+    ('𑍋', '𑍍'),
+    ('𑍐', '𑍐'),
+    ('\u{11357}', '\u{11357}'),
+    ('𑍝', '𑍣'),
+    ('\u{11366}', '\u{1136c}'),
+    ('\u{11370}', '\u{11374}'),
+    ('𑐀', '𑑊'),
+    ('𑑐', '𑑙'),
+    ('\u{1145e}', '\u{11461}'),
+    ('𑒀', '𑓅'),
+    ('𑓇', '𑓇'),
+    ('𑓐', '𑓙'),
+    ('𑖀', '\u{115b5}'),
+    ('𑖸', '\u{115c0}'),
+    ('𑗘', '\u{115dd}'),
+    ('𑘀', '\u{11640}'),
+    ('𑙄', '𑙄'),
+    ('𑙐', '𑙙'),
+    ('𑚀', '𑚸'),
+    ('𑛀', '𑛉'),
+    ('𑜀', '𑜚'),
+    ('\u{1171d}', '\u{1172b}'),
+    ('𑜰', '𑜹'),
+    ('𑠀', '\u{1183a}'),
+    ('𑢠', '𑣩'),
+    ('𑣿', '\u{11906}'),
+    ('\u{11909}', '\u{11909}'),
+    ('\u{1190c}', '\u{11913}'),
+    ('\u{11915}', '\u{11916}'),
+    ('\u{11918}', '\u{11935}'),
+    ('\u{11937}', '\u{11938}'),
+    ('\u{1193b}', '\u{11943}'),
+    ('\u{11950}', '\u{11959}'),
+    ('𑦠', '𑦧'),
+    ('𑦪', '\u{119d7}'),
+    ('\u{119da}', '𑧡'),
+    ('𑧣', '𑧤'),
+    ('𑨀', '\u{11a3e}'),
+    ('\u{11a47}', '\u{11a47}'),
+    ('𑩐', '\u{11a99}'),
+    ('𑪝', '𑪝'),
+    ('𑫀', '𑫸'),
+    ('𑰀', '𑰈'),
+    ('𑰊', '\u{11c36}'),
+    ('\u{11c38}', '𑱀'),
+    ('𑱐', '𑱙'),
+    ('𑱲', '𑲏'),
+    ('\u{11c92}', '\u{11ca7}'),
+    ('𑲩', '\u{11cb6}'),
+    ('𑴀', '𑴆'),
+    ('𑴈', '𑴉'),
+    ('𑴋', '\u{11d36}'),
+    ('\u{11d3a}', '\u{11d3a}'),
+    ('\u{11d3c}', '\u{11d3d}'),
+    ('\u{11d3f}', '\u{11d47}'),
+    ('𑵐', '𑵙'),
+    ('𑵠', '𑵥'),
+    ('𑵧', '𑵨'),
+    ('𑵪', '𑶎'),
+    ('\u{11d90}', '\u{11d91}'),
+    ('𑶓', '𑶘'),
+    ('𑶠', '𑶩'),
+    ('𑻠', '𑻶'),
+    ('\u{11fb0}', '\u{11fb0}'),
+    ('𒀀', '𒎙'),
+    ('𒐀', '𒑮'),
+    ('𒒀', '𒕃'),
+    ('𓀀', '𓐮'),
+    ('𔐀', '𔙆'),
+    ('𖠀', '𖨸'),
+    ('𖩀', '𖩞'),
+    ('𖩠', '𖩩'),
+    ('𖫐', '𖫭'),
+    ('\u{16af0}', '\u{16af4}'),
+    ('𖬀', '\u{16b36}'),
+    ('𖭀', '𖭃'),
+    ('𖭐', '𖭙'),
+    ('𖭣', '𖭷'),
+    ('𖭽', '𖮏'),
+    ('𖹀', '𖹿'),
+    ('𖼀', '𖽊'),
+    ('\u{16f4f}', '𖾇'),
+    ('\u{16f8f}', '𖾟'),
+    ('𖿠', '𖿡'),
+    ('𖿣', '\u{16fe4}'),
+    ('\u{16ff0}', '\u{16ff1}'),
+    ('𗀀', '𘟷'),
+    ('𘠀', '\u{18cd5}'),
+    ('\u{18d00}', '\u{18d08}'),
+    ('𛀀', '𛄞'),
+    ('𛅐', '𛅒'),
+    ('𛅤', '𛅧'),
+    ('𛅰', '𛋻'),
+    ('𛰀', '𛱪'),
+    ('𛱰', '𛱼'),
+    ('𛲀', '𛲈'),
+    ('𛲐', '𛲙'),
+    ('\u{1bc9d}', '\u{1bc9e}'),
+    ('\u{1d165}', '\u{1d169}'),
+    ('𝅭', '\u{1d172}'),
+    ('\u{1d17b}', '\u{1d182}'),
+    ('\u{1d185}', '\u{1d18b}'),
+    ('\u{1d1aa}', '\u{1d1ad}'),
+    ('\u{1d242}', '\u{1d244}'),
+    ('𝐀', '𝑔'),
+    ('𝑖', '𝒜'),
+    ('𝒞', '𝒟'),
+    ('𝒢', '𝒢'),
+    ('𝒥', '𝒦'),
+    ('𝒩', '𝒬'),
+    ('𝒮', '𝒹'),
+    ('𝒻', '𝒻'),
+    ('𝒽', '𝓃'),
+    ('𝓅', '𝔅'),
+    ('𝔇', '𝔊'),
+    ('𝔍', '𝔔'),
+    ('𝔖', '𝔜'),
+    ('𝔞', '𝔹'),
+    ('𝔻', '𝔾'),
+    ('𝕀', '𝕄'),
+    ('𝕆', '𝕆'),
+    ('𝕊', '𝕐'),
+    ('𝕒', '𝚥'),
+    ('𝚨', '𝛀'),
+    ('𝛂', '𝛚'),
+    ('𝛜', '𝛺'),
+    ('𝛼', '𝜔'),
+    ('𝜖', '𝜴'),
+    ('𝜶', '𝝎'),
+    ('𝝐', '𝝮'),
+    ('𝝰', '𝞈'),
+    ('𝞊', '𝞨'),
+    ('𝞪', '𝟂'),
+    ('𝟄', '𝟋'),
+    ('𝟎', '𝟿'),
+    ('\u{1da00}', '\u{1da36}'),
+    ('\u{1da3b}', '\u{1da6c}'),
+    ('\u{1da75}', '\u{1da75}'),
+    ('\u{1da84}', '\u{1da84}'),
+    ('\u{1da9b}', '\u{1da9f}'),
+    ('\u{1daa1}', '\u{1daaf}'),
+    ('\u{1e000}', '\u{1e006}'),
+    ('\u{1e008}', '\u{1e018}'),
+    ('\u{1e01b}', '\u{1e021}'),
+    ('\u{1e023}', '\u{1e024}'),
+    ('\u{1e026}', '\u{1e02a}'),
+    ('𞄀', '𞄬'),
+    ('\u{1e130}', '𞄽'),
+    ('𞅀', '𞅉'),
+    ('𞅎', '𞅎'),
+    ('𞋀', '𞋹'),
+    ('𞠀', '𞣄'),
+    ('\u{1e8d0}', '\u{1e8d6}'),
+    ('𞤀', '𞥋'),
+    ('𞥐', '𞥙'),
+    ('𞸀', '𞸃'),
+    ('𞸅', '𞸟'),
+    ('𞸡', '𞸢'),
+    ('𞸤', '𞸤'),
+    ('𞸧', '𞸧'),
+    ('𞸩', '𞸲'),
+    ('𞸴', '𞸷'),
+    ('𞸹', '𞸹'),
+    ('𞸻', '𞸻'),
+    ('𞹂', '𞹂'),
+    ('𞹇', '𞹇'),
+    ('𞹉', '𞹉'),
+    ('𞹋', '𞹋'),
+    ('𞹍', '𞹏'),
+    ('𞹑', '𞹒'),
+    ('𞹔', '𞹔'),
+    ('𞹗', '𞹗'),
+    ('𞹙', '𞹙'),
+    ('𞹛', '𞹛'),
+    ('𞹝', '𞹝'),
+    ('𞹟', '𞹟'),
+    ('𞹡', '𞹢'),
+    ('𞹤', '𞹤'),
+    ('𞹧', '𞹪'),
+    ('𞹬', '𞹲'),
+    ('𞹴', '𞹷'),
+    ('𞹹', '𞹼'),
+    ('𞹾', '𞹾'),
+    ('𞺀', '𞺉'),
+    ('𞺋', '𞺛'),
+    ('𞺡', '𞺣'),
+    ('𞺥', '𞺩'),
+    ('𞺫', '𞺻'),
+    ('🄰', '🅉'),
+    ('🅐', '🅩'),
+    ('🅰', '🆉'),
+    ('\u{1fbf0}', '\u{1fbf9}'),
+    ('𠀀', '\u{2a6dd}'),
+    ('𪜀', '𫜴'),
+    ('𫝀', '𫠝'),
+    ('𫠠', '𬺡'),
+    ('𬺰', '𮯠'),
+    ('丽', '𪘀'),
+    ('\u{30000}', '\u{3134a}'),
+    ('\u{e0100}', '\u{e01ef}'),
+];

diff --git a/src/unicode_tables/property_bool.rs b/src/unicode_tables/property_bool.rs
new file mode 100644
index 0000000..a0ef0cf
--- /dev/null
+++ b/src/unicode_tables/property_bool.rs

@@ -0,0 +1,10951 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+//  ucd-generate property-bool ucd-13.0.0 --chars
+//
+// ucd-generate 0.2.7 is available on crates.io.
+
+pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
+    ("ASCII_Hex_Digit", ASCII_HEX_DIGIT),
+    ("Alphabetic", ALPHABETIC),
+    ("Bidi_Control", BIDI_CONTROL),
+    ("Bidi_Mirrored", BIDI_MIRRORED),
+    ("Case_Ignorable", CASE_IGNORABLE),
+    ("Cased", CASED),
+    ("Changes_When_Casefolded", CHANGES_WHEN_CASEFOLDED),
+    ("Changes_When_Casemapped", CHANGES_WHEN_CASEMAPPED),
+    ("Changes_When_Lowercased", CHANGES_WHEN_LOWERCASED),
+    ("Changes_When_Titlecased", CHANGES_WHEN_TITLECASED),
+    ("Changes_When_Uppercased", CHANGES_WHEN_UPPERCASED),
+    ("Dash", DASH),
+    ("Default_Ignorable_Code_Point", DEFAULT_IGNORABLE_CODE_POINT),
+    ("Deprecated", DEPRECATED),
+    ("Diacritic", DIACRITIC),
+    ("Emoji", EMOJI),
+    ("Emoji_Component", EMOJI_COMPONENT),
+    ("Emoji_Modifier", EMOJI_MODIFIER),
+    ("Emoji_Modifier_Base", EMOJI_MODIFIER_BASE),
+    ("Emoji_Presentation", EMOJI_PRESENTATION),
+    ("Extended_Pictographic", EXTENDED_PICTOGRAPHIC),
+    ("Extender", EXTENDER),
+    ("Grapheme_Base", GRAPHEME_BASE),
+    ("Grapheme_Extend", GRAPHEME_EXTEND),
+    ("Grapheme_Link", GRAPHEME_LINK),
+    ("Hex_Digit", HEX_DIGIT),
+    ("Hyphen", HYPHEN),
+    ("IDS_Binary_Operator", IDS_BINARY_OPERATOR),
+    ("IDS_Trinary_Operator", IDS_TRINARY_OPERATOR),
+    ("ID_Continue", ID_CONTINUE),
+    ("ID_Start", ID_START),
+    ("Ideographic", IDEOGRAPHIC),
+    ("Join_Control", JOIN_CONTROL),
+    ("Logical_Order_Exception", LOGICAL_ORDER_EXCEPTION),
+    ("Lowercase", LOWERCASE),
+    ("Math", MATH),
+    ("Noncharacter_Code_Point", NONCHARACTER_CODE_POINT),
+    ("Other_Alphabetic", OTHER_ALPHABETIC),
+    ("Other_Default_Ignorable_Code_Point", OTHER_DEFAULT_IGNORABLE_CODE_POINT),
+    ("Other_Grapheme_Extend", OTHER_GRAPHEME_EXTEND),
+    ("Other_ID_Continue", OTHER_ID_CONTINUE),
+    ("Other_ID_Start", OTHER_ID_START),
+    ("Other_Lowercase", OTHER_LOWERCASE),
+    ("Other_Math", OTHER_MATH),
+    ("Other_Uppercase", OTHER_UPPERCASE),
+    ("Pattern_Syntax", PATTERN_SYNTAX),
+    ("Pattern_White_Space", PATTERN_WHITE_SPACE),
+    ("Prepended_Concatenation_Mark", PREPENDED_CONCATENATION_MARK),
+    ("Quotation_Mark", QUOTATION_MARK),
+    ("Radical", RADICAL),
+    ("Regional_Indicator", REGIONAL_INDICATOR),
+    ("Sentence_Terminal", SENTENCE_TERMINAL),
+    ("Soft_Dotted", SOFT_DOTTED),
+    ("Terminal_Punctuation", TERMINAL_PUNCTUATION),
+    ("Unified_Ideograph", UNIFIED_IDEOGRAPH),
+    ("Uppercase", UPPERCASE),
+    ("Variation_Selector", VARIATION_SELECTOR),
+    ("White_Space", WHITE_SPACE),
+    ("XID_Continue", XID_CONTINUE),
+    ("XID_Start", XID_START),
+];
+
+pub const ASCII_HEX_DIGIT: &'static [(char, char)] =
+    &[('0', '9'), ('A', 'F'), ('a', 'f')];
+
+pub const ALPHABETIC: &'static [(char, char)] = &[
+    ('A', 'Z'),
+    ('a', 'z'),
+    ('ª', 'ª'),
+    ('µ', 'µ'),
+    ('º', 'º'),
+    ('À', 'Ö'),
+    ('Ø', 'ö'),
+    ('ø', 'ˁ'),
+    ('ˆ', 'ˑ'),
+    ('ˠ', 'ˤ'),
+    ('ˬ', 'ˬ'),
+    ('ˮ', 'ˮ'),
+    ('\u{345}', '\u{345}'),
+    ('Ͱ', 'ʹ'),
+    ('Ͷ', 'ͷ'),
+    ('ͺ', 'ͽ'),
+    ('Ϳ', 'Ϳ'),
+    ('Ά', 'Ά'),
+    ('Έ', 'Ί'),
+    ('Ό', 'Ό'),
+    ('Ύ', 'Ρ'),
+    ('Σ', 'ϵ'),
+    ('Ϸ', 'ҁ'),
+    ('Ҋ', 'ԯ'),
+    ('Ա', 'Ֆ'),
+    ('ՙ', 'ՙ'),
+    ('ՠ', 'ֈ'),
+    ('\u{5b0}', '\u{5bd}'),
+    ('\u{5bf}', '\u{5bf}'),
+    ('\u{5c1}', '\u{5c2}'),
+    ('\u{5c4}', '\u{5c5}'),
+    ('\u{5c7}', '\u{5c7}'),
+    ('א', 'ת'),
+    ('ׯ', 'ײ'),
+    ('\u{610}', '\u{61a}'),
+    ('ؠ', '\u{657}'),
+    ('\u{659}', '\u{65f}'),
+    ('ٮ', 'ۓ'),
+    ('ە', '\u{6dc}'),
+    ('\u{6e1}', '\u{6e8}'),
+    ('\u{6ed}', 'ۯ'),
+    ('ۺ', 'ۼ'),
+    ('ۿ', 'ۿ'),
+    ('ܐ', '\u{73f}'),
+    ('ݍ', 'ޱ'),
+    ('ߊ', 'ߪ'),
+    ('ߴ', 'ߵ'),
+    ('ߺ', 'ߺ'),
+    ('ࠀ', '\u{817}'),
+    ('ࠚ', '\u{82c}'),
+    ('ࡀ', 'ࡘ'),
+    ('ࡠ', 'ࡪ'),
+    ('ࢠ', 'ࢴ'),
+    ('ࢶ', '\u{8c7}'),
+    ('\u{8d4}', '\u{8df}'),
+    ('\u{8e3}', '\u{8e9}'),
+    ('\u{8f0}', 'ऻ'),
+    ('ऽ', 'ौ'),
+    ('ॎ', 'ॐ'),
+    ('\u{955}', '\u{963}'),
+    ('ॱ', 'ঃ'),
+    ('অ', 'ঌ'),
+    ('এ', 'ঐ'),
+    ('ও', 'ন'),
+    ('প', 'র'),
+    ('ল', 'ল'),
+    ('শ', 'হ'),
+    ('ঽ', '\u{9c4}'),
+    ('ে', 'ৈ'),
+    ('ো', 'ৌ'),
+    ('ৎ', 'ৎ'),
+    ('\u{9d7}', '\u{9d7}'),
+    ('ড়', 'ঢ়'),
+    ('য়', '\u{9e3}'),
+    ('ৰ', 'ৱ'),
+    ('ৼ', 'ৼ'),
+    ('\u{a01}', 'ਃ'),
+    ('ਅ', 'ਊ'),
+    ('ਏ', 'ਐ'),
+    ('ਓ', 'ਨ'),
+    ('ਪ', 'ਰ'),
+    ('ਲ', 'ਲ਼'),
+    ('ਵ', 'ਸ਼'),
+    ('ਸ', 'ਹ'),
+    ('ਾ', '\u{a42}'),
+    ('\u{a47}', '\u{a48}'),
+    ('\u{a4b}', '\u{a4c}'),
+    ('\u{a51}', '\u{a51}'),
+    ('ਖ਼', 'ੜ'),
+    ('ਫ਼', 'ਫ਼'),
+    ('\u{a70}', '\u{a75}'),
+    ('\u{a81}', 'ઃ'),
+    ('અ', 'ઍ'),
+    ('એ', 'ઑ'),
+    ('ઓ', 'ન'),
+    ('પ', 'ર'),
+    ('લ', 'ળ'),
+    ('વ', 'હ'),
+    ('ઽ', '\u{ac5}'),
+    ('\u{ac7}', 'ૉ'),
+    ('ો', 'ૌ'),
+    ('ૐ', 'ૐ'),
+    ('ૠ', '\u{ae3}'),
+    ('ૹ', '\u{afc}'),
+    ('\u{b01}', 'ଃ'),
+    ('ଅ', 'ଌ'),
+    ('ଏ', 'ଐ'),
+    ('ଓ', 'ନ'),
+    ('ପ', 'ର'),
+    ('ଲ', 'ଳ'),
+    ('ଵ', 'ହ'),
+    ('ଽ', '\u{b44}'),
+    ('େ', 'ୈ'),
+    ('ୋ', 'ୌ'),
+    ('\u{b56}', '\u{b57}'),
+    ('ଡ଼', 'ଢ଼'),
+    ('ୟ', '\u{b63}'),
+    ('ୱ', 'ୱ'),
+    ('\u{b82}', 'ஃ'),
+    ('அ', 'ஊ'),
+    ('எ', 'ஐ'),
+    ('ஒ', 'க'),
+    ('ங', 'ச'),
+    ('ஜ', 'ஜ'),
+    ('ஞ', 'ட'),
+    ('ண', 'த'),
+    ('ந', 'ப'),
+    ('ம', 'ஹ'),
+    ('\u{bbe}', 'ூ'),
+    ('ெ', 'ை'),
+    ('ொ', 'ௌ'),
+    ('ௐ', 'ௐ'),
+    ('\u{bd7}', '\u{bd7}'),
+    ('\u{c00}', 'ః'),
+    ('అ', 'ఌ'),
+    ('ఎ', 'ఐ'),
+    ('ఒ', 'న'),
+    ('ప', 'హ'),
+    ('ఽ', 'ౄ'),
+    ('\u{c46}', '\u{c48}'),
+    ('\u{c4a}', '\u{c4c}'),
+    ('\u{c55}', '\u{c56}'),
+    ('ౘ', 'ౚ'),
+    ('ౠ', '\u{c63}'),
+    ('ಀ', 'ಃ'),
+    ('ಅ', 'ಌ'),
+    ('ಎ', 'ಐ'),
+    ('ಒ', 'ನ'),
+    ('ಪ', 'ಳ'),
+    ('ವ', 'ಹ'),
+    ('ಽ', 'ೄ'),
+    ('\u{cc6}', 'ೈ'),
+    ('ೊ', '\u{ccc}'),
+    ('\u{cd5}', '\u{cd6}'),
+    ('ೞ', 'ೞ'),
+    ('ೠ', '\u{ce3}'),
+    ('ೱ', 'ೲ'),
+    ('\u{d00}', 'ഌ'),
+    ('എ', 'ഐ'),
+    ('ഒ', 'ഺ'),
+    ('ഽ', '\u{d44}'),
+    ('െ', 'ൈ'),
+    ('ൊ', 'ൌ'),
+    ('ൎ', 'ൎ'),
+    ('ൔ', '\u{d57}'),
+    ('ൟ', '\u{d63}'),
+    ('ൺ', 'ൿ'),
+    ('\u{d81}', 'ඃ'),
+    ('අ', 'ඖ'),
+    ('ක', 'න'),
+    ('ඳ', 'ර'),
+    ('ල', 'ල'),
+    ('ව', 'ෆ'),
+    ('\u{dcf}', '\u{dd4}'),
+    ('\u{dd6}', '\u{dd6}'),
+    ('ෘ', '\u{ddf}'),
+    ('ෲ', 'ෳ'),
+    ('ก', '\u{e3a}'),
+    ('เ', 'ๆ'),
+    ('\u{e4d}', '\u{e4d}'),
+    ('ກ', 'ຂ'),
+    ('ຄ', 'ຄ'),
+    ('ຆ', 'ຊ'),
+    ('ຌ', 'ຣ'),
+    ('ລ', 'ລ'),
+    ('ວ', '\u{eb9}'),
+    ('\u{ebb}', 'ຽ'),
+    ('ເ', 'ໄ'),
+    ('ໆ', 'ໆ'),
+    ('\u{ecd}', '\u{ecd}'),
+    ('ໜ', 'ໟ'),
+    ('ༀ', 'ༀ'),
+    ('ཀ', 'ཇ'),
+    ('ཉ', 'ཬ'),
+    ('\u{f71}', '\u{f81}'),
+    ('ྈ', '\u{f97}'),
+    ('\u{f99}', '\u{fbc}'),
+    ('က', '\u{1036}'),
+    ('း', 'း'),
+    ('ျ', 'ဿ'),
+    ('ၐ', 'ႏ'),
+    ('ႚ', '\u{109d}'),
+    ('Ⴀ', 'Ⴥ'),
+    ('Ⴧ', 'Ⴧ'),
+    ('Ⴭ', 'Ⴭ'),
+    ('ა', 'ჺ'),
+    ('ჼ', 'ቈ'),
+    ('ቊ', 'ቍ'),
+    ('ቐ', 'ቖ'),
+    ('ቘ', 'ቘ'),
+    ('ቚ', 'ቝ'),
+    ('በ', 'ኈ'),
+    ('ኊ', 'ኍ'),
+    ('ነ', 'ኰ'),
+    ('ኲ', 'ኵ'),
+    ('ኸ', 'ኾ'),
+    ('ዀ', 'ዀ'),
+    ('ዂ', 'ዅ'),
+    ('ወ', 'ዖ'),
+    ('ዘ', 'ጐ'),
+    ('ጒ', 'ጕ'),
+    ('ጘ', 'ፚ'),
+    ('ᎀ', 'ᎏ'),
+    ('Ꭰ', 'Ᏽ'),
+    ('ᏸ', 'ᏽ'),
+    ('ᐁ', 'ᙬ'),
+    ('ᙯ', 'ᙿ'),
+    ('ᚁ', 'ᚚ'),
+    ('ᚠ', 'ᛪ'),
+    ('ᛮ', 'ᛸ'),
+    ('ᜀ', 'ᜌ'),
+    ('ᜎ', '\u{1713}'),
+    ('ᜠ', '\u{1733}'),
+    ('ᝀ', '\u{1753}'),
+    ('ᝠ', 'ᝬ'),
+    ('ᝮ', 'ᝰ'),
+    ('\u{1772}', '\u{1773}'),
+    ('ក', 'ឳ'),
+    ('ា', 'ៈ'),
+    ('ៗ', 'ៗ'),
+    ('ៜ', 'ៜ'),
+    ('ᠠ', 'ᡸ'),
+    ('ᢀ', 'ᢪ'),
+    ('ᢰ', 'ᣵ'),
+    ('ᤀ', 'ᤞ'),
+    ('\u{1920}', 'ᤫ'),
+    ('ᤰ', 'ᤸ'),
+    ('ᥐ', 'ᥭ'),
+    ('ᥰ', 'ᥴ'),
+    ('ᦀ', 'ᦫ'),
+    ('ᦰ', 'ᧉ'),
+    ('ᨀ', '\u{1a1b}'),
+    ('ᨠ', '\u{1a5e}'),
+    ('ᩡ', '\u{1a74}'),
+    ('ᪧ', 'ᪧ'),
+    ('\u{1abf}', '\u{1ac0}'),
+    ('\u{1b00}', 'ᬳ'),
+    ('\u{1b35}', 'ᭃ'),
+    ('ᭅ', 'ᭋ'),
+    ('\u{1b80}', '\u{1ba9}'),
+    ('\u{1bac}', 'ᮯ'),
+    ('ᮺ', 'ᯥ'),
+    ('ᯧ', '\u{1bf1}'),
+    ('ᰀ', '\u{1c36}'),
+    ('ᱍ', 'ᱏ'),
+    ('ᱚ', 'ᱽ'),
+    ('ᲀ', 'ᲈ'),
+    ('Ა', 'Ჺ'),
+    ('Ჽ', 'Ჿ'),
+    ('ᳩ', 'ᳬ'),
+    ('ᳮ', 'ᳳ'),
+    ('ᳵ', 'ᳶ'),
+    ('ᳺ', 'ᳺ'),
+    ('ᴀ', 'ᶿ'),
+    ('\u{1de7}', '\u{1df4}'),
+    ('Ḁ', 'ἕ'),
+    ('Ἐ', 'Ἕ'),
+    ('ἠ', 'ὅ'),
+    ('Ὀ', 'Ὅ'),
+    ('ὐ', 'ὗ'),
+    ('Ὑ', 'Ὑ'),
+    ('Ὓ', 'Ὓ'),
+    ('Ὕ', 'Ὕ'),
+    ('Ὗ', 'ώ'),
+    ('ᾀ', 'ᾴ'),
+    ('ᾶ', 'ᾼ'),
+    ('ι', 'ι'),
+    ('ῂ', 'ῄ'),
+    ('ῆ', 'ῌ'),
+    ('ῐ', 'ΐ'),
+    ('ῖ', 'Ί'),
+    ('ῠ', 'Ῥ'),
+    ('ῲ', 'ῴ'),
+    ('ῶ', 'ῼ'),
+    ('ⁱ', 'ⁱ'),
+    ('ⁿ', 'ⁿ'),
+    ('ₐ', 'ₜ'),
+    ('ℂ', 'ℂ'),
+    ('ℇ', 'ℇ'),
+    ('ℊ', 'ℓ'),
+    ('ℕ', 'ℕ'),
+    ('ℙ', 'ℝ'),
+    ('ℤ', 'ℤ'),
+    ('Ω', 'Ω'),
+    ('ℨ', 'ℨ'),
+    ('K', 'ℭ'),
+    ('ℯ', 'ℹ'),
+    ('ℼ', 'ℿ'),
+    ('ⅅ', 'ⅉ'),
+    ('ⅎ', 'ⅎ'),
+    ('Ⅰ', 'ↈ'),
+    ('Ⓐ', 'ⓩ'),
+    ('Ⰰ', 'Ⱞ'),
+    ('ⰰ', 'ⱞ'),
+    ('Ⱡ', 'ⳤ'),
+    ('Ⳬ', 'ⳮ'),
+    ('Ⳳ', 'ⳳ'),
+    ('ⴀ', 'ⴥ'),
+    ('ⴧ', 'ⴧ'),
+    ('ⴭ', 'ⴭ'),
+    ('ⴰ', 'ⵧ'),
+    ('ⵯ', 'ⵯ'),
+    ('ⶀ', 'ⶖ'),
+    ('ⶠ', 'ⶦ'),
+    ('ⶨ', 'ⶮ'),
+    ('ⶰ', 'ⶶ'),
+    ('ⶸ', 'ⶾ'),
+    ('ⷀ', 'ⷆ'),
+    ('ⷈ', 'ⷎ'),
+    ('ⷐ', 'ⷖ'),
+    ('ⷘ', 'ⷞ'),
+    ('\u{2de0}', '\u{2dff}'),
+    ('ⸯ', 'ⸯ'),
+    ('々', '〇'),
+    ('〡', '〩'),
+    ('〱', '〵'),
+    ('〸', '〼'),
+    ('ぁ', 'ゖ'),
+    ('ゝ', 'ゟ'),
+    ('ァ', 'ヺ'),
+    ('ー', 'ヿ'),
+    ('ㄅ', 'ㄯ'),
+    ('ㄱ', 'ㆎ'),
+    ('ㆠ', '\u{31bf}'),
+    ('ㇰ', 'ㇿ'),
+    ('㐀', '\u{4dbf}'),
+    ('一', '\u{9ffc}'),
+    ('ꀀ', 'ꒌ'),
+    ('ꓐ', 'ꓽ'),
+    ('ꔀ', 'ꘌ'),
+    ('ꘐ', 'ꘟ'),
+    ('ꘪ', 'ꘫ'),
+    ('Ꙁ', 'ꙮ'),
+    ('\u{a674}', '\u{a67b}'),
+    ('ꙿ', 'ꛯ'),
+    ('ꜗ', 'ꜟ'),
+    ('Ꜣ', 'ꞈ'),
+    ('Ꞌ', 'ꞿ'),
+    ('Ꟃ', '\u{a7ca}'),
+    ('\u{a7f5}', 'ꠅ'),
+    ('ꠇ', 'ꠧ'),
+    ('ꡀ', 'ꡳ'),
+    ('ꢀ', 'ꣃ'),
+    ('\u{a8c5}', '\u{a8c5}'),
+    ('ꣲ', 'ꣷ'),
+    ('ꣻ', 'ꣻ'),
+    ('ꣽ', '\u{a8ff}'),
+    ('ꤊ', '\u{a92a}'),
+    ('ꤰ', 'ꥒ'),
+    ('ꥠ', 'ꥼ'),
+    ('\u{a980}', 'ꦲ'),
+    ('ꦴ', 'ꦿ'),
+    ('ꧏ', 'ꧏ'),
+    ('ꧠ', 'ꧯ'),
+    ('ꧺ', 'ꧾ'),
+    ('ꨀ', '\u{aa36}'),
+    ('ꩀ', 'ꩍ'),
+    ('ꩠ', 'ꩶ'),
+    ('ꩺ', '\u{aabe}'),
+    ('ꫀ', 'ꫀ'),
+    ('ꫂ', 'ꫂ'),
+    ('ꫛ', 'ꫝ'),
+    ('ꫠ', 'ꫯ'),
+    ('ꫲ', 'ꫵ'),
+    ('ꬁ', 'ꬆ'),
+    ('ꬉ', 'ꬎ'),
+    ('ꬑ', 'ꬖ'),
+    ('ꬠ', 'ꬦ'),
+    ('ꬨ', 'ꬮ'),
+    ('ꬰ', 'ꭚ'),
+    ('ꭜ', '\u{ab69}'),
+    ('ꭰ', 'ꯪ'),
+    ('가', '힣'),
+    ('ힰ', 'ퟆ'),
+    ('ퟋ', 'ퟻ'),
+    ('豈', '舘'),
+    ('並', '龎'),
+    ('ﬀ', 'ﬆ'),
+    ('ﬓ', 'ﬗ'),
+    ('יִ', 'ﬨ'),
+    ('שׁ', 'זּ'),
+    ('טּ', 'לּ'),
+    ('מּ', 'מּ'),
+    ('נּ', 'סּ'),
+    ('ףּ', 'פּ'),
+    ('צּ', 'ﮱ'),
+    ('ﯓ', 'ﴽ'),
+    ('ﵐ', 'ﶏ'),
+    ('ﶒ', 'ﷇ'),
+    ('ﷰ', 'ﷻ'),
+    ('ﹰ', 'ﹴ'),
+    ('ﹶ', 'ﻼ'),
+    ('Ａ', 'Ｚ'),
+    ('ａ', 'ｚ'),
+    ('ｦ', 'ﾾ'),
+    ('ￂ', 'ￇ'),
+    ('ￊ', 'ￏ'),
+    ('ￒ', 'ￗ'),
+    ('ￚ', 'ￜ'),
+    ('𐀀', '𐀋'),
+    ('𐀍', '𐀦'),
+    ('𐀨', '𐀺'),
+    ('𐀼', '𐀽'),
+    ('𐀿', '𐁍'),
+    ('𐁐', '𐁝'),
+    ('𐂀', '𐃺'),
+    ('𐅀', '𐅴'),
+    ('𐊀', '𐊜'),
+    ('𐊠', '𐋐'),
+    ('𐌀', '𐌟'),
+    ('𐌭', '𐍊'),
+    ('𐍐', '\u{1037a}'),
+    ('𐎀', '𐎝'),
+    ('𐎠', '𐏃'),
+    ('𐏈', '𐏏'),
+    ('𐏑', '𐏕'),
+    ('𐐀', '𐒝'),
+    ('𐒰', '𐓓'),
+    ('𐓘', '𐓻'),
+    ('𐔀', '𐔧'),
+    ('𐔰', '𐕣'),
+    ('𐘀', '𐜶'),
+    ('𐝀', '𐝕'),
+    ('𐝠', '𐝧'),
+    ('𐠀', '𐠅'),
+    ('𐠈', '𐠈'),
+    ('𐠊', '𐠵'),
+    ('𐠷', '𐠸'),
+    ('𐠼', '𐠼'),
+    ('𐠿', '𐡕'),
+    ('𐡠', '𐡶'),
+    ('𐢀', '𐢞'),
+    ('𐣠', '𐣲'),
+    ('𐣴', '𐣵'),
+    ('𐤀', '𐤕'),
+    ('𐤠', '𐤹'),
+    ('𐦀', '𐦷'),
+    ('𐦾', '𐦿'),
+    ('𐨀', '\u{10a03}'),
+    ('\u{10a05}', '\u{10a06}'),
+    ('\u{10a0c}', '𐨓'),
+    ('𐨕', '𐨗'),
+    ('𐨙', '𐨵'),
+    ('𐩠', '𐩼'),
+    ('𐪀', '𐪜'),
+    ('𐫀', '𐫇'),
+    ('𐫉', '𐫤'),
+    ('𐬀', '𐬵'),
+    ('𐭀', '𐭕'),
+    ('𐭠', '𐭲'),
+    ('𐮀', '𐮑'),
+    ('𐰀', '𐱈'),
+    ('𐲀', '𐲲'),
+    ('𐳀', '𐳲'),
+    ('𐴀', '\u{10d27}'),
+    ('\u{10e80}', '\u{10ea9}'),
+    ('\u{10eab}', '\u{10eac}'),
+    ('\u{10eb0}', '\u{10eb1}'),
+    ('𐼀', '𐼜'),
+    ('𐼧', '𐼧'),
+    ('𐼰', '𐽅'),
+    ('\u{10fb0}', '\u{10fc4}'),
+    ('𐿠', '𐿶'),
+    ('𑀀', '\u{11045}'),
+    ('𑂂', '𑂸'),
+    ('𑃐', '𑃨'),
+    ('\u{11100}', '\u{11132}'),
+    ('𑅄', '\u{11147}'),
+    ('𑅐', '𑅲'),
+    ('𑅶', '𑅶'),
+    ('\u{11180}', '𑆿'),
+    ('𑇁', '𑇄'),
+    ('\u{111ce}', '\u{111cf}'),
+    ('𑇚', '𑇚'),
+    ('𑇜', '𑇜'),
+    ('𑈀', '𑈑'),
+    ('𑈓', '\u{11234}'),
+    ('\u{11237}', '\u{11237}'),
+    ('\u{1123e}', '\u{1123e}'),
+    ('𑊀', '𑊆'),
+    ('𑊈', '𑊈'),
+    ('𑊊', '𑊍'),
+    ('𑊏', '𑊝'),
+    ('𑊟', '𑊨'),
+    ('𑊰', '\u{112e8}'),
+    ('\u{11300}', '𑌃'),
+    ('𑌅', '𑌌'),
+    ('𑌏', '𑌐'),
+    ('𑌓', '𑌨'),
+    ('𑌪', '𑌰'),
+    ('𑌲', '𑌳'),
+    ('𑌵', '𑌹'),
+    ('𑌽', '𑍄'),
+    ('𑍇', '𑍈'),
+    ('𑍋', '𑍌'),
+    ('𑍐', '𑍐'),
+    ('\u{11357}', '\u{11357}'),
+    ('𑍝', '𑍣'),
+    ('𑐀', '𑑁'),
+    ('\u{11443}', '𑑅'),
+    ('𑑇', '𑑊'),
+    ('𑑟', '\u{11461}'),
+    ('𑒀', '𑓁'),
+    ('𑓄', '𑓅'),
+    ('𑓇', '𑓇'),
+    ('𑖀', '\u{115b5}'),
+    ('𑖸', '𑖾'),
+    ('𑗘', '\u{115dd}'),
+    ('𑘀', '𑘾'),
+    ('\u{11640}', '\u{11640}'),
+    ('𑙄', '𑙄'),
+    ('𑚀', '\u{116b5}'),
+    ('𑚸', '𑚸'),
+    ('𑜀', '𑜚'),
+    ('\u{1171d}', '\u{1172a}'),
+    ('𑠀', '𑠸'),
+    ('𑢠', '𑣟'),
+    ('𑣿', '\u{11906}'),
+    ('\u{11909}', '\u{11909}'),
+    ('\u{1190c}', '\u{11913}'),
+    ('\u{11915}', '\u{11916}'),
+    ('\u{11918}', '\u{11935}'),
+    ('\u{11937}', '\u{11938}'),
+    ('\u{1193b}', '\u{1193c}'),
+    ('\u{1193f}', '\u{11942}'),
+    ('𑦠', '𑦧'),
+    ('𑦪', '\u{119d7}'),
+    ('\u{119da}', '𑧟'),
+    ('𑧡', '𑧡'),
+    ('𑧣', '𑧤'),
+    ('𑨀', '𑨲'),
+    ('\u{11a35}', '\u{11a3e}'),
+    ('𑩐', '𑪗'),
+    ('𑪝', '𑪝'),
+    ('𑫀', '𑫸'),
+    ('𑰀', '𑰈'),
+    ('𑰊', '\u{11c36}'),
+    ('\u{11c38}', '𑰾'),
+    ('𑱀', '𑱀'),
+    ('𑱲', '𑲏'),
+    ('\u{11c92}', '\u{11ca7}'),
+    ('𑲩', '\u{11cb6}'),
+    ('𑴀', '𑴆'),
+    ('𑴈', '𑴉'),
+    ('𑴋', '\u{11d36}'),
+    ('\u{11d3a}', '\u{11d3a}'),
+    ('\u{11d3c}', '\u{11d3d}'),
+    ('\u{11d3f}', '\u{11d41}'),
+    ('\u{11d43}', '\u{11d43}'),
+    ('𑵆', '\u{11d47}'),
+    ('𑵠', '𑵥'),
+    ('𑵧', '𑵨'),
+    ('𑵪', '𑶎'),
+    ('\u{11d90}', '\u{11d91}'),
+    ('𑶓', '𑶖'),
+    ('𑶘', '𑶘'),
+    ('𑻠', '𑻶'),
+    ('\u{11fb0}', '\u{11fb0}'),
+    ('𒀀', '𒎙'),
+    ('𒐀', '𒑮'),
+    ('𒒀', '𒕃'),
+    ('𓀀', '𓐮'),
+    ('𔐀', '𔙆'),
+    ('𖠀', '𖨸'),
+    ('𖩀', '𖩞'),
+    ('𖫐', '𖫭'),
+    ('𖬀', '𖬯'),
+    ('𖭀', '𖭃'),
+    ('𖭣', '𖭷'),
+    ('𖭽', '𖮏'),
+    ('𖹀', '𖹿'),
+    ('𖼀', '𖽊'),
+    ('\u{16f4f}', '𖾇'),
+    ('\u{16f8f}', '𖾟'),
+    ('𖿠', '𖿡'),
+    ('𖿣', '𖿣'),
+    ('\u{16ff0}', '\u{16ff1}'),
+    ('𗀀', '𘟷'),
+    ('𘠀', '\u{18cd5}'),
+    ('\u{18d00}', '\u{18d08}'),
+    ('𛀀', '𛄞'),
+    ('𛅐', '𛅒'),
+    ('𛅤', '𛅧'),
+    ('𛅰', '𛋻'),
+    ('𛰀', '𛱪'),
+    ('𛱰', '𛱼'),
+    ('𛲀', '𛲈'),
+    ('𛲐', '𛲙'),
+    ('\u{1bc9e}', '\u{1bc9e}'),
+    ('𝐀', '𝑔'),
+    ('𝑖', '𝒜'),
+    ('𝒞', '𝒟'),
+    ('𝒢', '𝒢'),
+    ('𝒥', '𝒦'),
+    ('𝒩', '𝒬'),
+    ('𝒮', '𝒹'),
+    ('𝒻', '𝒻'),
+    ('𝒽', '𝓃'),
+    ('𝓅', '𝔅'),
+    ('𝔇', '𝔊'),
+    ('𝔍', '𝔔'),
+    ('𝔖', '𝔜'),
+    ('𝔞', '𝔹'),
+    ('𝔻', '𝔾'),
+    ('𝕀', '𝕄'),
+    ('𝕆', '𝕆'),
+    ('𝕊', '𝕐'),
+    ('𝕒', '𝚥'),
+    ('𝚨', '𝛀'),
+    ('𝛂', '𝛚'),
+    ('𝛜', '𝛺'),
+    ('𝛼', '𝜔'),
+    ('𝜖', '𝜴'),
+    ('𝜶', '𝝎'),
+    ('𝝐', '𝝮'),
+    ('𝝰', '𝞈'),
+    ('𝞊', '𝞨'),
+    ('𝞪', '𝟂'),
+    ('𝟄', '𝟋'),
+    ('\u{1e000}', '\u{1e006}'),
+    ('\u{1e008}', '\u{1e018}'),
+    ('\u{1e01b}', '\u{1e021}'),
+    ('\u{1e023}', '\u{1e024}'),
+    ('\u{1e026}', '\u{1e02a}'),
+    ('𞄀', '𞄬'),
+    ('𞄷', '𞄽'),
+    ('𞅎', '𞅎'),
+    ('𞋀', '𞋫'),
+    ('𞠀', '𞣄'),
+    ('𞤀', '𞥃'),
+    ('\u{1e947}', '\u{1e947}'),
+    ('𞥋', '𞥋'),
+    ('𞸀', '𞸃'),
+    ('𞸅', '𞸟'),
+    ('𞸡', '𞸢'),
+    ('𞸤', '𞸤'),
+    ('𞸧', '𞸧'),
+    ('𞸩', '𞸲'),
+    ('𞸴', '𞸷'),
+    ('𞸹', '𞸹'),
+    ('𞸻', '𞸻'),
+    ('𞹂', '𞹂'),
+    ('𞹇', '𞹇'),
+    ('𞹉', '𞹉'),
+    ('𞹋', '𞹋'),
+    ('𞹍', '𞹏'),
+    ('𞹑', '𞹒'),
+    ('𞹔', '𞹔'),
+    ('𞹗', '𞹗'),
+    ('𞹙', '𞹙'),
+    ('𞹛', '𞹛'),
+    ('𞹝', '𞹝'),
+    ('𞹟', '𞹟'),
+    ('𞹡', '𞹢'),
+    ('𞹤', '𞹤'),
+    ('𞹧', '𞹪'),
+    ('𞹬', '𞹲'),
+    ('𞹴', '𞹷'),
+    ('𞹹', '𞹼'),
+    ('𞹾', '𞹾'),
+    ('𞺀', '𞺉'),
+    ('𞺋', '𞺛'),
+    ('𞺡', '𞺣'),
+    ('𞺥', '𞺩'),
+    ('𞺫', '𞺻'),
+    ('🄰', '🅉'),
+    ('🅐', '🅩'),
+    ('🅰', '🆉'),
+    ('𠀀', '\u{2a6dd}'),
+    ('𪜀', '𫜴'),
+    ('𫝀', '𫠝'),
+    ('𫠠', '𬺡'),
+    ('𬺰', '𮯠'),
+    ('丽', '𪘀'),
+    ('\u{30000}', '\u{3134a}'),
+];
+
+pub const BIDI_CONTROL: &'static [(char, char)] = &[
+    ('\u{61c}', '\u{61c}'),
+    ('\u{200e}', '\u{200f}'),
+    ('\u{202a}', '\u{202e}'),
+    ('\u{2066}', '\u{2069}'),
+];
+
+pub const BIDI_MIRRORED: &'static [(char, char)] = &[
+    ('(', ')'),
+    ('<', '<'),
+    ('>', '>'),
+    ('[', '['),
+    (']', ']'),
+    ('{', '{'),
+    ('}', '}'),
+    ('«', '«'),
+    ('»', '»'),
+    ('༺', '༽'),
+    ('᚛', '᚜'),
+    ('‹', '›'),
+    ('⁅', '⁆'),
+    ('⁽', '⁾'),
+    ('₍', '₎'),
+    ('⅀', '⅀'),
+    ('∁', '∄'),
+    ('∈', '∍'),
+    ('∑', '∑'),
+    ('∕', '∖'),
+    ('√', '∝'),
+    ('∟', '∢'),
+    ('∤', '∤'),
+    ('∦', '∦'),
+    ('∫', '∳'),
+    ('∹', '∹'),
+    ('∻', '≌'),
+    ('≒', '≕'),
+    ('≟', '≠'),
+    ('≢', '≢'),
+    ('≤', '≫'),
+    ('≮', '⊌'),
+    ('⊏', '⊒'),
+    ('⊘', '⊘'),
+    ('⊢', '⊣'),
+    ('⊦', '⊸'),
+    ('⊾', '⊿'),
+    ('⋉', '⋍'),
+    ('⋐', '⋑'),
+    ('⋖', '⋭'),
+    ('⋰', '⋿'),
+    ('⌈', '⌋'),
+    ('⌠', '⌡'),
+    ('⟨', '⟩'),
+    ('❨', '❵'),
+    ('⟀', '⟀'),
+    ('⟃', '⟆'),
+    ('⟈', '⟉'),
+    ('⟋', '⟍'),
+    ('⟓', '⟖'),
+    ('⟜', '⟞'),
+    ('⟢', '⟯'),
+    ('⦃', '⦘'),
+    ('⦛', '⦠'),
+    ('⦢', '⦯'),
+    ('⦸', '⦸'),
+    ('⧀', '⧅'),
+    ('⧉', '⧉'),
+    ('⧎', '⧒'),
+    ('⧔', '⧕'),
+    ('⧘', '⧜'),
+    ('⧡', '⧡'),
+    ('⧣', '⧥'),
+    ('⧨', '⧩'),
+    ('⧴', '⧹'),
+    ('⧼', '⧽'),
+    ('⨊', '⨜'),
+    ('⨞', '⨡'),
+    ('⨤', '⨤'),
+    ('⨦', '⨦'),
+    ('⨩', '⨩'),
+    ('⨫', '⨮'),
+    ('⨴', '⨵'),
+    ('⨼', '⨾'),
+    ('⩗', '⩘'),
+    ('⩤', '⩥'),
+    ('⩪', '⩭'),
+    ('⩯', '⩰'),
+    ('⩳', '⩴'),
+    ('⩹', '⪣'),
+    ('⪦', '⪭'),
+    ('⪯', '⫖'),
+    ('⫝̸', '⫝̸'),
+    ('⫞', '⫞'),
+    ('⫢', '⫦'),
+    ('⫬', '⫮'),
+    ('⫳', '⫳'),
+    ('⫷', '⫻'),
+    ('⫽', '⫽'),
+    ('⯾', '⯾'),
+    ('⸂', '⸅'),
+    ('⸉', '⸊'),
+    ('⸌', '⸍'),
+    ('⸜', '⸝'),
+    ('⸠', '⸩'),
+    ('〈', '】'),
+    ('〔', '〛'),
+    ('﹙', '﹞'),
+    ('﹤', '﹥'),
+    ('（', '）'),
+    ('＜', '＜'),
+    ('＞', '＞'),
+    ('［', '［'),
+    ('］', '］'),
+    ('｛', '｛'),
+    ('｝', '｝'),
+    ('｟', '｠'),
+    ('｢', '｣'),
+    ('𝛛', '𝛛'),
+    ('𝜕', '𝜕'),
+    ('𝝏', '𝝏'),
+    ('𝞉', '𝞉'),
+    ('𝟃', '𝟃'),
+];
+
+pub const CASE_IGNORABLE: &'static [(char, char)] = &[
+    ('\'', '\''),
+    ('.', '.'),
+    (':', ':'),
+    ('^', '^'),
+    ('`', '`'),
+    ('¨', '¨'),
+    ('\u{ad}', '\u{ad}'),
+    ('¯', '¯'),
+    ('´', '´'),
+    ('·', '¸'),
+    ('ʰ', '\u{36f}'),
+    ('ʹ', '͵'),
+    ('ͺ', 'ͺ'),
+    ('΄', '΅'),
+    ('·', '·'),
+    ('\u{483}', '\u{489}'),
+    ('ՙ', 'ՙ'),
+    ('՟', '՟'),
+    ('\u{591}', '\u{5bd}'),
+    ('\u{5bf}', '\u{5bf}'),
+    ('\u{5c1}', '\u{5c2}'),
+    ('\u{5c4}', '\u{5c5}'),
+    ('\u{5c7}', '\u{5c7}'),
+    ('״', '״'),
+    ('\u{600}', '\u{605}'),
+    ('\u{610}', '\u{61a}'),
+    ('\u{61c}', '\u{61c}'),
+    ('ـ', 'ـ'),
+    ('\u{64b}', '\u{65f}'),
+    ('\u{670}', '\u{670}'),
+    ('\u{6d6}', '\u{6dd}'),
+    ('\u{6df}', '\u{6e8}'),
+    ('\u{6ea}', '\u{6ed}'),
+    ('\u{70f}', '\u{70f}'),
+    ('\u{711}', '\u{711}'),
+    ('\u{730}', '\u{74a}'),
+    ('\u{7a6}', '\u{7b0}'),
+    ('\u{7eb}', 'ߵ'),
+    ('ߺ', 'ߺ'),
+    ('\u{7fd}', '\u{7fd}'),
+    ('\u{816}', '\u{82d}'),
+    ('\u{859}', '\u{85b}'),
+    ('\u{8d3}', '\u{902}'),
+    ('\u{93a}', '\u{93a}'),
+    ('\u{93c}', '\u{93c}'),
+    ('\u{941}', '\u{948}'),
+    ('\u{94d}', '\u{94d}'),
+    ('\u{951}', '\u{957}'),
+    ('\u{962}', '\u{963}'),
+    ('ॱ', 'ॱ'),
+    ('\u{981}', '\u{981}'),
+    ('\u{9bc}', '\u{9bc}'),
+    ('\u{9c1}', '\u{9c4}'),
+    ('\u{9cd}', '\u{9cd}'),
+    ('\u{9e2}', '\u{9e3}'),
+    ('\u{9fe}', '\u{9fe}'),
+    ('\u{a01}', '\u{a02}'),
+    ('\u{a3c}', '\u{a3c}'),
+    ('\u{a41}', '\u{a42}'),
+    ('\u{a47}', '\u{a48}'),
+    ('\u{a4b}', '\u{a4d}'),
+    ('\u{a51}', '\u{a51}'),
+    ('\u{a70}', '\u{a71}'),
+    ('\u{a75}', '\u{a75}'),
+    ('\u{a81}', '\u{a82}'),
+    ('\u{abc}', '\u{abc}'),
+    ('\u{ac1}', '\u{ac5}'),
+    ('\u{ac7}', '\u{ac8}'),
+    ('\u{acd}', '\u{acd}'),
+    ('\u{ae2}', '\u{ae3}'),
+    ('\u{afa}', '\u{aff}'),
+    ('\u{b01}', '\u{b01}'),
+    ('\u{b3c}', '\u{b3c}'),
+    ('\u{b3f}', '\u{b3f}'),
+    ('\u{b41}', '\u{b44}'),
+    ('\u{b4d}', '\u{b4d}'),
+    ('\u{b55}', '\u{b56}'),
+    ('\u{b62}', '\u{b63}'),
+    ('\u{b82}', '\u{b82}'),
+    ('\u{bc0}', '\u{bc0}'),
+    ('\u{bcd}', '\u{bcd}'),
+    ('\u{c00}', '\u{c00}'),
+    ('\u{c04}', '\u{c04}'),
+    ('\u{c3e}', '\u{c40}'),
+    ('\u{c46}', '\u{c48}'),
+    ('\u{c4a}', '\u{c4d}'),
+    ('\u{c55}', '\u{c56}'),
+    ('\u{c62}', '\u{c63}'),
+    ('\u{c81}', '\u{c81}'),
+    ('\u{cbc}', '\u{cbc}'),
+    ('\u{cbf}', '\u{cbf}'),
+    ('\u{cc6}', '\u{cc6}'),
+    ('\u{ccc}', '\u{ccd}'),
+    ('\u{ce2}', '\u{ce3}'),
+    ('\u{d00}', '\u{d01}'),
+    ('\u{d3b}', '\u{d3c}'),
+    ('\u{d41}', '\u{d44}'),
+    ('\u{d4d}', '\u{d4d}'),
+    ('\u{d62}', '\u{d63}'),
+    ('\u{d81}', '\u{d81}'),
+    ('\u{dca}', '\u{dca}'),
+    ('\u{dd2}', '\u{dd4}'),
+    ('\u{dd6}', '\u{dd6}'),
+    ('\u{e31}', '\u{e31}'),
+    ('\u{e34}', '\u{e3a}'),
+    ('ๆ', '\u{e4e}'),
+    ('\u{eb1}', '\u{eb1}'),
+    ('\u{eb4}', '\u{ebc}'),
+    ('ໆ', 'ໆ'),
+    ('\u{ec8}', '\u{ecd}'),
+    ('\u{f18}', '\u{f19}'),
+    ('\u{f35}', '\u{f35}'),
+    ('\u{f37}', '\u{f37}'),
+    ('\u{f39}', '\u{f39}'),
+    ('\u{f71}', '\u{f7e}'),
+    ('\u{f80}', '\u{f84}'),
+    ('\u{f86}', '\u{f87}'),
+    ('\u{f8d}', '\u{f97}'),
+    ('\u{f99}', '\u{fbc}'),
+    ('\u{fc6}', '\u{fc6}'),
+    ('\u{102d}', '\u{1030}'),
+    ('\u{1032}', '\u{1037}'),
+    ('\u{1039}', '\u{103a}'),
+    ('\u{103d}', '\u{103e}'),
+    ('\u{1058}', '\u{1059}'),
+    ('\u{105e}', '\u{1060}'),
+    ('\u{1071}', '\u{1074}'),
+    ('\u{1082}', '\u{1082}'),
+    ('\u{1085}', '\u{1086}'),
+    ('\u{108d}', '\u{108d}'),
+    ('\u{109d}', '\u{109d}'),
+    ('ჼ', 'ჼ'),
+    ('\u{135d}', '\u{135f}'),
+    ('\u{1712}', '\u{1714}'),
+    ('\u{1732}', '\u{1734}'),
+    ('\u{1752}', '\u{1753}'),
+    ('\u{1772}', '\u{1773}'),
+    ('\u{17b4}', '\u{17b5}'),
+    ('\u{17b7}', '\u{17bd}'),
+    ('\u{17c6}', '\u{17c6}'),
+    ('\u{17c9}', '\u{17d3}'),
+    ('ៗ', 'ៗ'),
+    ('\u{17dd}', '\u{17dd}'),
+    ('\u{180b}', '\u{180e}'),
+    ('ᡃ', 'ᡃ'),
+    ('\u{1885}', '\u{1886}'),
+    ('\u{18a9}', '\u{18a9}'),
+    ('\u{1920}', '\u{1922}'),
+    ('\u{1927}', '\u{1928}'),
+    ('\u{1932}', '\u{1932}'),
+    ('\u{1939}', '\u{193b}'),
+    ('\u{1a17}', '\u{1a18}'),
+    ('\u{1a1b}', '\u{1a1b}'),
+    ('\u{1a56}', '\u{1a56}'),
+    ('\u{1a58}', '\u{1a5e}'),
+    ('\u{1a60}', '\u{1a60}'),
+    ('\u{1a62}', '\u{1a62}'),
+    ('\u{1a65}', '\u{1a6c}'),
+    ('\u{1a73}', '\u{1a7c}'),
+    ('\u{1a7f}', '\u{1a7f}'),
+    ('ᪧ', 'ᪧ'),
+    ('\u{1ab0}', '\u{1ac0}'),
+    ('\u{1b00}', '\u{1b03}'),
+    ('\u{1b34}', '\u{1b34}'),
+    ('\u{1b36}', '\u{1b3a}'),
+    ('\u{1b3c}', '\u{1b3c}'),
+    ('\u{1b42}', '\u{1b42}'),
+    ('\u{1b6b}', '\u{1b73}'),
+    ('\u{1b80}', '\u{1b81}'),
+    ('\u{1ba2}', '\u{1ba5}'),
+    ('\u{1ba8}', '\u{1ba9}'),
+    ('\u{1bab}', '\u{1bad}'),
+    ('\u{1be6}', '\u{1be6}'),
+    ('\u{1be8}', '\u{1be9}'),
+    ('\u{1bed}', '\u{1bed}'),
+    ('\u{1bef}', '\u{1bf1}'),
+    ('\u{1c2c}', '\u{1c33}'),
+    ('\u{1c36}', '\u{1c37}'),
+    ('ᱸ', 'ᱽ'),
+    ('\u{1cd0}', '\u{1cd2}'),
+    ('\u{1cd4}', '\u{1ce0}'),
+    ('\u{1ce2}', '\u{1ce8}'),
+    ('\u{1ced}', '\u{1ced}'),
+    ('\u{1cf4}', '\u{1cf4}'),
+    ('\u{1cf8}', '\u{1cf9}'),
+    ('ᴬ', 'ᵪ'),
+    ('ᵸ', 'ᵸ'),
+    ('ᶛ', '\u{1df9}'),
+    ('\u{1dfb}', '\u{1dff}'),
+    ('᾽', '᾽'),
+    ('᾿', '῁'),
+    ('῍', '῏'),
+    ('῝', '῟'),
+    ('῭', '`'),
+    ('´', '῾'),
+    ('\u{200b}', '\u{200f}'),
+    ('‘', '’'),
+    ('․', '․'),
+    ('‧', '‧'),
+    ('\u{202a}', '\u{202e}'),
+    ('\u{2060}', '\u{2064}'),
+    ('\u{2066}', '\u{206f}'),
+    ('ⁱ', 'ⁱ'),
+    ('ⁿ', 'ⁿ'),
+    ('ₐ', 'ₜ'),
+    ('\u{20d0}', '\u{20f0}'),
+    ('ⱼ', 'ⱽ'),
+    ('\u{2cef}', '\u{2cf1}'),
+    ('ⵯ', 'ⵯ'),
+    ('\u{2d7f}', '\u{2d7f}'),
+    ('\u{2de0}', '\u{2dff}'),
+    ('ⸯ', 'ⸯ'),
+    ('々', '々'),
+    ('\u{302a}', '\u{302d}'),
+    ('〱', '〵'),
+    ('〻', '〻'),
+    ('\u{3099}', 'ゞ'),
+    ('ー', 'ヾ'),
+    ('ꀕ', 'ꀕ'),
+    ('ꓸ', 'ꓽ'),
+    ('ꘌ', 'ꘌ'),
+    ('\u{a66f}', '\u{a672}'),
+    ('\u{a674}', '\u{a67d}'),
+    ('ꙿ', 'ꙿ'),
+    ('ꚜ', '\u{a69f}'),
+    ('\u{a6f0}', '\u{a6f1}'),
+    ('꜀', '꜡'),
+    ('ꝰ', 'ꝰ'),
+    ('ꞈ', '꞊'),
+    ('ꟸ', 'ꟹ'),
+    ('\u{a802}', '\u{a802}'),
+    ('\u{a806}', '\u{a806}'),
+    ('\u{a80b}', '\u{a80b}'),
+    ('\u{a825}', '\u{a826}'),
+    ('\u{a82c}', '\u{a82c}'),
+    ('\u{a8c4}', '\u{a8c5}'),
+    ('\u{a8e0}', '\u{a8f1}'),
+    ('\u{a8ff}', '\u{a8ff}'),
+    ('\u{a926}', '\u{a92d}'),
+    ('\u{a947}', '\u{a951}'),
+    ('\u{a980}', '\u{a982}'),
+    ('\u{a9b3}', '\u{a9b3}'),
+    ('\u{a9b6}', '\u{a9b9}'),
+    ('\u{a9bc}', '\u{a9bd}'),
+    ('ꧏ', 'ꧏ'),
+    ('\u{a9e5}', 'ꧦ'),
+    ('\u{aa29}', '\u{aa2e}'),
+    ('\u{aa31}', '\u{aa32}'),
+    ('\u{aa35}', '\u{aa36}'),
+    ('\u{aa43}', '\u{aa43}'),
+    ('\u{aa4c}', '\u{aa4c}'),
+    ('ꩰ', 'ꩰ'),
+    ('\u{aa7c}', '\u{aa7c}'),
+    ('\u{aab0}', '\u{aab0}'),
+    ('\u{aab2}', '\u{aab4}'),
+    ('\u{aab7}', '\u{aab8}'),
+    ('\u{aabe}', '\u{aabf}'),
+    ('\u{aac1}', '\u{aac1}'),
+    ('ꫝ', 'ꫝ'),
+    ('\u{aaec}', '\u{aaed}'),
+    ('ꫳ', 'ꫴ'),
+    ('\u{aaf6}', '\u{aaf6}'),
+    ('꭛', 'ꭟ'),
+    ('\u{ab69}', '\u{ab6b}'),
+    ('\u{abe5}', '\u{abe5}'),
+    ('\u{abe8}', '\u{abe8}'),
+    ('\u{abed}', '\u{abed}'),
+    ('\u{fb1e}', '\u{fb1e}'),
+    ('﮲', '﯁'),
+    ('\u{fe00}', '\u{fe0f}'),
+    ('︓', '︓'),
+    ('\u{fe20}', '\u{fe2f}'),
+    ('﹒', '﹒'),
+    ('﹕', '﹕'),
+    ('\u{feff}', '\u{feff}'),
+    ('＇', '＇'),
+    ('．', '．'),
+    ('：', '：'),
+    ('＾', '＾'),
+    ('｀', '｀'),
+    ('ｰ', 'ｰ'),
+    ('\u{ff9e}', '\u{ff9f}'),
+    ('￣', '￣'),
+    ('\u{fff9}', '\u{fffb}'),
+    ('\u{101fd}', '\u{101fd}'),
+    ('\u{102e0}', '\u{102e0}'),
+    ('\u{10376}', '\u{1037a}'),
+    ('\u{10a01}', '\u{10a03}'),
+    ('\u{10a05}', '\u{10a06}'),
+    ('\u{10a0c}', '\u{10a0f}'),
+    ('\u{10a38}', '\u{10a3a}'),
+    ('\u{10a3f}', '\u{10a3f}'),
+    ('\u{10ae5}', '\u{10ae6}'),
+    ('\u{10d24}', '\u{10d27}'),
+    ('\u{10eab}', '\u{10eac}'),
+    ('\u{10f46}', '\u{10f50}'),
+    ('\u{11001}', '\u{11001}'),
+    ('\u{11038}', '\u{11046}'),
+    ('\u{1107f}', '\u{11081}'),
+    ('\u{110b3}', '\u{110b6}'),
+    ('\u{110b9}', '\u{110ba}'),
+    ('\u{110bd}', '\u{110bd}'),
+    ('\u{110cd}', '\u{110cd}'),
+    ('\u{11100}', '\u{11102}'),
+    ('\u{11127}', '\u{1112b}'),
+    ('\u{1112d}', '\u{11134}'),
+    ('\u{11173}', '\u{11173}'),
+    ('\u{11180}', '\u{11181}'),
+    ('\u{111b6}', '\u{111be}'),
+    ('\u{111c9}', '\u{111cc}'),
+    ('\u{111cf}', '\u{111cf}'),
+    ('\u{1122f}', '\u{11231}'),
+    ('\u{11234}', '\u{11234}'),
+    ('\u{11236}', '\u{11237}'),
+    ('\u{1123e}', '\u{1123e}'),
+    ('\u{112df}', '\u{112df}'),
+    ('\u{112e3}', '\u{112ea}'),
+    ('\u{11300}', '\u{11301}'),
+    ('\u{1133b}', '\u{1133c}'),
+    ('\u{11340}', '\u{11340}'),
+    ('\u{11366}', '\u{1136c}'),
+    ('\u{11370}', '\u{11374}'),
+    ('\u{11438}', '\u{1143f}'),
+    ('\u{11442}', '\u{11444}'),
+    ('\u{11446}', '\u{11446}'),
+    ('\u{1145e}', '\u{1145e}'),
+    ('\u{114b3}', '\u{114b8}'),
+    ('\u{114ba}', '\u{114ba}'),
+    ('\u{114bf}', '\u{114c0}'),
+    ('\u{114c2}', '\u{114c3}'),
+    ('\u{115b2}', '\u{115b5}'),
+    ('\u{115bc}', '\u{115bd}'),
+    ('\u{115bf}', '\u{115c0}'),
+    ('\u{115dc}', '\u{115dd}'),
+    ('\u{11633}', '\u{1163a}'),
+    ('\u{1163d}', '\u{1163d}'),
+    ('\u{1163f}', '\u{11640}'),
+    ('\u{116ab}', '\u{116ab}'),
+    ('\u{116ad}', '\u{116ad}'),
+    ('\u{116b0}', '\u{116b5}'),
+    ('\u{116b7}', '\u{116b7}'),
+    ('\u{1171d}', '\u{1171f}'),
+    ('\u{11722}', '\u{11725}'),
+    ('\u{11727}', '\u{1172b}'),
+    ('\u{1182f}', '\u{11837}'),
+    ('\u{11839}', '\u{1183a}'),
+    ('\u{1193b}', '\u{1193c}'),
+    ('\u{1193e}', '\u{1193e}'),
+    ('\u{11943}', '\u{11943}'),
+    ('\u{119d4}', '\u{119d7}'),
+    ('\u{119da}', '\u{119db}'),
+    ('\u{119e0}', '\u{119e0}'),
+    ('\u{11a01}', '\u{11a0a}'),
+    ('\u{11a33}', '\u{11a38}'),
+    ('\u{11a3b}', '\u{11a3e}'),
+    ('\u{11a47}', '\u{11a47}'),
+    ('\u{11a51}', '\u{11a56}'),
+    ('\u{11a59}', '\u{11a5b}'),
+    ('\u{11a8a}', '\u{11a96}'),
+    ('\u{11a98}', '\u{11a99}'),
+    ('\u{11c30}', '\u{11c36}'),
+    ('\u{11c38}', '\u{11c3d}'),
+    ('\u{11c3f}', '\u{11c3f}'),
+    ('\u{11c92}', '\u{11ca7}'),
+    ('\u{11caa}', '\u{11cb0}'),
+    ('\u{11cb2}', '\u{11cb3}'),
+    ('\u{11cb5}', '\u{11cb6}'),
+    ('\u{11d31}', '\u{11d36}'),
+    ('\u{11d3a}', '\u{11d3a}'),
+    ('\u{11d3c}', '\u{11d3d}'),
+    ('\u{11d3f}', '\u{11d45}'),
+    ('\u{11d47}', '\u{11d47}'),
+    ('\u{11d90}', '\u{11d91}'),
+    ('\u{11d95}', '\u{11d95}'),
+    ('\u{11d97}', '\u{11d97}'),
+    ('\u{11ef3}', '\u{11ef4}'),
+    ('\u{13430}', '\u{13438}'),
+    ('\u{16af0}', '\u{16af4}'),
+    ('\u{16b30}', '\u{16b36}'),
+    ('𖭀', '𖭃'),
+    ('\u{16f4f}', '\u{16f4f}'),
+    ('\u{16f8f}', '𖾟'),
+    ('𖿠', '𖿡'),
+    ('𖿣', '\u{16fe4}'),
+    ('\u{1bc9d}', '\u{1bc9e}'),
+    ('\u{1bca0}', '\u{1bca3}'),
+    ('\u{1d167}', '\u{1d169}'),
+    ('\u{1d173}', '\u{1d182}'),
+    ('\u{1d185}', '\u{1d18b}'),
+    ('\u{1d1aa}', '\u{1d1ad}'),
+    ('\u{1d242}', '\u{1d244}'),
+    ('\u{1da00}', '\u{1da36}'),
+    ('\u{1da3b}', '\u{1da6c}'),
+    ('\u{1da75}', '\u{1da75}'),
+    ('\u{1da84}', '\u{1da84}'),
+    ('\u{1da9b}', '\u{1da9f}'),
+    ('\u{1daa1}', '\u{1daaf}'),
+    ('\u{1e000}', '\u{1e006}'),
+    ('\u{1e008}', '\u{1e018}'),
+    ('\u{1e01b}', '\u{1e021}'),
+    ('\u{1e023}', '\u{1e024}'),
+    ('\u{1e026}', '\u{1e02a}'),
+    ('\u{1e130}', '𞄽'),
+    ('\u{1e2ec}', '\u{1e2ef}'),
+    ('\u{1e8d0}', '\u{1e8d6}'),
+    ('\u{1e944}', '𞥋'),
+    ('🏻', '🏿'),
+    ('\u{e0001}', '\u{e0001}'),
+    ('\u{e0020}', '\u{e007f}'),
+    ('\u{e0100}', '\u{e01ef}'),
+];
+
+pub const CASED: &'static [(char, char)] = &[
+    ('A', 'Z'),
+    ('a', 'z'),
+    ('ª', 'ª'),
+    ('µ', 'µ'),
+    ('º', 'º'),
+    ('À', 'Ö'),
+    ('Ø', 'ö'),
+    ('ø', 'ƺ'),
+    ('Ƽ', 'ƿ'),
+    ('Ǆ', 'ʓ'),
+    ('ʕ', 'ʸ'),
+    ('ˀ', 'ˁ'),
+    ('ˠ', 'ˤ'),
+    ('\u{345}', '\u{345}'),
+    ('Ͱ', 'ͳ'),
+    ('Ͷ', 'ͷ'),
+    ('ͺ', 'ͽ'),
+    ('Ϳ', 'Ϳ'),
+    ('Ά', 'Ά'),
+    ('Έ', 'Ί'),
+    ('Ό', 'Ό'),
+    ('Ύ', 'Ρ'),
+    ('Σ', 'ϵ'),
+    ('Ϸ', 'ҁ'),
+    ('Ҋ', 'ԯ'),
+    ('Ա', 'Ֆ'),
+    ('ՠ', 'ֈ'),
+    ('Ⴀ', 'Ⴥ'),
+    ('Ⴧ', 'Ⴧ'),
+    ('Ⴭ', 'Ⴭ'),
+    ('ა', 'ჺ'),
+    ('ჽ', 'ჿ'),
+    ('Ꭰ', 'Ᏽ'),
+    ('ᏸ', 'ᏽ'),
+    ('ᲀ', 'ᲈ'),
+    ('Ა', 'Ჺ'),
+    ('Ჽ', 'Ჿ'),
+    ('ᴀ', 'ᶿ'),
+    ('Ḁ', 'ἕ'),
+    ('Ἐ', 'Ἕ'),
+    ('ἠ', 'ὅ'),
+    ('Ὀ', 'Ὅ'),
+    ('ὐ', 'ὗ'),
+    ('Ὑ', 'Ὑ'),
+    ('Ὓ', 'Ὓ'),
+    ('Ὕ', 'Ὕ'),
+    ('Ὗ', 'ώ'),
+    ('ᾀ', 'ᾴ'),
+    ('ᾶ', 'ᾼ'),
+    ('ι', 'ι'),
+    ('ῂ', 'ῄ'),
+    ('ῆ', 'ῌ'),
+    ('ῐ', 'ΐ'),
+    ('ῖ', 'Ί'),
+    ('ῠ', 'Ῥ'),
+    ('ῲ', 'ῴ'),
+    ('ῶ', 'ῼ'),
+    ('ⁱ', 'ⁱ'),
+    ('ⁿ', 'ⁿ'),
+    ('ₐ', 'ₜ'),
+    ('ℂ', 'ℂ'),
+    ('ℇ', 'ℇ'),
+    ('ℊ', 'ℓ'),
+    ('ℕ', 'ℕ'),
+    ('ℙ', 'ℝ'),
+    ('ℤ', 'ℤ'),
+    ('Ω', 'Ω'),
+    ('ℨ', 'ℨ'),
+    ('K', 'ℭ'),
+    ('ℯ', 'ℴ'),
+    ('ℹ', 'ℹ'),
+    ('ℼ', 'ℿ'),
+    ('ⅅ', 'ⅉ'),
+    ('ⅎ', 'ⅎ'),
+    ('Ⅰ', 'ⅿ'),
+    ('Ↄ', 'ↄ'),
+    ('Ⓐ', 'ⓩ'),
+    ('Ⰰ', 'Ⱞ'),
+    ('ⰰ', 'ⱞ'),
+    ('Ⱡ', 'ⳤ'),
+    ('Ⳬ', 'ⳮ'),
+    ('Ⳳ', 'ⳳ'),
+    ('ⴀ', 'ⴥ'),
+    ('ⴧ', 'ⴧ'),
+    ('ⴭ', 'ⴭ'),
+    ('Ꙁ', 'ꙭ'),
+    ('Ꚁ', 'ꚝ'),
+    ('Ꜣ', 'ꞇ'),
+    ('Ꞌ', 'ꞎ'),
+    ('Ꞑ', 'ꞿ'),
+    ('Ꟃ', '\u{a7ca}'),
+    ('\u{a7f5}', '\u{a7f6}'),
+    ('ꟸ', 'ꟺ'),
+    ('ꬰ', 'ꭚ'),
+    ('ꭜ', '\u{ab68}'),
+    ('ꭰ', 'ꮿ'),
+    ('ﬀ', 'ﬆ'),
+    ('ﬓ', 'ﬗ'),
+    ('Ａ', 'Ｚ'),
+    ('ａ', 'ｚ'),
+    ('𐐀', '𐑏'),
+    ('𐒰', '𐓓'),
+    ('𐓘', '𐓻'),
+    ('𐲀', '𐲲'),
+    ('𐳀', '𐳲'),
+    ('𑢠', '𑣟'),
+    ('𖹀', '𖹿'),
+    ('𝐀', '𝑔'),
+    ('𝑖', '𝒜'),
+    ('𝒞', '𝒟'),
+    ('𝒢', '𝒢'),
+    ('𝒥', '𝒦'),
+    ('𝒩', '𝒬'),
+    ('𝒮', '𝒹'),
+    ('𝒻', '𝒻'),
+    ('𝒽', '𝓃'),
+    ('𝓅', '𝔅'),
+    ('𝔇', '𝔊'),
+    ('𝔍', '𝔔'),
+    ('𝔖', '𝔜'),
+    ('𝔞', '𝔹'),
+    ('𝔻', '𝔾'),
+    ('𝕀', '𝕄'),
+    ('𝕆', '𝕆'),
+    ('𝕊', '𝕐'),
+    ('𝕒', '𝚥'),
+    ('𝚨', '𝛀'),
+    ('𝛂', '𝛚'),
+    ('𝛜', '𝛺'),
+    ('𝛼', '𝜔'),
+    ('𝜖', '𝜴'),
+    ('𝜶', '𝝎'),
+    ('𝝐', '𝝮'),
+    ('𝝰', '𝞈'),
+    ('𝞊', '𝞨'),
+    ('𝞪', '𝟂'),
+    ('𝟄', '𝟋'),
+    ('𞤀', '𞥃'),
+    ('🄰', '🅉'),
+    ('🅐', '🅩'),
+    ('🅰', '🆉'),
+];
+
+pub const CHANGES_WHEN_CASEFOLDED: &'static [(char, char)] = &[
+    ('A', 'Z'),
+    ('µ', 'µ'),
+    ('À', 'Ö'),
+    ('Ø', 'ß'),
+    ('Ā', 'Ā'),
+    ('Ă', 'Ă'),
+    ('Ą', 'Ą'),
+    ('Ć', 'Ć'),
+    ('Ĉ', 'Ĉ'),
+    ('Ċ', 'Ċ'),
+    ('Č', 'Č'),
+    ('Ď', 'Ď'),
+    ('Đ', 'Đ'),
+    ('Ē', 'Ē'),
+    ('Ĕ', 'Ĕ'),
+    ('Ė', 'Ė'),
+    ('Ę', 'Ę'),
+    ('Ě', 'Ě'),
+    ('Ĝ', 'Ĝ'),
+    ('Ğ', 'Ğ'),
+    ('Ġ', 'Ġ'),
+    ('Ģ', 'Ģ'),
+    ('Ĥ', 'Ĥ'),
+    ('Ħ', 'Ħ'),
+    ('Ĩ', 'Ĩ'),
+    ('Ī', 'Ī'),
+    ('Ĭ', 'Ĭ'),
+    ('Į', 'Į'),
+    ('İ', 'İ'),
+    ('Ĳ', 'Ĳ'),
+    ('Ĵ', 'Ĵ'),
+    ('Ķ', 'Ķ'),
+    ('Ĺ', 'Ĺ'),
+    ('Ļ', 'Ļ'),
+    ('Ľ', 'Ľ'),
+    ('Ŀ', 'Ŀ'),
+    ('Ł', 'Ł'),
+    ('Ń', 'Ń'),
+    ('Ņ', 'Ņ'),
+    ('Ň', 'Ň'),
+    ('ŉ', 'Ŋ'),
+    ('Ō', 'Ō'),
+    ('Ŏ', 'Ŏ'),
+    ('Ő', 'Ő'),
+    ('Œ', 'Œ'),
+    ('Ŕ', 'Ŕ'),
+    ('Ŗ', 'Ŗ'),
+    ('Ř', 'Ř'),
+    ('Ś', 'Ś'),
+    ('Ŝ', 'Ŝ'),
+    ('Ş', 'Ş'),
+    ('Š', 'Š'),
+    ('Ţ', 'Ţ'),
+    ('Ť', 'Ť'),
+    ('Ŧ', 'Ŧ'),
+    ('Ũ', 'Ũ'),
+    ('Ū', 'Ū'),
+    ('Ŭ', 'Ŭ'),
+    ('Ů', 'Ů'),
+    ('Ű', 'Ű'),
+    ('Ų', 'Ų'),
+    ('Ŵ', 'Ŵ'),
+    ('Ŷ', 'Ŷ'),
+    ('Ÿ', 'Ź'),
+    ('Ż', 'Ż'),
+    ('Ž', 'Ž'),
+    ('ſ', 'ſ'),
+    ('Ɓ', 'Ƃ'),
+    ('Ƅ', 'Ƅ'),
+    ('Ɔ', 'Ƈ'),
+    ('Ɖ', 'Ƌ'),
+    ('Ǝ', 'Ƒ'),
+    ('Ɠ', 'Ɣ'),
+    ('Ɩ', 'Ƙ'),
+    ('Ɯ', 'Ɲ'),
+    ('Ɵ', 'Ơ'),
+    ('Ƣ', 'Ƣ'),
+    ('Ƥ', 'Ƥ'),
+    ('Ʀ', 'Ƨ'),
+    ('Ʃ', 'Ʃ'),
+    ('Ƭ', 'Ƭ'),
+    ('Ʈ', 'Ư'),
+    ('Ʊ', 'Ƴ'),
+    ('Ƶ', 'Ƶ'),
+    ('Ʒ', 'Ƹ'),
+    ('Ƽ', 'Ƽ'),
+    ('Ǆ', 'ǅ'),
+    ('Ǉ', 'ǈ'),
+    ('Ǌ', 'ǋ'),
+    ('Ǎ', 'Ǎ'),
+    ('Ǐ', 'Ǐ'),
+    ('Ǒ', 'Ǒ'),
+    ('Ǔ', 'Ǔ'),
+    ('Ǖ', 'Ǖ'),
+    ('Ǘ', 'Ǘ'),
+    ('Ǚ', 'Ǚ'),
+    ('Ǜ', 'Ǜ'),
+    ('Ǟ', 'Ǟ'),
+    ('Ǡ', 'Ǡ'),
+    ('Ǣ', 'Ǣ'),
+    ('Ǥ', 'Ǥ'),
+    ('Ǧ', 'Ǧ'),
+    ('Ǩ', 'Ǩ'),
+    ('Ǫ', 'Ǫ'),
+    ('Ǭ', 'Ǭ'),
+    ('Ǯ', 'Ǯ'),
+    ('Ǳ', 'ǲ'),
+    ('Ǵ', 'Ǵ'),
+    ('Ƕ', 'Ǹ'),
+    ('Ǻ', 'Ǻ'),
+    ('Ǽ', 'Ǽ'),
+    ('Ǿ', 'Ǿ'),
+    ('Ȁ', 'Ȁ'),
+    ('Ȃ', 'Ȃ'),
+    ('Ȅ', 'Ȅ'),
+    ('Ȇ', 'Ȇ'),
+    ('Ȉ', 'Ȉ'),
+    ('Ȋ', 'Ȋ'),
+    ('Ȍ', 'Ȍ'),
+    ('Ȏ', 'Ȏ'),
+    ('Ȑ', 'Ȑ'),
+    ('Ȓ', 'Ȓ'),
+    ('Ȕ', 'Ȕ'),
+    ('Ȗ', 'Ȗ'),
+    ('Ș', 'Ș'),
+    ('Ț', 'Ț'),
+    ('Ȝ', 'Ȝ'),
+    ('Ȟ', 'Ȟ'),
+    ('Ƞ', 'Ƞ'),
+    ('Ȣ', 'Ȣ'),
+    ('Ȥ', 'Ȥ'),
+    ('Ȧ', 'Ȧ'),
+    ('Ȩ', 'Ȩ'),
+    ('Ȫ', 'Ȫ'),
+    ('Ȭ', 'Ȭ'),
+    ('Ȯ', 'Ȯ'),
+    ('Ȱ', 'Ȱ'),
+    ('Ȳ', 'Ȳ'),
+    ('Ⱥ', 'Ȼ'),
+    ('Ƚ', 'Ⱦ'),
+    ('Ɂ', 'Ɂ'),
+    ('Ƀ', 'Ɇ'),
+    ('Ɉ', 'Ɉ'),
+    ('Ɋ', 'Ɋ'),
+    ('Ɍ', 'Ɍ'),
+    ('Ɏ', 'Ɏ'),
+    ('\u{345}', '\u{345}'),
+    ('Ͱ', 'Ͱ'),
+    ('Ͳ', 'Ͳ'),
+    ('Ͷ', 'Ͷ'),
+    ('Ϳ', 'Ϳ'),
+    ('Ά', 'Ά'),
+    ('Έ', 'Ί'),
+    ('Ό', 'Ό'),
+    ('Ύ', 'Ώ'),
+    ('Α', 'Ρ'),
+    ('Σ', 'Ϋ'),
+    ('ς', 'ς'),
+    ('Ϗ', 'ϑ'),
+    ('ϕ', 'ϖ'),
+    ('Ϙ', 'Ϙ'),
+    ('Ϛ', 'Ϛ'),
+    ('Ϝ', 'Ϝ'),
+    ('Ϟ', 'Ϟ'),
+    ('Ϡ', 'Ϡ'),
+    ('Ϣ', 'Ϣ'),
+    ('Ϥ', 'Ϥ'),
+    ('Ϧ', 'Ϧ'),
+    ('Ϩ', 'Ϩ'),
+    ('Ϫ', 'Ϫ'),
+    ('Ϭ', 'Ϭ'),
+    ('Ϯ', 'Ϯ'),
+    ('ϰ', 'ϱ'),
+    ('ϴ', 'ϵ'),
+    ('Ϸ', 'Ϸ'),
+    ('Ϲ', 'Ϻ'),
+    ('Ͻ', 'Я'),
+    ('Ѡ', 'Ѡ'),
+    ('Ѣ', 'Ѣ'),
+    ('Ѥ', 'Ѥ'),
+    ('Ѧ', 'Ѧ'),
+    ('Ѩ', 'Ѩ'),
+    ('Ѫ', 'Ѫ'),
+    ('Ѭ', 'Ѭ'),
+    ('Ѯ', 'Ѯ'),
+    ('Ѱ', 'Ѱ'),
+    ('Ѳ', 'Ѳ'),
+    ('Ѵ', 'Ѵ'),
+    ('Ѷ', 'Ѷ'),
+    ('Ѹ', 'Ѹ'),
+    ('Ѻ', 'Ѻ'),
+    ('Ѽ', 'Ѽ'),
+    ('Ѿ', 'Ѿ'),
+    ('Ҁ', 'Ҁ'),
+    ('Ҋ', 'Ҋ'),
+    ('Ҍ', 'Ҍ'),
+    ('Ҏ', 'Ҏ'),
+    ('Ґ', 'Ґ'),
+    ('Ғ', 'Ғ'),
+    ('Ҕ', 'Ҕ'),
+    ('Җ', 'Җ'),
+    ('Ҙ', 'Ҙ'),
+    ('Қ', 'Қ'),
+    ('Ҝ', 'Ҝ'),
+    ('Ҟ', 'Ҟ'),
+    ('Ҡ', 'Ҡ'),
+    ('Ң', 'Ң'),
+    ('Ҥ', 'Ҥ'),
+    ('Ҧ', 'Ҧ'),
+    ('Ҩ', 'Ҩ'),
+    ('Ҫ', 'Ҫ'),
+    ('Ҭ', 'Ҭ'),
+    ('Ү', 'Ү'),
+    ('Ұ', 'Ұ'),
+    ('Ҳ', 'Ҳ'),
+    ('Ҵ', 'Ҵ'),
+    ('Ҷ', 'Ҷ'),
+    ('Ҹ', 'Ҹ'),
+    ('Һ', 'Һ'),
+    ('Ҽ', 'Ҽ'),
+    ('Ҿ', 'Ҿ'),
+    ('Ӏ', 'Ӂ'),
+    ('Ӄ', 'Ӄ'),
+    ('Ӆ', 'Ӆ'),
+    ('Ӈ', 'Ӈ'),
+    ('Ӊ', 'Ӊ'),
+    ('Ӌ', 'Ӌ'),
+    ('Ӎ', 'Ӎ'),
+    ('Ӑ', 'Ӑ'),
+    ('Ӓ', 'Ӓ'),
+    ('Ӕ', 'Ӕ'),
+    ('Ӗ', 'Ӗ'),
+    ('Ә', 'Ә'),
+    ('Ӛ', 'Ӛ'),
+    ('Ӝ', 'Ӝ'),
+    ('Ӟ', 'Ӟ'),
+    ('Ӡ', 'Ӡ'),
+    ('Ӣ', 'Ӣ'),
+    ('Ӥ', 'Ӥ'),
+    ('Ӧ', 'Ӧ'),
+    ('Ө', 'Ө'),
+    ('Ӫ', 'Ӫ'),
+    ('Ӭ', 'Ӭ'),
+    ('Ӯ', 'Ӯ'),
+    ('Ӱ', 'Ӱ'),
+    ('Ӳ', 'Ӳ'),
+    ('Ӵ', 'Ӵ'),
+    ('Ӷ', 'Ӷ'),
+    ('Ӹ', 'Ӹ'),
+    ('Ӻ', 'Ӻ'),
+    ('Ӽ', 'Ӽ'),
+    ('Ӿ', 'Ӿ'),
+    ('Ԁ', 'Ԁ'),
+    ('Ԃ', 'Ԃ'),
+    ('Ԅ', 'Ԅ'),
+    ('Ԇ', 'Ԇ'),
+    ('Ԉ', 'Ԉ'),
+    ('Ԋ', 'Ԋ'),
+    ('Ԍ', 'Ԍ'),
+    ('Ԏ', 'Ԏ'),
+    ('Ԑ', 'Ԑ'),
+    ('Ԓ', 'Ԓ'),
+    ('Ԕ', 'Ԕ'),
+    ('Ԗ', 'Ԗ'),
+    ('Ԙ', 'Ԙ'),
+    ('Ԛ', 'Ԛ'),
+    ('Ԝ', 'Ԝ'),
+    ('Ԟ', 'Ԟ'),
+    ('Ԡ', 'Ԡ'),
+    ('Ԣ', 'Ԣ'),
+    ('Ԥ', 'Ԥ'),
+    ('Ԧ', 'Ԧ'),
+    ('Ԩ', 'Ԩ'),
+    ('Ԫ', 'Ԫ'),
+    ('Ԭ', 'Ԭ'),
+    ('Ԯ', 'Ԯ'),
+    ('Ա', 'Ֆ'),
+    ('և', 'և'),
+    ('Ⴀ', 'Ⴥ'),
+    ('Ⴧ', 'Ⴧ'),
+    ('Ⴭ', 'Ⴭ'),
+    ('ᏸ', 'ᏽ'),
+    ('ᲀ', 'ᲈ'),
+    ('Ა', 'Ჺ'),
+    ('Ჽ', 'Ჿ'),
+    ('Ḁ', 'Ḁ'),
+    ('Ḃ', 'Ḃ'),
+    ('Ḅ', 'Ḅ'),
+    ('Ḇ', 'Ḇ'),
+    ('Ḉ', 'Ḉ'),
+    ('Ḋ', 'Ḋ'),
+    ('Ḍ', 'Ḍ'),
+    ('Ḏ', 'Ḏ'),
+    ('Ḑ', 'Ḑ'),
+    ('Ḓ', 'Ḓ'),
+    ('Ḕ', 'Ḕ'),
+    ('Ḗ', 'Ḗ'),
+    ('Ḙ', 'Ḙ'),
+    ('Ḛ', 'Ḛ'),
+    ('Ḝ', 'Ḝ'),
+    ('Ḟ', 'Ḟ'),
+    ('Ḡ', 'Ḡ'),
+    ('Ḣ', 'Ḣ'),
+    ('Ḥ', 'Ḥ'),
+    ('Ḧ', 'Ḧ'),
+    ('Ḩ', 'Ḩ'),
+    ('Ḫ', 'Ḫ'),
+    ('Ḭ', 'Ḭ'),
+    ('Ḯ', 'Ḯ'),
+    ('Ḱ', 'Ḱ'),
+    ('Ḳ', 'Ḳ'),
+    ('Ḵ', 'Ḵ'),
+    ('Ḷ', 'Ḷ'),
+    ('Ḹ', 'Ḹ'),
+    ('Ḻ', 'Ḻ'),
+    ('Ḽ', 'Ḽ'),
+    ('Ḿ', 'Ḿ'),
+    ('Ṁ', 'Ṁ'),
+    ('Ṃ', 'Ṃ'),
+    ('Ṅ', 'Ṅ'),
+    ('Ṇ', 'Ṇ'),
+    ('Ṉ', 'Ṉ'),
+    ('Ṋ', 'Ṋ'),
+    ('Ṍ', 'Ṍ'),
+    ('Ṏ', 'Ṏ'),
+    ('Ṑ', 'Ṑ'),
+    ('Ṓ', 'Ṓ'),
+    ('Ṕ', 'Ṕ'),
+    ('Ṗ', 'Ṗ'),
+    ('Ṙ', 'Ṙ'),
+    ('Ṛ', 'Ṛ'),
+    ('Ṝ', 'Ṝ'),
+    ('Ṟ', 'Ṟ'),
+    ('Ṡ', 'Ṡ'),
+    ('Ṣ', 'Ṣ'),
+    ('Ṥ', 'Ṥ'),
+    ('Ṧ', 'Ṧ'),
+    ('Ṩ', 'Ṩ'),
+    ('Ṫ', 'Ṫ'),
+    ('Ṭ', 'Ṭ'),
+    ('Ṯ', 'Ṯ'),
+    ('Ṱ', 'Ṱ'),
+    ('Ṳ', 'Ṳ'),
+    ('Ṵ', 'Ṵ'),
+    ('Ṷ', 'Ṷ'),
+    ('Ṹ', 'Ṹ'),
+    ('Ṻ', 'Ṻ'),
+    ('Ṽ', 'Ṽ'),
+    ('Ṿ', 'Ṿ'),
+    ('Ẁ', 'Ẁ'),
+    ('Ẃ', 'Ẃ'),
+    ('Ẅ', 'Ẅ'),
+    ('Ẇ', 'Ẇ'),
+    ('Ẉ', 'Ẉ'),
+    ('Ẋ', 'Ẋ'),
+    ('Ẍ', 'Ẍ'),
+    ('Ẏ', 'Ẏ'),
+    ('Ẑ', 'Ẑ'),
+    ('Ẓ', 'Ẓ'),
+    ('Ẕ', 'Ẕ'),
+    ('ẚ', 'ẛ'),
+    ('ẞ', 'ẞ'),
+    ('Ạ', 'Ạ'),
+    ('Ả', 'Ả'),
+    ('Ấ', 'Ấ'),
+    ('Ầ', 'Ầ'),
+    ('Ẩ', 'Ẩ'),
+    ('Ẫ', 'Ẫ'),
+    ('Ậ', 'Ậ'),
+    ('Ắ', 'Ắ'),
+    ('Ằ', 'Ằ'),
+    ('Ẳ', 'Ẳ'),
+    ('Ẵ', 'Ẵ'),
+    ('Ặ', 'Ặ'),
+    ('Ẹ', 'Ẹ'),
+    ('Ẻ', 'Ẻ'),
+    ('Ẽ', 'Ẽ'),
+    ('Ế', 'Ế'),
+    ('Ề', 'Ề'),
+    ('Ể', 'Ể'),
+    ('Ễ', 'Ễ'),
+    ('Ệ', 'Ệ'),
+    ('Ỉ', 'Ỉ'),
+    ('Ị', 'Ị'),
+    ('Ọ', 'Ọ'),
+    ('Ỏ', 'Ỏ'),
+    ('Ố', 'Ố'),
+    ('Ồ', 'Ồ'),
+    ('Ổ', 'Ổ'),
+    ('Ỗ', 'Ỗ'),
+    ('Ộ', 'Ộ'),
+    ('Ớ', 'Ớ'),
+    ('Ờ', 'Ờ'),
+    ('Ở', 'Ở'),
+    ('Ỡ', 'Ỡ'),
+    ('Ợ', 'Ợ'),
+    ('Ụ', 'Ụ'),
+    ('Ủ', 'Ủ'),
+    ('Ứ', 'Ứ'),
+    ('Ừ', 'Ừ'),
+    ('Ử', 'Ử'),
+    ('Ữ', 'Ữ'),
+    ('Ự', 'Ự'),
+    ('Ỳ', 'Ỳ'),
+    ('Ỵ', 'Ỵ'),
+    ('Ỷ', 'Ỷ'),
+    ('Ỹ', 'Ỹ'),
+    ('Ỻ', 'Ỻ'),
+    ('Ỽ', 'Ỽ'),
+    ('Ỿ', 'Ỿ'),
+    ('Ἀ', 'Ἇ'),
+    ('Ἐ', 'Ἕ'),
+    ('Ἠ', 'Ἧ'),
+    ('Ἰ', 'Ἷ'),
+    ('Ὀ', 'Ὅ'),
+    ('Ὑ', 'Ὑ'),
+    ('Ὓ', 'Ὓ'),
+    ('Ὕ', 'Ὕ'),
+    ('Ὗ', 'Ὗ'),
+    ('Ὠ', 'Ὧ'),
+    ('ᾀ', 'ᾯ'),
+    ('ᾲ', 'ᾴ'),
+    ('ᾷ', 'ᾼ'),
+    ('ῂ', 'ῄ'),
+    ('ῇ', 'ῌ'),
+    ('Ῐ', 'Ί'),
+    ('Ῠ', 'Ῥ'),
+    ('ῲ', 'ῴ'),
+    ('ῷ', 'ῼ'),
+    ('Ω', 'Ω'),
+    ('K', 'Å'),
+    ('Ⅎ', 'Ⅎ'),
+    ('Ⅰ', 'Ⅿ'),
+    ('Ↄ', 'Ↄ'),
+    ('Ⓐ', 'Ⓩ'),
+    ('Ⰰ', 'Ⱞ'),
+    ('Ⱡ', 'Ⱡ'),
+    ('Ɫ', 'Ɽ'),
+    ('Ⱨ', 'Ⱨ'),
+    ('Ⱪ', 'Ⱪ'),
+    ('Ⱬ', 'Ⱬ'),
+    ('Ɑ', 'Ɒ'),
+    ('Ⱳ', 'Ⱳ'),
+    ('Ⱶ', 'Ⱶ'),
+    ('Ȿ', 'Ⲁ'),
+    ('Ⲃ', 'Ⲃ'),
+    ('Ⲅ', 'Ⲅ'),
+    ('Ⲇ', 'Ⲇ'),
+    ('Ⲉ', 'Ⲉ'),
+    ('Ⲋ', 'Ⲋ'),
+    ('Ⲍ', 'Ⲍ'),
+    ('Ⲏ', 'Ⲏ'),
+    ('Ⲑ', 'Ⲑ'),
+    ('Ⲓ', 'Ⲓ'),
+    ('Ⲕ', 'Ⲕ'),
+    ('Ⲗ', 'Ⲗ'),
+    ('Ⲙ', 'Ⲙ'),
+    ('Ⲛ', 'Ⲛ'),
+    ('Ⲝ', 'Ⲝ'),
+    ('Ⲟ', 'Ⲟ'),
+    ('Ⲡ', 'Ⲡ'),
+    ('Ⲣ', 'Ⲣ'),
+    ('Ⲥ', 'Ⲥ'),
+    ('Ⲧ', 'Ⲧ'),
+    ('Ⲩ', 'Ⲩ'),
+    ('Ⲫ', 'Ⲫ'),
+    ('Ⲭ', 'Ⲭ'),
+    ('Ⲯ', 'Ⲯ'),
+    ('Ⲱ', 'Ⲱ'),
+    ('Ⲳ', 'Ⲳ'),
+    ('Ⲵ', 'Ⲵ'),
+    ('Ⲷ', 'Ⲷ'),
+    ('Ⲹ', 'Ⲹ'),
+    ('Ⲻ', 'Ⲻ'),
+    ('Ⲽ', 'Ⲽ'),
+    ('Ⲿ', 'Ⲿ'),
+    ('Ⳁ', 'Ⳁ'),
+    ('Ⳃ', 'Ⳃ'),
+    ('Ⳅ', 'Ⳅ'),
+    ('Ⳇ', 'Ⳇ'),
+    ('Ⳉ', 'Ⳉ'),
+    ('Ⳋ', 'Ⳋ'),
+    ('Ⳍ', 'Ⳍ'),
+    ('Ⳏ', 'Ⳏ'),
+    ('Ⳑ', 'Ⳑ'),
+    ('Ⳓ', 'Ⳓ'),
+    ('Ⳕ', 'Ⳕ'),
+    ('Ⳗ', 'Ⳗ'),
+    ('Ⳙ', 'Ⳙ'),
+    ('Ⳛ', 'Ⳛ'),
+    ('Ⳝ', 'Ⳝ'),
+    ('Ⳟ', 'Ⳟ'),
+    ('Ⳡ', 'Ⳡ'),
+    ('Ⳣ', 'Ⳣ'),
+    ('Ⳬ', 'Ⳬ'),
+    ('Ⳮ', 'Ⳮ'),
+    ('Ⳳ', 'Ⳳ'),
+    ('Ꙁ', 'Ꙁ'),
+    ('Ꙃ', 'Ꙃ'),
+    ('Ꙅ', 'Ꙅ'),
+    ('Ꙇ', 'Ꙇ'),
+    ('Ꙉ', 'Ꙉ'),
+    ('Ꙋ', 'Ꙋ'),
+    ('Ꙍ', 'Ꙍ'),
+    ('Ꙏ', 'Ꙏ'),
+    ('Ꙑ', 'Ꙑ'),
+    ('Ꙓ', 'Ꙓ'),
+    ('Ꙕ', 'Ꙕ'),
+    ('Ꙗ', 'Ꙗ'),
+    ('Ꙙ', 'Ꙙ'),
+    ('Ꙛ', 'Ꙛ'),
+    ('Ꙝ', 'Ꙝ'),
+    ('Ꙟ', 'Ꙟ'),
+    ('Ꙡ', 'Ꙡ'),
+    ('Ꙣ', 'Ꙣ'),
+    ('Ꙥ', 'Ꙥ'),
+    ('Ꙧ', 'Ꙧ'),
+    ('Ꙩ', 'Ꙩ'),
+    ('Ꙫ', 'Ꙫ'),
+    ('Ꙭ', 'Ꙭ'),
+    ('Ꚁ', 'Ꚁ'),
+    ('Ꚃ', 'Ꚃ'),
+    ('Ꚅ', 'Ꚅ'),
+    ('Ꚇ', 'Ꚇ'),
+    ('Ꚉ', 'Ꚉ'),
+    ('Ꚋ', 'Ꚋ'),
+    ('Ꚍ', 'Ꚍ'),
+    ('Ꚏ', 'Ꚏ'),
+    ('Ꚑ', 'Ꚑ'),
+    ('Ꚓ', 'Ꚓ'),
+    ('Ꚕ', 'Ꚕ'),
+    ('Ꚗ', 'Ꚗ'),
+    ('Ꚙ', 'Ꚙ'),
+    ('Ꚛ', 'Ꚛ'),
+    ('Ꜣ', 'Ꜣ'),
+    ('Ꜥ', 'Ꜥ'),
+    ('Ꜧ', 'Ꜧ'),
+    ('Ꜩ', 'Ꜩ'),
+    ('Ꜫ', 'Ꜫ'),
+    ('Ꜭ', 'Ꜭ'),
+    ('Ꜯ', 'Ꜯ'),
+    ('Ꜳ', 'Ꜳ'),
+    ('Ꜵ', 'Ꜵ'),
+    ('Ꜷ', 'Ꜷ'),
+    ('Ꜹ', 'Ꜹ'),
+    ('Ꜻ', 'Ꜻ'),
+    ('Ꜽ', 'Ꜽ'),
+    ('Ꜿ', 'Ꜿ'),
+    ('Ꝁ', 'Ꝁ'),
+    ('Ꝃ', 'Ꝃ'),
+    ('Ꝅ', 'Ꝅ'),
+    ('Ꝇ', 'Ꝇ'),
+    ('Ꝉ', 'Ꝉ'),
+    ('Ꝋ', 'Ꝋ'),
+    ('Ꝍ', 'Ꝍ'),
+    ('Ꝏ', 'Ꝏ'),
+    ('Ꝑ', 'Ꝑ'),
+    ('Ꝓ', 'Ꝓ'),
+    ('Ꝕ', 'Ꝕ'),
+    ('Ꝗ', 'Ꝗ'),
+    ('Ꝙ', 'Ꝙ'),
+    ('Ꝛ', 'Ꝛ'),
+    ('Ꝝ', 'Ꝝ'),
+    ('Ꝟ', 'Ꝟ'),
+    ('Ꝡ', 'Ꝡ'),
+    ('Ꝣ', 'Ꝣ'),
+    ('Ꝥ', 'Ꝥ'),
+    ('Ꝧ', 'Ꝧ'),
+    ('Ꝩ', 'Ꝩ'),
+    ('Ꝫ', 'Ꝫ'),
+    ('Ꝭ', 'Ꝭ'),
+    ('Ꝯ', 'Ꝯ'),
+    ('Ꝺ', 'Ꝺ'),
+    ('Ꝼ', 'Ꝼ'),
+    ('Ᵹ', 'Ꝿ'),
+    ('Ꞁ', 'Ꞁ'),
+    ('Ꞃ', 'Ꞃ'),
+    ('Ꞅ', 'Ꞅ'),
+    ('Ꞇ', 'Ꞇ'),
+    ('Ꞌ', 'Ꞌ'),
+    ('Ɥ', 'Ɥ'),
+    ('Ꞑ', 'Ꞑ'),
+    ('Ꞓ', 'Ꞓ'),
+    ('Ꞗ', 'Ꞗ'),
+    ('Ꞙ', 'Ꞙ'),
+    ('Ꞛ', 'Ꞛ'),
+    ('Ꞝ', 'Ꞝ'),
+    ('Ꞟ', 'Ꞟ'),
+    ('Ꞡ', 'Ꞡ'),
+    ('Ꞣ', 'Ꞣ'),
+    ('Ꞥ', 'Ꞥ'),
+    ('Ꞧ', 'Ꞧ'),
+    ('Ꞩ', 'Ꞩ'),
+    ('Ɦ', 'Ɪ'),
+    ('Ʞ', 'Ꞵ'),
+    ('Ꞷ', 'Ꞷ'),
+    ('Ꞹ', 'Ꞹ'),
+    ('Ꞻ', 'Ꞻ'),
+    ('Ꞽ', 'Ꞽ'),
+    ('Ꞿ', 'Ꞿ'),
+    ('Ꟃ', 'Ꟃ'),
+    ('Ꞔ', '\u{a7c7}'),
+    ('\u{a7c9}', '\u{a7c9}'),
+    ('\u{a7f5}', '\u{a7f5}'),
+    ('ꭰ', 'ꮿ'),
+    ('ﬀ', 'ﬆ'),
+    ('ﬓ', 'ﬗ'),
+    ('Ａ', 'Ｚ'),
+    ('𐐀', '𐐧'),
+    ('𐒰', '𐓓'),
+    ('𐲀', '𐲲'),
+    ('𑢠', '𑢿'),
+    ('𖹀', '𖹟'),
+    ('𞤀', '𞤡'),
+];
+
+pub const CHANGES_WHEN_CASEMAPPED: &'static [(char, char)] = &[
+    ('A', 'Z'),
+    ('a', 'z'),
+    ('µ', 'µ'),
+    ('À', 'Ö'),
+    ('Ø', 'ö'),
+    ('ø', 'ķ'),
+    ('Ĺ', 'ƌ'),
+    ('Ǝ', 'ƚ'),
+    ('Ɯ', 'Ʃ'),
+    ('Ƭ', 'ƹ'),
+    ('Ƽ', 'ƽ'),
+    ('ƿ', 'ƿ'),
+    ('Ǆ', 'Ƞ'),
+    ('Ȣ', 'ȳ'),
+    ('Ⱥ', 'ɔ'),
+    ('ɖ', 'ɗ'),
+    ('ə', 'ə'),
+    ('ɛ', 'ɜ'),
+    ('ɠ', 'ɡ'),
+    ('ɣ', 'ɣ'),
+    ('ɥ', 'ɦ'),
+    ('ɨ', 'ɬ'),
+    ('ɯ', 'ɯ'),
+    ('ɱ', 'ɲ'),
+    ('ɵ', 'ɵ'),
+    ('ɽ', 'ɽ'),
+    ('ʀ', 'ʀ'),
+    ('ʂ', 'ʃ'),
+    ('ʇ', 'ʌ'),
+    ('ʒ', 'ʒ'),
+    ('ʝ', 'ʞ'),
+    ('\u{345}', '\u{345}'),
+    ('Ͱ', 'ͳ'),
+    ('Ͷ', 'ͷ'),
+    ('ͻ', 'ͽ'),
+    ('Ϳ', 'Ϳ'),
+    ('Ά', 'Ά'),
+    ('Έ', 'Ί'),
+    ('Ό', 'Ό'),
+    ('Ύ', 'Ρ'),
+    ('Σ', 'ϑ'),
+    ('ϕ', 'ϵ'),
+    ('Ϸ', 'ϻ'),
+    ('Ͻ', 'ҁ'),
+    ('Ҋ', 'ԯ'),
+    ('Ա', 'Ֆ'),
+    ('ա', 'և'),
+    ('Ⴀ', 'Ⴥ'),
+    ('Ⴧ', 'Ⴧ'),
+    ('Ⴭ', 'Ⴭ'),
+    ('ა', 'ჺ'),
+    ('ჽ', 'ჿ'),
+    ('Ꭰ', 'Ᏽ'),
+    ('ᏸ', 'ᏽ'),
+    ('ᲀ', 'ᲈ'),
+    ('Ა', 'Ჺ'),
+    ('Ჽ', 'Ჿ'),
+    ('ᵹ', 'ᵹ'),
+    ('ᵽ', 'ᵽ'),
+    ('ᶎ', 'ᶎ'),
+    ('Ḁ', 'ẛ'),
+    ('ẞ', 'ẞ'),
+    ('Ạ', 'ἕ'),
+    ('Ἐ', 'Ἕ'),
+    ('ἠ', 'ὅ'),
+    ('Ὀ', 'Ὅ'),
+    ('ὐ', 'ὗ'),
+    ('Ὑ', 'Ὑ'),
+    ('Ὓ', 'Ὓ'),
+    ('Ὕ', 'Ὕ'),
+    ('Ὗ', 'ώ'),
+    ('ᾀ', 'ᾴ'),
+    ('ᾶ', 'ᾼ'),
+    ('ι', 'ι'),
+    ('ῂ', 'ῄ'),
+    ('ῆ', 'ῌ'),
+    ('ῐ', 'ΐ'),
+    ('ῖ', 'Ί'),
+    ('ῠ', 'Ῥ'),
+    ('ῲ', 'ῴ'),
+    ('ῶ', 'ῼ'),
+    ('Ω', 'Ω'),
+    ('K', 'Å'),
+    ('Ⅎ', 'Ⅎ'),
+    ('ⅎ', 'ⅎ'),
+    ('Ⅰ', 'ⅿ'),
+    ('Ↄ', 'ↄ'),
+    ('Ⓐ', 'ⓩ'),
+    ('Ⰰ', 'Ⱞ'),
+    ('ⰰ', 'ⱞ'),
+    ('Ⱡ', 'Ɒ'),
+    ('Ⱳ', 'ⱳ'),
+    ('Ⱶ', 'ⱶ'),
+    ('Ȿ', 'ⳣ'),
+    ('Ⳬ', 'ⳮ'),
+    ('Ⳳ', 'ⳳ'),
+    ('ⴀ', 'ⴥ'),
+    ('ⴧ', 'ⴧ'),
+    ('ⴭ', 'ⴭ'),
+    ('Ꙁ', 'ꙭ'),
+    ('Ꚁ', 'ꚛ'),
+    ('Ꜣ', 'ꜯ'),
+    ('Ꜳ', 'ꝯ'),
+    ('Ꝺ', 'ꞇ'),
+    ('Ꞌ', 'Ɥ'),
+    ('Ꞑ', 'ꞔ'),
+    ('Ꞗ', 'Ɪ'),
+    ('Ʞ', 'ꞿ'),
+    ('Ꟃ', '\u{a7ca}'),
+    ('\u{a7f5}', '\u{a7f6}'),
+    ('ꭓ', 'ꭓ'),
+    ('ꭰ', 'ꮿ'),
+    ('ﬀ', 'ﬆ'),
+    ('ﬓ', 'ﬗ'),
+    ('Ａ', 'Ｚ'),
+    ('ａ', 'ｚ'),
+    ('𐐀', '𐑏'),
+    ('𐒰', '𐓓'),
+    ('𐓘', '𐓻'),
+    ('𐲀', '𐲲'),
+    ('𐳀', '𐳲'),
+    ('𑢠', '𑣟'),
+    ('𖹀', '𖹿'),
+    ('𞤀', '𞥃'),
+];
+
+pub const CHANGES_WHEN_LOWERCASED: &'static [(char, char)] = &[
+    ('A', 'Z'),
+    ('À', 'Ö'),
+    ('Ø', 'Þ'),
+    ('Ā', 'Ā'),
+    ('Ă', 'Ă'),
+    ('Ą', 'Ą'),
+    ('Ć', 'Ć'),
+    ('Ĉ', 'Ĉ'),
+    ('Ċ', 'Ċ'),
+    ('Č', 'Č'),
+    ('Ď', 'Ď'),
+    ('Đ', 'Đ'),
+    ('Ē', 'Ē'),
+    ('Ĕ', 'Ĕ'),
+    ('Ė', 'Ė'),
+    ('Ę', 'Ę'),
+    ('Ě', 'Ě'),
+    ('Ĝ', 'Ĝ'),
+    ('Ğ', 'Ğ'),
+    ('Ġ', 'Ġ'),
+    ('Ģ', 'Ģ'),
+    ('Ĥ', 'Ĥ'),
+    ('Ħ', 'Ħ'),
+    ('Ĩ', 'Ĩ'),
+    ('Ī', 'Ī'),
+    ('Ĭ', 'Ĭ'),
+    ('Į', 'Į'),
+    ('İ', 'İ'),
+    ('Ĳ', 'Ĳ'),
+    ('Ĵ', 'Ĵ'),
+    ('Ķ', 'Ķ'),
+    ('Ĺ', 'Ĺ'),
+    ('Ļ', 'Ļ'),
+    ('Ľ', 'Ľ'),
+    ('Ŀ', 'Ŀ'),
+    ('Ł', 'Ł'),
+    ('Ń', 'Ń'),
+    ('Ņ', 'Ņ'),
+    ('Ň', 'Ň'),
+    ('Ŋ', 'Ŋ'),
+    ('Ō', 'Ō'),
+    ('Ŏ', 'Ŏ'),
+    ('Ő', 'Ő'),
+    ('Œ', 'Œ'),
+    ('Ŕ', 'Ŕ'),
+    ('Ŗ', 'Ŗ'),
+    ('Ř', 'Ř'),
+    ('Ś', 'Ś'),
+    ('Ŝ', 'Ŝ'),
+    ('Ş', 'Ş'),
+    ('Š', 'Š'),
+    ('Ţ', 'Ţ'),
+    ('Ť', 'Ť'),
+    ('Ŧ', 'Ŧ'),
+    ('Ũ', 'Ũ'),
+    ('Ū', 'Ū'),
+    ('Ŭ', 'Ŭ'),
+    ('Ů', 'Ů'),
+    ('Ű', 'Ű'),
+    ('Ų', 'Ų'),
+    ('Ŵ', 'Ŵ'),
+    ('Ŷ', 'Ŷ'),
+    ('Ÿ', 'Ź'),
+    ('Ż', 'Ż'),
+    ('Ž', 'Ž'),
+    ('Ɓ', 'Ƃ'),
+    ('Ƅ', 'Ƅ'),
+    ('Ɔ', 'Ƈ'),
+    ('Ɖ', 'Ƌ'),
+    ('Ǝ', 'Ƒ'),
+    ('Ɠ', 'Ɣ'),
+    ('Ɩ', 'Ƙ'),
+    ('Ɯ', 'Ɲ'),
+    ('Ɵ', 'Ơ'),
+    ('Ƣ', 'Ƣ'),
+    ('Ƥ', 'Ƥ'),
+    ('Ʀ', 'Ƨ'),
+    ('Ʃ', 'Ʃ'),
+    ('Ƭ', 'Ƭ'),
+    ('Ʈ', 'Ư'),
+    ('Ʊ', 'Ƴ'),
+    ('Ƶ', 'Ƶ'),
+    ('Ʒ', 'Ƹ'),
+    ('Ƽ', 'Ƽ'),
+    ('Ǆ', 'ǅ'),
+    ('Ǉ', 'ǈ'),
+    ('Ǌ', 'ǋ'),
+    ('Ǎ', 'Ǎ'),
+    ('Ǐ', 'Ǐ'),
+    ('Ǒ', 'Ǒ'),
+    ('Ǔ', 'Ǔ'),
+    ('Ǖ', 'Ǖ'),
+    ('Ǘ', 'Ǘ'),
+    ('Ǚ', 'Ǚ'),
+    ('Ǜ', 'Ǜ'),
+    ('Ǟ', 'Ǟ'),
+    ('Ǡ', 'Ǡ'),
+    ('Ǣ', 'Ǣ'),
+    ('Ǥ', 'Ǥ'),
+    ('Ǧ', 'Ǧ'),
+    ('Ǩ', 'Ǩ'),
+    ('Ǫ', 'Ǫ'),
+    ('Ǭ', 'Ǭ'),
+    ('Ǯ', 'Ǯ'),
+    ('Ǳ', 'ǲ'),
+    ('Ǵ', 'Ǵ'),
+    ('Ƕ', 'Ǹ'),
+    ('Ǻ', 'Ǻ'),
+    ('Ǽ', 'Ǽ'),
+    ('Ǿ', 'Ǿ'),
+    ('Ȁ', 'Ȁ'),
+    ('Ȃ', 'Ȃ'),
+    ('Ȅ', 'Ȅ'),
+    ('Ȇ', 'Ȇ'),
+    ('Ȉ', 'Ȉ'),
+    ('Ȋ', 'Ȋ'),
+    ('Ȍ', 'Ȍ'),
+    ('Ȏ', 'Ȏ'),
+    ('Ȑ', 'Ȑ'),
+    ('Ȓ', 'Ȓ'),
+    ('Ȕ', 'Ȕ'),
+    ('Ȗ', 'Ȗ'),
+    ('Ș', 'Ș'),
+    ('Ț', 'Ț'),
+    ('Ȝ', 'Ȝ'),
+    ('Ȟ', 'Ȟ'),
+    ('Ƞ', 'Ƞ'),
+    ('Ȣ', 'Ȣ'),
+    ('Ȥ', 'Ȥ'),
+    ('Ȧ', 'Ȧ'),
+    ('Ȩ', 'Ȩ'),
+    ('Ȫ', 'Ȫ'),
+    ('Ȭ', 'Ȭ'),
+    ('Ȯ', 'Ȯ'),
+    ('Ȱ', 'Ȱ'),
+    ('Ȳ', 'Ȳ'),
+    ('Ⱥ', 'Ȼ'),
+    ('Ƚ', 'Ⱦ'),
+    ('Ɂ', 'Ɂ'),
+    ('Ƀ', 'Ɇ'),
+    ('Ɉ', 'Ɉ'),
+    ('Ɋ', 'Ɋ'),
+    ('Ɍ', 'Ɍ'),
+    ('Ɏ', 'Ɏ'),
+    ('Ͱ', 'Ͱ'),
+    ('Ͳ', 'Ͳ'),
+    ('Ͷ', 'Ͷ'),
+    ('Ϳ', 'Ϳ'),
+    ('Ά', 'Ά'),
+    ('Έ', 'Ί'),
+    ('Ό', 'Ό'),
+    ('Ύ', 'Ώ'),
+    ('Α', 'Ρ'),
+    ('Σ', 'Ϋ'),
+    ('Ϗ', 'Ϗ'),
+    ('Ϙ', 'Ϙ'),
+    ('Ϛ', 'Ϛ'),
+    ('Ϝ', 'Ϝ'),
+    ('Ϟ', 'Ϟ'),
+    ('Ϡ', 'Ϡ'),
+    ('Ϣ', 'Ϣ'),
+    ('Ϥ', 'Ϥ'),
+    ('Ϧ', 'Ϧ'),
+    ('Ϩ', 'Ϩ'),
+    ('Ϫ', 'Ϫ'),
+    ('Ϭ', 'Ϭ'),
+    ('Ϯ', 'Ϯ'),
+    ('ϴ', 'ϴ'),
+    ('Ϸ', 'Ϸ'),
+    ('Ϲ', 'Ϻ'),
+    ('Ͻ', 'Я'),
+    ('Ѡ', 'Ѡ'),
+    ('Ѣ', 'Ѣ'),
+    ('Ѥ', 'Ѥ'),
+    ('Ѧ', 'Ѧ'),
+    ('Ѩ', 'Ѩ'),
+    ('Ѫ', 'Ѫ'),
+    ('Ѭ', 'Ѭ'),
+    ('Ѯ', 'Ѯ'),
+    ('Ѱ', 'Ѱ'),
+    ('Ѳ', 'Ѳ'),
+    ('Ѵ', 'Ѵ'),
+    ('Ѷ', 'Ѷ'),
+    ('Ѹ', 'Ѹ'),
+    ('Ѻ', 'Ѻ'),
+    ('Ѽ', 'Ѽ'),
+    ('Ѿ', 'Ѿ'),
+    ('Ҁ', 'Ҁ'),
+    ('Ҋ', 'Ҋ'),
+    ('Ҍ', 'Ҍ'),
+    ('Ҏ', 'Ҏ'),
+    ('Ґ', 'Ґ'),
+    ('Ғ', 'Ғ'),
+    ('Ҕ', 'Ҕ'),
+    ('Җ', 'Җ'),
+    ('Ҙ', 'Ҙ'),
+    ('Қ', 'Қ'),
+    ('Ҝ', 'Ҝ'),
+    ('Ҟ', 'Ҟ'),
+    ('Ҡ', 'Ҡ'),
+    ('Ң', 'Ң'),
+    ('Ҥ', 'Ҥ'),
+    ('Ҧ', 'Ҧ'),
+    ('Ҩ', 'Ҩ'),
+    ('Ҫ', 'Ҫ'),
+    ('Ҭ', 'Ҭ'),
+    ('Ү', 'Ү'),
+    ('Ұ', 'Ұ'),
+    ('Ҳ', 'Ҳ'),
+    ('Ҵ', 'Ҵ'),
+    ('Ҷ', 'Ҷ'),
+    ('Ҹ', 'Ҹ'),
+    ('Һ', 'Һ'),
+    ('Ҽ', 'Ҽ'),
+    ('Ҿ', 'Ҿ'),
+    ('Ӏ', 'Ӂ'),
+    ('Ӄ', 'Ӄ'),
+    ('Ӆ', 'Ӆ'),
+    ('Ӈ', 'Ӈ'),
+    ('Ӊ', 'Ӊ'),
+    ('Ӌ', 'Ӌ'),
+    ('Ӎ', 'Ӎ'),
+    ('Ӑ', 'Ӑ'),
+    ('Ӓ', 'Ӓ'),
+    ('Ӕ', 'Ӕ'),
+    ('Ӗ', 'Ӗ'),
+    ('Ә', 'Ә'),
+    ('Ӛ', 'Ӛ'),
+    ('Ӝ', 'Ӝ'),
+    ('Ӟ', 'Ӟ'),
+    ('Ӡ', 'Ӡ'),
+    ('Ӣ', 'Ӣ'),
+    ('Ӥ', 'Ӥ'),
+    ('Ӧ', 'Ӧ'),
+    ('Ө', 'Ө'),
+    ('Ӫ', 'Ӫ'),
+    ('Ӭ', 'Ӭ'),
+    ('Ӯ', 'Ӯ'),
+    ('Ӱ', 'Ӱ'),
+    ('Ӳ', 'Ӳ'),
+    ('Ӵ', 'Ӵ'),
+    ('Ӷ', 'Ӷ'),
+    ('Ӹ', 'Ӹ'),
+    ('Ӻ', 'Ӻ'),
+    ('Ӽ', 'Ӽ'),
+    ('Ӿ', 'Ӿ'),
+    ('Ԁ', 'Ԁ'),
+    ('Ԃ', 'Ԃ'),
+    ('Ԅ', 'Ԅ'),
+    ('Ԇ', 'Ԇ'),
+    ('Ԉ', 'Ԉ'),
+    ('Ԋ', 'Ԋ'),
+    ('Ԍ', 'Ԍ'),
+    ('Ԏ', 'Ԏ'),
+    ('Ԑ', 'Ԑ'),
+    ('Ԓ', 'Ԓ'),
+    ('Ԕ', 'Ԕ'),
+    ('Ԗ', 'Ԗ'),
+    ('Ԙ', 'Ԙ'),
+    ('Ԛ', 'Ԛ'),
+    ('Ԝ', 'Ԝ'),
+    ('Ԟ', 'Ԟ'),
+    ('Ԡ', 'Ԡ'),
+    ('Ԣ', 'Ԣ'),
+    ('Ԥ', 'Ԥ'),
+    ('Ԧ', 'Ԧ'),
+    ('Ԩ', 'Ԩ'),
+    ('Ԫ', 'Ԫ'),
+    ('Ԭ', 'Ԭ'),
+    ('Ԯ', 'Ԯ'),
+    ('Ա', 'Ֆ'),
+    ('Ⴀ', 'Ⴥ'),
+    ('Ⴧ', 'Ⴧ'),
+    ('Ⴭ', 'Ⴭ'),
+    ('Ꭰ', 'Ᏽ'),
+    ('Ა', 'Ჺ'),
+    ('Ჽ', 'Ჿ'),
+    ('Ḁ', 'Ḁ'),
+    ('Ḃ', 'Ḃ'),
+    ('Ḅ', 'Ḅ'),
+    ('Ḇ', 'Ḇ'),
+    ('Ḉ', 'Ḉ'),
+    ('Ḋ', 'Ḋ'),
+    ('Ḍ', 'Ḍ'),
+    ('Ḏ', 'Ḏ'),
+    ('Ḑ', 'Ḑ'),
+    ('Ḓ', 'Ḓ'),
+    ('Ḕ', 'Ḕ'),
+    ('Ḗ', 'Ḗ'),
+    ('Ḙ', 'Ḙ'),
+    ('Ḛ', 'Ḛ'),
+    ('Ḝ', 'Ḝ'),
+    ('Ḟ', 'Ḟ'),
+    ('Ḡ', 'Ḡ'),
+    ('Ḣ', 'Ḣ'),
+    ('Ḥ', 'Ḥ'),
+    ('Ḧ', 'Ḧ'),
+    ('Ḩ', 'Ḩ'),
+    ('Ḫ', 'Ḫ'),
+    ('Ḭ', 'Ḭ'),
+    ('Ḯ', 'Ḯ'),
+    ('Ḱ', 'Ḱ'),
+    ('Ḳ', 'Ḳ'),
+    ('Ḵ', 'Ḵ'),
+    ('Ḷ', 'Ḷ'),
+    ('Ḹ', 'Ḹ'),
+    ('Ḻ', 'Ḻ'),
+    ('Ḽ', 'Ḽ'),
+    ('Ḿ', 'Ḿ'),
+    ('Ṁ', 'Ṁ'),
+    ('Ṃ', 'Ṃ'),
+    ('Ṅ', 'Ṅ'),
+    ('Ṇ', 'Ṇ'),
+    ('Ṉ', 'Ṉ'),
+    ('Ṋ', 'Ṋ'),
+    ('Ṍ', 'Ṍ'),
+    ('Ṏ', 'Ṏ'),
+    ('Ṑ', 'Ṑ'),
+    ('Ṓ', 'Ṓ'),
+    ('Ṕ', 'Ṕ'),
+    ('Ṗ', 'Ṗ'),
+    ('Ṙ', 'Ṙ'),
+    ('Ṛ', 'Ṛ'),
+    ('Ṝ', 'Ṝ'),
+    ('Ṟ', 'Ṟ'),
+    ('Ṡ', 'Ṡ'),
+    ('Ṣ', 'Ṣ'),
+    ('Ṥ', 'Ṥ'),
+    ('Ṧ', 'Ṧ'),
+    ('Ṩ', 'Ṩ'),
+    ('Ṫ', 'Ṫ'),
+    ('Ṭ', 'Ṭ'),
+    ('Ṯ', 'Ṯ'),
+    ('Ṱ', 'Ṱ'),
+    ('Ṳ', 'Ṳ'),
+    ('Ṵ', 'Ṵ'),
+    ('Ṷ', 'Ṷ'),
+    ('Ṹ', 'Ṹ'),
+    ('Ṻ', 'Ṻ'),
+    ('Ṽ', 'Ṽ'),
+    ('Ṿ', 'Ṿ'),
+    ('Ẁ', 'Ẁ'),
+    ('Ẃ', 'Ẃ'),
+    ('Ẅ', 'Ẅ'),
+    ('Ẇ', 'Ẇ'),
+    ('Ẉ', 'Ẉ'),
+    ('Ẋ', 'Ẋ'),
+    ('Ẍ', 'Ẍ'),
+    ('Ẏ', 'Ẏ'),
+    ('Ẑ', 'Ẑ'),
+    ('Ẓ', 'Ẓ'),
+    ('Ẕ', 'Ẕ'),
+    ('ẞ', 'ẞ'),
+    ('Ạ', 'Ạ'),
+    ('Ả', 'Ả'),
+    ('Ấ', 'Ấ'),
+    ('Ầ', 'Ầ'),
+    ('Ẩ', 'Ẩ'),
+    ('Ẫ', 'Ẫ'),
+    ('Ậ', 'Ậ'),
+    ('Ắ', 'Ắ'),
+    ('Ằ', 'Ằ'),
+    ('Ẳ', 'Ẳ'),
+    ('Ẵ', 'Ẵ'),
+    ('Ặ', 'Ặ'),
+    ('Ẹ', 'Ẹ'),
+    ('Ẻ', 'Ẻ'),
+    ('Ẽ', 'Ẽ'),
+    ('Ế', 'Ế'),
+    ('Ề', 'Ề'),
+    ('Ể', 'Ể'),
+    ('Ễ', 'Ễ'),
+    ('Ệ', 'Ệ'),
+    ('Ỉ', 'Ỉ'),
+    ('Ị', 'Ị'),
+    ('Ọ', 'Ọ'),
+    ('Ỏ', 'Ỏ'),
+    ('Ố', 'Ố'),
+    ('Ồ', 'Ồ'),
+    ('Ổ', 'Ổ'),
+    ('Ỗ', 'Ỗ'),
+    ('Ộ', 'Ộ'),
+    ('Ớ', 'Ớ'),
+    ('Ờ', 'Ờ'),
+    ('Ở', 'Ở'),
+    ('Ỡ', 'Ỡ'),
+    ('Ợ', 'Ợ'),
+    ('Ụ', 'Ụ'),
+    ('Ủ', 'Ủ'),
+    ('Ứ', 'Ứ'),
+    ('Ừ', 'Ừ'),
+    ('Ử', 'Ử'),
+    ('Ữ', 'Ữ'),
+    ('Ự', 'Ự'),
+    ('Ỳ', 'Ỳ'),
+    ('Ỵ', 'Ỵ'),
+    ('Ỷ', 'Ỷ'),
+    ('Ỹ', 'Ỹ'),
+    ('Ỻ', 'Ỻ'),
+    ('Ỽ', 'Ỽ'),
+    ('Ỿ', 'Ỿ'),
+    ('Ἀ', 'Ἇ'),
+    ('Ἐ', 'Ἕ'),
+    ('Ἠ', 'Ἧ'),
+    ('Ἰ', 'Ἷ'),
+    ('Ὀ', 'Ὅ'),
+    ('Ὑ', 'Ὑ'),
+    ('Ὓ', 'Ὓ'),
+    ('Ὕ', 'Ὕ'),
+    ('Ὗ', 'Ὗ'),
+    ('Ὠ', 'Ὧ'),
+    ('ᾈ', 'ᾏ'),
+    ('ᾘ', 'ᾟ'),
+    ('ᾨ', 'ᾯ'),
+    ('Ᾰ', 'ᾼ'),
+    ('Ὲ', 'ῌ'),
+    ('Ῐ', 'Ί'),
+    ('Ῠ', 'Ῥ'),
+    ('Ὸ', 'ῼ'),
+    ('Ω', 'Ω'),
+    ('K', 'Å'),
+    ('Ⅎ', 'Ⅎ'),
+    ('Ⅰ', 'Ⅿ'),
+    ('Ↄ', 'Ↄ'),
+    ('Ⓐ', 'Ⓩ'),
+    ('Ⰰ', 'Ⱞ'),
+    ('Ⱡ', 'Ⱡ'),
+    ('Ɫ', 'Ɽ'),
+    ('Ⱨ', 'Ⱨ'),
+    ('Ⱪ', 'Ⱪ'),
+    ('Ⱬ', 'Ⱬ'),
+    ('Ɑ', 'Ɒ'),
+    ('Ⱳ', 'Ⱳ'),
+    ('Ⱶ', 'Ⱶ'),
+    ('Ȿ', 'Ⲁ'),
+    ('Ⲃ', 'Ⲃ'),
+    ('Ⲅ', 'Ⲅ'),
+    ('Ⲇ', 'Ⲇ'),
+    ('Ⲉ', 'Ⲉ'),
+    ('Ⲋ', 'Ⲋ'),
+    ('Ⲍ', 'Ⲍ'),
+    ('Ⲏ', 'Ⲏ'),
+    ('Ⲑ', 'Ⲑ'),
+    ('Ⲓ', 'Ⲓ'),
+    ('Ⲕ', 'Ⲕ'),
+    ('Ⲗ', 'Ⲗ'),
+    ('Ⲙ', 'Ⲙ'),
+    ('Ⲛ', 'Ⲛ'),
+    ('Ⲝ', 'Ⲝ'),
+    ('Ⲟ', 'Ⲟ'),
+    ('Ⲡ', 'Ⲡ'),
+    ('Ⲣ', 'Ⲣ'),
+    ('Ⲥ', 'Ⲥ'),
+    ('Ⲧ', 'Ⲧ'),
+    ('Ⲩ', 'Ⲩ'),
+    ('Ⲫ', 'Ⲫ'),
+    ('Ⲭ', 'Ⲭ'),
+    ('Ⲯ', 'Ⲯ'),
+    ('Ⲱ', 'Ⲱ'),
+    ('Ⲳ', 'Ⲳ'),
+    ('Ⲵ', 'Ⲵ'),
+    ('Ⲷ', 'Ⲷ'),
+    ('Ⲹ', 'Ⲹ'),
+    ('Ⲻ', 'Ⲻ'),
+    ('Ⲽ', 'Ⲽ'),
+    ('Ⲿ', 'Ⲿ'),
+    ('Ⳁ', 'Ⳁ'),
+    ('Ⳃ', 'Ⳃ'),
+    ('Ⳅ', 'Ⳅ'),
+    ('Ⳇ', 'Ⳇ'),
+    ('Ⳉ', 'Ⳉ'),
+    ('Ⳋ', 'Ⳋ'),
+    ('Ⳍ', 'Ⳍ'),
+    ('Ⳏ', 'Ⳏ'),
+    ('Ⳑ', 'Ⳑ'),
+    ('Ⳓ', 'Ⳓ'),
+    ('Ⳕ', 'Ⳕ'),
+    ('Ⳗ', 'Ⳗ'),
+    ('Ⳙ', 'Ⳙ'),
+    ('Ⳛ', 'Ⳛ'),
+    ('Ⳝ', 'Ⳝ'),
+    ('Ⳟ', 'Ⳟ'),
+    ('Ⳡ', 'Ⳡ'),
+    ('Ⳣ', 'Ⳣ'),
+    ('Ⳬ', 'Ⳬ'),
+    ('Ⳮ', 'Ⳮ'),
+    ('Ⳳ', 'Ⳳ'),
+    ('Ꙁ', 'Ꙁ'),
+    ('Ꙃ', 'Ꙃ'),
+    ('Ꙅ', 'Ꙅ'),
+    ('Ꙇ', 'Ꙇ'),
+    ('Ꙉ', 'Ꙉ'),
+    ('Ꙋ', 'Ꙋ'),
+    ('Ꙍ', 'Ꙍ'),
+    ('Ꙏ', 'Ꙏ'),
+    ('Ꙑ', 'Ꙑ'),
+    ('Ꙓ', 'Ꙓ'),
+    ('Ꙕ', 'Ꙕ'),
+    ('Ꙗ', 'Ꙗ'),
+    ('Ꙙ', 'Ꙙ'),
+    ('Ꙛ', 'Ꙛ'),
+    ('Ꙝ', 'Ꙝ'),
+    ('Ꙟ', 'Ꙟ'),
+    ('Ꙡ', 'Ꙡ'),
+    ('Ꙣ', 'Ꙣ'),
+    ('Ꙥ', 'Ꙥ'),
+    ('Ꙧ', 'Ꙧ'),
+    ('Ꙩ', 'Ꙩ'),
+    ('Ꙫ', 'Ꙫ'),
+    ('Ꙭ', 'Ꙭ'),
+    ('Ꚁ', 'Ꚁ'),
+    ('Ꚃ', 'Ꚃ'),
+    ('Ꚅ', 'Ꚅ'),
+    ('Ꚇ', 'Ꚇ'),
+    ('Ꚉ', 'Ꚉ'),
+    ('Ꚋ', 'Ꚋ'),
+    ('Ꚍ', 'Ꚍ'),
+    ('Ꚏ', 'Ꚏ'),
+    ('Ꚑ', 'Ꚑ'),
+    ('Ꚓ', 'Ꚓ'),
+    ('Ꚕ', 'Ꚕ'),
+    ('Ꚗ', 'Ꚗ'),
+    ('Ꚙ', 'Ꚙ'),
+    ('Ꚛ', 'Ꚛ'),
+    ('Ꜣ', 'Ꜣ'),
+    ('Ꜥ', 'Ꜥ'),
+    ('Ꜧ', 'Ꜧ'),
+    ('Ꜩ', 'Ꜩ'),
+    ('Ꜫ', 'Ꜫ'),
+    ('Ꜭ', 'Ꜭ'),
+    ('Ꜯ', 'Ꜯ'),
+    ('Ꜳ', 'Ꜳ'),
+    ('Ꜵ', 'Ꜵ'),
+    ('Ꜷ', 'Ꜷ'),
+    ('Ꜹ', 'Ꜹ'),
+    ('Ꜻ', 'Ꜻ'),
+    ('Ꜽ', 'Ꜽ'),
+    ('Ꜿ', 'Ꜿ'),
+    ('Ꝁ', 'Ꝁ'),
+    ('Ꝃ', 'Ꝃ'),
+    ('Ꝅ', 'Ꝅ'),
+    ('Ꝇ', 'Ꝇ'),
+    ('Ꝉ', 'Ꝉ'),
+    ('Ꝋ', 'Ꝋ'),
+    ('Ꝍ', 'Ꝍ'),
+    ('Ꝏ', 'Ꝏ'),
+    ('Ꝑ', 'Ꝑ'),
+    ('Ꝓ', 'Ꝓ'),
+    ('Ꝕ', 'Ꝕ'),
+    ('Ꝗ', 'Ꝗ'),
+    ('Ꝙ', 'Ꝙ'),
+    ('Ꝛ', 'Ꝛ'),
+    ('Ꝝ', 'Ꝝ'),
+    ('Ꝟ', 'Ꝟ'),
+    ('Ꝡ', 'Ꝡ'),
+    ('Ꝣ', 'Ꝣ'),
+    ('Ꝥ', 'Ꝥ'),
+    ('Ꝧ', 'Ꝧ'),
+    ('Ꝩ', 'Ꝩ'),
+    ('Ꝫ', 'Ꝫ'),
+    ('Ꝭ', 'Ꝭ'),
+    ('Ꝯ', 'Ꝯ'),
+    ('Ꝺ', 'Ꝺ'),
+    ('Ꝼ', 'Ꝼ'),
+    ('Ᵹ', 'Ꝿ'),
+    ('Ꞁ', 'Ꞁ'),
+    ('Ꞃ', 'Ꞃ'),
+    ('Ꞅ', 'Ꞅ'),
+    ('Ꞇ', 'Ꞇ'),
+    ('Ꞌ', 'Ꞌ'),
+    ('Ɥ', 'Ɥ'),
+    ('Ꞑ', 'Ꞑ'),
+    ('Ꞓ', 'Ꞓ'),
+    ('Ꞗ', 'Ꞗ'),
+    ('Ꞙ', 'Ꞙ'),
+    ('Ꞛ', 'Ꞛ'),
+    ('Ꞝ', 'Ꞝ'),
+    ('Ꞟ', 'Ꞟ'),
+    ('Ꞡ', 'Ꞡ'),
+    ('Ꞣ', 'Ꞣ'),
+    ('Ꞥ', 'Ꞥ'),
+    ('Ꞧ', 'Ꞧ'),
+    ('Ꞩ', 'Ꞩ'),
+    ('Ɦ', 'Ɪ'),
+    ('Ʞ', 'Ꞵ'),
+    ('Ꞷ', 'Ꞷ'),
+    ('Ꞹ', 'Ꞹ'),
+    ('Ꞻ', 'Ꞻ'),
+    ('Ꞽ', 'Ꞽ'),
+    ('Ꞿ', 'Ꞿ'),
+    ('Ꟃ', 'Ꟃ'),
+    ('Ꞔ', '\u{a7c7}'),
+    ('\u{a7c9}', '\u{a7c9}'),
+    ('\u{a7f5}', '\u{a7f5}'),
+    ('Ａ', 'Ｚ'),
+    ('𐐀', '𐐧'),
+    ('𐒰', '𐓓'),
+    ('𐲀', '𐲲'),
+    ('𑢠', '𑢿'),
+    ('𖹀', '𖹟'),
+    ('𞤀', '𞤡'),
+];
+
+pub const CHANGES_WHEN_TITLECASED: &'static [(char, char)] = &[
+    ('a', 'z'),
+    ('µ', 'µ'),
+    ('ß', 'ö'),
+    ('ø', 'ÿ'),
+    ('ā', 'ā'),
+    ('ă', 'ă'),
+    ('ą', 'ą'),
+    ('ć', 'ć'),
+    ('ĉ', 'ĉ'),
+    ('ċ', 'ċ'),
+    ('č', 'č'),
+    ('ď', 'ď'),
+    ('đ', 'đ'),
+    ('ē', 'ē'),
+    ('ĕ', 'ĕ'),
+    ('ė', 'ė'),
+    ('ę', 'ę'),
+    ('ě', 'ě'),
+    ('ĝ', 'ĝ'),
+    ('ğ', 'ğ'),
+    ('ġ', 'ġ'),
+    ('ģ', 'ģ'),
+    ('ĥ', 'ĥ'),
+    ('ħ', 'ħ'),
+    ('ĩ', 'ĩ'),
+    ('ī', 'ī'),
+    ('ĭ', 'ĭ'),
+    ('į', 'į'),
+    ('ı', 'ı'),
+    ('ĳ', 'ĳ'),
+    ('ĵ', 'ĵ'),
+    ('ķ', 'ķ'),
+    ('ĺ', 'ĺ'),
+    ('ļ', 'ļ'),
+    ('ľ', 'ľ'),
+    ('ŀ', 'ŀ'),
+    ('ł', 'ł'),
+    ('ń', 'ń'),
+    ('ņ', 'ņ'),
+    ('ň', 'ŉ'),
+    ('ŋ', 'ŋ'),
+    ('ō', 'ō'),
+    ('ŏ', 'ŏ'),
+    ('ő', 'ő'),
+    ('œ', 'œ'),
+    ('ŕ', 'ŕ'),
+    ('ŗ', 'ŗ'),
+    ('ř', 'ř'),
+    ('ś', 'ś'),
+    ('ŝ', 'ŝ'),
+    ('ş', 'ş'),
+    ('š', 'š'),
+    ('ţ', 'ţ'),
+    ('ť', 'ť'),
+    ('ŧ', 'ŧ'),
+    ('ũ', 'ũ'),
+    ('ū', 'ū'),
+    ('ŭ', 'ŭ'),
+    ('ů', 'ů'),
+    ('ű', 'ű'),
+    ('ų', 'ų'),
+    ('ŵ', 'ŵ'),
+    ('ŷ', 'ŷ'),
+    ('ź', 'ź'),
+    ('ż', 'ż'),
+    ('ž', 'ƀ'),
+    ('ƃ', 'ƃ'),
+    ('ƅ', 'ƅ'),
+    ('ƈ', 'ƈ'),
+    ('ƌ', 'ƌ'),
+    ('ƒ', 'ƒ'),
+    ('ƕ', 'ƕ'),
+    ('ƙ', 'ƚ'),
+    ('ƞ', 'ƞ'),
+    ('ơ', 'ơ'),
+    ('ƣ', 'ƣ'),
+    ('ƥ', 'ƥ'),
+    ('ƨ', 'ƨ'),
+    ('ƭ', 'ƭ'),
+    ('ư', 'ư'),
+    ('ƴ', 'ƴ'),
+    ('ƶ', 'ƶ'),
+    ('ƹ', 'ƹ'),
+    ('ƽ', 'ƽ'),
+    ('ƿ', 'ƿ'),
+    ('Ǆ', 'Ǆ'),
+    ('ǆ', 'Ǉ'),
+    ('ǉ', 'Ǌ'),
+    ('ǌ', 'ǌ'),
+    ('ǎ', 'ǎ'),
+    ('ǐ', 'ǐ'),
+    ('ǒ', 'ǒ'),
+    ('ǔ', 'ǔ'),
+    ('ǖ', 'ǖ'),
+    ('ǘ', 'ǘ'),
+    ('ǚ', 'ǚ'),
+    ('ǜ', 'ǝ'),
+    ('ǟ', 'ǟ'),
+    ('ǡ', 'ǡ'),
+    ('ǣ', 'ǣ'),
+    ('ǥ', 'ǥ'),
+    ('ǧ', 'ǧ'),
+    ('ǩ', 'ǩ'),
+    ('ǫ', 'ǫ'),
+    ('ǭ', 'ǭ'),
+    ('ǯ', 'Ǳ'),
+    ('ǳ', 'ǳ'),
+    ('ǵ', 'ǵ'),
+    ('ǹ', 'ǹ'),
+    ('ǻ', 'ǻ'),
+    ('ǽ', 'ǽ'),
+    ('ǿ', 'ǿ'),
+    ('ȁ', 'ȁ'),
+    ('ȃ', 'ȃ'),
+    ('ȅ', 'ȅ'),
+    ('ȇ', 'ȇ'),
+    ('ȉ', 'ȉ'),
+    ('ȋ', 'ȋ'),
+    ('ȍ', 'ȍ'),
+    ('ȏ', 'ȏ'),
+    ('ȑ', 'ȑ'),
+    ('ȓ', 'ȓ'),
+    ('ȕ', 'ȕ'),
+    ('ȗ', 'ȗ'),
+    ('ș', 'ș'),
+    ('ț', 'ț'),
+    ('ȝ', 'ȝ'),
+    ('ȟ', 'ȟ'),
+    ('ȣ', 'ȣ'),
+    ('ȥ', 'ȥ'),
+    ('ȧ', 'ȧ'),
+    ('ȩ', 'ȩ'),
+    ('ȫ', 'ȫ'),
+    ('ȭ', 'ȭ'),
+    ('ȯ', 'ȯ'),
+    ('ȱ', 'ȱ'),
+    ('ȳ', 'ȳ'),
+    ('ȼ', 'ȼ'),
+    ('ȿ', 'ɀ'),
+    ('ɂ', 'ɂ'),
+    ('ɇ', 'ɇ'),
+    ('ɉ', 'ɉ'),
+    ('ɋ', 'ɋ'),
+    ('ɍ', 'ɍ'),
+    ('ɏ', 'ɔ'),
+    ('ɖ', 'ɗ'),
+    ('ə', 'ə'),
+    ('ɛ', 'ɜ'),
+    ('ɠ', 'ɡ'),
+    ('ɣ', 'ɣ'),
+    ('ɥ', 'ɦ'),
+    ('ɨ', 'ɬ'),
+    ('ɯ', 'ɯ'),
+    ('ɱ', 'ɲ'),
+    ('ɵ', 'ɵ'),
+    ('ɽ', 'ɽ'),
+    ('ʀ', 'ʀ'),
+    ('ʂ', 'ʃ'),
+    ('ʇ', 'ʌ'),
+    ('ʒ', 'ʒ'),
+    ('ʝ', 'ʞ'),
+    ('\u{345}', '\u{345}'),
+    ('ͱ', 'ͱ'),
+    ('ͳ', 'ͳ'),
+    ('ͷ', 'ͷ'),
+    ('ͻ', 'ͽ'),
+    ('ΐ', 'ΐ'),
+    ('ά', 'ώ'),
+    ('ϐ', 'ϑ'),
+    ('ϕ', 'ϗ'),
+    ('ϙ', 'ϙ'),
+    ('ϛ', 'ϛ'),
+    ('ϝ', 'ϝ'),
+    ('ϟ', 'ϟ'),
+    ('ϡ', 'ϡ'),
+    ('ϣ', 'ϣ'),
+    ('ϥ', 'ϥ'),
+    ('ϧ', 'ϧ'),
+    ('ϩ', 'ϩ'),
+    ('ϫ', 'ϫ'),
+    ('ϭ', 'ϭ'),
+    ('ϯ', 'ϳ'),
+    ('ϵ', 'ϵ'),
+    ('ϸ', 'ϸ'),
+    ('ϻ', 'ϻ'),
+    ('а', 'џ'),
+    ('ѡ', 'ѡ'),
+    ('ѣ', 'ѣ'),
+    ('ѥ', 'ѥ'),
+    ('ѧ', 'ѧ'),
+    ('ѩ', 'ѩ'),
+    ('ѫ', 'ѫ'),
+    ('ѭ', 'ѭ'),
+    ('ѯ', 'ѯ'),
+    ('ѱ', 'ѱ'),
+    ('ѳ', 'ѳ'),
+    ('ѵ', 'ѵ'),
+    ('ѷ', 'ѷ'),
+    ('ѹ', 'ѹ'),
+    ('ѻ', 'ѻ'),
+    ('ѽ', 'ѽ'),
+    ('ѿ', 'ѿ'),
+    ('ҁ', 'ҁ'),
+    ('ҋ', 'ҋ'),
+    ('ҍ', 'ҍ'),
+    ('ҏ', 'ҏ'),
+    ('ґ', 'ґ'),
+    ('ғ', 'ғ'),
+    ('ҕ', 'ҕ'),
+    ('җ', 'җ'),
+    ('ҙ', 'ҙ'),
+    ('қ', 'қ'),
+    ('ҝ', 'ҝ'),
+    ('ҟ', 'ҟ'),
+    ('ҡ', 'ҡ'),
+    ('ң', 'ң'),
+    ('ҥ', 'ҥ'),
+    ('ҧ', 'ҧ'),
+    ('ҩ', 'ҩ'),
+    ('ҫ', 'ҫ'),
+    ('ҭ', 'ҭ'),
+    ('ү', 'ү'),
+    ('ұ', 'ұ'),
+    ('ҳ', 'ҳ'),
+    ('ҵ', 'ҵ'),
+    ('ҷ', 'ҷ'),
+    ('ҹ', 'ҹ'),
+    ('һ', 'һ'),
+    ('ҽ', 'ҽ'),
+    ('ҿ', 'ҿ'),
+    ('ӂ', 'ӂ'),
+    ('ӄ', 'ӄ'),
+    ('ӆ', 'ӆ'),
+    ('ӈ', 'ӈ'),
+    ('ӊ', 'ӊ'),
+    ('ӌ', 'ӌ'),
+    ('ӎ', 'ӏ'),
+    ('ӑ', 'ӑ'),
+    ('ӓ', 'ӓ'),
+    ('ӕ', 'ӕ'),
+    ('ӗ', 'ӗ'),
+    ('ә', 'ә'),
+    ('ӛ', 'ӛ'),
+    ('ӝ', 'ӝ'),
+    ('ӟ', 'ӟ'),
+    ('ӡ', 'ӡ'),
+    ('ӣ', 'ӣ'),
+    ('ӥ', 'ӥ'),
+    ('ӧ', 'ӧ'),
+    ('ө', 'ө'),
+    ('ӫ', 'ӫ'),
+    ('ӭ', 'ӭ'),
+    ('ӯ', 'ӯ'),
+    ('ӱ', 'ӱ'),
+    ('ӳ', 'ӳ'),
+    ('ӵ', 'ӵ'),
+    ('ӷ', 'ӷ'),
+    ('ӹ', 'ӹ'),
+    ('ӻ', 'ӻ'),
+    ('ӽ', 'ӽ'),
+    ('ӿ', 'ӿ'),
+    ('ԁ', 'ԁ'),
+    ('ԃ', 'ԃ'),
+    ('ԅ', 'ԅ'),
+    ('ԇ', 'ԇ'),
+    ('ԉ', 'ԉ'),
+    ('ԋ', 'ԋ'),
+    ('ԍ', 'ԍ'),
+    ('ԏ', 'ԏ'),
+    ('ԑ', 'ԑ'),
+    ('ԓ', 'ԓ'),
+    ('ԕ', 'ԕ'),
+    ('ԗ', 'ԗ'),
+    ('ԙ', 'ԙ'),
+    ('ԛ', 'ԛ'),
+    ('ԝ', 'ԝ'),
+    ('ԟ', 'ԟ'),
+    ('ԡ', 'ԡ'),
+    ('ԣ', 'ԣ'),
+    ('ԥ', 'ԥ'),
+    ('ԧ', 'ԧ'),
+    ('ԩ', 'ԩ'),
+    ('ԫ', 'ԫ'),
+    ('ԭ', 'ԭ'),
+    ('ԯ', 'ԯ'),
+    ('ա', 'և'),
+    ('ᏸ', 'ᏽ'),
+    ('ᲀ', 'ᲈ'),
+    ('ᵹ', 'ᵹ'),
+    ('ᵽ', 'ᵽ'),
+    ('ᶎ', 'ᶎ'),
+    ('ḁ', 'ḁ'),
+    ('ḃ', 'ḃ'),
+    ('ḅ', 'ḅ'),
+    ('ḇ', 'ḇ'),
+    ('ḉ', 'ḉ'),
+    ('ḋ', 'ḋ'),
+    ('ḍ', 'ḍ'),
+    ('ḏ', 'ḏ'),
+    ('ḑ', 'ḑ'),
+    ('ḓ', 'ḓ'),
+    ('ḕ', 'ḕ'),
+    ('ḗ', 'ḗ'),
+    ('ḙ', 'ḙ'),
+    ('ḛ', 'ḛ'),
+    ('ḝ', 'ḝ'),
+    ('ḟ', 'ḟ'),
+    ('ḡ', 'ḡ'),
+    ('ḣ', 'ḣ'),
+    ('ḥ', 'ḥ'),
+    ('ḧ', 'ḧ'),
+    ('ḩ', 'ḩ'),
+    ('ḫ', 'ḫ'),
+    ('ḭ', 'ḭ'),
+    ('ḯ', 'ḯ'),
+    ('ḱ', 'ḱ'),
+    ('ḳ', 'ḳ'),
+    ('ḵ', 'ḵ'),
+    ('ḷ', 'ḷ'),
+    ('ḹ', 'ḹ'),
+    ('ḻ', 'ḻ'),
+    ('ḽ', 'ḽ'),
+    ('ḿ', 'ḿ'),
+    ('ṁ', 'ṁ'),
+    ('ṃ', 'ṃ'),
+    ('ṅ', 'ṅ'),
+    ('ṇ', 'ṇ'),
+    ('ṉ', 'ṉ'),
+    ('ṋ', 'ṋ'),
+    ('ṍ', 'ṍ'),
+    ('ṏ', 'ṏ'),
+    ('ṑ', 'ṑ'),
+    ('ṓ', 'ṓ'),
+    ('ṕ', 'ṕ'),
+    ('ṗ', 'ṗ'),
+    ('ṙ', 'ṙ'),
+    ('ṛ', 'ṛ'),
+    ('ṝ', 'ṝ'),
+    ('ṟ', 'ṟ'),
+    ('ṡ', 'ṡ'),
+    ('ṣ', 'ṣ'),
+    ('ṥ', 'ṥ'),
+    ('ṧ', 'ṧ'),
+    ('ṩ', 'ṩ'),
+    ('ṫ', 'ṫ'),
+    ('ṭ', 'ṭ'),
+    ('ṯ', 'ṯ'),
+    ('ṱ', 'ṱ'),
+    ('ṳ', 'ṳ'),
+    ('ṵ', 'ṵ'),
+    ('ṷ', 'ṷ'),
+    ('ṹ', 'ṹ'),
+    ('ṻ', 'ṻ'),
+    ('ṽ', 'ṽ'),
+    ('ṿ', 'ṿ'),
+    ('ẁ', 'ẁ'),
+    ('ẃ', 'ẃ'),
+    ('ẅ', 'ẅ'),
+    ('ẇ', 'ẇ'),
+    ('ẉ', 'ẉ'),
+    ('ẋ', 'ẋ'),
+    ('ẍ', 'ẍ'),
+    ('ẏ', 'ẏ'),
+    ('ẑ', 'ẑ'),
+    ('ẓ', 'ẓ'),
+    ('ẕ', 'ẛ'),
+    ('ạ', 'ạ'),
+    ('ả', 'ả'),
+    ('ấ', 'ấ'),
+    ('ầ', 'ầ'),
+    ('ẩ', 'ẩ'),
+    ('ẫ', 'ẫ'),
+    ('ậ', 'ậ'),
+    ('ắ', 'ắ'),
+    ('ằ', 'ằ'),
+    ('ẳ', 'ẳ'),
+    ('ẵ', 'ẵ'),
+    ('ặ', 'ặ'),
+    ('ẹ', 'ẹ'),
+    ('ẻ', 'ẻ'),
+    ('ẽ', 'ẽ'),
+    ('ế', 'ế'),
+    ('ề', 'ề'),
+    ('ể', 'ể'),
+    ('ễ', 'ễ'),
+    ('ệ', 'ệ'),
+    ('ỉ', 'ỉ'),
+    ('ị', 'ị'),
+    ('ọ', 'ọ'),
+    ('ỏ', 'ỏ'),
+    ('ố', 'ố'),
+    ('ồ', 'ồ'),
+    ('ổ', 'ổ'),
+    ('ỗ', 'ỗ'),
+    ('ộ', 'ộ'),
+    ('ớ', 'ớ'),
+    ('ờ', 'ờ'),
+    ('ở', 'ở'),
+    ('ỡ', 'ỡ'),
+    ('ợ', 'ợ'),
+    ('ụ', 'ụ'),
+    ('ủ', 'ủ'),
+    ('ứ', 'ứ'),
+    ('ừ', 'ừ'),
+    ('ử', 'ử'),
+    ('ữ', 'ữ'),
+    ('ự', 'ự'),
+    ('ỳ', 'ỳ'),
+    ('ỵ', 'ỵ'),
+    ('ỷ', 'ỷ'),
+    ('ỹ', 'ỹ'),
+    ('ỻ', 'ỻ'),
+    ('ỽ', 'ỽ'),
+    ('ỿ', 'ἇ'),
+    ('ἐ', 'ἕ'),
+    ('ἠ', 'ἧ'),
+    ('ἰ', 'ἷ'),
+    ('ὀ', 'ὅ'),
+    ('ὐ', 'ὗ'),
+    ('ὠ', 'ὧ'),
+    ('ὰ', 'ώ'),
+    ('ᾀ', 'ᾇ'),
+    ('ᾐ', 'ᾗ'),
+    ('ᾠ', 'ᾧ'),
+    ('ᾰ', 'ᾴ'),
+    ('ᾶ', 'ᾷ'),
+    ('ι', 'ι'),
+    ('ῂ', 'ῄ'),
+    ('ῆ', 'ῇ'),
+    ('ῐ', 'ΐ'),
+    ('ῖ', 'ῗ'),
+    ('ῠ', 'ῧ'),
+    ('ῲ', 'ῴ'),
+    ('ῶ', 'ῷ'),
+    ('ⅎ', 'ⅎ'),
+    ('ⅰ', 'ⅿ'),
+    ('ↄ', 'ↄ'),
+    ('ⓐ', 'ⓩ'),
+    ('ⰰ', 'ⱞ'),
+    ('ⱡ', 'ⱡ'),
+    ('ⱥ', 'ⱦ'),
+    ('ⱨ', 'ⱨ'),
+    ('ⱪ', 'ⱪ'),
+    ('ⱬ', 'ⱬ'),
+    ('ⱳ', 'ⱳ'),
+    ('ⱶ', 'ⱶ'),
+    ('ⲁ', 'ⲁ'),
+    ('ⲃ', 'ⲃ'),
+    ('ⲅ', 'ⲅ'),
+    ('ⲇ', 'ⲇ'),
+    ('ⲉ', 'ⲉ'),
+    ('ⲋ', 'ⲋ'),
+    ('ⲍ', 'ⲍ'),
+    ('ⲏ', 'ⲏ'),
+    ('ⲑ', 'ⲑ'),
+    ('ⲓ', 'ⲓ'),
+    ('ⲕ', 'ⲕ'),
+    ('ⲗ', 'ⲗ'),
+    ('ⲙ', 'ⲙ'),
+    ('ⲛ', 'ⲛ'),
+    ('ⲝ', 'ⲝ'),
+    ('ⲟ', 'ⲟ'),
+    ('ⲡ', 'ⲡ'),
+    ('ⲣ', 'ⲣ'),
+    ('ⲥ', 'ⲥ'),
+    ('ⲧ', 'ⲧ'),
+    ('ⲩ', 'ⲩ'),
+    ('ⲫ', 'ⲫ'),
+    ('ⲭ', 'ⲭ'),
+    ('ⲯ', 'ⲯ'),
+    ('ⲱ', 'ⲱ'),
+    ('ⲳ', 'ⲳ'),
+    ('ⲵ', 'ⲵ'),
+    ('ⲷ', 'ⲷ'),
+    ('ⲹ', 'ⲹ'),
+    ('ⲻ', 'ⲻ'),
+    ('ⲽ', 'ⲽ'),
+    ('ⲿ', 'ⲿ'),
+    ('ⳁ', 'ⳁ'),
+    ('ⳃ', 'ⳃ'),
+    ('ⳅ', 'ⳅ'),
+    ('ⳇ', 'ⳇ'),
+    ('ⳉ', 'ⳉ'),
+    ('ⳋ', 'ⳋ'),
+    ('ⳍ', 'ⳍ'),
+    ('ⳏ', 'ⳏ'),
+    ('ⳑ', 'ⳑ'),
+    ('ⳓ', 'ⳓ'),
+    ('ⳕ', 'ⳕ'),
+    ('ⳗ', 'ⳗ'),
+    ('ⳙ', 'ⳙ'),
+    ('ⳛ', 'ⳛ'),
+    ('ⳝ', 'ⳝ'),
+    ('ⳟ', 'ⳟ'),
+    ('ⳡ', 'ⳡ'),
+    ('ⳣ', 'ⳣ'),
+    ('ⳬ', 'ⳬ'),
+    ('ⳮ', 'ⳮ'),
+    ('ⳳ', 'ⳳ'),
+    ('ⴀ', 'ⴥ'),
+    ('ⴧ', 'ⴧ'),
+    ('ⴭ', 'ⴭ'),
+    ('ꙁ', 'ꙁ'),
+    ('ꙃ', 'ꙃ'),
+    ('ꙅ', 'ꙅ'),
+    ('ꙇ', 'ꙇ'),
+    ('ꙉ', 'ꙉ'),
+    ('ꙋ', 'ꙋ'),
+    ('ꙍ', 'ꙍ'),
+    ('ꙏ', 'ꙏ'),
+    ('ꙑ', 'ꙑ'),
+    ('ꙓ', 'ꙓ'),
+    ('ꙕ', 'ꙕ'),
+    ('ꙗ', 'ꙗ'),
+    ('ꙙ', 'ꙙ'),
+    ('ꙛ', 'ꙛ'),
+    ('ꙝ', 'ꙝ'),
+    ('ꙟ', 'ꙟ'),
+    ('ꙡ', 'ꙡ'),
+    ('ꙣ', 'ꙣ'),
+    ('ꙥ', 'ꙥ'),
+    ('ꙧ', 'ꙧ'),
+    ('ꙩ', 'ꙩ'),
+    ('ꙫ', 'ꙫ'),
+    ('ꙭ', 'ꙭ'),
+    ('ꚁ', 'ꚁ'),
+    ('ꚃ', 'ꚃ'),
+    ('ꚅ', 'ꚅ'),
+    ('ꚇ', 'ꚇ'),
+    ('ꚉ', 'ꚉ'),
+    ('ꚋ', 'ꚋ'),
+    ('ꚍ', 'ꚍ'),
+    ('ꚏ', 'ꚏ'),
+    ('ꚑ', 'ꚑ'),
+    ('ꚓ', 'ꚓ'),
+    ('ꚕ', 'ꚕ'),
+    ('ꚗ', 'ꚗ'),
+    ('ꚙ', 'ꚙ'),
+    ('ꚛ', 'ꚛ'),
+    ('ꜣ', 'ꜣ'),
+    ('ꜥ', 'ꜥ'),
+    ('ꜧ', 'ꜧ'),
+    ('ꜩ', 'ꜩ'),
+    ('ꜫ', 'ꜫ'),
+    ('ꜭ', 'ꜭ'),
+    ('ꜯ', 'ꜯ'),
+    ('ꜳ', 'ꜳ'),
+    ('ꜵ', 'ꜵ'),
+    ('ꜷ', 'ꜷ'),
+    ('ꜹ', 'ꜹ'),
+    ('ꜻ', 'ꜻ'),
+    ('ꜽ', 'ꜽ'),
+    ('ꜿ', 'ꜿ'),
+    ('ꝁ', 'ꝁ'),
+    ('ꝃ', 'ꝃ'),
+    ('ꝅ', 'ꝅ'),
+    ('ꝇ', 'ꝇ'),
+    ('ꝉ', 'ꝉ'),
+    ('ꝋ', 'ꝋ'),
+    ('ꝍ', 'ꝍ'),
+    ('ꝏ', 'ꝏ'),
+    ('ꝑ', 'ꝑ'),
+    ('ꝓ', 'ꝓ'),
+    ('ꝕ', 'ꝕ'),
+    ('ꝗ', 'ꝗ'),
+    ('ꝙ', 'ꝙ'),
+    ('ꝛ', 'ꝛ'),
+    ('ꝝ', 'ꝝ'),
+    ('ꝟ', 'ꝟ'),
+    ('ꝡ', 'ꝡ'),
+    ('ꝣ', 'ꝣ'),
+    ('ꝥ', 'ꝥ'),
+    ('ꝧ', 'ꝧ'),
+    ('ꝩ', 'ꝩ'),
+    ('ꝫ', 'ꝫ'),
+    ('ꝭ', 'ꝭ'),
+    ('ꝯ', 'ꝯ'),
+    ('ꝺ', 'ꝺ'),
+    ('ꝼ', 'ꝼ'),
+    ('ꝿ', 'ꝿ'),
+    ('ꞁ', 'ꞁ'),
+    ('ꞃ', 'ꞃ'),
+    ('ꞅ', 'ꞅ'),
+    ('ꞇ', 'ꞇ'),
+    ('ꞌ', 'ꞌ'),
+    ('ꞑ', 'ꞑ'),
+    ('ꞓ', 'ꞔ'),
+    ('ꞗ', 'ꞗ'),
+    ('ꞙ', 'ꞙ'),
+    ('ꞛ', 'ꞛ'),
+    ('ꞝ', 'ꞝ'),
+    ('ꞟ', 'ꞟ'),
+    ('ꞡ', 'ꞡ'),
+    ('ꞣ', 'ꞣ'),
+    ('ꞥ', 'ꞥ'),
+    ('ꞧ', 'ꞧ'),
+    ('ꞩ', 'ꞩ'),
+    ('ꞵ', 'ꞵ'),
+    ('ꞷ', 'ꞷ'),
+    ('ꞹ', 'ꞹ'),
+    ('ꞻ', 'ꞻ'),
+    ('ꞽ', 'ꞽ'),
+    ('ꞿ', 'ꞿ'),
+    ('ꟃ', 'ꟃ'),
+    ('\u{a7c8}', '\u{a7c8}'),
+    ('\u{a7ca}', '\u{a7ca}'),
+    ('\u{a7f6}', '\u{a7f6}'),
+    ('ꭓ', 'ꭓ'),
+    ('ꭰ', 'ꮿ'),
+    ('ﬀ', 'ﬆ'),
+    ('ﬓ', 'ﬗ'),
+    ('ａ', 'ｚ'),
+    ('𐐨', '𐑏'),
+    ('𐓘', '𐓻'),
+    ('𐳀', '𐳲'),
+    ('𑣀', '𑣟'),
+    ('𖹠', '𖹿'),
+    ('𞤢', '𞥃'),
+];
+
+pub const CHANGES_WHEN_UPPERCASED: &'static [(char, char)] = &[
+    ('a', 'z'),
+    ('µ', 'µ'),
+    ('ß', 'ö'),
+    ('ø', 'ÿ'),
+    ('ā', 'ā'),
+    ('ă', 'ă'),
+    ('ą', 'ą'),
+    ('ć', 'ć'),
+    ('ĉ', 'ĉ'),
+    ('ċ', 'ċ'),
+    ('č', 'č'),
+    ('ď', 'ď'),
+    ('đ', 'đ'),
+    ('ē', 'ē'),
+    ('ĕ', 'ĕ'),
+    ('ė', 'ė'),
+    ('ę', 'ę'),
+    ('ě', 'ě'),
+    ('ĝ', 'ĝ'),
+    ('ğ', 'ğ'),
+    ('ġ', 'ġ'),
+    ('ģ', 'ģ'),
+    ('ĥ', 'ĥ'),
+    ('ħ', 'ħ'),
+    ('ĩ', 'ĩ'),
+    ('ī', 'ī'),
+    ('ĭ', 'ĭ'),
+    ('į', 'į'),
+    ('ı', 'ı'),
+    ('ĳ', 'ĳ'),
+    ('ĵ', 'ĵ'),
+    ('ķ', 'ķ'),
+    ('ĺ', 'ĺ'),
+    ('ļ', 'ļ'),
+    ('ľ', 'ľ'),
+    ('ŀ', 'ŀ'),
+    ('ł', 'ł'),
+    ('ń', 'ń'),
+    ('ņ', 'ņ'),
+    ('ň', 'ŉ'),
+    ('ŋ', 'ŋ'),
+    ('ō', 'ō'),
+    ('ŏ', 'ŏ'),
+    ('ő', 'ő'),
+    ('œ', 'œ'),
+    ('ŕ', 'ŕ'),
+    ('ŗ', 'ŗ'),
+    ('ř', 'ř'),
+    ('ś', 'ś'),
+    ('ŝ', 'ŝ'),
+    ('ş', 'ş'),
+    ('š', 'š'),
+    ('ţ', 'ţ'),
+    ('ť', 'ť'),
+    ('ŧ', 'ŧ'),
+    ('ũ', 'ũ'),
+    ('ū', 'ū'),
+    ('ŭ', 'ŭ'),
+    ('ů', 'ů'),
+    ('ű', 'ű'),
+    ('ų', 'ų'),
+    ('ŵ', 'ŵ'),
+    ('ŷ', 'ŷ'),
+    ('ź', 'ź'),
+    ('ż', 'ż'),
+    ('ž', 'ƀ'),
+    ('ƃ', 'ƃ'),
+    ('ƅ', 'ƅ'),
+    ('ƈ', 'ƈ'),
+    ('ƌ', 'ƌ'),
+    ('ƒ', 'ƒ'),
+    ('ƕ', 'ƕ'),
+    ('ƙ', 'ƚ'),
+    ('ƞ', 'ƞ'),
+    ('ơ', 'ơ'),
+    ('ƣ', 'ƣ'),
+    ('ƥ', 'ƥ'),
+    ('ƨ', 'ƨ'),
+    ('ƭ', 'ƭ'),
+    ('ư', 'ư'),
+    ('ƴ', 'ƴ'),
+    ('ƶ', 'ƶ'),
+    ('ƹ', 'ƹ'),
+    ('ƽ', 'ƽ'),
+    ('ƿ', 'ƿ'),
+    ('ǅ', 'ǆ'),
+    ('ǈ', 'ǉ'),
+    ('ǋ', 'ǌ'),
+    ('ǎ', 'ǎ'),
+    ('ǐ', 'ǐ'),
+    ('ǒ', 'ǒ'),
+    ('ǔ', 'ǔ'),
+    ('ǖ', 'ǖ'),
+    ('ǘ', 'ǘ'),
+    ('ǚ', 'ǚ'),
+    ('ǜ', 'ǝ'),
+    ('ǟ', 'ǟ'),
+    ('ǡ', 'ǡ'),
+    ('ǣ', 'ǣ'),
+    ('ǥ', 'ǥ'),
+    ('ǧ', 'ǧ'),
+    ('ǩ', 'ǩ'),
+    ('ǫ', 'ǫ'),
+    ('ǭ', 'ǭ'),
+    ('ǯ', 'ǰ'),
+    ('ǲ', 'ǳ'),
+    ('ǵ', 'ǵ'),
+    ('ǹ', 'ǹ'),
+    ('ǻ', 'ǻ'),
+    ('ǽ', 'ǽ'),
+    ('ǿ', 'ǿ'),
+    ('ȁ', 'ȁ'),
+    ('ȃ', 'ȃ'),
+    ('ȅ', 'ȅ'),
+    ('ȇ', 'ȇ'),
+    ('ȉ', 'ȉ'),
+    ('ȋ', 'ȋ'),
+    ('ȍ', 'ȍ'),
+    ('ȏ', 'ȏ'),
+    ('ȑ', 'ȑ'),
+    ('ȓ', 'ȓ'),
+    ('ȕ', 'ȕ'),
+    ('ȗ', 'ȗ'),
+    ('ș', 'ș'),
+    ('ț', 'ț'),
+    ('ȝ', 'ȝ'),
+    ('ȟ', 'ȟ'),
+    ('ȣ', 'ȣ'),
+    ('ȥ', 'ȥ'),
+    ('ȧ', 'ȧ'),
+    ('ȩ', 'ȩ'),
+    ('ȫ', 'ȫ'),
+    ('ȭ', 'ȭ'),
+    ('ȯ', 'ȯ'),
+    ('ȱ', 'ȱ'),
+    ('ȳ', 'ȳ'),
+    ('ȼ', 'ȼ'),
+    ('ȿ', 'ɀ'),
+    ('ɂ', 'ɂ'),
+    ('ɇ', 'ɇ'),
+    ('ɉ', 'ɉ'),
+    ('ɋ', 'ɋ'),
+    ('ɍ', 'ɍ'),
+    ('ɏ', 'ɔ'),
+    ('ɖ', 'ɗ'),
+    ('ə', 'ə'),
+    ('ɛ', 'ɜ'),
+    ('ɠ', 'ɡ'),
+    ('ɣ', 'ɣ'),
+    ('ɥ', 'ɦ'),
+    ('ɨ', 'ɬ'),
+    ('ɯ', 'ɯ'),
+    ('ɱ', 'ɲ'),
+    ('ɵ', 'ɵ'),
+    ('ɽ', 'ɽ'),
+    ('ʀ', 'ʀ'),
+    ('ʂ', 'ʃ'),
+    ('ʇ', 'ʌ'),
+    ('ʒ', 'ʒ'),
+    ('ʝ', 'ʞ'),
+    ('\u{345}', '\u{345}'),
+    ('ͱ', 'ͱ'),
+    ('ͳ', 'ͳ'),
+    ('ͷ', 'ͷ'),
+    ('ͻ', 'ͽ'),
+    ('ΐ', 'ΐ'),
+    ('ά', 'ώ'),
+    ('ϐ', 'ϑ'),
+    ('ϕ', 'ϗ'),
+    ('ϙ', 'ϙ'),
+    ('ϛ', 'ϛ'),
+    ('ϝ', 'ϝ'),
+    ('ϟ', 'ϟ'),
+    ('ϡ', 'ϡ'),
+    ('ϣ', 'ϣ'),
+    ('ϥ', 'ϥ'),
+    ('ϧ', 'ϧ'),
+    ('ϩ', 'ϩ'),
+    ('ϫ', 'ϫ'),
+    ('ϭ', 'ϭ'),
+    ('ϯ', 'ϳ'),
+    ('ϵ', 'ϵ'),
+    ('ϸ', 'ϸ'),
+    ('ϻ', 'ϻ'),
+    ('а', 'џ'),
+    ('ѡ', 'ѡ'),
+    ('ѣ', 'ѣ'),
+    ('ѥ', 'ѥ'),
+    ('ѧ', 'ѧ'),
+    ('ѩ', 'ѩ'),
+    ('ѫ', 'ѫ'),
+    ('ѭ', 'ѭ'),
+    ('ѯ', 'ѯ'),
+    ('ѱ', 'ѱ'),
+    ('ѳ', 'ѳ'),
+    ('ѵ', 'ѵ'),
+    ('ѷ', 'ѷ'),
+    ('ѹ', 'ѹ'),
+    ('ѻ', 'ѻ'),
+    ('ѽ', 'ѽ'),
+    ('ѿ', 'ѿ'),
+    ('ҁ', 'ҁ'),
+    ('ҋ', 'ҋ'),
+    ('ҍ', 'ҍ'),
+    ('ҏ', 'ҏ'),
+    ('ґ', 'ґ'),
+    ('ғ', 'ғ'),
+    ('ҕ', 'ҕ'),
+    ('җ', 'җ'),
+    ('ҙ', 'ҙ'),
+    ('қ', 'қ'),
+    ('ҝ', 'ҝ'),
+    ('ҟ', 'ҟ'),
+    ('ҡ', 'ҡ'),
+    ('ң', 'ң'),
+    ('ҥ', 'ҥ'),
+    ('ҧ', 'ҧ'),
+    ('ҩ', 'ҩ'),
+    ('ҫ', 'ҫ'),
+    ('ҭ', 'ҭ'),
+    ('ү', 'ү'),
+    ('ұ', 'ұ'),
+    ('ҳ', 'ҳ'),
+    ('ҵ', 'ҵ'),
+    ('ҷ', 'ҷ'),
+    ('ҹ', 'ҹ'),
+    ('һ', 'һ'),
+    ('ҽ', 'ҽ'),
+    ('ҿ', 'ҿ'),
+    ('ӂ', 'ӂ'),
+    ('ӄ', 'ӄ'),
+    ('ӆ', 'ӆ'),
+    ('ӈ', 'ӈ'),
+    ('ӊ', 'ӊ'),
+    ('ӌ', 'ӌ'),
+    ('ӎ', 'ӏ'),
+    ('ӑ', 'ӑ'),
+    ('ӓ', 'ӓ'),
+    ('ӕ', 'ӕ'),
+    ('ӗ', 'ӗ'),
+    ('ә', 'ә'),
+    ('ӛ', 'ӛ'),
+    ('ӝ', 'ӝ'),
+    ('ӟ', 'ӟ'),
+    ('ӡ', 'ӡ'),
+    ('ӣ', 'ӣ'),
+    ('ӥ', 'ӥ'),
+    ('ӧ', 'ӧ'),
+    ('ө', 'ө'),
+    ('ӫ', 'ӫ'),
+    ('ӭ', 'ӭ'),
+    ('ӯ', 'ӯ'),
+    ('ӱ', 'ӱ'),
+    ('ӳ', 'ӳ'),
+    ('ӵ', 'ӵ'),
+    ('ӷ', 'ӷ'),
+    ('ӹ', 'ӹ'),
+    ('ӻ', 'ӻ'),
+    ('ӽ', 'ӽ'),
+    ('ӿ', 'ӿ'),
+    ('ԁ', 'ԁ'),
+    ('ԃ', 'ԃ'),
+    ('ԅ', 'ԅ'),
+    ('ԇ', 'ԇ'),
+    ('ԉ', 'ԉ'),
+    ('ԋ', 'ԋ'),
+    ('ԍ', 'ԍ'),
+    ('ԏ', 'ԏ'),
+    ('ԑ', 'ԑ'),
+    ('ԓ', 'ԓ'),
+    ('ԕ', 'ԕ'),
+    ('ԗ', 'ԗ'),
+    ('ԙ', 'ԙ'),
+    ('ԛ', 'ԛ'),
+    ('ԝ', 'ԝ'),
+    ('ԟ', 'ԟ'),
+    ('ԡ', 'ԡ'),
+    ('ԣ', 'ԣ'),
+    ('ԥ', 'ԥ'),
+    ('ԧ', 'ԧ'),
+    ('ԩ', 'ԩ'),
+    ('ԫ', 'ԫ'),
+    ('ԭ', 'ԭ'),
+    ('ԯ', 'ԯ'),
+    ('ա', 'և'),
+    ('ა', 'ჺ'),
+    ('ჽ', 'ჿ'),
+    ('ᏸ', 'ᏽ'),
+    ('ᲀ', 'ᲈ'),
+    ('ᵹ', 'ᵹ'),
+    ('ᵽ', 'ᵽ'),
+    ('ᶎ', 'ᶎ'),
+    ('ḁ', 'ḁ'),
+    ('ḃ', 'ḃ'),
+    ('ḅ', 'ḅ'),
+    ('ḇ', 'ḇ'),
+    ('ḉ', 'ḉ'),
+    ('ḋ', 'ḋ'),
+    ('ḍ', 'ḍ'),
+    ('ḏ', 'ḏ'),
+    ('ḑ', 'ḑ'),
+    ('ḓ', 'ḓ'),
+    ('ḕ', 'ḕ'),
+    ('ḗ', 'ḗ'),
+    ('ḙ', 'ḙ'),
+    ('ḛ', 'ḛ'),
+    ('ḝ', 'ḝ'),
+    ('ḟ', 'ḟ'),
+    ('ḡ', 'ḡ'),
+    ('ḣ', 'ḣ'),
+    ('ḥ', 'ḥ'),
+    ('ḧ', 'ḧ'),
+    ('ḩ', 'ḩ'),
+    ('ḫ', 'ḫ'),
+    ('ḭ', 'ḭ'),
+    ('ḯ', 'ḯ'),
+    ('ḱ', 'ḱ'),
+    ('ḳ', 'ḳ'),
+    ('ḵ', 'ḵ'),
+    ('ḷ', 'ḷ'),
+    ('ḹ', 'ḹ'),
+    ('ḻ', 'ḻ'),
+    ('ḽ', 'ḽ'),
+    ('ḿ', 'ḿ'),
+    ('ṁ', 'ṁ'),
+    ('ṃ', 'ṃ'),
+    ('ṅ', 'ṅ'),
+    ('ṇ', 'ṇ'),
+    ('ṉ', 'ṉ'),
+    ('ṋ', 'ṋ'),
+    ('ṍ', 'ṍ'),
+    ('ṏ', 'ṏ'),
+    ('ṑ', 'ṑ'),
+    ('ṓ', 'ṓ'),
+    ('ṕ', 'ṕ'),
+    ('ṗ', 'ṗ'),
+    ('ṙ', 'ṙ'),
+    ('ṛ', 'ṛ'),
+    ('ṝ', 'ṝ'),
+    ('ṟ', 'ṟ'),
+    ('ṡ', 'ṡ'),
+    ('ṣ', 'ṣ'),
+    ('ṥ', 'ṥ'),
+    ('ṧ', 'ṧ'),
+    ('ṩ', 'ṩ'),
+    ('ṫ', 'ṫ'),
+    ('ṭ', 'ṭ'),
+    ('ṯ', 'ṯ'),
+    ('ṱ', 'ṱ'),
+    ('ṳ', 'ṳ'),
+    ('ṵ', 'ṵ'),
+    ('ṷ', 'ṷ'),
+    ('ṹ', 'ṹ'),
+    ('ṻ', 'ṻ'),
+    ('ṽ', 'ṽ'),
+    ('ṿ', 'ṿ'),
+    ('ẁ', 'ẁ'),
+    ('ẃ', 'ẃ'),
+    ('ẅ', 'ẅ'),
+    ('ẇ', 'ẇ'),
+    ('ẉ', 'ẉ'),
+    ('ẋ', 'ẋ'),
+    ('ẍ', 'ẍ'),
+    ('ẏ', 'ẏ'),
+    ('ẑ', 'ẑ'),
+    ('ẓ', 'ẓ'),
+    ('ẕ', 'ẛ'),
+    ('ạ', 'ạ'),
+    ('ả', 'ả'),
+    ('ấ', 'ấ'),
+    ('ầ', 'ầ'),
+    ('ẩ', 'ẩ'),
+    ('ẫ', 'ẫ'),
+    ('ậ', 'ậ'),
+    ('ắ', 'ắ'),
+    ('ằ', 'ằ'),
+    ('ẳ', 'ẳ'),
+    ('ẵ', 'ẵ'),
+    ('ặ', 'ặ'),
+    ('ẹ', 'ẹ'),
+    ('ẻ', 'ẻ'),
+    ('ẽ', 'ẽ'),
+    ('ế', 'ế'),
+    ('ề', 'ề'),
+    ('ể', 'ể'),
+    ('ễ', 'ễ'),
+    ('ệ', 'ệ'),
+    ('ỉ', 'ỉ'),
+    ('ị', 'ị'),
+    ('ọ', 'ọ'),
+    ('ỏ', 'ỏ'),
+    ('ố', 'ố'),
+    ('ồ', 'ồ'),
+    ('ổ', 'ổ'),
+    ('ỗ', 'ỗ'),
+    ('ộ', 'ộ'),
+    ('ớ', 'ớ'),
+    ('ờ', 'ờ'),
+    ('ở', 'ở'),
+    ('ỡ', 'ỡ'),
+    ('ợ', 'ợ'),
+    ('ụ', 'ụ'),
+    ('ủ', 'ủ'),
+    ('ứ', 'ứ'),
+    ('ừ', 'ừ'),
+    ('ử', 'ử'),
+    ('ữ', 'ữ'),
+    ('ự', 'ự'),
+    ('ỳ', 'ỳ'),
+    ('ỵ', 'ỵ'),
+    ('ỷ', 'ỷ'),
+    ('ỹ', 'ỹ'),
+    ('ỻ', 'ỻ'),
+    ('ỽ', 'ỽ'),
+    ('ỿ', 'ἇ'),
+    ('ἐ', 'ἕ'),
+    ('ἠ', 'ἧ'),
+    ('ἰ', 'ἷ'),
+    ('ὀ', 'ὅ'),
+    ('ὐ', 'ὗ'),
+    ('ὠ', 'ὧ'),
+    ('ὰ', 'ώ'),
+    ('ᾀ', 'ᾴ'),
+    ('ᾶ', 'ᾷ'),
+    ('ᾼ', 'ᾼ'),
+    ('ι', 'ι'),
+    ('ῂ', 'ῄ'),
+    ('ῆ', 'ῇ'),
+    ('ῌ', 'ῌ'),
+    ('ῐ', 'ΐ'),
+    ('ῖ', 'ῗ'),
+    ('ῠ', 'ῧ'),
+    ('ῲ', 'ῴ'),
+    ('ῶ', 'ῷ'),
+    ('ῼ', 'ῼ'),
+    ('ⅎ', 'ⅎ'),
+    ('ⅰ', 'ⅿ'),
+    ('ↄ', 'ↄ'),
+    ('ⓐ', 'ⓩ'),
+    ('ⰰ', 'ⱞ'),
+    ('ⱡ', 'ⱡ'),
+    ('ⱥ', 'ⱦ'),
+    ('ⱨ', 'ⱨ'),
+    ('ⱪ', 'ⱪ'),
+    ('ⱬ', 'ⱬ'),
+    ('ⱳ', 'ⱳ'),
+    ('ⱶ', 'ⱶ'),
+    ('ⲁ', 'ⲁ'),
+    ('ⲃ', 'ⲃ'),
+    ('ⲅ', 'ⲅ'),
+    ('ⲇ', 'ⲇ'),
+    ('ⲉ', 'ⲉ'),
+    ('ⲋ', 'ⲋ'),
+    ('ⲍ', 'ⲍ'),
+    ('ⲏ', 'ⲏ'),
+    ('ⲑ', 'ⲑ'),
+    ('ⲓ', 'ⲓ'),
+    ('ⲕ', 'ⲕ'),
+    ('ⲗ', 'ⲗ'),
+    ('ⲙ', 'ⲙ'),
+    ('ⲛ', 'ⲛ'),
+    ('ⲝ', 'ⲝ'),
+    ('ⲟ', 'ⲟ'),
+    ('ⲡ', 'ⲡ'),
+    ('ⲣ', 'ⲣ'),
+    ('ⲥ', 'ⲥ'),
+    ('ⲧ', 'ⲧ'),
+    ('ⲩ', 'ⲩ'),
+    ('ⲫ', 'ⲫ'),
+    ('ⲭ', 'ⲭ'),
+    ('ⲯ', 'ⲯ'),
+    ('ⲱ', 'ⲱ'),
+    ('ⲳ', 'ⲳ'),
+    ('ⲵ', 'ⲵ'),
+    ('ⲷ', 'ⲷ'),
+    ('ⲹ', 'ⲹ'),
+    ('ⲻ', 'ⲻ'),
+    ('ⲽ', 'ⲽ'),
+    ('ⲿ', 'ⲿ'),
+    ('ⳁ', 'ⳁ'),
+    ('ⳃ', 'ⳃ'),
+    ('ⳅ', 'ⳅ'),
+    ('ⳇ', 'ⳇ'),
+    ('ⳉ', 'ⳉ'),
+    ('ⳋ', 'ⳋ'),
+    ('ⳍ', 'ⳍ'),
+    ('ⳏ', 'ⳏ'),
+    ('ⳑ', 'ⳑ'),
+    ('ⳓ', 'ⳓ'),
+    ('ⳕ', 'ⳕ'),
+    ('ⳗ', 'ⳗ'),
+    ('ⳙ', 'ⳙ'),
+    ('ⳛ', 'ⳛ'),
+    ('ⳝ', 'ⳝ'),
+    ('ⳟ', 'ⳟ'),
+    ('ⳡ', 'ⳡ'),
+    ('ⳣ', 'ⳣ'),
+    ('ⳬ', 'ⳬ'),
+    ('ⳮ', 'ⳮ'),
+    ('ⳳ', 'ⳳ'),
+    ('ⴀ', 'ⴥ'),
+    ('ⴧ', 'ⴧ'),
+    ('ⴭ', 'ⴭ'),
+    ('ꙁ', 'ꙁ'),
+    ('ꙃ', 'ꙃ'),
+    ('ꙅ', 'ꙅ'),
+    ('ꙇ', 'ꙇ'),
+    ('ꙉ', 'ꙉ'),
+    ('ꙋ', 'ꙋ'),
+    ('ꙍ', 'ꙍ'),
+    ('ꙏ', 'ꙏ'),
+    ('ꙑ', 'ꙑ'),
+    ('ꙓ', 'ꙓ'),
+    ('ꙕ', 'ꙕ'),
+    ('ꙗ', 'ꙗ'),
+    ('ꙙ', 'ꙙ'),
+    ('ꙛ', 'ꙛ'),
+    ('ꙝ', 'ꙝ'),
+    ('ꙟ', 'ꙟ'),
+    ('ꙡ', 'ꙡ'),
+    ('ꙣ', 'ꙣ'),
+    ('ꙥ', 'ꙥ'),
+    ('ꙧ', 'ꙧ'),
+    ('ꙩ', 'ꙩ'),
+    ('ꙫ', 'ꙫ'),
+    ('ꙭ', 'ꙭ'),
+    ('ꚁ', 'ꚁ'),
+    ('ꚃ', 'ꚃ'),
+    ('ꚅ', 'ꚅ'),
+    ('ꚇ', 'ꚇ'),
+    ('ꚉ', 'ꚉ'),
+    ('ꚋ', 'ꚋ'),
+    ('ꚍ', 'ꚍ'),
+    ('ꚏ', 'ꚏ'),
+    ('ꚑ', 'ꚑ'),
+    ('ꚓ', 'ꚓ'),
+    ('ꚕ', 'ꚕ'),
+    ('ꚗ', 'ꚗ'),
+    ('ꚙ', 'ꚙ'),
+    ('ꚛ', 'ꚛ'),
+    ('ꜣ', 'ꜣ'),
+    ('ꜥ', 'ꜥ'),
+    ('ꜧ', 'ꜧ'),
+    ('ꜩ', 'ꜩ'),
+    ('ꜫ', 'ꜫ'),
+    ('ꜭ', 'ꜭ'),
+    ('ꜯ', 'ꜯ'),
+    ('ꜳ', 'ꜳ'),
+    ('ꜵ', 'ꜵ'),
+    ('ꜷ', 'ꜷ'),
+    ('ꜹ', 'ꜹ'),
+    ('ꜻ', 'ꜻ'),
+    ('ꜽ', 'ꜽ'),
+    ('ꜿ', 'ꜿ'),
+    ('ꝁ', 'ꝁ'),
+    ('ꝃ', 'ꝃ'),
+    ('ꝅ', 'ꝅ'),
+    ('ꝇ', 'ꝇ'),
+    ('ꝉ', 'ꝉ'),
+    ('ꝋ', 'ꝋ'),
+    ('ꝍ', 'ꝍ'),
+    ('ꝏ', 'ꝏ'),
+    ('ꝑ', 'ꝑ'),
+    ('ꝓ', 'ꝓ'),
+    ('ꝕ', 'ꝕ'),
+    ('ꝗ', 'ꝗ'),
+    ('ꝙ', 'ꝙ'),
+    ('ꝛ', 'ꝛ'),
+    ('ꝝ', 'ꝝ'),
+    ('ꝟ', 'ꝟ'),
+    ('ꝡ', 'ꝡ'),
+    ('ꝣ', 'ꝣ'),
+    ('ꝥ', 'ꝥ'),
+    ('ꝧ', 'ꝧ'),
+    ('ꝩ', 'ꝩ'),
+    ('ꝫ', 'ꝫ'),
+    ('ꝭ', 'ꝭ'),
+    ('ꝯ', 'ꝯ'),
+    ('ꝺ', 'ꝺ'),
+    ('ꝼ', 'ꝼ'),
+    ('ꝿ', 'ꝿ'),
+    ('ꞁ', 'ꞁ'),
+    ('ꞃ', 'ꞃ'),
+    ('ꞅ', 'ꞅ'),
+    ('ꞇ', 'ꞇ'),
+    ('ꞌ', 'ꞌ'),
+    ('ꞑ', 'ꞑ'),
+    ('ꞓ', 'ꞔ'),
+    ('ꞗ', 'ꞗ'),
+    ('ꞙ', 'ꞙ'),
+    ('ꞛ', 'ꞛ'),
+    ('ꞝ', 'ꞝ'),
+    ('ꞟ', 'ꞟ'),
+    ('ꞡ', 'ꞡ'),
+    ('ꞣ', 'ꞣ'),
+    ('ꞥ', 'ꞥ'),
+    ('ꞧ', 'ꞧ'),
+    ('ꞩ', 'ꞩ'),
+    ('ꞵ', 'ꞵ'),
+    ('ꞷ', 'ꞷ'),
+    ('ꞹ', 'ꞹ'),
+    ('ꞻ', 'ꞻ'),
+    ('ꞽ', 'ꞽ'),
+    ('ꞿ', 'ꞿ'),
+    ('ꟃ', 'ꟃ'),
+    ('\u{a7c8}', '\u{a7c8}'),
+    ('\u{a7ca}', '\u{a7ca}'),
+    ('\u{a7f6}', '\u{a7f6}'),
+    ('ꭓ', 'ꭓ'),
+    ('ꭰ', 'ꮿ'),
+    ('ﬀ', 'ﬆ'),
+    ('ﬓ', 'ﬗ'),
+    ('ａ', 'ｚ'),
+    ('𐐨', '𐑏'),
+    ('𐓘', '𐓻'),
+    ('𐳀', '𐳲'),
+    ('𑣀', '𑣟'),
+    ('𖹠', '𖹿'),
+    ('𞤢', '𞥃'),
+];
+
+pub const DASH: &'static [(char, char)] = &[
+    ('-', '-'),
+    ('֊', '֊'),
+    ('־', '־'),
+    ('᐀', '᐀'),
+    ('᠆', '᠆'),
+    ('‐', '―'),
+    ('⁓', '⁓'),
+    ('⁻', '⁻'),
+    ('₋', '₋'),
+    ('−', '−'),
+    ('⸗', '⸗'),
+    ('⸚', '⸚'),
+    ('⸺', '⸻'),
+    ('⹀', '⹀'),
+    ('〜', '〜'),
+    ('〰', '〰'),
+    ('゠', '゠'),
+    ('︱', '︲'),
+    ('﹘', '﹘'),
+    ('﹣', '﹣'),
+    ('－', '－'),
+    ('\u{10ead}', '\u{10ead}'),
+];
+
+pub const DEFAULT_IGNORABLE_CODE_POINT: &'static [(char, char)] = &[
+    ('\u{ad}', '\u{ad}'),
+    ('\u{34f}', '\u{34f}'),
+    ('\u{61c}', '\u{61c}'),
+    ('ᅟ', 'ᅠ'),
+    ('\u{17b4}', '\u{17b5}'),
+    ('\u{180b}', '\u{180e}'),
+    ('\u{200b}', '\u{200f}'),
+    ('\u{202a}', '\u{202e}'),
+    ('\u{2060}', '\u{206f}'),
+    ('ㅤ', 'ㅤ'),
+    ('\u{fe00}', '\u{fe0f}'),
+    ('\u{feff}', '\u{feff}'),
+    ('ﾠ', 'ﾠ'),
+    ('\u{fff0}', '\u{fff8}'),
+    ('\u{1bca0}', '\u{1bca3}'),
+    ('\u{1d173}', '\u{1d17a}'),
+    ('\u{e0000}', '\u{e0fff}'),
+];
+
+pub const DEPRECATED: &'static [(char, char)] = &[
+    ('ŉ', 'ŉ'),
+    ('ٳ', 'ٳ'),
+    ('\u{f77}', '\u{f77}'),
+    ('\u{f79}', '\u{f79}'),
+    ('ឣ', 'ឤ'),
+    ('\u{206a}', '\u{206f}'),
+    ('⟨', '⟩'),
+    ('\u{e0001}', '\u{e0001}'),
+];
+
+pub const DIACRITIC: &'static [(char, char)] = &[
+    ('^', '^'),
+    ('`', '`'),
+    ('¨', '¨'),
+    ('¯', '¯'),
+    ('´', '´'),
+    ('·', '¸'),
+    ('ʰ', '\u{34e}'),
+    ('\u{350}', '\u{357}'),
+    ('\u{35d}', '\u{362}'),
+    ('ʹ', '͵'),
+    ('ͺ', 'ͺ'),
+    ('΄', '΅'),
+    ('\u{483}', '\u{487}'),
+    ('ՙ', 'ՙ'),
+    ('\u{591}', '\u{5a1}'),
+    ('\u{5a3}', '\u{5bd}'),
+    ('\u{5bf}', '\u{5bf}'),
+    ('\u{5c1}', '\u{5c2}'),
+    ('\u{5c4}', '\u{5c4}'),
+    ('\u{64b}', '\u{652}'),
+    ('\u{657}', '\u{658}'),
+    ('\u{6df}', '\u{6e0}'),
+    ('ۥ', 'ۦ'),
+    ('\u{6ea}', '\u{6ec}'),
+    ('\u{730}', '\u{74a}'),
+    ('\u{7a6}', '\u{7b0}'),
+    ('\u{7eb}', 'ߵ'),
+    ('\u{818}', '\u{819}'),
+    ('\u{8e3}', '\u{8fe}'),
+    ('\u{93c}', '\u{93c}'),
+    ('\u{94d}', '\u{94d}'),
+    ('\u{951}', '\u{954}'),
+    ('ॱ', 'ॱ'),
+    ('\u{9bc}', '\u{9bc}'),
+    ('\u{9cd}', '\u{9cd}'),
+    ('\u{a3c}', '\u{a3c}'),
+    ('\u{a4d}', '\u{a4d}'),
+    ('\u{abc}', '\u{abc}'),
+    ('\u{acd}', '\u{acd}'),
+    ('\u{afd}', '\u{aff}'),
+    ('\u{b3c}', '\u{b3c}'),
+    ('\u{b4d}', '\u{b4d}'),
+    ('\u{b55}', '\u{b55}'),
+    ('\u{bcd}', '\u{bcd}'),
+    ('\u{c4d}', '\u{c4d}'),
+    ('\u{cbc}', '\u{cbc}'),
+    ('\u{ccd}', '\u{ccd}'),
+    ('\u{d3b}', '\u{d3c}'),
+    ('\u{d4d}', '\u{d4d}'),
+    ('\u{dca}', '\u{dca}'),
+    ('\u{e47}', '\u{e4c}'),
+    ('\u{e4e}', '\u{e4e}'),
+    ('\u{eba}', '\u{eba}'),
+    ('\u{ec8}', '\u{ecc}'),
+    ('\u{f18}', '\u{f19}'),
+    ('\u{f35}', '\u{f35}'),
+    ('\u{f37}', '\u{f37}'),
+    ('\u{f39}', '\u{f39}'),
+    ('༾', '༿'),
+    ('\u{f82}', '\u{f84}'),
+    ('\u{f86}', '\u{f87}'),
+    ('\u{fc6}', '\u{fc6}'),
+    ('\u{1037}', '\u{1037}'),
+    ('\u{1039}', '\u{103a}'),
+    ('ၣ', 'ၤ'),
+    ('ၩ', 'ၭ'),
+    ('ႇ', '\u{108d}'),
+    ('ႏ', 'ႏ'),
+    ('ႚ', 'ႛ'),
+    ('\u{135d}', '\u{135f}'),
+    ('\u{17c9}', '\u{17d3}'),
+    ('\u{17dd}', '\u{17dd}'),
+    ('\u{1939}', '\u{193b}'),
+    ('\u{1a75}', '\u{1a7c}'),
+    ('\u{1a7f}', '\u{1a7f}'),
+    ('\u{1ab0}', '\u{1abd}'),
+    ('\u{1b34}', '\u{1b34}'),
+    ('᭄', '᭄'),
+    ('\u{1b6b}', '\u{1b73}'),
+    ('᮪', '\u{1bab}'),
+    ('\u{1c36}', '\u{1c37}'),
+    ('ᱸ', 'ᱽ'),
+    ('\u{1cd0}', '\u{1ce8}'),
+    ('\u{1ced}', '\u{1ced}'),
+    ('\u{1cf4}', '\u{1cf4}'),
+    ('᳷', '\u{1cf9}'),
+    ('ᴬ', 'ᵪ'),
+    ('\u{1dc4}', '\u{1dcf}'),
+    ('\u{1df5}', '\u{1df9}'),
+    ('\u{1dfd}', '\u{1dff}'),
+    ('᾽', '᾽'),
+    ('᾿', '῁'),
+    ('῍', '῏'),
+    ('῝', '῟'),
+    ('῭', '`'),
+    ('´', '῾'),
+    ('\u{2cef}', '\u{2cf1}'),
+    ('ⸯ', 'ⸯ'),
+    ('\u{302a}', '\u{302f}'),
+    ('\u{3099}', '゜'),
+    ('ー', 'ー'),
+    ('\u{a66f}', '\u{a66f}'),
+    ('\u{a67c}', '\u{a67d}'),
+    ('ꙿ', 'ꙿ'),
+    ('ꚜ', 'ꚝ'),
+    ('\u{a6f0}', '\u{a6f1}'),
+    ('꜀', '꜡'),
+    ('ꞈ', '꞊'),
+    ('ꟸ', 'ꟹ'),
+    ('\u{a8c4}', '\u{a8c4}'),
+    ('\u{a8e0}', '\u{a8f1}'),
+    ('\u{a92b}', '꤮'),
+    ('꥓', '꥓'),
+    ('\u{a9b3}', '\u{a9b3}'),
+    ('꧀', '꧀'),
+    ('\u{a9e5}', '\u{a9e5}'),
+    ('ꩻ', 'ꩽ'),
+    ('\u{aabf}', 'ꫂ'),
+    ('\u{aaf6}', '\u{aaf6}'),
+    ('꭛', 'ꭟ'),
+    ('\u{ab69}', '\u{ab6b}'),
+    ('꯬', '\u{abed}'),
+    ('\u{fb1e}', '\u{fb1e}'),
+    ('\u{fe20}', '\u{fe2f}'),
+    ('＾', '＾'),
+    ('｀', '｀'),
+    ('ｰ', 'ｰ'),
+    ('\u{ff9e}', '\u{ff9f}'),
+    ('￣', '￣'),
+    ('\u{102e0}', '\u{102e0}'),
+    ('\u{10ae5}', '\u{10ae6}'),
+    ('𐴢', '\u{10d27}'),
+    ('\u{10f46}', '\u{10f50}'),
+    ('\u{110b9}', '\u{110ba}'),
+    ('\u{11133}', '\u{11134}'),
+    ('\u{11173}', '\u{11173}'),
+    ('𑇀', '𑇀'),
+    ('\u{111ca}', '\u{111cc}'),
+    ('𑈵', '\u{11236}'),
+    ('\u{112e9}', '\u{112ea}'),
+    ('\u{1133c}', '\u{1133c}'),
+    ('𑍍', '𑍍'),
+    ('\u{11366}', '\u{1136c}'),
+    ('\u{11370}', '\u{11374}'),
+    ('\u{11442}', '\u{11442}'),
+    ('\u{11446}', '\u{11446}'),
+    ('\u{114c2}', '\u{114c3}'),
+    ('\u{115bf}', '\u{115c0}'),
+    ('\u{1163f}', '\u{1163f}'),
+    ('𑚶', '\u{116b7}'),
+    ('\u{1172b}', '\u{1172b}'),
+    ('\u{11839}', '\u{1183a}'),
+    ('\u{1193d}', '\u{1193e}'),
+    ('\u{11943}', '\u{11943}'),
+    ('\u{119e0}', '\u{119e0}'),
+    ('\u{11a34}', '\u{11a34}'),
+    ('\u{11a47}', '\u{11a47}'),
+    ('\u{11a99}', '\u{11a99}'),
+    ('\u{11c3f}', '\u{11c3f}'),
+    ('\u{11d42}', '\u{11d42}'),
+    ('\u{11d44}', '\u{11d45}'),
+    ('\u{11d97}', '\u{11d97}'),
+    ('\u{16af0}', '\u{16af4}'),
+    ('\u{16b30}', '\u{16b36}'),
+    ('\u{16f8f}', '𖾟'),
+    ('\u{16ff0}', '\u{16ff1}'),
+    ('\u{1d167}', '\u{1d169}'),
+    ('𝅭', '\u{1d172}'),
+    ('\u{1d17b}', '\u{1d182}'),
+    ('\u{1d185}', '\u{1d18b}'),
+    ('\u{1d1aa}', '\u{1d1ad}'),
+    ('\u{1e130}', '\u{1e136}'),
+    ('\u{1e2ec}', '\u{1e2ef}'),
+    ('\u{1e8d0}', '\u{1e8d6}'),
+    ('\u{1e944}', '\u{1e946}'),
+    ('\u{1e948}', '\u{1e94a}'),
+];
+
+pub const EMOJI: &'static [(char, char)] = &[
+    ('#', '#'),
+    ('*', '*'),
+    ('0', '9'),
+    ('©', '©'),
+    ('®', '®'),
+    ('‼', '‼'),
+    ('⁉', '⁉'),
+    ('™', '™'),
+    ('ℹ', 'ℹ'),
+    ('↔', '↙'),
+    ('↩', '↪'),
+    ('⌚', '⌛'),
+    ('⌨', '⌨'),
+    ('⏏', '⏏'),
+    ('⏩', '⏳'),
+    ('⏸', '⏺'),
+    ('Ⓜ', 'Ⓜ'),
+    ('▪', '▫'),
+    ('▶', '▶'),
+    ('◀', '◀'),
+    ('◻', '◾'),
+    ('☀', '☄'),
+    ('☎', '☎'),
+    ('☑', '☑'),
+    ('☔', '☕'),
+    ('☘', '☘'),
+    ('☝', '☝'),
+    ('☠', '☠'),
+    ('☢', '☣'),
+    ('☦', '☦'),
+    ('☪', '☪'),
+    ('☮', '☯'),
+    ('☸', '☺'),
+    ('♀', '♀'),
+    ('♂', '♂'),
+    ('♈', '♓'),
+    ('♟', '♠'),
+    ('♣', '♣'),
+    ('♥', '♦'),
+    ('♨', '♨'),
+    ('♻', '♻'),
+    ('♾', '♿'),
+    ('⚒', '⚗'),
+    ('⚙', '⚙'),
+    ('⚛', '⚜'),
+    ('⚠', '⚡'),
+    ('⚧', '⚧'),
+    ('⚪', '⚫'),
+    ('⚰', '⚱'),
+    ('⚽', '⚾'),
+    ('⛄', '⛅'),
+    ('⛈', '⛈'),
+    ('⛎', '⛏'),
+    ('⛑', '⛑'),
+    ('⛓', '⛔'),
+    ('⛩', '⛪'),
+    ('⛰', '⛵'),
+    ('⛷', '⛺'),
+    ('⛽', '⛽'),
+    ('✂', '✂'),
+    ('✅', '✅'),
+    ('✈', '✍'),
+    ('✏', '✏'),
+    ('✒', '✒'),
+    ('✔', '✔'),
+    ('✖', '✖'),
+    ('✝', '✝'),
+    ('✡', '✡'),
+    ('✨', '✨'),
+    ('✳', '✴'),
+    ('❄', '❄'),
+    ('❇', '❇'),
+    ('❌', '❌'),
+    ('❎', '❎'),
+    ('❓', '❕'),
+    ('❗', '❗'),
+    ('❣', '❤'),
+    ('➕', '➗'),
+    ('➡', '➡'),
+    ('➰', '➰'),
+    ('➿', '➿'),
+    ('⤴', '⤵'),
+    ('⬅', '⬇'),
+    ('⬛', '⬜'),
+    ('⭐', '⭐'),
+    ('⭕', '⭕'),
+    ('〰', '〰'),
+    ('〽', '〽'),
+    ('㊗', '㊗'),
+    ('㊙', '㊙'),
+    ('🀄', '🀄'),
+    ('🃏', '🃏'),
+    ('🅰', '🅱'),
+    ('🅾', '🅿'),
+    ('🆎', '🆎'),
+    ('🆑', '🆚'),
+    ('🇦', '🇿'),
+    ('🈁', '🈂'),
+    ('🈚', '🈚'),
+    ('🈯', '🈯'),
+    ('🈲', '🈺'),
+    ('🉐', '🉑'),
+    ('🌀', '🌡'),
+    ('🌤', '🎓'),
+    ('🎖', '🎗'),
+    ('🎙', '🎛'),
+    ('🎞', '🏰'),
+    ('🏳', '🏵'),
+    ('🏷', '📽'),
+    ('📿', '🔽'),
+    ('🕉', '🕎'),
+    ('🕐', '🕧'),
+    ('🕯', '🕰'),
+    ('🕳', '🕺'),
+    ('🖇', '🖇'),
+    ('🖊', '🖍'),
+    ('🖐', '🖐'),
+    ('🖕', '🖖'),
+    ('🖤', '🖥'),
+    ('🖨', '🖨'),
+    ('🖱', '🖲'),
+    ('🖼', '🖼'),
+    ('🗂', '🗄'),
+    ('🗑', '🗓'),
+    ('🗜', '🗞'),
+    ('🗡', '🗡'),
+    ('🗣', '🗣'),
+    ('🗨', '🗨'),
+    ('🗯', '🗯'),
+    ('🗳', '🗳'),
+    ('🗺', '🙏'),
+    ('🚀', '🛅'),
+    ('🛋', '🛒'),
+    ('🛕', '\u{1f6d7}'),
+    ('🛠', '🛥'),
+    ('🛩', '🛩'),
+    ('🛫', '🛬'),
+    ('🛰', '🛰'),
+    ('🛳', '\u{1f6fc}'),
+    ('🟠', '🟫'),
+    ('\u{1f90c}', '🤺'),
+    ('🤼', '🥅'),
+    ('🥇', '\u{1f978}'),
+    ('🥺', '\u{1f9cb}'),
+    ('🧍', '🧿'),
+    ('🩰', '\u{1fa74}'),
+    ('🩸', '🩺'),
+    ('🪀', '\u{1fa86}'),
+    ('🪐', '\u{1faa8}'),
+    ('\u{1fab0}', '\u{1fab6}'),
+    ('\u{1fac0}', '\u{1fac2}'),
+    ('\u{1fad0}', '\u{1fad6}'),
+];
+
+pub const EMOJI_COMPONENT: &'static [(char, char)] = &[
+    ('#', '#'),
+    ('*', '*'),
+    ('0', '9'),
+    ('\u{200d}', '\u{200d}'),
+    ('\u{20e3}', '\u{20e3}'),
+    ('\u{fe0f}', '\u{fe0f}'),
+    ('🇦', '🇿'),
+    ('🏻', '🏿'),
+    ('🦰', '🦳'),
+    ('\u{e0020}', '\u{e007f}'),
+];
+
+pub const EMOJI_MODIFIER: &'static [(char, char)] = &[('🏻', '🏿')];
+
+pub const EMOJI_MODIFIER_BASE: &'static [(char, char)] = &[
+    ('☝', '☝'),
+    ('⛹', '⛹'),
+    ('✊', '✍'),
+    ('🎅', '🎅'),
+    ('🏂', '🏄'),
+    ('🏇', '🏇'),
+    ('🏊', '🏌'),
+    ('👂', '👃'),
+    ('👆', '👐'),
+    ('👦', '👸'),
+    ('👼', '👼'),
+    ('💁', '💃'),
+    ('💅', '💇'),
+    ('💏', '💏'),
+    ('💑', '💑'),
+    ('💪', '💪'),
+    ('🕴', '🕵'),
+    ('🕺', '🕺'),
+    ('🖐', '🖐'),
+    ('🖕', '🖖'),
+    ('🙅', '🙇'),
+    ('🙋', '🙏'),
+    ('🚣', '🚣'),
+    ('🚴', '🚶'),
+    ('🛀', '🛀'),
+    ('🛌', '🛌'),
+    ('\u{1f90c}', '\u{1f90c}'),
+    ('🤏', '🤏'),
+    ('🤘', '🤟'),
+    ('🤦', '🤦'),
+    ('🤰', '🤹'),
+    ('🤼', '🤾'),
+    ('\u{1f977}', '\u{1f977}'),
+    ('🦵', '🦶'),
+    ('🦸', '🦹'),
+    ('🦻', '🦻'),
+    ('🧍', '🧏'),
+    ('🧑', '🧝'),
+];
+
+pub const EMOJI_PRESENTATION: &'static [(char, char)] = &[
+    ('⌚', '⌛'),
+    ('⏩', '⏬'),
+    ('⏰', '⏰'),
+    ('⏳', '⏳'),
+    ('◽', '◾'),
+    ('☔', '☕'),
+    ('♈', '♓'),
+    ('♿', '♿'),
+    ('⚓', '⚓'),
+    ('⚡', '⚡'),
+    ('⚪', '⚫'),
+    ('⚽', '⚾'),
+    ('⛄', '⛅'),
+    ('⛎', '⛎'),
+    ('⛔', '⛔'),
+    ('⛪', '⛪'),
+    ('⛲', '⛳'),
+    ('⛵', '⛵'),
+    ('⛺', '⛺'),
+    ('⛽', '⛽'),
+    ('✅', '✅'),
+    ('✊', '✋'),
+    ('✨', '✨'),
+    ('❌', '❌'),
+    ('❎', '❎'),
+    ('❓', '❕'),
+    ('❗', '❗'),
+    ('➕', '➗'),
+    ('➰', '➰'),
+    ('➿', '➿'),
+    ('⬛', '⬜'),
+    ('⭐', '⭐'),
+    ('⭕', '⭕'),
+    ('🀄', '🀄'),
+    ('🃏', '🃏'),
+    ('🆎', '🆎'),
+    ('🆑', '🆚'),
+    ('🇦', '🇿'),
+    ('🈁', '🈁'),
+    ('🈚', '🈚'),
+    ('🈯', '🈯'),
+    ('🈲', '🈶'),
+    ('🈸', '🈺'),
+    ('🉐', '🉑'),
+    ('🌀', '🌠'),
+    ('🌭', '🌵'),
+    ('🌷', '🍼'),
+    ('🍾', '🎓'),
+    ('🎠', '🏊'),
+    ('🏏', '🏓'),
+    ('🏠', '🏰'),
+    ('🏴', '🏴'),
+    ('🏸', '🐾'),
+    ('👀', '👀'),
+    ('👂', '📼'),
+    ('📿', '🔽'),
+    ('🕋', '🕎'),
+    ('🕐', '🕧'),
+    ('🕺', '🕺'),
+    ('🖕', '🖖'),
+    ('🖤', '🖤'),
+    ('🗻', '🙏'),
+    ('🚀', '🛅'),
+    ('🛌', '🛌'),
+    ('🛐', '🛒'),
+    ('🛕', '\u{1f6d7}'),
+    ('🛫', '🛬'),
+    ('🛴', '\u{1f6fc}'),
+    ('🟠', '🟫'),
+    ('\u{1f90c}', '🤺'),
+    ('🤼', '🥅'),
+    ('🥇', '\u{1f978}'),
+    ('🥺', '\u{1f9cb}'),
+    ('🧍', '🧿'),
+    ('🩰', '\u{1fa74}'),
+    ('🩸', '🩺'),
+    ('🪀', '\u{1fa86}'),
+    ('🪐', '\u{1faa8}'),
+    ('\u{1fab0}', '\u{1fab6}'),
+    ('\u{1fac0}', '\u{1fac2}'),
+    ('\u{1fad0}', '\u{1fad6}'),
+];
+
+pub const EXTENDED_PICTOGRAPHIC: &'static [(char, char)] = &[
+    ('©', '©'),
+    ('®', '®'),
+    ('‼', '‼'),
+    ('⁉', '⁉'),
+    ('™', '™'),
+    ('ℹ', 'ℹ'),
+    ('↔', '↙'),
+    ('↩', '↪'),
+    ('⌚', '⌛'),
+    ('⌨', '⌨'),
+    ('⎈', '⎈'),
+    ('⏏', '⏏'),
+    ('⏩', '⏳'),
+    ('⏸', '⏺'),
+    ('Ⓜ', 'Ⓜ'),
+    ('▪', '▫'),
+    ('▶', '▶'),
+    ('◀', '◀'),
+    ('◻', '◾'),
+    ('☀', '★'),
+    ('☇', '☒'),
+    ('☔', '⚅'),
+    ('⚐', '✅'),
+    ('✈', '✒'),
+    ('✔', '✔'),
+    ('✖', '✖'),
+    ('✝', '✝'),
+    ('✡', '✡'),
+    ('✨', '✨'),
+    ('✳', '✴'),
+    ('❄', '❄'),
+    ('❇', '❇'),
+    ('❌', '❌'),
+    ('❎', '❎'),
+    ('❓', '❕'),
+    ('❗', '❗'),
+    ('❣', '❧'),
+    ('➕', '➗'),
+    ('➡', '➡'),
+    ('➰', '➰'),
+    ('➿', '➿'),
+    ('⤴', '⤵'),
+    ('⬅', '⬇'),
+    ('⬛', '⬜'),
+    ('⭐', '⭐'),
+    ('⭕', '⭕'),
+    ('〰', '〰'),
+    ('〽', '〽'),
+    ('㊗', '㊗'),
+    ('㊙', '㊙'),
+    ('🀀', '\u{1f0ff}'),
+    ('\u{1f10d}', '\u{1f10f}'),
+    ('🄯', '🄯'),
+    ('🅬', '🅱'),
+    ('🅾', '🅿'),
+    ('🆎', '🆎'),
+    ('🆑', '🆚'),
+    ('\u{1f1ad}', '\u{1f1e5}'),
+    ('🈁', '\u{1f20f}'),
+    ('🈚', '🈚'),
+    ('🈯', '🈯'),
+    ('🈲', '🈺'),
+    ('\u{1f23c}', '\u{1f23f}'),
+    ('\u{1f249}', '🏺'),
+    ('🐀', '🔽'),
+    ('🕆', '🙏'),
+    ('🚀', '\u{1f6ff}'),
+    ('\u{1f774}', '\u{1f77f}'),
+    ('🟕', '\u{1f7ff}'),
+    ('\u{1f80c}', '\u{1f80f}'),
+    ('\u{1f848}', '\u{1f84f}'),
+    ('\u{1f85a}', '\u{1f85f}'),
+    ('\u{1f888}', '\u{1f88f}'),
+    ('\u{1f8ae}', '\u{1f8ff}'),
+    ('\u{1f90c}', '🤺'),
+    ('🤼', '🥅'),
+    ('🥇', '\u{1faff}'),
+    ('\u{1fc00}', '\u{1fffd}'),
+];
+
+pub const EXTENDER: &'static [(char, char)] = &[
+    ('·', '·'),
+    ('ː', 'ˑ'),
+    ('ـ', 'ـ'),
+    ('ߺ', 'ߺ'),
+    ('\u{b55}', '\u{b55}'),
+    ('ๆ', 'ๆ'),
+    ('ໆ', 'ໆ'),
+    ('᠊', '᠊'),
+    ('ᡃ', 'ᡃ'),
+    ('ᪧ', 'ᪧ'),
+    ('\u{1c36}', '\u{1c36}'),
+    ('ᱻ', 'ᱻ'),
+    ('々', '々'),
+    ('〱', '〵'),
+    ('ゝ', 'ゞ'),
+    ('ー', 'ヾ'),
+    ('ꀕ', 'ꀕ'),
+    ('ꘌ', 'ꘌ'),
+    ('ꧏ', 'ꧏ'),
+    ('ꧦ', 'ꧦ'),
+    ('ꩰ', 'ꩰ'),
+    ('ꫝ', 'ꫝ'),
+    ('ꫳ', 'ꫴ'),
+    ('ｰ', 'ｰ'),
+    ('𑍝', '𑍝'),
+    ('𑗆', '𑗈'),
+    ('\u{11a98}', '\u{11a98}'),
+    ('𖭂', '𖭃'),
+    ('𖿠', '𖿡'),
+    ('𖿣', '𖿣'),
+    ('𞄼', '𞄽'),
+    ('\u{1e944}', '\u{1e946}'),
+];
+
+pub const GRAPHEME_BASE: &'static [(char, char)] = &[
+    (' ', '~'),
+    ('\u{a0}', '¬'),
+    ('®', '˿'),
+    ('Ͱ', 'ͷ'),
+    ('ͺ', 'Ϳ'),
+    ('΄', 'Ί'),
+    ('Ό', 'Ό'),
+    ('Ύ', 'Ρ'),
+    ('Σ', '҂'),
+    ('Ҋ', 'ԯ'),
+    ('Ա', 'Ֆ'),
+    ('ՙ', '֊'),
+    ('֍', '֏'),
+    ('־', '־'),
+    ('׀', '׀'),
+    ('׃', '׃'),
+    ('׆', '׆'),
+    ('א', 'ת'),
+    ('ׯ', '״'),
+    ('؆', '؏'),
+    ('؛', '؛'),
+    ('؞', 'ي'),
+    ('٠', 'ٯ'),
+    ('ٱ', 'ە'),
+    ('۞', '۞'),
+    ('ۥ', 'ۦ'),
+    ('۩', '۩'),
+    ('ۮ', '܍'),
+    ('ܐ', 'ܐ'),
+    ('ܒ', 'ܯ'),
+    ('ݍ', 'ޥ'),
+    ('ޱ', 'ޱ'),
+    ('߀', 'ߪ'),
+    ('ߴ', 'ߺ'),
+    ('߾', 'ࠕ'),
+    ('ࠚ', 'ࠚ'),
+    ('ࠤ', 'ࠤ'),
+    ('ࠨ', 'ࠨ'),
+    ('࠰', '࠾'),
+    ('ࡀ', 'ࡘ'),
+    ('࡞', '࡞'),
+    ('ࡠ', 'ࡪ'),
+    ('ࢠ', 'ࢴ'),
+    ('ࢶ', '\u{8c7}'),
+    ('ः', 'ह'),
+    ('ऻ', 'ऻ'),
+    ('ऽ', 'ी'),
+    ('ॉ', 'ौ'),
+    ('ॎ', 'ॐ'),
+    ('क़', 'ॡ'),
+    ('।', 'ঀ'),
+    ('ং', 'ঃ'),
+    ('অ', 'ঌ'),
+    ('এ', 'ঐ'),
+    ('ও', 'ন'),
+    ('প', 'র'),
+    ('ল', 'ল'),
+    ('শ', 'হ'),
+    ('ঽ', 'ঽ'),
+    ('ি', 'ী'),
+    ('ে', 'ৈ'),
+    ('ো', 'ৌ'),
+    ('ৎ', 'ৎ'),
+    ('ড়', 'ঢ়'),
+    ('য়', 'ৡ'),
+    ('০', '৽'),
+    ('ਃ', 'ਃ'),
+    ('ਅ', 'ਊ'),
+    ('ਏ', 'ਐ'),
+    ('ਓ', 'ਨ'),
+    ('ਪ', 'ਰ'),
+    ('ਲ', 'ਲ਼'),
+    ('ਵ', 'ਸ਼'),
+    ('ਸ', 'ਹ'),
+    ('ਾ', 'ੀ'),
+    ('ਖ਼', 'ੜ'),
+    ('ਫ਼', 'ਫ਼'),
+    ('੦', '੯'),
+    ('ੲ', 'ੴ'),
+    ('੶', '੶'),
+    ('ઃ', 'ઃ'),
+    ('અ', 'ઍ'),
+    ('એ', 'ઑ'),
+    ('ઓ', 'ન'),
+    ('પ', 'ર'),
+    ('લ', 'ળ'),
+    ('વ', 'હ'),
+    ('ઽ', 'ી'),
+    ('ૉ', 'ૉ'),
+    ('ો', 'ૌ'),
+    ('ૐ', 'ૐ'),
+    ('ૠ', 'ૡ'),
+    ('૦', '૱'),
+    ('ૹ', 'ૹ'),
+    ('ଂ', 'ଃ'),
+    ('ଅ', 'ଌ'),
+    ('ଏ', 'ଐ'),
+    ('ଓ', 'ନ'),
+    ('ପ', 'ର'),
+    ('ଲ', 'ଳ'),
+    ('ଵ', 'ହ'),
+    ('ଽ', 'ଽ'),
+    ('ୀ', 'ୀ'),
+    ('େ', 'ୈ'),
+    ('ୋ', 'ୌ'),
+    ('ଡ଼', 'ଢ଼'),
+    ('ୟ', 'ୡ'),
+    ('୦', '୷'),
+    ('ஃ', 'ஃ'),
+    ('அ', 'ஊ'),
+    ('எ', 'ஐ'),
+    ('ஒ', 'க'),
+    ('ங', 'ச'),
+    ('ஜ', 'ஜ'),
+    ('ஞ', 'ட'),
+    ('ண', 'த'),
+    ('ந', 'ப'),
+    ('ம', 'ஹ'),
+    ('ி', 'ி'),
+    ('ு', 'ூ'),
+    ('ெ', 'ை'),
+    ('ொ', 'ௌ'),
+    ('ௐ', 'ௐ'),
+    ('௦', '௺'),
+    ('ఁ', 'ః'),
+    ('అ', 'ఌ'),
+    ('ఎ', 'ఐ'),
+    ('ఒ', 'న'),
+    ('ప', 'హ'),
+    ('ఽ', 'ఽ'),
+    ('ు', 'ౄ'),
+    ('ౘ', 'ౚ'),
+    ('ౠ', 'ౡ'),
+    ('౦', '౯'),
+    ('౷', 'ಀ'),
+    ('ಂ', 'ಌ'),
+    ('ಎ', 'ಐ'),
+    ('ಒ', 'ನ'),
+    ('ಪ', 'ಳ'),
+    ('ವ', 'ಹ'),
+    ('ಽ', 'ಾ'),
+    ('ೀ', 'ು'),
+    ('ೃ', 'ೄ'),
+    ('ೇ', 'ೈ'),
+    ('ೊ', 'ೋ'),
+    ('ೞ', 'ೞ'),
+    ('ೠ', 'ೡ'),
+    ('೦', '೯'),
+    ('ೱ', 'ೲ'),
+    ('ം', 'ഌ'),
+    ('എ', 'ഐ'),
+    ('ഒ', 'ഺ'),
+    ('ഽ', 'ഽ'),
+    ('ി', 'ീ'),
+    ('െ', 'ൈ'),
+    ('ൊ', 'ൌ'),
+    ('ൎ', '൏'),
+    ('ൔ', 'ൖ'),
+    ('൘', 'ൡ'),
+    ('൦', 'ൿ'),
+    ('ං', 'ඃ'),
+    ('අ', 'ඖ'),
+    ('ක', 'න'),
+    ('ඳ', 'ර'),
+    ('ල', 'ල'),
+    ('ව', 'ෆ'),
+    ('ැ', 'ෑ'),
+    ('ෘ', 'ෞ'),
+    ('෦', '෯'),
+    ('ෲ', '෴'),
+    ('ก', 'ะ'),
+    ('า', 'ำ'),
+    ('฿', 'ๆ'),
+    ('๏', '๛'),
+    ('ກ', 'ຂ'),
+    ('ຄ', 'ຄ'),
+    ('ຆ', 'ຊ'),
+    ('ຌ', 'ຣ'),
+    ('ລ', 'ລ'),
+    ('ວ', 'ະ'),
+    ('າ', 'ຳ'),
+    ('ຽ', 'ຽ'),
+    ('ເ', 'ໄ'),
+    ('ໆ', 'ໆ'),
+    ('໐', '໙'),
+    ('ໜ', 'ໟ'),
+    ('ༀ', '༗'),
+    ('༚', '༴'),
+    ('༶', '༶'),
+    ('༸', '༸'),
+    ('༺', 'ཇ'),
+    ('ཉ', 'ཬ'),
+    ('ཿ', 'ཿ'),
+    ('྅', '྅'),
+    ('ྈ', 'ྌ'),
+    ('྾', '࿅'),
+    ('࿇', '࿌'),
+    ('࿎', '࿚'),
+    ('က', 'ာ'),
+    ('ေ', 'ေ'),
+    ('း', 'း'),
+    ('ျ', 'ြ'),
+    ('ဿ', 'ၗ'),
+    ('ၚ', 'ၝ'),
+    ('ၡ', 'ၰ'),
+    ('ၵ', 'ႁ'),
+    ('ႃ', 'ႄ'),
+    ('ႇ', 'ႌ'),
+    ('ႎ', 'ႜ'),
+    ('႞', 'Ⴥ'),
+    ('Ⴧ', 'Ⴧ'),
+    ('Ⴭ', 'Ⴭ'),
+    ('ა', 'ቈ'),
+    ('ቊ', 'ቍ'),
+    ('ቐ', 'ቖ'),
+    ('ቘ', 'ቘ'),
+    ('ቚ', 'ቝ'),
+    ('በ', 'ኈ'),
+    ('ኊ', 'ኍ'),
+    ('ነ', 'ኰ'),
+    ('ኲ', 'ኵ'),
+    ('ኸ', 'ኾ'),
+    ('ዀ', 'ዀ'),
+    ('ዂ', 'ዅ'),
+    ('ወ', 'ዖ'),
+    ('ዘ', 'ጐ'),
+    ('ጒ', 'ጕ'),
+    ('ጘ', 'ፚ'),
+    ('፠', '፼'),
+    ('ᎀ', '᎙'),
+    ('Ꭰ', 'Ᏽ'),
+    ('ᏸ', 'ᏽ'),
+    ('᐀', '᚜'),
+    ('ᚠ', 'ᛸ'),
+    ('ᜀ', 'ᜌ'),
+    ('ᜎ', 'ᜑ'),
+    ('ᜠ', 'ᜱ'),
+    ('᜵', '᜶'),
+    ('ᝀ', 'ᝑ'),
+    ('ᝠ', 'ᝬ'),
+    ('ᝮ', 'ᝰ'),
+    ('ក', 'ឳ'),
+    ('ា', 'ា'),
+    ('ើ', 'ៅ'),
+    ('ះ', 'ៈ'),
+    ('។', 'ៜ'),
+    ('០', '៩'),
+    ('៰', '៹'),
+    ('᠀', '᠊'),
+    ('᠐', '᠙'),
+    ('ᠠ', 'ᡸ'),
+    ('ᢀ', 'ᢄ'),
+    ('ᢇ', 'ᢨ'),
+    ('ᢪ', 'ᢪ'),
+    ('ᢰ', 'ᣵ'),
+    ('ᤀ', 'ᤞ'),
+    ('ᤣ', 'ᤦ'),
+    ('ᤩ', 'ᤫ'),
+    ('ᤰ', 'ᤱ'),
+    ('ᤳ', 'ᤸ'),
+    ('᥀', '᥀'),
+    ('᥄', 'ᥭ'),
+    ('ᥰ', 'ᥴ'),
+    ('ᦀ', 'ᦫ'),
+    ('ᦰ', 'ᧉ'),
+    ('᧐', '᧚'),
+    ('᧞', 'ᨖ'),
+    ('ᨙ', 'ᨚ'),
+    ('᨞', 'ᩕ'),
+    ('ᩗ', 'ᩗ'),
+    ('ᩡ', 'ᩡ'),
+    ('ᩣ', 'ᩤ'),
+    ('ᩭ', 'ᩲ'),
+    ('᪀', '᪉'),
+    ('᪐', '᪙'),
+    ('᪠', '᪭'),
+    ('ᬄ', 'ᬳ'),
+    ('ᬻ', 'ᬻ'),
+    ('ᬽ', 'ᭁ'),
+    ('ᭃ', 'ᭋ'),
+    ('᭐', '᭪'),
+    ('᭴', '᭼'),
+    ('ᮂ', 'ᮡ'),
+    ('ᮦ', 'ᮧ'),
+    ('᮪', '᮪'),
+    ('ᮮ', 'ᯥ'),
+    ('ᯧ', 'ᯧ'),
+    ('ᯪ', 'ᯬ'),
+    ('ᯮ', 'ᯮ'),
+    ('᯲', '᯳'),
+    ('᯼', 'ᰫ'),
+    ('ᰴ', 'ᰵ'),
+    ('᰻', '᱉'),
+    ('ᱍ', 'ᲈ'),
+    ('Ა', 'Ჺ'),
+    ('Ჽ', '᳇'),
+    ('᳓', '᳓'),
+    ('᳡', '᳡'),
+    ('ᳩ', 'ᳬ'),
+    ('ᳮ', 'ᳳ'),
+    ('ᳵ', '᳷'),
+    ('ᳺ', 'ᳺ'),
+    ('ᴀ', 'ᶿ'),
+    ('Ḁ', 'ἕ'),
+    ('Ἐ', 'Ἕ'),
+    ('ἠ', 'ὅ'),
+    ('Ὀ', 'Ὅ'),
+    ('ὐ', 'ὗ'),
+    ('Ὑ', 'Ὑ'),
+    ('Ὓ', 'Ὓ'),
+    ('Ὕ', 'Ὕ'),
+    ('Ὗ', 'ώ'),
+    ('ᾀ', 'ᾴ'),
+    ('ᾶ', 'ῄ'),
+    ('ῆ', 'ΐ'),
+    ('ῖ', 'Ί'),
+    ('῝', '`'),
+    ('ῲ', 'ῴ'),
+    ('ῶ', '῾'),
+    ('\u{2000}', '\u{200a}'),
+    ('‐', '‧'),
+    ('\u{202f}', '\u{205f}'),
+    ('⁰', 'ⁱ'),
+    ('⁴', '₎'),
+    ('ₐ', 'ₜ'),
+    ('₠', '₿'),
+    ('℀', '↋'),
+    ('←', '␦'),
+    ('⑀', '⑊'),
+    ('①', '⭳'),
+    ('⭶', '⮕'),
+    ('\u{2b97}', 'Ⱞ'),
+    ('ⰰ', 'ⱞ'),
+    ('Ⱡ', 'ⳮ'),
+    ('Ⳳ', 'ⳳ'),
+    ('⳹', 'ⴥ'),
+    ('ⴧ', 'ⴧ'),
+    ('ⴭ', 'ⴭ'),
+    ('ⴰ', 'ⵧ'),
+    ('ⵯ', '⵰'),
+    ('ⶀ', 'ⶖ'),
+    ('ⶠ', 'ⶦ'),
+    ('ⶨ', 'ⶮ'),
+    ('ⶰ', 'ⶶ'),
+    ('ⶸ', 'ⶾ'),
+    ('ⷀ', 'ⷆ'),
+    ('ⷈ', 'ⷎ'),
+    ('ⷐ', 'ⷖ'),
+    ('ⷘ', 'ⷞ'),
+    ('⸀', '\u{2e52}'),
+    ('⺀', '⺙'),
+    ('⺛', '⻳'),
+    ('⼀', '⿕'),
+    ('⿰', '⿻'),
+    ('\u{3000}', '〩'),
+    ('〰', '〿'),
+    ('ぁ', 'ゖ'),
+    ('゛', 'ヿ'),
+    ('ㄅ', 'ㄯ'),
+    ('ㄱ', 'ㆎ'),
+    ('㆐', '㇣'),
+    ('ㇰ', '㈞'),
+    ('㈠', '\u{9ffc}'),
+    ('ꀀ', 'ꒌ'),
+    ('꒐', '꓆'),
+    ('ꓐ', 'ꘫ'),
+    ('Ꙁ', 'ꙮ'),
+    ('꙳', '꙳'),
+    ('꙾', 'ꚝ'),
+    ('ꚠ', 'ꛯ'),
+    ('꛲', '꛷'),
+    ('꜀', 'ꞿ'),
+    ('Ꟃ', '\u{a7ca}'),
+    ('\u{a7f5}', 'ꠁ'),
+    ('ꠃ', 'ꠅ'),
+    ('ꠇ', 'ꠊ'),
+    ('ꠌ', 'ꠤ'),
+    ('ꠧ', '꠫'),
+    ('꠰', '꠹'),
+    ('ꡀ', '꡷'),
+    ('ꢀ', 'ꣃ'),
+    ('꣎', '꣙'),
+    ('ꣲ', 'ꣾ'),
+    ('꤀', 'ꤥ'),
+    ('꤮', 'ꥆ'),
+    ('ꥒ', '꥓'),
+    ('꥟', 'ꥼ'),
+    ('ꦃ', 'ꦲ'),
+    ('ꦴ', 'ꦵ'),
+    ('ꦺ', 'ꦻ'),
+    ('ꦾ', '꧍'),
+    ('ꧏ', '꧙'),
+    ('꧞', 'ꧤ'),
+    ('ꧦ', 'ꧾ'),
+    ('ꨀ', 'ꨨ'),
+    ('ꨯ', 'ꨰ'),
+    ('ꨳ', 'ꨴ'),
+    ('ꩀ', 'ꩂ'),
+    ('ꩄ', 'ꩋ'),
+    ('ꩍ', 'ꩍ'),
+    ('꩐', '꩙'),
+    ('꩜', 'ꩻ'),
+    ('ꩽ', 'ꪯ'),
+    ('ꪱ', 'ꪱ'),
+    ('ꪵ', 'ꪶ'),
+    ('ꪹ', 'ꪽ'),
+    ('ꫀ', 'ꫀ'),
+    ('ꫂ', 'ꫂ'),
+    ('ꫛ', 'ꫫ'),
+    ('ꫮ', 'ꫵ'),
+    ('ꬁ', 'ꬆ'),
+    ('ꬉ', 'ꬎ'),
+    ('ꬑ', 'ꬖ'),
+    ('ꬠ', 'ꬦ'),
+    ('ꬨ', 'ꬮ'),
+    ('ꬰ', '\u{ab6b}'),
+    ('ꭰ', 'ꯤ'),
+    ('ꯦ', 'ꯧ'),
+    ('ꯩ', '꯬'),
+    ('꯰', '꯹'),
+    ('가', '힣'),
+    ('ힰ', 'ퟆ'),
+    ('ퟋ', 'ퟻ'),
+    ('豈', '舘'),
+    ('並', '龎'),
+    ('ﬀ', 'ﬆ'),
+    ('ﬓ', 'ﬗ'),
+    ('יִ', 'יִ'),
+    ('ײַ', 'זּ'),
+    ('טּ', 'לּ'),
+    ('מּ', 'מּ'),
+    ('נּ', 'סּ'),
+    ('ףּ', 'פּ'),
+    ('צּ', '﯁'),
+    ('ﯓ', '﴿'),
+    ('ﵐ', 'ﶏ'),
+    ('ﶒ', 'ﷇ'),
+    ('ﷰ', '﷽'),
+    ('︐', '︙'),
+    ('︰', '﹒'),
+    ('﹔', '﹦'),
+    ('﹨', '﹫'),
+    ('ﹰ', 'ﹴ'),
+    ('ﹶ', 'ﻼ'),
+    ('！', 'ﾝ'),
+    ('ﾠ', 'ﾾ'),
+    ('ￂ', 'ￇ'),
+    ('ￊ', 'ￏ'),
+    ('ￒ', 'ￗ'),
+    ('ￚ', 'ￜ'),
+    ('￠', '￦'),
+    ('￨', '￮'),
+    ('', '�'),
+    ('𐀀', '𐀋'),
+    ('𐀍', '𐀦'),
+    ('𐀨', '𐀺'),
+    ('𐀼', '𐀽'),
+    ('𐀿', '𐁍'),
+    ('𐁐', '𐁝'),
+    ('𐂀', '𐃺'),
+    ('𐄀', '𐄂'),
+    ('𐄇', '𐄳'),
+    ('𐄷', '𐆎'),
+    ('𐆐', '\u{1019c}'),
+    ('𐆠', '𐆠'),
+    ('𐇐', '𐇼'),
+    ('𐊀', '𐊜'),
+    ('𐊠', '𐋐'),
+    ('𐋡', '𐋻'),
+    ('𐌀', '𐌣'),
+    ('𐌭', '𐍊'),
+    ('𐍐', '𐍵'),
+    ('𐎀', '𐎝'),
+    ('𐎟', '𐏃'),
+    ('𐏈', '𐏕'),
+    ('𐐀', '𐒝'),
+    ('𐒠', '𐒩'),
+    ('𐒰', '𐓓'),
+    ('𐓘', '𐓻'),
+    ('𐔀', '𐔧'),
+    ('𐔰', '𐕣'),
+    ('𐕯', '𐕯'),
+    ('𐘀', '𐜶'),
+    ('𐝀', '𐝕'),
+    ('𐝠', '𐝧'),
+    ('𐠀', '𐠅'),
+    ('𐠈', '𐠈'),
+    ('𐠊', '𐠵'),
+    ('𐠷', '𐠸'),
+    ('𐠼', '𐠼'),
+    ('𐠿', '𐡕'),
+    ('𐡗', '𐢞'),
+    ('𐢧', '𐢯'),
+    ('𐣠', '𐣲'),
+    ('𐣴', '𐣵'),
+    ('𐣻', '𐤛'),
+    ('𐤟', '𐤹'),
+    ('𐤿', '𐤿'),
+    ('𐦀', '𐦷'),
+    ('𐦼', '𐧏'),
+    ('𐧒', '𐨀'),
+    ('𐨐', '𐨓'),
+    ('𐨕', '𐨗'),
+    ('𐨙', '𐨵'),
+    ('𐩀', '𐩈'),
+    ('𐩐', '𐩘'),
+    ('𐩠', '𐪟'),
+    ('𐫀', '𐫤'),
+    ('𐫫', '𐫶'),
+    ('𐬀', '𐬵'),
+    ('𐬹', '𐭕'),
+    ('𐭘', '𐭲'),
+    ('𐭸', '𐮑'),
+    ('𐮙', '𐮜'),
+    ('𐮩', '𐮯'),
+    ('𐰀', '𐱈'),
+    ('𐲀', '𐲲'),
+    ('𐳀', '𐳲'),
+    ('𐳺', '𐴣'),
+    ('𐴰', '𐴹'),
+    ('𐹠', '𐹾'),
+    ('\u{10e80}', '\u{10ea9}'),
+    ('\u{10ead}', '\u{10ead}'),
+    ('\u{10eb0}', '\u{10eb1}'),
+    ('𐼀', '𐼧'),
+    ('𐼰', '𐽅'),
+    ('𐽑', '𐽙'),
+    ('\u{10fb0}', '\u{10fcb}'),
+    ('𐿠', '𐿶'),
+    ('𑀀', '𑀀'),
+    ('𑀂', '𑀷'),
+    ('𑁇', '𑁍'),
+    ('𑁒', '𑁯'),
+    ('𑂂', '𑂲'),
+    ('𑂷', '𑂸'),
+    ('𑂻', '𑂼'),
+    ('𑂾', '𑃁'),
+    ('𑃐', '𑃨'),
+    ('𑃰', '𑃹'),
+    ('𑄃', '𑄦'),
+    ('𑄬', '𑄬'),
+    ('𑄶', '\u{11147}'),
+    ('𑅐', '𑅲'),
+    ('𑅴', '𑅶'),
+    ('𑆂', '𑆵'),
+    ('𑆿', '𑇈'),
+    ('𑇍', '\u{111ce}'),
+    ('𑇐', '𑇟'),
+    ('𑇡', '𑇴'),
+    ('𑈀', '𑈑'),
+    ('𑈓', '𑈮'),
+    ('𑈲', '𑈳'),
+    ('𑈵', '𑈵'),
+    ('𑈸', '𑈽'),
+    ('𑊀', '𑊆'),
+    ('𑊈', '𑊈'),
+    ('𑊊', '𑊍'),
+    ('𑊏', '𑊝'),
+    ('𑊟', '𑊩'),
+    ('𑊰', '𑋞'),
+    ('𑋠', '𑋢'),
+    ('𑋰', '𑋹'),
+    ('𑌂', '𑌃'),
+    ('𑌅', '𑌌'),
+    ('𑌏', '𑌐'),
+    ('𑌓', '𑌨'),
+    ('𑌪', '𑌰'),
+    ('𑌲', '𑌳'),
+    ('𑌵', '𑌹'),
+    ('𑌽', '𑌽'),
+    ('𑌿', '𑌿'),
+    ('𑍁', '𑍄'),
+    ('𑍇', '𑍈'),
+    ('𑍋', '𑍍'),
+    ('𑍐', '𑍐'),
+    ('𑍝', '𑍣'),
+    ('𑐀', '𑐷'),
+    ('𑑀', '𑑁'),
+    ('𑑅', '𑑅'),
+    ('𑑇', '𑑛'),
+    ('𑑝', '𑑝'),
+    ('𑑟', '\u{11461}'),
+    ('𑒀', '𑒯'),
+    ('𑒱', '𑒲'),
+    ('𑒹', '𑒹'),
+    ('𑒻', '𑒼'),
+    ('𑒾', '𑒾'),
+    ('𑓁', '𑓁'),
+    ('𑓄', '𑓇'),
+    ('𑓐', '𑓙'),
+    ('𑖀', '𑖮'),
+    ('𑖰', '𑖱'),
+    ('𑖸', '𑖻'),
+    ('𑖾', '𑖾'),
+    ('𑗁', '𑗛'),
+    ('𑘀', '𑘲'),
+    ('𑘻', '𑘼'),
+    ('𑘾', '𑘾'),
+    ('𑙁', '𑙄'),
+    ('𑙐', '𑙙'),
+    ('𑙠', '𑙬'),
+    ('𑚀', '𑚪'),
+    ('𑚬', '𑚬'),
+    ('𑚮', '𑚯'),
+    ('𑚶', '𑚶'),
+    ('𑚸', '𑚸'),
+    ('𑛀', '𑛉'),
+    ('𑜀', '𑜚'),
+    ('𑜠', '𑜡'),
+    ('𑜦', '𑜦'),
+    ('𑜰', '𑜿'),
+    ('𑠀', '𑠮'),
+    ('𑠸', '𑠸'),
+    ('𑠻', '𑠻'),
+    ('𑢠', '𑣲'),
+    ('𑣿', '\u{11906}'),
+    ('\u{11909}', '\u{11909}'),
+    ('\u{1190c}', '\u{11913}'),
+    ('\u{11915}', '\u{11916}'),
+    ('\u{11918}', '\u{1192f}'),
+    ('\u{11931}', '\u{11935}'),
+    ('\u{11937}', '\u{11938}'),
+    ('\u{1193d}', '\u{1193d}'),
+    ('\u{1193f}', '\u{11942}'),
+    ('\u{11944}', '\u{11946}'),
+    ('\u{11950}', '\u{11959}'),
+    ('𑦠', '𑦧'),
+    ('𑦪', '𑧓'),
+    ('𑧜', '𑧟'),
+    ('𑧡', '𑧤'),
+    ('𑨀', '𑨀'),
+    ('𑨋', '𑨲'),
+    ('𑨹', '𑨺'),
+    ('𑨿', '𑩆'),
+    ('𑩐', '𑩐'),
+    ('𑩗', '𑩘'),
+    ('𑩜', '𑪉'),
+    ('𑪗', '𑪗'),
+    ('𑪚', '𑪢'),
+    ('𑫀', '𑫸'),
+    ('𑰀', '𑰈'),
+    ('𑰊', '𑰯'),
+    ('𑰾', '𑰾'),
+    ('𑱀', '𑱅'),
+    ('𑱐', '𑱬'),
+    ('𑱰', '𑲏'),
+    ('𑲩', '𑲩'),
+    ('𑲱', '𑲱'),
+    ('𑲴', '𑲴'),
+    ('𑴀', '𑴆'),
+    ('𑴈', '𑴉'),
+    ('𑴋', '𑴰'),
+    ('𑵆', '𑵆'),
+    ('𑵐', '𑵙'),
+    ('𑵠', '𑵥'),
+    ('𑵧', '𑵨'),
+    ('𑵪', '𑶎'),
+    ('𑶓', '𑶔'),
+    ('𑶖', '𑶖'),
+    ('𑶘', '𑶘'),
+    ('𑶠', '𑶩'),
+    ('𑻠', '𑻲'),
+    ('𑻵', '𑻸'),
+    ('\u{11fb0}', '\u{11fb0}'),
+    ('𑿀', '𑿱'),
+    ('𑿿', '𒎙'),
+    ('𒐀', '𒑮'),
+    ('𒑰', '𒑴'),
+    ('𒒀', '𒕃'),
+    ('𓀀', '𓐮'),
+    ('𔐀', '𔙆'),
+    ('𖠀', '𖨸'),
+    ('𖩀', '𖩞'),
+    ('𖩠', '𖩩'),
+    ('𖩮', '𖩯'),
+    ('𖫐', '𖫭'),
+    ('𖫵', '𖫵'),
+    ('𖬀', '𖬯'),
+    ('𖬷', '𖭅'),
+    ('𖭐', '𖭙'),
+    ('𖭛', '𖭡'),
+    ('𖭣', '𖭷'),
+    ('𖭽', '𖮏'),
+    ('𖹀', '𖺚'),
+    ('𖼀', '𖽊'),
+    ('𖽐', '𖾇'),
+    ('𖾓', '𖾟'),
+    ('𖿠', '𖿣'),
+    ('\u{16ff0}', '\u{16ff1}'),
+    ('𗀀', '𘟷'),
+    ('𘠀', '\u{18cd5}'),
+    ('\u{18d00}', '\u{18d08}'),
+    ('𛀀', '𛄞'),
+    ('𛅐', '𛅒'),
+    ('𛅤', '𛅧'),
+    ('𛅰', '𛋻'),
+    ('𛰀', '𛱪'),
+    ('𛱰', '𛱼'),
+    ('𛲀', '𛲈'),
+    ('𛲐', '𛲙'),
+    ('𛲜', '𛲜'),
+    ('𛲟', '𛲟'),
+    ('𝀀', '𝃵'),
+    ('𝄀', '𝄦'),
+    ('𝄩', '𝅘𝅥𝅲'),
+    ('𝅦', '𝅦'),
+    ('𝅪', '𝅭'),
+    ('𝆃', '𝆄'),
+    ('𝆌', '𝆩'),
+    ('𝆮', '𝇨'),
+    ('𝈀', '𝉁'),
+    ('𝉅', '𝉅'),
+    ('𝋠', '𝋳'),
+    ('𝌀', '𝍖'),
+    ('𝍠', '𝍸'),
+    ('𝐀', '𝑔'),
+    ('𝑖', '𝒜'),
+    ('𝒞', '𝒟'),
+    ('𝒢', '𝒢'),
+    ('𝒥', '𝒦'),
+    ('𝒩', '𝒬'),
+    ('𝒮', '𝒹'),
+    ('𝒻', '𝒻'),
+    ('𝒽', '𝓃'),
+    ('𝓅', '𝔅'),
+    ('𝔇', '𝔊'),
+    ('𝔍', '𝔔'),
+    ('𝔖', '𝔜'),
+    ('𝔞', '𝔹'),
+    ('𝔻', '𝔾'),
+    ('𝕀', '𝕄'),
+    ('𝕆', '𝕆'),
+    ('𝕊', '𝕐'),
+    ('𝕒', '𝚥'),
+    ('𝚨', '𝟋'),
+    ('𝟎', '𝧿'),
+    ('𝨷', '𝨺'),
+    ('𝩭', '𝩴'),
+    ('𝩶', '𝪃'),
+    ('𝪅', '𝪋'),
+    ('𞄀', '𞄬'),
+    ('𞄷', '𞄽'),
+    ('𞅀', '𞅉'),
+    ('𞅎', '𞅏'),
+    ('𞋀', '𞋫'),
+    ('𞋰', '𞋹'),
+    ('𞋿', '𞋿'),
+    ('𞠀', '𞣄'),
+    ('𞣇', '𞣏'),
+    ('𞤀', '𞥃'),
+    ('𞥋', '𞥋'),
+    ('𞥐', '𞥙'),
+    ('𞥞', '𞥟'),
+    ('𞱱', '𞲴'),
+    ('𞴁', '𞴽'),
+    ('𞸀', '𞸃'),
+    ('𞸅', '𞸟'),
+    ('𞸡', '𞸢'),
+    ('𞸤', '𞸤'),
+    ('𞸧', '𞸧'),
+    ('𞸩', '𞸲'),
+    ('𞸴', '𞸷'),
+    ('𞸹', '𞸹'),
+    ('𞸻', '𞸻'),
+    ('𞹂', '𞹂'),
+    ('𞹇', '𞹇'),
+    ('𞹉', '𞹉'),
+    ('𞹋', '𞹋'),
+    ('𞹍', '𞹏'),
+    ('𞹑', '𞹒'),
+    ('𞹔', '𞹔'),
+    ('𞹗', '𞹗'),
+    ('𞹙', '𞹙'),
+    ('𞹛', '𞹛'),
+    ('𞹝', '𞹝'),
+    ('𞹟', '𞹟'),
+    ('𞹡', '𞹢'),
+    ('𞹤', '𞹤'),
+    ('𞹧', '𞹪'),
+    ('𞹬', '𞹲'),
+    ('𞹴', '𞹷'),
+    ('𞹹', '𞹼'),
+    ('𞹾', '𞹾'),
+    ('𞺀', '𞺉'),
+    ('𞺋', '𞺛'),
+    ('𞺡', '𞺣'),
+    ('𞺥', '𞺩'),
+    ('𞺫', '𞺻'),
+    ('𞻰', '𞻱'),
+    ('🀀', '🀫'),
+    ('🀰', '🂓'),
+    ('🂠', '🂮'),
+    ('🂱', '🂿'),
+    ('🃁', '🃏'),
+    ('🃑', '🃵'),
+    ('🄀', '\u{1f1ad}'),
+    ('🇦', '🈂'),
+    ('🈐', '🈻'),
+    ('🉀', '🉈'),
+    ('🉐', '🉑'),
+    ('🉠', '🉥'),
+    ('🌀', '\u{1f6d7}'),
+    ('🛠', '🛬'),
+    ('🛰', '\u{1f6fc}'),
+    ('🜀', '🝳'),
+    ('🞀', '🟘'),
+    ('🟠', '🟫'),
+    ('🠀', '🠋'),
+    ('🠐', '🡇'),
+    ('🡐', '🡙'),
+    ('🡠', '🢇'),
+    ('🢐', '🢭'),
+    ('\u{1f8b0}', '\u{1f8b1}'),
+    ('🤀', '\u{1f978}'),
+    ('🥺', '\u{1f9cb}'),
+    ('🧍', '🩓'),
+    ('🩠', '🩭'),
+    ('🩰', '\u{1fa74}'),
+    ('🩸', '🩺'),
+    ('🪀', '\u{1fa86}'),
+    ('🪐', '\u{1faa8}'),
+    ('\u{1fab0}', '\u{1fab6}'),
+    ('\u{1fac0}', '\u{1fac2}'),
+    ('\u{1fad0}', '\u{1fad6}'),
+    ('\u{1fb00}', '\u{1fb92}'),
+    ('\u{1fb94}', '\u{1fbca}'),
+    ('\u{1fbf0}', '\u{1fbf9}'),
+    ('𠀀', '\u{2a6dd}'),
+    ('𪜀', '𫜴'),
+    ('𫝀', '𫠝'),
+    ('𫠠', '𬺡'),
+    ('𬺰', '𮯠'),
+    ('丽', '𪘀'),
+    ('\u{30000}', '\u{3134a}'),
+];
+
+pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[
+    ('\u{300}', '\u{36f}'),
+    ('\u{483}', '\u{489}'),
+    ('\u{591}', '\u{5bd}'),
+    ('\u{5bf}', '\u{5bf}'),
+    ('\u{5c1}', '\u{5c2}'),
+    ('\u{5c4}', '\u{5c5}'),
+    ('\u{5c7}', '\u{5c7}'),
+    ('\u{610}', '\u{61a}'),
+    ('\u{64b}', '\u{65f}'),
+    ('\u{670}', '\u{670}'),
+    ('\u{6d6}', '\u{6dc}'),
+    ('\u{6df}', '\u{6e4}'),
+    ('\u{6e7}', '\u{6e8}'),
+    ('\u{6ea}', '\u{6ed}'),
+    ('\u{711}', '\u{711}'),
+    ('\u{730}', '\u{74a}'),
+    ('\u{7a6}', '\u{7b0}'),
+    ('\u{7eb}', '\u{7f3}'),
+    ('\u{7fd}', '\u{7fd}'),
+    ('\u{816}', '\u{819}'),
+    ('\u{81b}', '\u{823}'),
+    ('\u{825}', '\u{827}'),
+    ('\u{829}', '\u{82d}'),
+    ('\u{859}', '\u{85b}'),
+    ('\u{8d3}', '\u{8e1}'),
+    ('\u{8e3}', '\u{902}'),
+    ('\u{93a}', '\u{93a}'),
+    ('\u{93c}', '\u{93c}'),
+    ('\u{941}', '\u{948}'),
+    ('\u{94d}', '\u{94d}'),
+    ('\u{951}', '\u{957}'),
+    ('\u{962}', '\u{963}'),
+    ('\u{981}', '\u{981}'),
+    ('\u{9bc}', '\u{9bc}'),
+    ('\u{9be}', '\u{9be}'),
+    ('\u{9c1}', '\u{9c4}'),
+    ('\u{9cd}', '\u{9cd}'),
+    ('\u{9d7}', '\u{9d7}'),
+    ('\u{9e2}', '\u{9e3}'),
+    ('\u{9fe}', '\u{9fe}'),
+    ('\u{a01}', '\u{a02}'),
+    ('\u{a3c}', '\u{a3c}'),
+    ('\u{a41}', '\u{a42}'),
+    ('\u{a47}', '\u{a48}'),
+    ('\u{a4b}', '\u{a4d}'),
+    ('\u{a51}', '\u{a51}'),
+    ('\u{a70}', '\u{a71}'),
+    ('\u{a75}', '\u{a75}'),
+    ('\u{a81}', '\u{a82}'),
+    ('\u{abc}', '\u{abc}'),
+    ('\u{ac1}', '\u{ac5}'),
+    ('\u{ac7}', '\u{ac8}'),
+    ('\u{acd}', '\u{acd}'),
+    ('\u{ae2}', '\u{ae3}'),
+    ('\u{afa}', '\u{aff}'),
+    ('\u{b01}', '\u{b01}'),
+    ('\u{b3c}', '\u{b3c}'),
+    ('\u{b3e}', '\u{b3f}'),
+    ('\u{b41}', '\u{b44}'),
+    ('\u{b4d}', '\u{b4d}'),
+    ('\u{b55}', '\u{b57}'),
+    ('\u{b62}', '\u{b63}'),
+    ('\u{b82}', '\u{b82}'),
+    ('\u{bbe}', '\u{bbe}'),
+    ('\u{bc0}', '\u{bc0}'),
+    ('\u{bcd}', '\u{bcd}'),
+    ('\u{bd7}', '\u{bd7}'),
+    ('\u{c00}', '\u{c00}'),
+    ('\u{c04}', '\u{c04}'),
+    ('\u{c3e}', '\u{c40}'),
+    ('\u{c46}', '\u{c48}'),
+    ('\u{c4a}', '\u{c4d}'),
+    ('\u{c55}', '\u{c56}'),
+    ('\u{c62}', '\u{c63}'),
+    ('\u{c81}', '\u{c81}'),
+    ('\u{cbc}', '\u{cbc}'),
+    ('\u{cbf}', '\u{cbf}'),
+    ('\u{cc2}', '\u{cc2}'),
+    ('\u{cc6}', '\u{cc6}'),
+    ('\u{ccc}', '\u{ccd}'),
+    ('\u{cd5}', '\u{cd6}'),
+    ('\u{ce2}', '\u{ce3}'),
+    ('\u{d00}', '\u{d01}'),
+    ('\u{d3b}', '\u{d3c}'),
+    ('\u{d3e}', '\u{d3e}'),
+    ('\u{d41}', '\u{d44}'),
+    ('\u{d4d}', '\u{d4d}'),
+    ('\u{d57}', '\u{d57}'),
+    ('\u{d62}', '\u{d63}'),
+    ('\u{d81}', '\u{d81}'),
+    ('\u{dca}', '\u{dca}'),
+    ('\u{dcf}', '\u{dcf}'),
+    ('\u{dd2}', '\u{dd4}'),
+    ('\u{dd6}', '\u{dd6}'),
+    ('\u{ddf}', '\u{ddf}'),
+    ('\u{e31}', '\u{e31}'),
+    ('\u{e34}', '\u{e3a}'),
+    ('\u{e47}', '\u{e4e}'),
+    ('\u{eb1}', '\u{eb1}'),
+    ('\u{eb4}', '\u{ebc}'),
+    ('\u{ec8}', '\u{ecd}'),
+    ('\u{f18}', '\u{f19}'),
+    ('\u{f35}', '\u{f35}'),
+    ('\u{f37}', '\u{f37}'),
+    ('\u{f39}', '\u{f39}'),
+    ('\u{f71}', '\u{f7e}'),
+    ('\u{f80}', '\u{f84}'),
+    ('\u{f86}', '\u{f87}'),
+    ('\u{f8d}', '\u{f97}'),
+    ('\u{f99}', '\u{fbc}'),
+    ('\u{fc6}', '\u{fc6}'),
+    ('\u{102d}', '\u{1030}'),
+    ('\u{1032}', '\u{1037}'),
+    ('\u{1039}', '\u{103a}'),
+    ('\u{103d}', '\u{103e}'),
+    ('\u{1058}', '\u{1059}'),
+    ('\u{105e}', '\u{1060}'),
+    ('\u{1071}', '\u{1074}'),
+    ('\u{1082}', '\u{1082}'),
+    ('\u{1085}', '\u{1086}'),
+    ('\u{108d}', '\u{108d}'),
+    ('\u{109d}', '\u{109d}'),
+    ('\u{135d}', '\u{135f}'),
+    ('\u{1712}', '\u{1714}'),
+    ('\u{1732}', '\u{1734}'),
+    ('\u{1752}', '\u{1753}'),
+    ('\u{1772}', '\u{1773}'),
+    ('\u{17b4}', '\u{17b5}'),
+    ('\u{17b7}', '\u{17bd}'),
+    ('\u{17c6}', '\u{17c6}'),
+    ('\u{17c9}', '\u{17d3}'),
+    ('\u{17dd}', '\u{17dd}'),
+    ('\u{180b}', '\u{180d}'),
+    ('\u{1885}', '\u{1886}'),
+    ('\u{18a9}', '\u{18a9}'),
+    ('\u{1920}', '\u{1922}'),
+    ('\u{1927}', '\u{1928}'),
+    ('\u{1932}', '\u{1932}'),
+    ('\u{1939}', '\u{193b}'),
+    ('\u{1a17}', '\u{1a18}'),
+    ('\u{1a1b}', '\u{1a1b}'),
+    ('\u{1a56}', '\u{1a56}'),
+    ('\u{1a58}', '\u{1a5e}'),
+    ('\u{1a60}', '\u{1a60}'),
+    ('\u{1a62}', '\u{1a62}'),
+    ('\u{1a65}', '\u{1a6c}'),
+    ('\u{1a73}', '\u{1a7c}'),
+    ('\u{1a7f}', '\u{1a7f}'),
+    ('\u{1ab0}', '\u{1ac0}'),
+    ('\u{1b00}', '\u{1b03}'),
+    ('\u{1b34}', '\u{1b3a}'),
+    ('\u{1b3c}', '\u{1b3c}'),
+    ('\u{1b42}', '\u{1b42}'),
+    ('\u{1b6b}', '\u{1b73}'),
+    ('\u{1b80}', '\u{1b81}'),
+    ('\u{1ba2}', '\u{1ba5}'),
+    ('\u{1ba8}', '\u{1ba9}'),
+    ('\u{1bab}', '\u{1bad}'),
+    ('\u{1be6}', '\u{1be6}'),
+    ('\u{1be8}', '\u{1be9}'),
+    ('\u{1bed}', '\u{1bed}'),
+    ('\u{1bef}', '\u{1bf1}'),
+    ('\u{1c2c}', '\u{1c33}'),
+    ('\u{1c36}', '\u{1c37}'),
+    ('\u{1cd0}', '\u{1cd2}'),
+    ('\u{1cd4}', '\u{1ce0}'),
+    ('\u{1ce2}', '\u{1ce8}'),
+    ('\u{1ced}', '\u{1ced}'),
+    ('\u{1cf4}', '\u{1cf4}'),
+    ('\u{1cf8}', '\u{1cf9}'),
+    ('\u{1dc0}', '\u{1df9}'),
+    ('\u{1dfb}', '\u{1dff}'),
+    ('\u{200c}', '\u{200c}'),
+    ('\u{20d0}', '\u{20f0}'),
+    ('\u{2cef}', '\u{2cf1}'),
+    ('\u{2d7f}', '\u{2d7f}'),
+    ('\u{2de0}', '\u{2dff}'),
+    ('\u{302a}', '\u{302f}'),
+    ('\u{3099}', '\u{309a}'),
+    ('\u{a66f}', '\u{a672}'),
+    ('\u{a674}', '\u{a67d}'),
+    ('\u{a69e}', '\u{a69f}'),
+    ('\u{a6f0}', '\u{a6f1}'),
+    ('\u{a802}', '\u{a802}'),
+    ('\u{a806}', '\u{a806}'),
+    ('\u{a80b}', '\u{a80b}'),
+    ('\u{a825}', '\u{a826}'),
+    ('\u{a82c}', '\u{a82c}'),
+    ('\u{a8c4}', '\u{a8c5}'),
+    ('\u{a8e0}', '\u{a8f1}'),
+    ('\u{a8ff}', '\u{a8ff}'),
+    ('\u{a926}', '\u{a92d}'),
+    ('\u{a947}', '\u{a951}'),
+    ('\u{a980}', '\u{a982}'),
+    ('\u{a9b3}', '\u{a9b3}'),
+    ('\u{a9b6}', '\u{a9b9}'),
+    ('\u{a9bc}', '\u{a9bd}'),
+    ('\u{a9e5}', '\u{a9e5}'),
+    ('\u{aa29}', '\u{aa2e}'),
+    ('\u{aa31}', '\u{aa32}'),
+    ('\u{aa35}', '\u{aa36}'),
+    ('\u{aa43}', '\u{aa43}'),
+    ('\u{aa4c}', '\u{aa4c}'),
+    ('\u{aa7c}', '\u{aa7c}'),
+    ('\u{aab0}', '\u{aab0}'),
+    ('\u{aab2}', '\u{aab4}'),
+    ('\u{aab7}', '\u{aab8}'),
+    ('\u{aabe}', '\u{aabf}'),
+    ('\u{aac1}', '\u{aac1}'),
+    ('\u{aaec}', '\u{aaed}'),
+    ('\u{aaf6}', '\u{aaf6}'),
+    ('\u{abe5}', '\u{abe5}'),
+    ('\u{abe8}', '\u{abe8}'),
+    ('\u{abed}', '\u{abed}'),
+    ('\u{fb1e}', '\u{fb1e}'),
+    ('\u{fe00}', '\u{fe0f}'),
+    ('\u{fe20}', '\u{fe2f}'),
+    ('\u{ff9e}', '\u{ff9f}'),
+    ('\u{101fd}', '\u{101fd}'),
+    ('\u{102e0}', '\u{102e0}'),
+    ('\u{10376}', '\u{1037a}'),
+    ('\u{10a01}', '\u{10a03}'),
+    ('\u{10a05}', '\u{10a06}'),
+    ('\u{10a0c}', '\u{10a0f}'),
+    ('\u{10a38}', '\u{10a3a}'),
+    ('\u{10a3f}', '\u{10a3f}'),
+    ('\u{10ae5}', '\u{10ae6}'),
+    ('\u{10d24}', '\u{10d27}'),
+    ('\u{10eab}', '\u{10eac}'),
+    ('\u{10f46}', '\u{10f50}'),
+    ('\u{11001}', '\u{11001}'),
+    ('\u{11038}', '\u{11046}'),
+    ('\u{1107f}', '\u{11081}'),
+    ('\u{110b3}', '\u{110b6}'),
+    ('\u{110b9}', '\u{110ba}'),
+    ('\u{11100}', '\u{11102}'),
+    ('\u{11127}', '\u{1112b}'),
+    ('\u{1112d}', '\u{11134}'),
+    ('\u{11173}', '\u{11173}'),
+    ('\u{11180}', '\u{11181}'),
+    ('\u{111b6}', '\u{111be}'),
+    ('\u{111c9}', '\u{111cc}'),
+    ('\u{111cf}', '\u{111cf}'),
+    ('\u{1122f}', '\u{11231}'),
+    ('\u{11234}', '\u{11234}'),
+    ('\u{11236}', '\u{11237}'),
+    ('\u{1123e}', '\u{1123e}'),
+    ('\u{112df}', '\u{112df}'),
+    ('\u{112e3}', '\u{112ea}'),
+    ('\u{11300}', '\u{11301}'),
+    ('\u{1133b}', '\u{1133c}'),
+    ('\u{1133e}', '\u{1133e}'),
+    ('\u{11340}', '\u{11340}'),
+    ('\u{11357}', '\u{11357}'),
+    ('\u{11366}', '\u{1136c}'),
+    ('\u{11370}', '\u{11374}'),
+    ('\u{11438}', '\u{1143f}'),
+    ('\u{11442}', '\u{11444}'),
+    ('\u{11446}', '\u{11446}'),
+    ('\u{1145e}', '\u{1145e}'),
+    ('\u{114b0}', '\u{114b0}'),
+    ('\u{114b3}', '\u{114b8}'),
+    ('\u{114ba}', '\u{114ba}'),
+    ('\u{114bd}', '\u{114bd}'),
+    ('\u{114bf}', '\u{114c0}'),
+    ('\u{114c2}', '\u{114c3}'),
+    ('\u{115af}', '\u{115af}'),
+    ('\u{115b2}', '\u{115b5}'),
+    ('\u{115bc}', '\u{115bd}'),
+    ('\u{115bf}', '\u{115c0}'),
+    ('\u{115dc}', '\u{115dd}'),
+    ('\u{11633}', '\u{1163a}'),
+    ('\u{1163d}', '\u{1163d}'),
+    ('\u{1163f}', '\u{11640}'),
+    ('\u{116ab}', '\u{116ab}'),
+    ('\u{116ad}', '\u{116ad}'),
+    ('\u{116b0}', '\u{116b5}'),
+    ('\u{116b7}', '\u{116b7}'),
+    ('\u{1171d}', '\u{1171f}'),
+    ('\u{11722}', '\u{11725}'),
+    ('\u{11727}', '\u{1172b}'),
+    ('\u{1182f}', '\u{11837}'),
+    ('\u{11839}', '\u{1183a}'),
+    ('\u{11930}', '\u{11930}'),
+    ('\u{1193b}', '\u{1193c}'),
+    ('\u{1193e}', '\u{1193e}'),
+    ('\u{11943}', '\u{11943}'),
+    ('\u{119d4}', '\u{119d7}'),
+    ('\u{119da}', '\u{119db}'),
+    ('\u{119e0}', '\u{119e0}'),
+    ('\u{11a01}', '\u{11a0a}'),
+    ('\u{11a33}', '\u{11a38}'),
+    ('\u{11a3b}', '\u{11a3e}'),
+    ('\u{11a47}', '\u{11a47}'),
+    ('\u{11a51}', '\u{11a56}'),
+    ('\u{11a59}', '\u{11a5b}'),
+    ('\u{11a8a}', '\u{11a96}'),
+    ('\u{11a98}', '\u{11a99}'),
+    ('\u{11c30}', '\u{11c36}'),
+    ('\u{11c38}', '\u{11c3d}'),
+    ('\u{11c3f}', '\u{11c3f}'),
+    ('\u{11c92}', '\u{11ca7}'),
+    ('\u{11caa}', '\u{11cb0}'),
+    ('\u{11cb2}', '\u{11cb3}'),
+    ('\u{11cb5}', '\u{11cb6}'),
+    ('\u{11d31}', '\u{11d36}'),
+    ('\u{11d3a}', '\u{11d3a}'),
+    ('\u{11d3c}', '\u{11d3d}'),
+    ('\u{11d3f}', '\u{11d45}'),
+    ('\u{11d47}', '\u{11d47}'),
+    ('\u{11d90}', '\u{11d91}'),
+    ('\u{11d95}', '\u{11d95}'),
+    ('\u{11d97}', '\u{11d97}'),
+    ('\u{11ef3}', '\u{11ef4}'),
+    ('\u{16af0}', '\u{16af4}'),
+    ('\u{16b30}', '\u{16b36}'),
+    ('\u{16f4f}', '\u{16f4f}'),
+    ('\u{16f8f}', '\u{16f92}'),
+    ('\u{16fe4}', '\u{16fe4}'),
+    ('\u{1bc9d}', '\u{1bc9e}'),
+    ('\u{1d165}', '\u{1d165}'),
+    ('\u{1d167}', '\u{1d169}'),
+    ('\u{1d16e}', '\u{1d172}'),
+    ('\u{1d17b}', '\u{1d182}'),
+    ('\u{1d185}', '\u{1d18b}'),
+    ('\u{1d1aa}', '\u{1d1ad}'),
+    ('\u{1d242}', '\u{1d244}'),
+    ('\u{1da00}', '\u{1da36}'),
+    ('\u{1da3b}', '\u{1da6c}'),
+    ('\u{1da75}', '\u{1da75}'),
+    ('\u{1da84}', '\u{1da84}'),
+    ('\u{1da9b}', '\u{1da9f}'),
+    ('\u{1daa1}', '\u{1daaf}'),
+    ('\u{1e000}', '\u{1e006}'),
+    ('\u{1e008}', '\u{1e018}'),
+    ('\u{1e01b}', '\u{1e021}'),
+    ('\u{1e023}', '\u{1e024}'),
+    ('\u{1e026}', '\u{1e02a}'),
+    ('\u{1e130}', '\u{1e136}'),
+    ('\u{1e2ec}', '\u{1e2ef}'),
+    ('\u{1e8d0}', '\u{1e8d6}'),
+    ('\u{1e944}', '\u{1e94a}'),
+    ('\u{e0020}', '\u{e007f}'),
+    ('\u{e0100}', '\u{e01ef}'),
+];
+
+pub const GRAPHEME_LINK: &'static [(char, char)] = &[
+    ('\u{94d}', '\u{94d}'),
+    ('\u{9cd}', '\u{9cd}'),
+    ('\u{a4d}', '\u{a4d}'),
+    ('\u{acd}', '\u{acd}'),
+    ('\u{b4d}', '\u{b4d}'),
+    ('\u{bcd}', '\u{bcd}'),
+    ('\u{c4d}', '\u{c4d}'),
+    ('\u{ccd}', '\u{ccd}'),
+    ('\u{d3b}', '\u{d3c}'),
+    ('\u{d4d}', '\u{d4d}'),
+    ('\u{dca}', '\u{dca}'),
+    ('\u{e3a}', '\u{e3a}'),
+    ('\u{eba}', '\u{eba}'),
+    ('\u{f84}', '\u{f84}'),
+    ('\u{1039}', '\u{103a}'),
+    ('\u{1714}', '\u{1714}'),
+    ('\u{1734}', '\u{1734}'),
+    ('\u{17d2}', '\u{17d2}'),
+    ('\u{1a60}', '\u{1a60}'),
+    ('᭄', '᭄'),
+    ('᮪', '\u{1bab}'),
+    ('᯲', '᯳'),
+    ('\u{2d7f}', '\u{2d7f}'),
+    ('\u{a806}', '\u{a806}'),
+    ('\u{a82c}', '\u{a82c}'),
+    ('\u{a8c4}', '\u{a8c4}'),
+    ('꥓', '꥓'),
+    ('꧀', '꧀'),
+    ('\u{aaf6}', '\u{aaf6}'),
+    ('\u{abed}', '\u{abed}'),
+    ('\u{10a3f}', '\u{10a3f}'),
+    ('\u{11046}', '\u{11046}'),
+    ('\u{1107f}', '\u{1107f}'),
+    ('\u{110b9}', '\u{110b9}'),
+    ('\u{11133}', '\u{11134}'),
+    ('𑇀', '𑇀'),
+    ('𑈵', '𑈵'),
+    ('\u{112ea}', '\u{112ea}'),
+    ('𑍍', '𑍍'),
+    ('\u{11442}', '\u{11442}'),
+    ('\u{114c2}', '\u{114c2}'),
+    ('\u{115bf}', '\u{115bf}'),
+    ('\u{1163f}', '\u{1163f}'),
+    ('𑚶', '𑚶'),
+    ('\u{1172b}', '\u{1172b}'),
+    ('\u{11839}', '\u{11839}'),
+    ('\u{1193d}', '\u{1193e}'),
+    ('\u{119e0}', '\u{119e0}'),
+    ('\u{11a34}', '\u{11a34}'),
+    ('\u{11a47}', '\u{11a47}'),
+    ('\u{11a99}', '\u{11a99}'),
+    ('\u{11c3f}', '\u{11c3f}'),
+    ('\u{11d44}', '\u{11d45}'),
+    ('\u{11d97}', '\u{11d97}'),
+];
+
+pub const HEX_DIGIT: &'static [(char, char)] = &[
+    ('0', '9'),
+    ('A', 'F'),
+    ('a', 'f'),
+    ('０', '９'),
+    ('Ａ', 'Ｆ'),
+    ('ａ', 'ｆ'),
+];
+
+pub const HYPHEN: &'static [(char, char)] = &[
+    ('-', '-'),
+    ('\u{ad}', '\u{ad}'),
+    ('֊', '֊'),
+    ('᠆', '᠆'),
+    ('‐', '‑'),
+    ('⸗', '⸗'),
+    ('・', '・'),
+    ('﹣', '﹣'),
+    ('－', '－'),
+    ('･', '･'),
+];
+
+pub const IDS_BINARY_OPERATOR: &'static [(char, char)] =
+    &[('⿰', '⿱'), ('⿴', '⿻')];
+
+pub const IDS_TRINARY_OPERATOR: &'static [(char, char)] = &[('⿲', '⿳')];
+
+pub const ID_CONTINUE: &'static [(char, char)] = &[
+    ('0', '9'),
+    ('A', 'Z'),
+    ('_', '_'),
+    ('a', 'z'),
+    ('ª', 'ª'),
+    ('µ', 'µ'),
+    ('·', '·'),
+    ('º', 'º'),
+    ('À', 'Ö'),
+    ('Ø', 'ö'),
+    ('ø', 'ˁ'),
+    ('ˆ', 'ˑ'),
+    ('ˠ', 'ˤ'),
+    ('ˬ', 'ˬ'),
+    ('ˮ', 'ˮ'),
+    ('\u{300}', 'ʹ'),
+    ('Ͷ', 'ͷ'),
+    ('ͺ', 'ͽ'),
+    ('Ϳ', 'Ϳ'),
+    ('Ά', 'Ί'),
+    ('Ό', 'Ό'),
+    ('Ύ', 'Ρ'),
+    ('Σ', 'ϵ'),
+    ('Ϸ', 'ҁ'),
+    ('\u{483}', '\u{487}'),
+    ('Ҋ', 'ԯ'),
+    ('Ա', 'Ֆ'),
+    ('ՙ', 'ՙ'),
+    ('ՠ', 'ֈ'),
+    ('\u{591}', '\u{5bd}'),
+    ('\u{5bf}', '\u{5bf}'),
+    ('\u{5c1}', '\u{5c2}'),
+    ('\u{5c4}', '\u{5c5}'),
+    ('\u{5c7}', '\u{5c7}'),
+    ('א', 'ת'),
+    ('ׯ', 'ײ'),
+    ('\u{610}', '\u{61a}'),
+    ('ؠ', '٩'),
+    ('ٮ', 'ۓ'),
+    ('ە', '\u{6dc}'),
+    ('\u{6df}', '\u{6e8}'),
+    ('\u{6ea}', 'ۼ'),
+    ('ۿ', 'ۿ'),
+    ('ܐ', '\u{74a}'),
+    ('ݍ', 'ޱ'),
+    ('߀', 'ߵ'),
+    ('ߺ', 'ߺ'),
+    ('\u{7fd}', '\u{7fd}'),
+    ('ࠀ', '\u{82d}'),
+    ('ࡀ', '\u{85b}'),
+    ('ࡠ', 'ࡪ'),
+    ('ࢠ', 'ࢴ'),
+    ('ࢶ', '\u{8c7}'),
+    ('\u{8d3}', '\u{8e1}'),
+    ('\u{8e3}', '\u{963}'),
+    ('०', '९'),
+    ('ॱ', 'ঃ'),
+    ('অ', 'ঌ'),
+    ('এ', 'ঐ'),
+    ('ও', 'ন'),
+    ('প', 'র'),
+    ('ল', 'ল'),
+    ('শ', 'হ'),
+    ('\u{9bc}', '\u{9c4}'),
+    ('ে', 'ৈ'),
+    ('ো', 'ৎ'),
+    ('\u{9d7}', '\u{9d7}'),
+    ('ড়', 'ঢ়'),
+    ('য়', '\u{9e3}'),
+    ('০', 'ৱ'),
+    ('ৼ', 'ৼ'),
+    ('\u{9fe}', '\u{9fe}'),
+    ('\u{a01}', 'ਃ'),
+    ('ਅ', 'ਊ'),
+    ('ਏ', 'ਐ'),
+    ('ਓ', 'ਨ'),
+    ('ਪ', 'ਰ'),
+    ('ਲ', 'ਲ਼'),
+    ('ਵ', 'ਸ਼'),
+    ('ਸ', 'ਹ'),
+    ('\u{a3c}', '\u{a3c}'),
+    ('ਾ', '\u{a42}'),
+    ('\u{a47}', '\u{a48}'),
+    ('\u{a4b}', '\u{a4d}'),
+    ('\u{a51}', '\u{a51}'),
+    ('ਖ਼', 'ੜ'),
+    ('ਫ਼', 'ਫ਼'),
+    ('੦', '\u{a75}'),
+    ('\u{a81}', 'ઃ'),
+    ('અ', 'ઍ'),
+    ('એ', 'ઑ'),
+    ('ઓ', 'ન'),
+    ('પ', 'ર'),
+    ('લ', 'ળ'),
+    ('વ', 'હ'),
+    ('\u{abc}', '\u{ac5}'),
+    ('\u{ac7}', 'ૉ'),
+    ('ો', '\u{acd}'),
+    ('ૐ', 'ૐ'),
+    ('ૠ', '\u{ae3}'),
+    ('૦', '૯'),
+    ('ૹ', '\u{aff}'),
+    ('\u{b01}', 'ଃ'),
+    ('ଅ', 'ଌ'),
+    ('ଏ', 'ଐ'),
+    ('ଓ', 'ନ'),
+    ('ପ', 'ର'),
+    ('ଲ', 'ଳ'),
+    ('ଵ', 'ହ'),
+    ('\u{b3c}', '\u{b44}'),
+    ('େ', 'ୈ'),
+    ('ୋ', '\u{b4d}'),
+    ('\u{b55}', '\u{b57}'),
+    ('ଡ଼', 'ଢ଼'),
+    ('ୟ', '\u{b63}'),
+    ('୦', '୯'),
+    ('ୱ', 'ୱ'),
+    ('\u{b82}', 'ஃ'),
+    ('அ', 'ஊ'),
+    ('எ', 'ஐ'),
+    ('ஒ', 'க'),
+    ('ங', 'ச'),
+    ('ஜ', 'ஜ'),
+    ('ஞ', 'ட'),
+    ('ண', 'த'),
+    ('ந', 'ப'),
+    ('ம', 'ஹ'),
+    ('\u{bbe}', 'ூ'),
+    ('ெ', 'ை'),
+    ('ொ', '\u{bcd}'),
+    ('ௐ', 'ௐ'),
+    ('\u{bd7}', '\u{bd7}'),
+    ('௦', '௯'),
+    ('\u{c00}', 'ఌ'),
+    ('ఎ', 'ఐ'),
+    ('ఒ', 'న'),
+    ('ప', 'హ'),
+    ('ఽ', 'ౄ'),
+    ('\u{c46}', '\u{c48}'),
+    ('\u{c4a}', '\u{c4d}'),
+    ('\u{c55}', '\u{c56}'),
+    ('ౘ', 'ౚ'),
+    ('ౠ', '\u{c63}'),
+    ('౦', '౯'),
+    ('ಀ', 'ಃ'),
+    ('ಅ', 'ಌ'),
+    ('ಎ', 'ಐ'),
+    ('ಒ', 'ನ'),
+    ('ಪ', 'ಳ'),
+    ('ವ', 'ಹ'),
+    ('\u{cbc}', 'ೄ'),
+    ('\u{cc6}', 'ೈ'),
+    ('ೊ', '\u{ccd}'),
+    ('\u{cd5}', '\u{cd6}'),
+    ('ೞ', 'ೞ'),
+    ('ೠ', '\u{ce3}'),
+    ('೦', '೯'),
+    ('ೱ', 'ೲ'),
+    ('\u{d00}', 'ഌ'),
+    ('എ', 'ഐ'),
+    ('ഒ', '\u{d44}'),
+    ('െ', 'ൈ'),
+    ('ൊ', 'ൎ'),
+    ('ൔ', '\u{d57}'),
+    ('ൟ', '\u{d63}'),
+    ('൦', '൯'),
+    ('ൺ', 'ൿ'),
+    ('\u{d81}', 'ඃ'),
+    ('අ', 'ඖ'),
+    ('ක', 'න'),
+    ('ඳ', 'ර'),
+    ('ල', 'ල'),
+    ('ව', 'ෆ'),
+    ('\u{dca}', '\u{dca}'),
+    ('\u{dcf}', '\u{dd4}'),
+    ('\u{dd6}', '\u{dd6}'),
+    ('ෘ', '\u{ddf}'),
+    ('෦', '෯'),
+    ('ෲ', 'ෳ'),
+    ('ก', '\u{e3a}'),
+    ('เ', '\u{e4e}'),
+    ('๐', '๙'),
+    ('ກ', 'ຂ'),
+    ('ຄ', 'ຄ'),
+    ('ຆ', 'ຊ'),
+    ('ຌ', 'ຣ'),
+    ('ລ', 'ລ'),
+    ('ວ', 'ຽ'),
+    ('ເ', 'ໄ'),
+    ('ໆ', 'ໆ'),
+    ('\u{ec8}', '\u{ecd}'),
+    ('໐', '໙'),
+    ('ໜ', 'ໟ'),
+    ('ༀ', 'ༀ'),
+    ('\u{f18}', '\u{f19}'),
+    ('༠', '༩'),
+    ('\u{f35}', '\u{f35}'),
+    ('\u{f37}', '\u{f37}'),
+    ('\u{f39}', '\u{f39}'),
+    ('༾', 'ཇ'),
+    ('ཉ', 'ཬ'),
+    ('\u{f71}', '\u{f84}'),
+    ('\u{f86}', '\u{f97}'),
+    ('\u{f99}', '\u{fbc}'),
+    ('\u{fc6}', '\u{fc6}'),
+    ('က', '၉'),
+    ('ၐ', '\u{109d}'),
+    ('Ⴀ', 'Ⴥ'),
+    ('Ⴧ', 'Ⴧ'),
+    ('Ⴭ', 'Ⴭ'),
+    ('ა', 'ჺ'),
+    ('ჼ', 'ቈ'),
+    ('ቊ', 'ቍ'),
+    ('ቐ', 'ቖ'),
+    ('ቘ', 'ቘ'),
+    ('ቚ', 'ቝ'),
+    ('በ', 'ኈ'),
+    ('ኊ', 'ኍ'),
+    ('ነ', 'ኰ'),
+    ('ኲ', 'ኵ'),
+    ('ኸ', 'ኾ'),
+    ('ዀ', 'ዀ'),
+    ('ዂ', 'ዅ'),
+    ('ወ', 'ዖ'),
+    ('ዘ', 'ጐ'),
+    ('ጒ', 'ጕ'),
+    ('ጘ', 'ፚ'),
+    ('\u{135d}', '\u{135f}'),
+    ('፩', '፱'),
+    ('ᎀ', 'ᎏ'),
+    ('Ꭰ', 'Ᏽ'),
+    ('ᏸ', 'ᏽ'),
+    ('ᐁ', 'ᙬ'),
+    ('ᙯ', 'ᙿ'),
+    ('ᚁ', 'ᚚ'),
+    ('ᚠ', 'ᛪ'),
+    ('ᛮ', 'ᛸ'),
+    ('ᜀ', 'ᜌ'),
+    ('ᜎ', '\u{1714}'),
+    ('ᜠ', '\u{1734}'),
+    ('ᝀ', '\u{1753}'),
+    ('ᝠ', 'ᝬ'),
+    ('ᝮ', 'ᝰ'),
+    ('\u{1772}', '\u{1773}'),
+    ('ក', '\u{17d3}'),
+    ('ៗ', 'ៗ'),
+    ('ៜ', '\u{17dd}'),
+    ('០', '៩'),
+    ('\u{180b}', '\u{180d}'),
+    ('᠐', '᠙'),
+    ('ᠠ', 'ᡸ'),
+    ('ᢀ', 'ᢪ'),
+    ('ᢰ', 'ᣵ'),
+    ('ᤀ', 'ᤞ'),
+    ('\u{1920}', 'ᤫ'),
+    ('ᤰ', '\u{193b}'),
+    ('᥆', 'ᥭ'),
+    ('ᥰ', 'ᥴ'),
+    ('ᦀ', 'ᦫ'),
+    ('ᦰ', 'ᧉ'),
+    ('᧐', '᧚'),
+    ('ᨀ', '\u{1a1b}'),
+    ('ᨠ', '\u{1a5e}'),
+    ('\u{1a60}', '\u{1a7c}'),
+    ('\u{1a7f}', '᪉'),
+    ('᪐', '᪙'),
+    ('ᪧ', 'ᪧ'),
+    ('\u{1ab0}', '\u{1abd}'),
+    ('\u{1abf}', '\u{1ac0}'),
+    ('\u{1b00}', 'ᭋ'),
+    ('᭐', '᭙'),
+    ('\u{1b6b}', '\u{1b73}'),
+    ('\u{1b80}', '᯳'),
+    ('ᰀ', '\u{1c37}'),
+    ('᱀', '᱉'),
+    ('ᱍ', 'ᱽ'),
+    ('ᲀ', 'ᲈ'),
+    ('Ა', 'Ჺ'),
+    ('Ჽ', 'Ჿ'),
+    ('\u{1cd0}', '\u{1cd2}'),
+    ('\u{1cd4}', 'ᳺ'),
+    ('ᴀ', '\u{1df9}'),
+    ('\u{1dfb}', 'ἕ'),
+    ('Ἐ', 'Ἕ'),
+    ('ἠ', 'ὅ'),
+    ('Ὀ', 'Ὅ'),
+    ('ὐ', 'ὗ'),
+    ('Ὑ', 'Ὑ'),
+    ('Ὓ', 'Ὓ'),
+    ('Ὕ', 'Ὕ'),
+    ('Ὗ', 'ώ'),
+    ('ᾀ', 'ᾴ'),
+    ('ᾶ', 'ᾼ'),
+    ('ι', 'ι'),
+    ('ῂ', 'ῄ'),
+    ('ῆ', 'ῌ'),
+    ('ῐ', 'ΐ'),
+    ('ῖ', 'Ί'),
+    ('ῠ', 'Ῥ'),
+    ('ῲ', 'ῴ'),
+    ('ῶ', 'ῼ'),
+    ('‿', '⁀'),
+    ('⁔', '⁔'),
+    ('ⁱ', 'ⁱ'),
+    ('ⁿ', 'ⁿ'),
+    ('ₐ', 'ₜ'),
+    ('\u{20d0}', '\u{20dc}'),
+    ('\u{20e1}', '\u{20e1}'),
+    ('\u{20e5}', '\u{20f0}'),
+    ('ℂ', 'ℂ'),
+    ('ℇ', 'ℇ'),
+    ('ℊ', 'ℓ'),
+    ('ℕ', 'ℕ'),
+    ('℘', 'ℝ'),
+    ('ℤ', 'ℤ'),
+    ('Ω', 'Ω'),
+    ('ℨ', 'ℨ'),
+    ('K', 'ℹ'),
+    ('ℼ', 'ℿ'),
+    ('ⅅ', 'ⅉ'),
+    ('ⅎ', 'ⅎ'),
+    ('Ⅰ', 'ↈ'),
+    ('Ⰰ', 'Ⱞ'),
+    ('ⰰ', 'ⱞ'),
+    ('Ⱡ', 'ⳤ'),
+    ('Ⳬ', 'ⳳ'),
+    ('ⴀ', 'ⴥ'),
+    ('ⴧ', 'ⴧ'),
+    ('ⴭ', 'ⴭ'),
+    ('ⴰ', 'ⵧ'),
+    ('ⵯ', 'ⵯ'),
+    ('\u{2d7f}', 'ⶖ'),
+    ('ⶠ', 'ⶦ'),
+    ('ⶨ', 'ⶮ'),
+    ('ⶰ', 'ⶶ'),
+    ('ⶸ', 'ⶾ'),
+    ('ⷀ', 'ⷆ'),
+    ('ⷈ', 'ⷎ'),
+    ('ⷐ', 'ⷖ'),
+    ('ⷘ', 'ⷞ'),
+    ('\u{2de0}', '\u{2dff}'),
+    ('々', '〇'),
+    ('〡', '\u{302f}'),
+    ('〱', '〵'),
+    ('〸', '〼'),
+    ('ぁ', 'ゖ'),
+    ('\u{3099}', 'ゟ'),
+    ('ァ', 'ヺ'),
+    ('ー', 'ヿ'),
+    ('ㄅ', 'ㄯ'),
+    ('ㄱ', 'ㆎ'),
+    ('ㆠ', '\u{31bf}'),
+    ('ㇰ', 'ㇿ'),
+    ('㐀', '\u{4dbf}'),
+    ('一', '\u{9ffc}'),
+    ('ꀀ', 'ꒌ'),
+    ('ꓐ', 'ꓽ'),
+    ('ꔀ', 'ꘌ'),
+    ('ꘐ', 'ꘫ'),
+    ('Ꙁ', '\u{a66f}'),
+    ('\u{a674}', '\u{a67d}'),
+    ('ꙿ', '\u{a6f1}'),
+    ('ꜗ', 'ꜟ'),
+    ('Ꜣ', 'ꞈ'),
+    ('Ꞌ', 'ꞿ'),
+    ('Ꟃ', '\u{a7ca}'),
+    ('\u{a7f5}', 'ꠧ'),
+    ('\u{a82c}', '\u{a82c}'),
+    ('ꡀ', 'ꡳ'),
+    ('ꢀ', '\u{a8c5}'),
+    ('꣐', '꣙'),
+    ('\u{a8e0}', 'ꣷ'),
+    ('ꣻ', 'ꣻ'),
+    ('ꣽ', '\u{a92d}'),
+    ('ꤰ', '꥓'),
+    ('ꥠ', 'ꥼ'),
+    ('\u{a980}', '꧀'),
+    ('ꧏ', '꧙'),
+    ('ꧠ', 'ꧾ'),
+    ('ꨀ', '\u{aa36}'),
+    ('ꩀ', 'ꩍ'),
+    ('꩐', '꩙'),
+    ('ꩠ', 'ꩶ'),
+    ('ꩺ', 'ꫂ'),
+    ('ꫛ', 'ꫝ'),
+    ('ꫠ', 'ꫯ'),
+    ('ꫲ', '\u{aaf6}'),
+    ('ꬁ', 'ꬆ'),
+    ('ꬉ', 'ꬎ'),
+    ('ꬑ', 'ꬖ'),
+    ('ꬠ', 'ꬦ'),
+    ('ꬨ', 'ꬮ'),
+    ('ꬰ', 'ꭚ'),
+    ('ꭜ', '\u{ab69}'),
+    ('ꭰ', 'ꯪ'),
+    ('꯬', '\u{abed}'),
+    ('꯰', '꯹'),
+    ('가', '힣'),
+    ('ힰ', 'ퟆ'),
+    ('ퟋ', 'ퟻ'),
+    ('豈', '舘'),
+    ('並', '龎'),
+    ('ﬀ', 'ﬆ'),
+    ('ﬓ', 'ﬗ'),
+    ('יִ', 'ﬨ'),
+    ('שׁ', 'זּ'),
+    ('טּ', 'לּ'),
+    ('מּ', 'מּ'),
+    ('נּ', 'סּ'),
+    ('ףּ', 'פּ'),
+    ('צּ', 'ﮱ'),
+    ('ﯓ', 'ﴽ'),
+    ('ﵐ', 'ﶏ'),
+    ('ﶒ', 'ﷇ'),
+    ('ﷰ', 'ﷻ'),
+    ('\u{fe00}', '\u{fe0f}'),
+    ('\u{fe20}', '\u{fe2f}'),
+    ('︳', '︴'),
+    ('﹍', '﹏'),
+    ('ﹰ', 'ﹴ'),
+    ('ﹶ', 'ﻼ'),
+    ('０', '９'),
+    ('Ａ', 'Ｚ'),
+    ('＿', '＿'),
+    ('ａ', 'ｚ'),
+    ('ｦ', 'ﾾ'),
+    ('ￂ', 'ￇ'),
+    ('ￊ', 'ￏ'),
+    ('ￒ', 'ￗ'),
+    ('ￚ', 'ￜ'),
+    ('𐀀', '𐀋'),
+    ('𐀍', '𐀦'),
+    ('𐀨', '𐀺'),
+    ('𐀼', '𐀽'),
+    ('𐀿', '𐁍'),
+    ('𐁐', '𐁝'),
+    ('𐂀', '𐃺'),
+    ('𐅀', '𐅴'),
+    ('\u{101fd}', '\u{101fd}'),
+    ('𐊀', '𐊜'),
+    ('𐊠', '𐋐'),
+    ('\u{102e0}', '\u{102e0}'),
+    ('𐌀', '𐌟'),
+    ('𐌭', '𐍊'),
+    ('𐍐', '\u{1037a}'),
+    ('𐎀', '𐎝'),
+    ('𐎠', '𐏃'),
+    ('𐏈', '𐏏'),
+    ('𐏑', '𐏕'),
+    ('𐐀', '𐒝'),
+    ('𐒠', '𐒩'),
+    ('𐒰', '𐓓'),
+    ('𐓘', '𐓻'),
+    ('𐔀', '𐔧'),
+    ('𐔰', '𐕣'),
+    ('𐘀', '𐜶'),
+    ('𐝀', '𐝕'),
+    ('𐝠', '𐝧'),
+    ('𐠀', '𐠅'),
+    ('𐠈', '𐠈'),
+    ('𐠊', '𐠵'),
+    ('𐠷', '𐠸'),
+    ('𐠼', '𐠼'),
+    ('𐠿', '𐡕'),
+    ('𐡠', '𐡶'),
+    ('𐢀', '𐢞'),
+    ('𐣠', '𐣲'),
+    ('𐣴', '𐣵'),
+    ('𐤀', '𐤕'),
+    ('𐤠', '𐤹'),
+    ('𐦀', '𐦷'),
+    ('𐦾', '𐦿'),
+    ('𐨀', '\u{10a03}'),
+    ('\u{10a05}', '\u{10a06}'),
+    ('\u{10a0c}', '𐨓'),
+    ('𐨕', '𐨗'),
+    ('𐨙', '𐨵'),
+    ('\u{10a38}', '\u{10a3a}'),
+    ('\u{10a3f}', '\u{10a3f}'),
+    ('𐩠', '𐩼'),
+    ('𐪀', '𐪜'),
+    ('𐫀', '𐫇'),
+    ('𐫉', '\u{10ae6}'),
+    ('𐬀', '𐬵'),
+    ('𐭀', '𐭕'),
+    ('𐭠', '𐭲'),
+    ('𐮀', '𐮑'),
+    ('𐰀', '𐱈'),
+    ('𐲀', '𐲲'),
+    ('𐳀', '𐳲'),
+    ('𐴀', '\u{10d27}'),
+    ('𐴰', '𐴹'),
+    ('\u{10e80}', '\u{10ea9}'),
+    ('\u{10eab}', '\u{10eac}'),
+    ('\u{10eb0}', '\u{10eb1}'),
+    ('𐼀', '𐼜'),
+    ('𐼧', '𐼧'),
+    ('𐼰', '\u{10f50}'),
+    ('\u{10fb0}', '\u{10fc4}'),
+    ('𐿠', '𐿶'),
+    ('𑀀', '\u{11046}'),
+    ('𑁦', '𑁯'),
+    ('\u{1107f}', '\u{110ba}'),
+    ('𑃐', '𑃨'),
+    ('𑃰', '𑃹'),
+    ('\u{11100}', '\u{11134}'),
+    ('𑄶', '𑄿'),
+    ('𑅄', '\u{11147}'),
+    ('𑅐', '\u{11173}'),
+    ('𑅶', '𑅶'),
+    ('\u{11180}', '𑇄'),
+    ('\u{111c9}', '\u{111cc}'),
+    ('\u{111ce}', '𑇚'),
+    ('𑇜', '𑇜'),
+    ('𑈀', '𑈑'),
+    ('𑈓', '\u{11237}'),
+    ('\u{1123e}', '\u{1123e}'),
+    ('𑊀', '𑊆'),
+    ('𑊈', '𑊈'),
+    ('𑊊', '𑊍'),
+    ('𑊏', '𑊝'),
+    ('𑊟', '𑊨'),
+    ('𑊰', '\u{112ea}'),
+    ('𑋰', '𑋹'),
+    ('\u{11300}', '𑌃'),
+    ('𑌅', '𑌌'),
+    ('𑌏', '𑌐'),
+    ('𑌓', '𑌨'),
+    ('𑌪', '𑌰'),
+    ('𑌲', '𑌳'),
+    ('𑌵', '𑌹'),
+    ('\u{1133b}', '𑍄'),
+    ('𑍇', '𑍈'),
+    ('𑍋', '𑍍'),
+    ('𑍐', '𑍐'),
+    ('\u{11357}', '\u{11357}'),
+    ('𑍝', '𑍣'),
+    ('\u{11366}', '\u{1136c}'),
+    ('\u{11370}', '\u{11374}'),
+    ('𑐀', '𑑊'),
+    ('𑑐', '𑑙'),
+    ('\u{1145e}', '\u{11461}'),
+    ('𑒀', '𑓅'),
+    ('𑓇', '𑓇'),
+    ('𑓐', '𑓙'),
+    ('𑖀', '\u{115b5}'),
+    ('𑖸', '\u{115c0}'),
+    ('𑗘', '\u{115dd}'),
+    ('𑘀', '\u{11640}'),
+    ('𑙄', '𑙄'),
+    ('𑙐', '𑙙'),
+    ('𑚀', '𑚸'),
+    ('𑛀', '𑛉'),
+    ('𑜀', '𑜚'),
+    ('\u{1171d}', '\u{1172b}'),
+    ('𑜰', '𑜹'),
+    ('𑠀', '\u{1183a}'),
+    ('𑢠', '𑣩'),
+    ('𑣿', '\u{11906}'),
+    ('\u{11909}', '\u{11909}'),
+    ('\u{1190c}', '\u{11913}'),
+    ('\u{11915}', '\u{11916}'),
+    ('\u{11918}', '\u{11935}'),
+    ('\u{11937}', '\u{11938}'),
+    ('\u{1193b}', '\u{11943}'),
+    ('\u{11950}', '\u{11959}'),
+    ('𑦠', '𑦧'),
+    ('𑦪', '\u{119d7}'),
+    ('\u{119da}', '𑧡'),
+    ('𑧣', '𑧤'),
+    ('𑨀', '\u{11a3e}'),
+    ('\u{11a47}', '\u{11a47}'),
+    ('𑩐', '\u{11a99}'),
+    ('𑪝', '𑪝'),
+    ('𑫀', '𑫸'),
+    ('𑰀', '𑰈'),
+    ('𑰊', '\u{11c36}'),
+    ('\u{11c38}', '𑱀'),
+    ('𑱐', '𑱙'),
+    ('𑱲', '𑲏'),
+    ('\u{11c92}', '\u{11ca7}'),
+    ('𑲩', '\u{11cb6}'),
+    ('𑴀', '𑴆'),
+    ('𑴈', '𑴉'),
+    ('𑴋', '\u{11d36}'),
+    ('\u{11d3a}', '\u{11d3a}'),
+    ('\u{11d3c}', '\u{11d3d}'),
+    ('\u{11d3f}', '\u{11d47}'),
+    ('𑵐', '𑵙'),
+    ('𑵠', '𑵥'),
+    ('𑵧', '𑵨'),
+    ('𑵪', '𑶎'),
+    ('\u{11d90}', '\u{11d91}'),
+    ('𑶓', '𑶘'),
+    ('𑶠', '𑶩'),
+    ('𑻠', '𑻶'),
+    ('\u{11fb0}', '\u{11fb0}'),
+    ('𒀀', '𒎙'),
+    ('𒐀', '𒑮'),
+    ('𒒀', '𒕃'),
+    ('𓀀', '𓐮'),
+    ('𔐀', '𔙆'),
+    ('𖠀', '𖨸'),
+    ('𖩀', '𖩞'),
+    ('𖩠', '𖩩'),
+    ('𖫐', '𖫭'),
+    ('\u{16af0}', '\u{16af4}'),
+    ('𖬀', '\u{16b36}'),
+    ('𖭀', '𖭃'),
+    ('𖭐', '𖭙'),
+    ('𖭣', '𖭷'),
+    ('𖭽', '𖮏'),
+    ('𖹀', '𖹿'),
+    ('𖼀', '𖽊'),
+    ('\u{16f4f}', '𖾇'),
+    ('\u{16f8f}', '𖾟'),
+    ('𖿠', '𖿡'),
+    ('𖿣', '\u{16fe4}'),
+    ('\u{16ff0}', '\u{16ff1}'),
+    ('𗀀', '𘟷'),
+    ('𘠀', '\u{18cd5}'),
+    ('\u{18d00}', '\u{18d08}'),
+    ('𛀀', '𛄞'),
+    ('𛅐', '𛅒'),
+    ('𛅤', '𛅧'),
+    ('𛅰', '𛋻'),
+    ('𛰀', '𛱪'),
+    ('𛱰', '𛱼'),
+    ('𛲀', '𛲈'),
+    ('𛲐', '𛲙'),
+    ('\u{1bc9d}', '\u{1bc9e}'),
+    ('\u{1d165}', '\u{1d169}'),
+    ('𝅭', '\u{1d172}'),
+    ('\u{1d17b}', '\u{1d182}'),
+    ('\u{1d185}', '\u{1d18b}'),
+    ('\u{1d1aa}', '\u{1d1ad}'),
+    ('\u{1d242}', '\u{1d244}'),
+    ('𝐀', '𝑔'),
+    ('𝑖', '𝒜'),
+    ('𝒞', '𝒟'),
+    ('𝒢', '𝒢'),
+    ('𝒥', '𝒦'),
+    ('𝒩', '𝒬'),
+    ('𝒮', '𝒹'),
+    ('𝒻', '𝒻'),
+    ('𝒽', '𝓃'),
+    ('𝓅', '𝔅'),
+    ('𝔇', '𝔊'),
+    ('𝔍', '𝔔'),
+    ('𝔖', '𝔜'),
+    ('𝔞', '𝔹'),
+    ('𝔻', '𝔾'),
+    ('𝕀', '𝕄'),
+    ('𝕆', '𝕆'),
+    ('𝕊', '𝕐'),
+    ('𝕒', '𝚥'),
+    ('𝚨', '𝛀'),
+    ('𝛂', '𝛚'),
+    ('𝛜', '𝛺'),
+    ('𝛼', '𝜔'),
+    ('𝜖', '𝜴'),
+    ('𝜶', '𝝎'),
+    ('𝝐', '𝝮'),
+    ('𝝰', '𝞈'),
+    ('𝞊', '𝞨'),
+    ('𝞪', '𝟂'),
+    ('𝟄', '𝟋'),
+    ('𝟎', '𝟿'),
+    ('\u{1da00}', '\u{1da36}'),
+    ('\u{1da3b}', '\u{1da6c}'),
+    ('\u{1da75}', '\u{1da75}'),
+    ('\u{1da84}', '\u{1da84}'),
+    ('\u{1da9b}', '\u{1da9f}'),
+    ('\u{1daa1}', '\u{1daaf}'),
+    ('\u{1e000}', '\u{1e006}'),
+    ('\u{1e008}', '\u{1e018}'),
+    ('\u{1e01b}', '\u{1e021}'),
+    ('\u{1e023}', '\u{1e024}'),
+    ('\u{1e026}', '\u{1e02a}'),
+    ('𞄀', '𞄬'),
+    ('\u{1e130}', '𞄽'),
+    ('𞅀', '𞅉'),
+    ('𞅎', '𞅎'),
+    ('𞋀', '𞋹'),
+    ('𞠀', '𞣄'),
+    ('\u{1e8d0}', '\u{1e8d6}'),
+    ('𞤀', '𞥋'),
+    ('𞥐', '𞥙'),
+    ('𞸀', '𞸃'),
+    ('𞸅', '𞸟'),
+    ('𞸡', '𞸢'),
+    ('𞸤', '𞸤'),
+    ('𞸧', '𞸧'),
+    ('𞸩', '𞸲'),
+    ('𞸴', '𞸷'),
+    ('𞸹', '𞸹'),
+    ('𞸻', '𞸻'),
+    ('𞹂', '𞹂'),
+    ('𞹇', '𞹇'),
+    ('𞹉', '𞹉'),
+    ('𞹋', '𞹋'),
+    ('𞹍', '𞹏'),
+    ('𞹑', '𞹒'),
+    ('𞹔', '𞹔'),
+    ('𞹗', '𞹗'),
+    ('𞹙', '𞹙'),
+    ('𞹛', '𞹛'),
+    ('𞹝', '𞹝'),
+    ('𞹟', '𞹟'),
+    ('𞹡', '𞹢'),
+    ('𞹤', '𞹤'),
+    ('𞹧', '𞹪'),
+    ('𞹬', '𞹲'),
+    ('𞹴', '𞹷'),
+    ('𞹹', '𞹼'),
+    ('𞹾', '𞹾'),
+    ('𞺀', '𞺉'),
+    ('𞺋', '𞺛'),
+    ('𞺡', '𞺣'),
+    ('𞺥', '𞺩'),
+    ('𞺫', '𞺻'),
+    ('\u{1fbf0}', '\u{1fbf9}'),
+    ('𠀀', '\u{2a6dd}'),
+    ('𪜀', '𫜴'),
+    ('𫝀', '𫠝'),
+    ('𫠠', '𬺡'),
+    ('𬺰', '𮯠'),
+    ('丽', '𪘀'),
+    ('\u{30000}', '\u{3134a}'),
+    ('\u{e0100}', '\u{e01ef}'),
+];
+
+pub const ID_START: &'static [(char, char)] = &[
+    ('A', 'Z'),
+    ('a', 'z'),
+    ('ª', 'ª'),
+    ('µ', 'µ'),
+    ('º', 'º'),
+    ('À', 'Ö'),
+    ('Ø', 'ö'),
+    ('ø', 'ˁ'),
+    ('ˆ', 'ˑ'),
+    ('ˠ', 'ˤ'),
+    ('ˬ', 'ˬ'),
+    ('ˮ', 'ˮ'),
+    ('Ͱ', 'ʹ'),
+    ('Ͷ', 'ͷ'),
+    ('ͺ', 'ͽ'),
+    ('Ϳ', 'Ϳ'),
+    ('Ά', 'Ά'),
+    ('Έ', 'Ί'),
+    ('Ό', 'Ό'),
+    ('Ύ', 'Ρ'),
+    ('Σ', 'ϵ'),
+    ('Ϸ', 'ҁ'),
+    ('Ҋ', 'ԯ'),
+    ('Ա', 'Ֆ'),
+    ('ՙ', 'ՙ'),
+    ('ՠ', 'ֈ'),
+    ('א', 'ת'),
+    ('ׯ', 'ײ'),
+    ('ؠ', 'ي'),
+    ('ٮ', 'ٯ'),
+    ('ٱ', 'ۓ'),
+    ('ە', 'ە'),
+    ('ۥ', 'ۦ'),
+    ('ۮ', 'ۯ'),
+    ('ۺ', 'ۼ'),
+    ('ۿ', 'ۿ'),
+    ('ܐ', 'ܐ'),
+    ('ܒ', 'ܯ'),
+    ('ݍ', 'ޥ'),
+    ('ޱ', 'ޱ'),
+    ('ߊ', 'ߪ'),
+    ('ߴ', 'ߵ'),
+    ('ߺ', 'ߺ'),
+    ('ࠀ', 'ࠕ'),
+    ('ࠚ', 'ࠚ'),
+    ('ࠤ', 'ࠤ'),
+    ('ࠨ', 'ࠨ'),
+    ('ࡀ', 'ࡘ'),
+    ('ࡠ', 'ࡪ'),
+    ('ࢠ', 'ࢴ'),
+    ('ࢶ', '\u{8c7}'),
+    ('ऄ', 'ह'),
+    ('ऽ', 'ऽ'),
+    ('ॐ', 'ॐ'),
+    ('क़', 'ॡ'),
+    ('ॱ', 'ঀ'),
+    ('অ', 'ঌ'),
+    ('এ', 'ঐ'),
+    ('ও', 'ন'),
+    ('প', 'র'),
+    ('ল', 'ল'),
+    ('শ', 'হ'),
+    ('ঽ', 'ঽ'),
+    ('ৎ', 'ৎ'),
+    ('ড়', 'ঢ়'),
+    ('য়', 'ৡ'),
+    ('ৰ', 'ৱ'),
+    ('ৼ', 'ৼ'),
+    ('ਅ', 'ਊ'),
+    ('ਏ', 'ਐ'),
+    ('ਓ', 'ਨ'),
+    ('ਪ', 'ਰ'),
+    ('ਲ', 'ਲ਼'),
+    ('ਵ', 'ਸ਼'),
+    ('ਸ', 'ਹ'),
+    ('ਖ਼', 'ੜ'),
+    ('ਫ਼', 'ਫ਼'),
+    ('ੲ', 'ੴ'),
+    ('અ', 'ઍ'),
+    ('એ', 'ઑ'),
+    ('ઓ', 'ન'),
+    ('પ', 'ર'),
+    ('લ', 'ળ'),
+    ('વ', 'હ'),
+    ('ઽ', 'ઽ'),
+    ('ૐ', 'ૐ'),
+    ('ૠ', 'ૡ'),
+    ('ૹ', 'ૹ'),
+    ('ଅ', 'ଌ'),
+    ('ଏ', 'ଐ'),
+    ('ଓ', 'ନ'),
+    ('ପ', 'ର'),
+    ('ଲ', 'ଳ'),
+    ('ଵ', 'ହ'),
+    ('ଽ', 'ଽ'),
+    ('ଡ଼', 'ଢ଼'),
+    ('ୟ', 'ୡ'),
+    ('ୱ', 'ୱ'),
+    ('ஃ', 'ஃ'),
+    ('அ', 'ஊ'),
+    ('எ', 'ஐ'),
+    ('ஒ', 'க'),
+    ('ங', 'ச'),
+    ('ஜ', 'ஜ'),
+    ('ஞ', 'ட'),
+    ('ண', 'த'),
+    ('ந', 'ப'),
+    ('ம', 'ஹ'),
+    ('ௐ', 'ௐ'),
+    ('అ', 'ఌ'),
+    ('ఎ', 'ఐ'),
+    ('ఒ', 'న'),
+    ('ప', 'హ'),
+    ('ఽ', 'ఽ'),
+    ('ౘ', 'ౚ'),
+    ('ౠ', 'ౡ'),
+    ('ಀ', 'ಀ'),
+    ('ಅ', 'ಌ'),
+    ('ಎ', 'ಐ'),
+    ('ಒ', 'ನ'),
+    ('ಪ', 'ಳ'),
+    ('ವ', 'ಹ'),
+    ('ಽ', 'ಽ'),
+    ('ೞ', 'ೞ'),
+    ('ೠ', 'ೡ'),
+    ('ೱ', 'ೲ'),
+    ('\u{d04}', 'ഌ'),
+    ('എ', 'ഐ'),
+    ('ഒ', 'ഺ'),
+    ('ഽ', 'ഽ'),
+    ('ൎ', 'ൎ'),
+    ('ൔ', 'ൖ'),
+    ('ൟ', 'ൡ'),
+    ('ൺ', 'ൿ'),
+    ('අ', 'ඖ'),
+    ('ක', 'න'),
+    ('ඳ', 'ර'),
+    ('ල', 'ල'),
+    ('ව', 'ෆ'),
+    ('ก', 'ะ'),
+    ('า', 'ำ'),
+    ('เ', 'ๆ'),
+    ('ກ', 'ຂ'),
+    ('ຄ', 'ຄ'),
+    ('ຆ', 'ຊ'),
+    ('ຌ', 'ຣ'),
+    ('ລ', 'ລ'),
+    ('ວ', 'ະ'),
+    ('າ', 'ຳ'),
+    ('ຽ', 'ຽ'),
+    ('ເ', 'ໄ'),
+    ('ໆ', 'ໆ'),
+    ('ໜ', 'ໟ'),
+    ('ༀ', 'ༀ'),
+    ('ཀ', 'ཇ'),
+    ('ཉ', 'ཬ'),
+    ('ྈ', 'ྌ'),
+    ('က', 'ဪ'),
+    ('ဿ', 'ဿ'),
+    ('ၐ', 'ၕ'),
+    ('ၚ', 'ၝ'),
+    ('ၡ', 'ၡ'),
+    ('ၥ', 'ၦ'),
+    ('ၮ', 'ၰ'),
+    ('ၵ', 'ႁ'),
+    ('ႎ', 'ႎ'),
+    ('Ⴀ', 'Ⴥ'),
+    ('Ⴧ', 'Ⴧ'),
+    ('Ⴭ', 'Ⴭ'),
+    ('ა', 'ჺ'),
+    ('ჼ', 'ቈ'),
+    ('ቊ', 'ቍ'),
+    ('ቐ', 'ቖ'),
+    ('ቘ', 'ቘ'),
+    ('ቚ', 'ቝ'),
+    ('በ', 'ኈ'),
+    ('ኊ', 'ኍ'),
+    ('ነ', 'ኰ'),
+    ('ኲ', 'ኵ'),
+    ('ኸ', 'ኾ'),
+    ('ዀ', 'ዀ'),
+    ('ዂ', 'ዅ'),
+    ('ወ', 'ዖ'),
+    ('ዘ', 'ጐ'),
+    ('ጒ', 'ጕ'),
+    ('ጘ', 'ፚ'),
+    ('ᎀ', 'ᎏ'),
+    ('Ꭰ', 'Ᏽ'),
+    ('ᏸ', 'ᏽ'),
+    ('ᐁ', 'ᙬ'),
+    ('ᙯ', 'ᙿ'),
+    ('ᚁ', 'ᚚ'),
+    ('ᚠ', 'ᛪ'),
+    ('ᛮ', 'ᛸ'),
+    ('ᜀ', 'ᜌ'),
+    ('ᜎ', 'ᜑ'),
+    ('ᜠ', 'ᜱ'),
+    ('ᝀ', 'ᝑ'),
+    ('ᝠ', 'ᝬ'),
+    ('ᝮ', 'ᝰ'),
+    ('ក', 'ឳ'),
+    ('ៗ', 'ៗ'),
+    ('ៜ', 'ៜ'),
+    ('ᠠ', 'ᡸ'),
+    ('ᢀ', 'ᢨ'),
+    ('ᢪ', 'ᢪ'),
+    ('ᢰ', 'ᣵ'),
+    ('ᤀ', 'ᤞ'),
+    ('ᥐ', 'ᥭ'),
+    ('ᥰ', 'ᥴ'),
+    ('ᦀ', 'ᦫ'),
+    ('ᦰ', 'ᧉ'),
+    ('ᨀ', 'ᨖ'),
+    ('ᨠ', 'ᩔ'),
+    ('ᪧ', 'ᪧ'),
+    ('ᬅ', 'ᬳ'),
+    ('ᭅ', 'ᭋ'),
+    ('ᮃ', 'ᮠ'),
+    ('ᮮ', 'ᮯ'),
+    ('ᮺ', 'ᯥ'),
+    ('ᰀ', 'ᰣ'),
+    ('ᱍ', 'ᱏ'),
+    ('ᱚ', 'ᱽ'),
+    ('ᲀ', 'ᲈ'),
+    ('Ა', 'Ჺ'),
+    ('Ჽ', 'Ჿ'),
+    ('ᳩ', 'ᳬ'),
+    ('ᳮ', 'ᳳ'),
+    ('ᳵ', 'ᳶ'),
+    ('ᳺ', 'ᳺ'),
+    ('ᴀ', 'ᶿ'),
+    ('Ḁ', 'ἕ'),
+    ('Ἐ', 'Ἕ'),
+    ('ἠ', 'ὅ'),
+    ('Ὀ', 'Ὅ'),
+    ('ὐ', 'ὗ'),
+    ('Ὑ', 'Ὑ'),
+    ('Ὓ', 'Ὓ'),
+    ('Ὕ', 'Ὕ'),
+    ('Ὗ', 'ώ'),
+    ('ᾀ', 'ᾴ'),
+    ('ᾶ', 'ᾼ'),
+    ('ι', 'ι'),
+    ('ῂ', 'ῄ'),
+    ('ῆ', 'ῌ'),
+    ('ῐ', 'ΐ'),
+    ('ῖ', 'Ί'),
+    ('ῠ', 'Ῥ'),
+    ('ῲ', 'ῴ'),
+    ('ῶ', 'ῼ'),
+    ('ⁱ', 'ⁱ'),
+    ('ⁿ', 'ⁿ'),
+    ('ₐ', 'ₜ'),
+    ('ℂ', 'ℂ'),
+    ('ℇ', 'ℇ'),
+    ('ℊ', 'ℓ'),
+    ('ℕ', 'ℕ'),
+    ('℘', 'ℝ'),
+    ('ℤ', 'ℤ'),
+    ('Ω', 'Ω'),
+    ('ℨ', 'ℨ'),
+    ('K', 'ℹ'),
+    ('ℼ', 'ℿ'),
+    ('ⅅ', 'ⅉ'),
+    ('ⅎ', 'ⅎ'),
+    ('Ⅰ', 'ↈ'),
+    ('Ⰰ', 'Ⱞ'),
+    ('ⰰ', 'ⱞ'),
+    ('Ⱡ', 'ⳤ'),
+    ('Ⳬ', 'ⳮ'),
+    ('Ⳳ', 'ⳳ'),
+    ('ⴀ', 'ⴥ'),
+    ('ⴧ', 'ⴧ'),
+    ('ⴭ', 'ⴭ'),
+    ('ⴰ', 'ⵧ'),
+    ('ⵯ', 'ⵯ'),
+    ('ⶀ', 'ⶖ'),
+    ('ⶠ', 'ⶦ'),
+    ('ⶨ', 'ⶮ'),
+    ('ⶰ', 'ⶶ'),
+    ('ⶸ', 'ⶾ'),
+    ('ⷀ', 'ⷆ'),
+    ('ⷈ', 'ⷎ'),
+    ('ⷐ', 'ⷖ'),
+    ('ⷘ', 'ⷞ'),
+    ('々', '〇'),
+    ('〡', '〩'),
+    ('〱', '〵'),
+    ('〸', '〼'),
+    ('ぁ', 'ゖ'),
+    ('゛', 'ゟ'),
+    ('ァ', 'ヺ'),
+    ('ー', 'ヿ'),
+    ('ㄅ', 'ㄯ'),
+    ('ㄱ', 'ㆎ'),
+    ('ㆠ', '\u{31bf}'),
+    ('ㇰ', 'ㇿ'),
+    ('㐀', '\u{4dbf}'),
+    ('一', '\u{9ffc}'),
+    ('ꀀ', 'ꒌ'),
+    ('ꓐ', 'ꓽ'),
+    ('ꔀ', 'ꘌ'),
+    ('ꘐ', 'ꘟ'),
+    ('ꘪ', 'ꘫ'),
+    ('Ꙁ', 'ꙮ'),
+    ('ꙿ', 'ꚝ'),
+    ('ꚠ', 'ꛯ'),
+    ('ꜗ', 'ꜟ'),
+    ('Ꜣ', 'ꞈ'),
+    ('Ꞌ', 'ꞿ'),
+    ('Ꟃ', '\u{a7ca}'),
+    ('\u{a7f5}', 'ꠁ'),
+    ('ꠃ', 'ꠅ'),
+    ('ꠇ', 'ꠊ'),
+    ('ꠌ', 'ꠢ'),
+    ('ꡀ', 'ꡳ'),
+    ('ꢂ', 'ꢳ'),
+    ('ꣲ', 'ꣷ'),
+    ('ꣻ', 'ꣻ'),
+    ('ꣽ', 'ꣾ'),
+    ('ꤊ', 'ꤥ'),
+    ('ꤰ', 'ꥆ'),
+    ('ꥠ', 'ꥼ'),
+    ('ꦄ', 'ꦲ'),
+    ('ꧏ', 'ꧏ'),
+    ('ꧠ', 'ꧤ'),
+    ('ꧦ', 'ꧯ'),
+    ('ꧺ', 'ꧾ'),
+    ('ꨀ', 'ꨨ'),
+    ('ꩀ', 'ꩂ'),
+    ('ꩄ', 'ꩋ'),
+    ('ꩠ', 'ꩶ'),
+    ('ꩺ', 'ꩺ'),
+    ('ꩾ', 'ꪯ'),
+    ('ꪱ', 'ꪱ'),
+    ('ꪵ', 'ꪶ'),
+    ('ꪹ', 'ꪽ'),
+    ('ꫀ', 'ꫀ'),
+    ('ꫂ', 'ꫂ'),
+    ('ꫛ', 'ꫝ'),
+    ('ꫠ', 'ꫪ'),
+    ('ꫲ', 'ꫴ'),
+    ('ꬁ', 'ꬆ'),
+    ('ꬉ', 'ꬎ'),
+    ('ꬑ', 'ꬖ'),
+    ('ꬠ', 'ꬦ'),
+    ('ꬨ', 'ꬮ'),
+    ('ꬰ', 'ꭚ'),
+    ('ꭜ', '\u{ab69}'),
+    ('ꭰ', 'ꯢ'),
+    ('가', '힣'),
+    ('ힰ', 'ퟆ'),
+    ('ퟋ', 'ퟻ'),
+    ('豈', '舘'),
+    ('並', '龎'),
+    ('ﬀ', 'ﬆ'),
+    ('ﬓ', 'ﬗ'),
+    ('יִ', 'יִ'),
+    ('ײַ', 'ﬨ'),
+    ('שׁ', 'זּ'),
+    ('טּ', 'לּ'),
+    ('מּ', 'מּ'),
+    ('נּ', 'סּ'),
+    ('ףּ', 'פּ'),
+    ('צּ', 'ﮱ'),
+    ('ﯓ', 'ﴽ'),
+    ('ﵐ', 'ﶏ'),
+    ('ﶒ', 'ﷇ'),
+    ('ﷰ', 'ﷻ'),
+    ('ﹰ', 'ﹴ'),
+    ('ﹶ', 'ﻼ'),
+    ('Ａ', 'Ｚ'),
+    ('ａ', 'ｚ'),
+    ('ｦ', 'ﾾ'),
+    ('ￂ', 'ￇ'),
+    ('ￊ', 'ￏ'),
+    ('ￒ', 'ￗ'),
+    ('ￚ', 'ￜ'),
+    ('𐀀', '𐀋'),
+    ('𐀍', '𐀦'),
+    ('𐀨', '𐀺'),
+    ('𐀼', '𐀽'),
+    ('𐀿', '𐁍'),
+    ('𐁐', '𐁝'),
+    ('𐂀', '𐃺'),
+    ('𐅀', '𐅴'),
+    ('𐊀', '𐊜'),
+    ('𐊠', '𐋐'),
+    ('𐌀', '𐌟'),
+    ('𐌭', '𐍊'),
+    ('𐍐', '𐍵'),
+    ('𐎀', '𐎝'),
+    ('𐎠', '𐏃'),
+    ('𐏈', '𐏏'),
+    ('𐏑', '𐏕'),
+    ('𐐀', '𐒝'),
+    ('𐒰', '𐓓'),
+    ('𐓘', '𐓻'),
+    ('𐔀', '𐔧'),
+    ('𐔰', '𐕣'),
+    ('𐘀', '𐜶'),
+    ('𐝀', '𐝕'),
+    ('𐝠', '𐝧'),
+    ('𐠀', '𐠅'),
+    ('𐠈', '𐠈'),
+    ('𐠊', '𐠵'),
+    ('𐠷', '𐠸'),
+    ('𐠼', '𐠼'),
+    ('𐠿', '𐡕'),
+    ('𐡠', '𐡶'),
+    ('𐢀', '𐢞'),
+    ('𐣠', '𐣲'),
+    ('𐣴', '𐣵'),
+    ('𐤀', '𐤕'),
+    ('𐤠', '𐤹'),
+    ('𐦀', '𐦷'),
+    ('𐦾', '𐦿'),
+    ('𐨀', '𐨀'),
+    ('𐨐', '𐨓'),
+    ('𐨕', '𐨗'),
+    ('𐨙', '𐨵'),
+    ('𐩠', '𐩼'),
+    ('𐪀', '𐪜'),
+    ('𐫀', '𐫇'),
+    ('𐫉', '𐫤'),
+    ('𐬀', '𐬵'),
+    ('𐭀', '𐭕'),
+    ('𐭠', '𐭲'),
+    ('𐮀', '𐮑'),
+    ('𐰀', '𐱈'),
+    ('𐲀', '𐲲'),
+    ('𐳀', '𐳲'),
+    ('𐴀', '𐴣'),
+    ('\u{10e80}', '\u{10ea9}'),
+    ('\u{10eb0}', '\u{10eb1}'),
+    ('𐼀', '𐼜'),
+    ('𐼧', '𐼧'),
+    ('𐼰', '𐽅'),
+    ('\u{10fb0}', '\u{10fc4}'),
+    ('𐿠', '𐿶'),
+    ('𑀃', '𑀷'),
+    ('𑂃', '𑂯'),
+    ('𑃐', '𑃨'),
+    ('𑄃', '𑄦'),
+    ('𑅄', '𑅄'),
+    ('\u{11147}', '\u{11147}'),
+    ('𑅐', '𑅲'),
+    ('𑅶', '𑅶'),
+    ('𑆃', '𑆲'),
+    ('𑇁', '𑇄'),
+    ('𑇚', '𑇚'),
+    ('𑇜', '𑇜'),
+    ('𑈀', '𑈑'),
+    ('𑈓', '𑈫'),
+    ('𑊀', '𑊆'),
+    ('𑊈', '𑊈'),
+    ('𑊊', '𑊍'),
+    ('𑊏', '𑊝'),
+    ('𑊟', '𑊨'),
+    ('𑊰', '𑋞'),
+    ('𑌅', '𑌌'),
+    ('𑌏', '𑌐'),
+    ('𑌓', '𑌨'),
+    ('𑌪', '𑌰'),
+    ('𑌲', '𑌳'),
+    ('𑌵', '𑌹'),
+    ('𑌽', '𑌽'),
+    ('𑍐', '𑍐'),
+    ('𑍝', '𑍡'),
+    ('𑐀', '𑐴'),
+    ('𑑇', '𑑊'),
+    ('𑑟', '\u{11461}'),
+    ('𑒀', '𑒯'),
+    ('𑓄', '𑓅'),
+    ('𑓇', '𑓇'),
+    ('𑖀', '𑖮'),
+    ('𑗘', '𑗛'),
+    ('𑘀', '𑘯'),
+    ('𑙄', '𑙄'),
+    ('𑚀', '𑚪'),
+    ('𑚸', '𑚸'),
+    ('𑜀', '𑜚'),
+    ('𑠀', '𑠫'),
+    ('𑢠', '𑣟'),
+    ('𑣿', '\u{11906}'),
+    ('\u{11909}', '\u{11909}'),
+    ('\u{1190c}', '\u{11913}'),
+    ('\u{11915}', '\u{11916}'),
+    ('\u{11918}', '\u{1192f}'),
+    ('\u{1193f}', '\u{1193f}'),
+    ('\u{11941}', '\u{11941}'),
+    ('𑦠', '𑦧'),
+    ('𑦪', '𑧐'),
+    ('𑧡', '𑧡'),
+    ('𑧣', '𑧣'),
+    ('𑨀', '𑨀'),
+    ('𑨋', '𑨲'),
+    ('𑨺', '𑨺'),
+    ('𑩐', '𑩐'),
+    ('𑩜', '𑪉'),
+    ('𑪝', '𑪝'),
+    ('𑫀', '𑫸'),
+    ('𑰀', '𑰈'),
+    ('𑰊', '𑰮'),
+    ('𑱀', '𑱀'),
+    ('𑱲', '𑲏'),
+    ('𑴀', '𑴆'),
+    ('𑴈', '𑴉'),
+    ('𑴋', '𑴰'),
+    ('𑵆', '𑵆'),
+    ('𑵠', '𑵥'),
+    ('𑵧', '𑵨'),
+    ('𑵪', '𑶉'),
+    ('𑶘', '𑶘'),
+    ('𑻠', '𑻲'),
+    ('\u{11fb0}', '\u{11fb0}'),
+    ('𒀀', '𒎙'),
+    ('𒐀', '𒑮'),
+    ('𒒀', '𒕃'),
+    ('𓀀', '𓐮'),
+    ('𔐀', '𔙆'),
+    ('𖠀', '𖨸'),
+    ('𖩀', '𖩞'),
+    ('𖫐', '𖫭'),
+    ('𖬀', '𖬯'),
+    ('𖭀', '𖭃'),
+    ('𖭣', '𖭷'),
+    ('𖭽', '𖮏'),
+    ('𖹀', '𖹿'),
+    ('𖼀', '𖽊'),
+    ('𖽐', '𖽐'),
+    ('𖾓', '𖾟'),
+    ('𖿠', '𖿡'),
+    ('𖿣', '𖿣'),
+    ('𗀀', '𘟷'),
+    ('𘠀', '\u{18cd5}'),
+    ('\u{18d00}', '\u{18d08}'),
+    ('𛀀', '𛄞'),
+    ('𛅐', '𛅒'),
+    ('𛅤', '𛅧'),
+    ('𛅰', '𛋻'),
+    ('𛰀', '𛱪'),
+    ('𛱰', '𛱼'),
+    ('𛲀', '𛲈'),
+    ('𛲐', '𛲙'),
+    ('𝐀', '𝑔'),
+    ('𝑖', '𝒜'),
+    ('𝒞', '𝒟'),
+    ('𝒢', '𝒢'),
+    ('𝒥', '𝒦'),
+    ('𝒩', '𝒬'),
+    ('𝒮', '𝒹'),
+    ('𝒻', '𝒻'),
+    ('𝒽', '𝓃'),
+    ('𝓅', '𝔅'),
+    ('𝔇', '𝔊'),
+    ('𝔍', '𝔔'),
+    ('𝔖', '𝔜'),
+    ('𝔞', '𝔹'),
+    ('𝔻', '𝔾'),
+    ('𝕀', '𝕄'),
+    ('𝕆', '𝕆'),
+    ('𝕊', '𝕐'),
+    ('𝕒', '𝚥'),
+    ('𝚨', '𝛀'),
+    ('𝛂', '𝛚'),
+    ('𝛜', '𝛺'),
+    ('𝛼', '𝜔'),
+    ('𝜖', '𝜴'),
+    ('𝜶', '𝝎'),
+    ('𝝐', '𝝮'),
+    ('𝝰', '𝞈'),
+    ('𝞊', '𝞨'),
+    ('𝞪', '𝟂'),
+    ('𝟄', '𝟋'),
+    ('𞄀', '𞄬'),
+    ('𞄷', '𞄽'),
+    ('𞅎', '𞅎'),
+    ('𞋀', '𞋫'),
+    ('𞠀', '𞣄'),
+    ('𞤀', '𞥃'),
+    ('𞥋', '𞥋'),
+    ('𞸀', '𞸃'),
+    ('𞸅', '𞸟'),
+    ('𞸡', '𞸢'),
+    ('𞸤', '𞸤'),
+    ('𞸧', '𞸧'),
+    ('𞸩', '𞸲'),
+    ('𞸴', '𞸷'),
+    ('𞸹', '𞸹'),
+    ('𞸻', '𞸻'),
+    ('𞹂', '𞹂'),
+    ('𞹇', '𞹇'),
+    ('𞹉', '𞹉'),
+    ('𞹋', '𞹋'),
+    ('𞹍', '𞹏'),
+    ('𞹑', '𞹒'),
+    ('𞹔', '𞹔'),
+    ('𞹗', '𞹗'),
+    ('𞹙', '𞹙'),
+    ('𞹛', '𞹛'),
+    ('𞹝', '𞹝'),
+    ('𞹟', '𞹟'),
+    ('𞹡', '𞹢'),
+    ('𞹤', '𞹤'),
+    ('𞹧', '𞹪'),
+    ('𞹬', '𞹲'),
+    ('𞹴', '𞹷'),
+    ('𞹹', '𞹼'),
+    ('𞹾', '𞹾'),
+    ('𞺀', '𞺉'),
+    ('𞺋', '𞺛'),
+    ('𞺡', '𞺣'),
+    ('𞺥', '𞺩'),
+    ('𞺫', '𞺻'),
+    ('𠀀', '\u{2a6dd}'),
+    ('𪜀', '𫜴'),
+    ('𫝀', '𫠝'),
+    ('𫠠', '𬺡'),
+    ('𬺰', '𮯠'),
+    ('丽', '𪘀'),
+    ('\u{30000}', '\u{3134a}'),
+];
+
+pub const IDEOGRAPHIC: &'static [(char, char)] = &[
+    ('〆', '〇'),
+    ('〡', '〩'),
+    ('〸', '〺'),
+    ('㐀', '\u{4dbf}'),
+    ('一', '\u{9ffc}'),
+    ('豈', '舘'),
+    ('並', '龎'),
+    ('\u{16fe4}', '\u{16fe4}'),
+    ('𗀀', '𘟷'),
+    ('𘠀', '\u{18cd5}'),
+    ('\u{18d00}', '\u{18d08}'),
+    ('𛅰', '𛋻'),
+    ('𠀀', '\u{2a6dd}'),
+    ('𪜀', '𫜴'),
+    ('𫝀', '𫠝'),
+    ('𫠠', '𬺡'),
+    ('𬺰', '𮯠'),
+    ('丽', '𪘀'),
+    ('\u{30000}', '\u{3134a}'),
+];
+
+pub const JOIN_CONTROL: &'static [(char, char)] = &[('\u{200c}', '\u{200d}')];
+
+pub const LOGICAL_ORDER_EXCEPTION: &'static [(char, char)] = &[
+    ('เ', 'ไ'),
+    ('ເ', 'ໄ'),
+    ('ᦵ', 'ᦷ'),
+    ('ᦺ', 'ᦺ'),
+    ('ꪵ', 'ꪶ'),
+    ('ꪹ', 'ꪹ'),
+    ('ꪻ', 'ꪼ'),
+];
+
+pub const LOWERCASE: &'static [(char, char)] = &[
+    ('a', 'z'),
+    ('ª', 'ª'),
+    ('µ', 'µ'),
+    ('º', 'º'),
+    ('ß', 'ö'),
+    ('ø', 'ÿ'),
+    ('ā', 'ā'),
+    ('ă', 'ă'),
+    ('ą', 'ą'),
+    ('ć', 'ć'),
+    ('ĉ', 'ĉ'),
+    ('ċ', 'ċ'),
+    ('č', 'č'),
+    ('ď', 'ď'),
+    ('đ', 'đ'),
+    ('ē', 'ē'),
+    ('ĕ', 'ĕ'),
+    ('ė', 'ė'),
+    ('ę', 'ę'),
+    ('ě', 'ě'),
+    ('ĝ', 'ĝ'),
+    ('ğ', 'ğ'),
+    ('ġ', 'ġ'),
+    ('ģ', 'ģ'),
+    ('ĥ', 'ĥ'),
+    ('ħ', 'ħ'),
+    ('ĩ', 'ĩ'),
+    ('ī', 'ī'),
+    ('ĭ', 'ĭ'),
+    ('į', 'į'),
+    ('ı', 'ı'),
+    ('ĳ', 'ĳ'),
+    ('ĵ', 'ĵ'),
+    ('ķ', 'ĸ'),
+    ('ĺ', 'ĺ'),
+    ('ļ', 'ļ'),
+    ('ľ', 'ľ'),
+    ('ŀ', 'ŀ'),
+    ('ł', 'ł'),
+    ('ń', 'ń'),
+    ('ņ', 'ņ'),
+    ('ň', 'ŉ'),
+    ('ŋ', 'ŋ'),
+    ('ō', 'ō'),
+    ('ŏ', 'ŏ'),
+    ('ő', 'ő'),
+    ('œ', 'œ'),
+    ('ŕ', 'ŕ'),
+    ('ŗ', 'ŗ'),
+    ('ř', 'ř'),
+    ('ś', 'ś'),
+    ('ŝ', 'ŝ'),
+    ('ş', 'ş'),
+    ('š', 'š'),
+    ('ţ', 'ţ'),
+    ('ť', 'ť'),
+    ('ŧ', 'ŧ'),
+    ('ũ', 'ũ'),
+    ('ū', 'ū'),
+    ('ŭ', 'ŭ'),
+    ('ů', 'ů'),
+    ('ű', 'ű'),
+    ('ų', 'ų'),
+    ('ŵ', 'ŵ'),
+    ('ŷ', 'ŷ'),
+    ('ź', 'ź'),
+    ('ż', 'ż'),
+    ('ž', 'ƀ'),
+    ('ƃ', 'ƃ'),
+    ('ƅ', 'ƅ'),
+    ('ƈ', 'ƈ'),
+    ('ƌ', 'ƍ'),
+    ('ƒ', 'ƒ'),
+    ('ƕ', 'ƕ'),
+    ('ƙ', 'ƛ'),
+    ('ƞ', 'ƞ'),
+    ('ơ', 'ơ'),
+    ('ƣ', 'ƣ'),
+    ('ƥ', 'ƥ'),
+    ('ƨ', 'ƨ'),
+    ('ƪ', 'ƫ'),
+    ('ƭ', 'ƭ'),
+    ('ư', 'ư'),
+    ('ƴ', 'ƴ'),
+    ('ƶ', 'ƶ'),
+    ('ƹ', 'ƺ'),
+    ('ƽ', 'ƿ'),
+    ('ǆ', 'ǆ'),
+    ('ǉ', 'ǉ'),
+    ('ǌ', 'ǌ'),
+    ('ǎ', 'ǎ'),
+    ('ǐ', 'ǐ'),
+    ('ǒ', 'ǒ'),
+    ('ǔ', 'ǔ'),
+    ('ǖ', 'ǖ'),
+    ('ǘ', 'ǘ'),
+    ('ǚ', 'ǚ'),
+    ('ǜ', 'ǝ'),
+    ('ǟ', 'ǟ'),
+    ('ǡ', 'ǡ'),
+    ('ǣ', 'ǣ'),
+    ('ǥ', 'ǥ'),
+    ('ǧ', 'ǧ'),
+    ('ǩ', 'ǩ'),
+    ('ǫ', 'ǫ'),
+    ('ǭ', 'ǭ'),
+    ('ǯ', 'ǰ'),
+    ('ǳ', 'ǳ'),
+    ('ǵ', 'ǵ'),
+    ('ǹ', 'ǹ'),
+    ('ǻ', 'ǻ'),
+    ('ǽ', 'ǽ'),
+    ('ǿ', 'ǿ'),
+    ('ȁ', 'ȁ'),
+    ('ȃ', 'ȃ'),
+    ('ȅ', 'ȅ'),
+    ('ȇ', 'ȇ'),
+    ('ȉ', 'ȉ'),
+    ('ȋ', 'ȋ'),
+    ('ȍ', 'ȍ'),
+    ('ȏ', 'ȏ'),
+    ('ȑ', 'ȑ'),
+    ('ȓ', 'ȓ'),
+    ('ȕ', 'ȕ'),
+    ('ȗ', 'ȗ'),
+    ('ș', 'ș'),
+    ('ț', 'ț'),
+    ('ȝ', 'ȝ'),
+    ('ȟ', 'ȟ'),
+    ('ȡ', 'ȡ'),
+    ('ȣ', 'ȣ'),
+    ('ȥ', 'ȥ'),
+    ('ȧ', 'ȧ'),
+    ('ȩ', 'ȩ'),
+    ('ȫ', 'ȫ'),
+    ('ȭ', 'ȭ'),
+    ('ȯ', 'ȯ'),
+    ('ȱ', 'ȱ'),
+    ('ȳ', 'ȹ'),
+    ('ȼ', 'ȼ'),
+    ('ȿ', 'ɀ'),
+    ('ɂ', 'ɂ'),
+    ('ɇ', 'ɇ'),
+    ('ɉ', 'ɉ'),
+    ('ɋ', 'ɋ'),
+    ('ɍ', 'ɍ'),
+    ('ɏ', 'ʓ'),
+    ('ʕ', 'ʸ'),
+    ('ˀ', 'ˁ'),
+    ('ˠ', 'ˤ'),
+    ('\u{345}', '\u{345}'),
+    ('ͱ', 'ͱ'),
+    ('ͳ', 'ͳ'),
+    ('ͷ', 'ͷ'),
+    ('ͺ', 'ͽ'),
+    ('ΐ', 'ΐ'),
+    ('ά', 'ώ'),
+    ('ϐ', 'ϑ'),
+    ('ϕ', 'ϗ'),
+    ('ϙ', 'ϙ'),
+    ('ϛ', 'ϛ'),
+    ('ϝ', 'ϝ'),
+    ('ϟ', 'ϟ'),
+    ('ϡ', 'ϡ'),
+    ('ϣ', 'ϣ'),
+    ('ϥ', 'ϥ'),
+    ('ϧ', 'ϧ'),
+    ('ϩ', 'ϩ'),
+    ('ϫ', 'ϫ'),
+    ('ϭ', 'ϭ'),
+    ('ϯ', 'ϳ'),
+    ('ϵ', 'ϵ'),
+    ('ϸ', 'ϸ'),
+    ('ϻ', 'ϼ'),
+    ('а', 'џ'),
+    ('ѡ', 'ѡ'),
+    ('ѣ', 'ѣ'),
+    ('ѥ', 'ѥ'),
+    ('ѧ', 'ѧ'),
+    ('ѩ', 'ѩ'),
+    ('ѫ', 'ѫ'),
+    ('ѭ', 'ѭ'),
+    ('ѯ', 'ѯ'),
+    ('ѱ', 'ѱ'),
+    ('ѳ', 'ѳ'),
+    ('ѵ', 'ѵ'),
+    ('ѷ', 'ѷ'),
+    ('ѹ', 'ѹ'),
+    ('ѻ', 'ѻ'),
+    ('ѽ', 'ѽ'),
+    ('ѿ', 'ѿ'),
+    ('ҁ', 'ҁ'),
+    ('ҋ', 'ҋ'),
+    ('ҍ', 'ҍ'),
+    ('ҏ', 'ҏ'),
+    ('ґ', 'ґ'),
+    ('ғ', 'ғ'),
+    ('ҕ', 'ҕ'),
+    ('җ', 'җ'),
+    ('ҙ', 'ҙ'),
+    ('қ', 'қ'),
+    ('ҝ', 'ҝ'),
+    ('ҟ', 'ҟ'),
+    ('ҡ', 'ҡ'),
+    ('ң', 'ң'),
+    ('ҥ', 'ҥ'),
+    ('ҧ', 'ҧ'),
+    ('ҩ', 'ҩ'),
+    ('ҫ', 'ҫ'),
+    ('ҭ', 'ҭ'),
+    ('ү', 'ү'),
+    ('ұ', 'ұ'),
+    ('ҳ', 'ҳ'),
+    ('ҵ', 'ҵ'),
+    ('ҷ', 'ҷ'),
+    ('ҹ', 'ҹ'),
+    ('һ', 'һ'),
+    ('ҽ', 'ҽ'),
+    ('ҿ', 'ҿ'),
+    ('ӂ', 'ӂ'),
+    ('ӄ', 'ӄ'),
+    ('ӆ', 'ӆ'),
+    ('ӈ', 'ӈ'),
+    ('ӊ', 'ӊ'),
+    ('ӌ', 'ӌ'),
+    ('ӎ', 'ӏ'),
+    ('ӑ', 'ӑ'),
+    ('ӓ', 'ӓ'),
+    ('ӕ', 'ӕ'),
+    ('ӗ', 'ӗ'),
+    ('ә', 'ә'),
+    ('ӛ', 'ӛ'),
+    ('ӝ', 'ӝ'),
+    ('ӟ', 'ӟ'),
+    ('ӡ', 'ӡ'),
+    ('ӣ', 'ӣ'),
+    ('ӥ', 'ӥ'),
+    ('ӧ', 'ӧ'),
+    ('ө', 'ө'),
+    ('ӫ', 'ӫ'),
+    ('ӭ', 'ӭ'),
+    ('ӯ', 'ӯ'),
+    ('ӱ', 'ӱ'),
+    ('ӳ', 'ӳ'),
+    ('ӵ', 'ӵ'),
+    ('ӷ', 'ӷ'),
+    ('ӹ', 'ӹ'),
+    ('ӻ', 'ӻ'),
+    ('ӽ', 'ӽ'),
+    ('ӿ', 'ӿ'),
+    ('ԁ', 'ԁ'),
+    ('ԃ', 'ԃ'),
+    ('ԅ', 'ԅ'),
+    ('ԇ', 'ԇ'),
+    ('ԉ', 'ԉ'),
+    ('ԋ', 'ԋ'),
+    ('ԍ', 'ԍ'),
+    ('ԏ', 'ԏ'),
+    ('ԑ', 'ԑ'),
+    ('ԓ', 'ԓ'),
+    ('ԕ', 'ԕ'),
+    ('ԗ', 'ԗ'),
+    ('ԙ', 'ԙ'),
+    ('ԛ', 'ԛ'),
+    ('ԝ', 'ԝ'),
+    ('ԟ', 'ԟ'),
+    ('ԡ', 'ԡ'),
+    ('ԣ', 'ԣ'),
+    ('ԥ', 'ԥ'),
+    ('ԧ', 'ԧ'),
+    ('ԩ', 'ԩ'),
+    ('ԫ', 'ԫ'),
+    ('ԭ', 'ԭ'),
+    ('ԯ', 'ԯ'),
+    ('ՠ', 'ֈ'),
+    ('ა', 'ჺ'),
+    ('ჽ', 'ჿ'),
+    ('ᏸ', 'ᏽ'),
+    ('ᲀ', 'ᲈ'),
+    ('ᴀ', 'ᶿ'),
+    ('ḁ', 'ḁ'),
+    ('ḃ', 'ḃ'),
+    ('ḅ', 'ḅ'),
+    ('ḇ', 'ḇ'),
+    ('ḉ', 'ḉ'),
+    ('ḋ', 'ḋ'),
+    ('ḍ', 'ḍ'),
+    ('ḏ', 'ḏ'),
+    ('ḑ', 'ḑ'),
+    ('ḓ', 'ḓ'),
+    ('ḕ', 'ḕ'),
+    ('ḗ', 'ḗ'),
+    ('ḙ', 'ḙ'),
+    ('ḛ', 'ḛ'),
+    ('ḝ', 'ḝ'),
+    ('ḟ', 'ḟ'),
+    ('ḡ', 'ḡ'),
+    ('ḣ', 'ḣ'),
+    ('ḥ', 'ḥ'),
+    ('ḧ', 'ḧ'),
+    ('ḩ', 'ḩ'),
+    ('ḫ', 'ḫ'),
+    ('ḭ', 'ḭ'),
+    ('ḯ', 'ḯ'),
+    ('ḱ', 'ḱ'),
+    ('ḳ', 'ḳ'),
+    ('ḵ', 'ḵ'),
+    ('ḷ', 'ḷ'),
+    ('ḹ', 'ḹ'),
+    ('ḻ', 'ḻ'),
+    ('ḽ', 'ḽ'),
+    ('ḿ', 'ḿ'),
+    ('ṁ', 'ṁ'),
+    ('ṃ', 'ṃ'),
+    ('ṅ', 'ṅ'),
+    ('ṇ', 'ṇ'),
+    ('ṉ', 'ṉ'),
+    ('ṋ', 'ṋ'),
+    ('ṍ', 'ṍ'),
+    ('ṏ', 'ṏ'),
+    ('ṑ', 'ṑ'),
+    ('ṓ', 'ṓ'),
+    ('ṕ', 'ṕ'),
+    ('ṗ', 'ṗ'),
+    ('ṙ', 'ṙ'),
+    ('ṛ', 'ṛ'),
+    ('ṝ', 'ṝ'),
+    ('ṟ', 'ṟ'),
+    ('ṡ', 'ṡ'),
+    ('ṣ', 'ṣ'),
+    ('ṥ', 'ṥ'),
+    ('ṧ', 'ṧ'),
+    ('ṩ', 'ṩ'),
+    ('ṫ', 'ṫ'),
+    ('ṭ', 'ṭ'),
+    ('ṯ', 'ṯ'),
+    ('ṱ', 'ṱ'),
+    ('ṳ', 'ṳ'),
+    ('ṵ', 'ṵ'),
+    ('ṷ', 'ṷ'),
+    ('ṹ', 'ṹ'),
+    ('ṻ', 'ṻ'),
+    ('ṽ', 'ṽ'),
+    ('ṿ', 'ṿ'),
+    ('ẁ', 'ẁ'),
+    ('ẃ', 'ẃ'),
+    ('ẅ', 'ẅ'),
+    ('ẇ', 'ẇ'),
+    ('ẉ', 'ẉ'),
+    ('ẋ', 'ẋ'),
+    ('ẍ', 'ẍ'),
+    ('ẏ', 'ẏ'),
+    ('ẑ', 'ẑ'),
+    ('ẓ', 'ẓ'),
+    ('ẕ', 'ẝ'),
+    ('ẟ', 'ẟ'),
+    ('ạ', 'ạ'),
+    ('ả', 'ả'),
+    ('ấ', 'ấ'),
+    ('ầ', 'ầ'),
+    ('ẩ', 'ẩ'),
+    ('ẫ', 'ẫ'),
+    ('ậ', 'ậ'),
+    ('ắ', 'ắ'),
+    ('ằ', 'ằ'),
+    ('ẳ', 'ẳ'),
+    ('ẵ', 'ẵ'),
+    ('ặ', 'ặ'),
+    ('ẹ', 'ẹ'),
+    ('ẻ', 'ẻ'),
+    ('ẽ', 'ẽ'),
+    ('ế', 'ế'),
+    ('ề', 'ề'),
+    ('ể', 'ể'),
+    ('ễ', 'ễ'),
+    ('ệ', 'ệ'),
+    ('ỉ', 'ỉ'),
+    ('ị', 'ị'),
+    ('ọ', 'ọ'),
+    ('ỏ', 'ỏ'),
+    ('ố', 'ố'),
+    ('ồ', 'ồ'),
+    ('ổ', 'ổ'),
+    ('ỗ', 'ỗ'),
+    ('ộ', 'ộ'),
+    ('ớ', 'ớ'),
+    ('ờ', 'ờ'),
+    ('ở', 'ở'),
+    ('ỡ', 'ỡ'),
+    ('ợ', 'ợ'),
+    ('ụ', 'ụ'),
+    ('ủ', 'ủ'),
+    ('ứ', 'ứ'),
+    ('ừ', 'ừ'),
+    ('ử', 'ử'),
+    ('ữ', 'ữ'),
+    ('ự', 'ự'),
+    ('ỳ', 'ỳ'),
+    ('ỵ', 'ỵ'),
+    ('ỷ', 'ỷ'),
+    ('ỹ', 'ỹ'),
+    ('ỻ', 'ỻ'),
+    ('ỽ', 'ỽ'),
+    ('ỿ', 'ἇ'),
+    ('ἐ', 'ἕ'),
+    ('ἠ', 'ἧ'),
+    ('ἰ', 'ἷ'),
+    ('ὀ', 'ὅ'),
+    ('ὐ', 'ὗ'),
+    ('ὠ', 'ὧ'),
+    ('ὰ', 'ώ'),
+    ('ᾀ', 'ᾇ'),
+    ('ᾐ', 'ᾗ'),
+    ('ᾠ', 'ᾧ'),
+    ('ᾰ', 'ᾴ'),
+    ('ᾶ', 'ᾷ'),
+    ('ι', 'ι'),
+    ('ῂ', 'ῄ'),
+    ('ῆ', 'ῇ'),
+    ('ῐ', 'ΐ'),
+    ('ῖ', 'ῗ'),
+    ('ῠ', 'ῧ'),
+    ('ῲ', 'ῴ'),
+    ('ῶ', 'ῷ'),
+    ('ⁱ', 'ⁱ'),
+    ('ⁿ', 'ⁿ'),
+    ('ₐ', 'ₜ'),
+    ('ℊ', 'ℊ'),
+    ('ℎ', 'ℏ'),
+    ('ℓ', 'ℓ'),
+    ('ℯ', 'ℯ'),
+    ('ℴ', 'ℴ'),
+    ('ℹ', 'ℹ'),
+    ('ℼ', 'ℽ'),
+    ('ⅆ', 'ⅉ'),
+    ('ⅎ', 'ⅎ'),
+    ('ⅰ', 'ⅿ'),
+    ('ↄ', 'ↄ'),
+    ('ⓐ', 'ⓩ'),
+    ('ⰰ', 'ⱞ'),
+    ('ⱡ', 'ⱡ'),
+    ('ⱥ', 'ⱦ'),
+    ('ⱨ', 'ⱨ'),
+    ('ⱪ', 'ⱪ'),
+    ('ⱬ', 'ⱬ'),
+    ('ⱱ', 'ⱱ'),
+    ('ⱳ', 'ⱴ'),
+    ('ⱶ', 'ⱽ'),
+    ('ⲁ', 'ⲁ'),
+    ('ⲃ', 'ⲃ'),
+    ('ⲅ', 'ⲅ'),
+    ('ⲇ', 'ⲇ'),
+    ('ⲉ', 'ⲉ'),
+    ('ⲋ', 'ⲋ'),
+    ('ⲍ', 'ⲍ'),
+    ('ⲏ', 'ⲏ'),
+    ('ⲑ', 'ⲑ'),
+    ('ⲓ', 'ⲓ'),
+    ('ⲕ', 'ⲕ'),
+    ('ⲗ', 'ⲗ'),
+    ('ⲙ', 'ⲙ'),
+    ('ⲛ', 'ⲛ'),
+    ('ⲝ', 'ⲝ'),
+    ('ⲟ', 'ⲟ'),
+    ('ⲡ', 'ⲡ'),
+    ('ⲣ', 'ⲣ'),
+    ('ⲥ', 'ⲥ'),
+    ('ⲧ', 'ⲧ'),
+    ('ⲩ', 'ⲩ'),
+    ('ⲫ', 'ⲫ'),
+    ('ⲭ', 'ⲭ'),
+    ('ⲯ', 'ⲯ'),
+    ('ⲱ', 'ⲱ'),
+    ('ⲳ', 'ⲳ'),
+    ('ⲵ', 'ⲵ'),
+    ('ⲷ', 'ⲷ'),
+    ('ⲹ', 'ⲹ'),
+    ('ⲻ', 'ⲻ'),
+    ('ⲽ', 'ⲽ'),
+    ('ⲿ', 'ⲿ'),
+    ('ⳁ', 'ⳁ'),
+    ('ⳃ', 'ⳃ'),
+    ('ⳅ', 'ⳅ'),
+    ('ⳇ', 'ⳇ'),
+    ('ⳉ', 'ⳉ'),
+    ('ⳋ', 'ⳋ'),
+    ('ⳍ', 'ⳍ'),
+    ('ⳏ', 'ⳏ'),
+    ('ⳑ', 'ⳑ'),
+    ('ⳓ', 'ⳓ'),
+    ('ⳕ', 'ⳕ'),
+    ('ⳗ', 'ⳗ'),
+    ('ⳙ', 'ⳙ'),
+    ('ⳛ', 'ⳛ'),
+    ('ⳝ', 'ⳝ'),
+    ('ⳟ', 'ⳟ'),
+    ('ⳡ', 'ⳡ'),
+    ('ⳣ', 'ⳤ'),
+    ('ⳬ', 'ⳬ'),
+    ('ⳮ', 'ⳮ'),
+    ('ⳳ', 'ⳳ'),
+    ('ⴀ', 'ⴥ'),
+    ('ⴧ', 'ⴧ'),
+    ('ⴭ', 'ⴭ'),
+    ('ꙁ', 'ꙁ'),
+    ('ꙃ', 'ꙃ'),
+    ('ꙅ', 'ꙅ'),
+    ('ꙇ', 'ꙇ'),
+    ('ꙉ', 'ꙉ'),
+    ('ꙋ', 'ꙋ'),
+    ('ꙍ', 'ꙍ'),
+    ('ꙏ', 'ꙏ'),
+    ('ꙑ', 'ꙑ'),
+    ('ꙓ', 'ꙓ'),
+    ('ꙕ', 'ꙕ'),
+    ('ꙗ', 'ꙗ'),
+    ('ꙙ', 'ꙙ'),
+    ('ꙛ', 'ꙛ'),
+    ('ꙝ', 'ꙝ'),
+    ('ꙟ', 'ꙟ'),
+    ('ꙡ', 'ꙡ'),
+    ('ꙣ', 'ꙣ'),
+    ('ꙥ', 'ꙥ'),
+    ('ꙧ', 'ꙧ'),
+    ('ꙩ', 'ꙩ'),
+    ('ꙫ', 'ꙫ'),
+    ('ꙭ', 'ꙭ'),
+    ('ꚁ', 'ꚁ'),
+    ('ꚃ', 'ꚃ'),
+    ('ꚅ', 'ꚅ'),
+    ('ꚇ', 'ꚇ'),
+    ('ꚉ', 'ꚉ'),
+    ('ꚋ', 'ꚋ'),
+    ('ꚍ', 'ꚍ'),
+    ('ꚏ', 'ꚏ'),
+    ('ꚑ', 'ꚑ'),
+    ('ꚓ', 'ꚓ'),
+    ('ꚕ', 'ꚕ'),
+    ('ꚗ', 'ꚗ'),
+    ('ꚙ', 'ꚙ'),
+    ('ꚛ', 'ꚝ'),
+    ('ꜣ', 'ꜣ'),
+    ('ꜥ', 'ꜥ'),
+    ('ꜧ', 'ꜧ'),
+    ('ꜩ', 'ꜩ'),
+    ('ꜫ', 'ꜫ'),
+    ('ꜭ', 'ꜭ'),
+    ('ꜯ', 'ꜱ'),
+    ('ꜳ', 'ꜳ'),
+    ('ꜵ', 'ꜵ'),
+    ('ꜷ', 'ꜷ'),
+    ('ꜹ', 'ꜹ'),
+    ('ꜻ', 'ꜻ'),
+    ('ꜽ', 'ꜽ'),
+    ('ꜿ', 'ꜿ'),
+    ('ꝁ', 'ꝁ'),
+    ('ꝃ', 'ꝃ'),
+    ('ꝅ', 'ꝅ'),
+    ('ꝇ', 'ꝇ'),
+    ('ꝉ', 'ꝉ'),
+    ('ꝋ', 'ꝋ'),
+    ('ꝍ', 'ꝍ'),
+    ('ꝏ', 'ꝏ'),
+    ('ꝑ', 'ꝑ'),
+    ('ꝓ', 'ꝓ'),
+    ('ꝕ', 'ꝕ'),
+    ('ꝗ', 'ꝗ'),
+    ('ꝙ', 'ꝙ'),
+    ('ꝛ', 'ꝛ'),
+    ('ꝝ', 'ꝝ'),
+    ('ꝟ', 'ꝟ'),
+    ('ꝡ', 'ꝡ'),
+    ('ꝣ', 'ꝣ'),
+    ('ꝥ', 'ꝥ'),
+    ('ꝧ', 'ꝧ'),
+    ('ꝩ', 'ꝩ'),
+    ('ꝫ', 'ꝫ'),
+    ('ꝭ', 'ꝭ'),
+    ('ꝯ', 'ꝸ'),
+    ('ꝺ', 'ꝺ'),
+    ('ꝼ', 'ꝼ'),
+    ('ꝿ', 'ꝿ'),
+    ('ꞁ', 'ꞁ'),
+    ('ꞃ', 'ꞃ'),
+    ('ꞅ', 'ꞅ'),
+    ('ꞇ', 'ꞇ'),
+    ('ꞌ', 'ꞌ'),
+    ('ꞎ', 'ꞎ'),
+    ('ꞑ', 'ꞑ'),
+    ('ꞓ', 'ꞕ'),
+    ('ꞗ', 'ꞗ'),
+    ('ꞙ', 'ꞙ'),
+    ('ꞛ', 'ꞛ'),
+    ('ꞝ', 'ꞝ'),
+    ('ꞟ', 'ꞟ'),
+    ('ꞡ', 'ꞡ'),
+    ('ꞣ', 'ꞣ'),
+    ('ꞥ', 'ꞥ'),
+    ('ꞧ', 'ꞧ'),
+    ('ꞩ', 'ꞩ'),
+    ('ꞯ', 'ꞯ'),
+    ('ꞵ', 'ꞵ'),
+    ('ꞷ', 'ꞷ'),
+    ('ꞹ', 'ꞹ'),
+    ('ꞻ', 'ꞻ'),
+    ('ꞽ', 'ꞽ'),
+    ('ꞿ', 'ꞿ'),
+    ('ꟃ', 'ꟃ'),
+    ('\u{a7c8}', '\u{a7c8}'),
+    ('\u{a7ca}', '\u{a7ca}'),
+    ('\u{a7f6}', '\u{a7f6}'),
+    ('ꟸ', 'ꟺ'),
+    ('ꬰ', 'ꭚ'),
+    ('ꭜ', '\u{ab68}'),
+    ('ꭰ', 'ꮿ'),
+    ('ﬀ', 'ﬆ'),
+    ('ﬓ', 'ﬗ'),
+    ('ａ', 'ｚ'),
+    ('𐐨', '𐑏'),
+    ('𐓘', '𐓻'),
+    ('𐳀', '𐳲'),
+    ('𑣀', '𑣟'),
+    ('𖹠', '𖹿'),
+    ('𝐚', '𝐳'),
+    ('𝑎', '𝑔'),
+    ('𝑖', '𝑧'),
+    ('𝒂', '𝒛'),
+    ('𝒶', '𝒹'),
+    ('𝒻', '𝒻'),
+    ('𝒽', '𝓃'),
+    ('𝓅', '𝓏'),
+    ('𝓪', '𝔃'),
+    ('𝔞', '𝔷'),
+    ('𝕒', '𝕫'),
+    ('𝖆', '𝖟'),
+    ('𝖺', '𝗓'),
+    ('𝗮', '𝘇'),
+    ('𝘢', '𝘻'),
+    ('𝙖', '𝙯'),
+    ('𝚊', '𝚥'),
+    ('𝛂', '𝛚'),
+    ('𝛜', '𝛡'),
+    ('𝛼', '𝜔'),
+    ('𝜖', '𝜛'),
+    ('𝜶', '𝝎'),
+    ('𝝐', '𝝕'),
+    ('𝝰', '𝞈'),
+    ('𝞊', '𝞏'),
+    ('𝞪', '𝟂'),
+    ('𝟄', '𝟉'),
+    ('𝟋', '𝟋'),
+    ('𞤢', '𞥃'),
+];
+
+pub const MATH: &'static [(char, char)] = &[
+    ('+', '+'),
+    ('<', '>'),
+    ('^', '^'),
+    ('|', '|'),
+    ('~', '~'),
+    ('¬', '¬'),
+    ('±', '±'),
+    ('×', '×'),
+    ('÷', '÷'),
+    ('ϐ', 'ϒ'),
+    ('ϕ', 'ϕ'),
+    ('ϰ', 'ϱ'),
+    ('ϴ', '϶'),
+    ('؆', '؈'),
+    ('‖', '‖'),
+    ('′', '‴'),
+    ('⁀', '⁀'),
+    ('⁄', '⁄'),
+    ('⁒', '⁒'),
+    ('\u{2061}', '\u{2064}'),
+    ('⁺', '⁾'),
+    ('₊', '₎'),
+    ('\u{20d0}', '\u{20dc}'),
+    ('\u{20e1}', '\u{20e1}'),
+    ('\u{20e5}', '\u{20e6}'),
+    ('\u{20eb}', '\u{20ef}'),
+    ('ℂ', 'ℂ'),
+    ('ℇ', 'ℇ'),
+    ('ℊ', 'ℓ'),
+    ('ℕ', 'ℕ'),
+    ('℘', 'ℝ'),
+    ('ℤ', 'ℤ'),
+    ('ℨ', '℩'),
+    ('ℬ', 'ℭ'),
+    ('ℯ', 'ℱ'),
+    ('ℳ', 'ℸ'),
+    ('ℼ', 'ⅉ'),
+    ('⅋', '⅋'),
+    ('←', '↧'),
+    ('↩', '↮'),
+    ('↰', '↱'),
+    ('↶', '↷'),
+    ('↼', '⇛'),
+    ('⇝', '⇝'),
+    ('⇤', '⇥'),
+    ('⇴', '⋿'),
+    ('⌈', '⌋'),
+    ('⌠', '⌡'),
+    ('⍼', '⍼'),
+    ('⎛', '⎵'),
+    ('⎷', '⎷'),
+    ('⏐', '⏐'),
+    ('⏜', '⏢'),
+    ('■', '□'),
+    ('▮', '▷'),
+    ('▼', '◁'),
+    ('◆', '◇'),
+    ('◊', '○'),
+    ('●', '◓'),
+    ('◢', '◢'),
+    ('◤', '◤'),
+    ('◧', '◬'),
+    ('◸', '◿'),
+    ('★', '☆'),
+    ('♀', '♀'),
+    ('♂', '♂'),
+    ('♠', '♣'),
+    ('♭', '♯'),
+    ('⟀', '⟿'),
+    ('⤀', '⫿'),
+    ('⬰', '⭄'),
+    ('⭇', '⭌'),
+    ('﬩', '﬩'),
+    ('﹡', '﹦'),
+    ('﹨', '﹨'),
+    ('＋', '＋'),
+    ('＜', '＞'),
+    ('＼', '＼'),
+    ('＾', '＾'),
+    ('｜', '｜'),
+    ('～', '～'),
+    ('￢', '￢'),
+    ('￩', '￬'),
+    ('𝐀', '𝑔'),
+    ('𝑖', '𝒜'),
+    ('𝒞', '𝒟'),
+    ('𝒢', '𝒢'),
+    ('𝒥', '𝒦'),
+    ('𝒩', '𝒬'),
+    ('𝒮', '𝒹'),
+    ('𝒻', '𝒻'),
+    ('𝒽', '𝓃'),
+    ('𝓅', '𝔅'),
+    ('𝔇', '𝔊'),
+    ('𝔍', '𝔔'),
+    ('𝔖', '𝔜'),
+    ('𝔞', '𝔹'),
+    ('𝔻', '𝔾'),
+    ('𝕀', '𝕄'),
+    ('𝕆', '𝕆'),
+    ('𝕊', '𝕐'),
+    ('𝕒', '𝚥'),
+    ('𝚨', '𝟋'),
+    ('𝟎', '𝟿'),
+    ('𞸀', '𞸃'),
+    ('𞸅', '𞸟'),
+    ('𞸡', '𞸢'),
+    ('𞸤', '𞸤'),
+    ('𞸧', '𞸧'),
+    ('𞸩', '𞸲'),
+    ('𞸴', '𞸷'),
+    ('𞸹', '𞸹'),
+    ('𞸻', '𞸻'),
+    ('𞹂', '𞹂'),
+    ('𞹇', '𞹇'),
+    ('𞹉', '𞹉'),
+    ('𞹋', '𞹋'),
+    ('𞹍', '𞹏'),
+    ('𞹑', '𞹒'),
+    ('𞹔', '𞹔'),
+    ('𞹗', '𞹗'),
+    ('𞹙', '𞹙'),
+    ('𞹛', '𞹛'),
+    ('𞹝', '𞹝'),
+    ('𞹟', '𞹟'),
+    ('𞹡', '𞹢'),
+    ('𞹤', '𞹤'),
+    ('𞹧', '𞹪'),
+    ('𞹬', '𞹲'),
+    ('𞹴', '𞹷'),
+    ('𞹹', '𞹼'),
+    ('𞹾', '𞹾'),
+    ('𞺀', '𞺉'),
+    ('𞺋', '𞺛'),
+    ('𞺡', '𞺣'),
+    ('𞺥', '𞺩'),
+    ('𞺫', '𞺻'),
+    ('𞻰', '𞻱'),
+];
+
+pub const NONCHARACTER_CODE_POINT: &'static [(char, char)] = &[
+    ('\u{fdd0}', '\u{fdef}'),
+    ('\u{fffe}', '\u{ffff}'),
+    ('\u{1fffe}', '\u{1ffff}'),
+    ('\u{2fffe}', '\u{2ffff}'),
+    ('\u{3fffe}', '\u{3ffff}'),
+    ('\u{4fffe}', '\u{4ffff}'),
+    ('\u{5fffe}', '\u{5ffff}'),
+    ('\u{6fffe}', '\u{6ffff}'),
+    ('\u{7fffe}', '\u{7ffff}'),
+    ('\u{8fffe}', '\u{8ffff}'),
+    ('\u{9fffe}', '\u{9ffff}'),
+    ('\u{afffe}', '\u{affff}'),
+    ('\u{bfffe}', '\u{bffff}'),
+    ('\u{cfffe}', '\u{cffff}'),
+    ('\u{dfffe}', '\u{dffff}'),
+    ('\u{efffe}', '\u{effff}'),
+    ('\u{ffffe}', '\u{fffff}'),
+    ('\u{10fffe}', '\u{10ffff}'),
+];
+
+pub const OTHER_ALPHABETIC: &'static [(char, char)] = &[
+    ('\u{345}', '\u{345}'),
+    ('\u{5b0}', '\u{5bd}'),
+    ('\u{5bf}', '\u{5bf}'),
+    ('\u{5c1}', '\u{5c2}'),
+    ('\u{5c4}', '\u{5c5}'),
+    ('\u{5c7}', '\u{5c7}'),
+    ('\u{610}', '\u{61a}'),
+    ('\u{64b}', '\u{657}'),
+    ('\u{659}', '\u{65f}'),
+    ('\u{670}', '\u{670}'),
+    ('\u{6d6}', '\u{6dc}'),
+    ('\u{6e1}', '\u{6e4}'),
+    ('\u{6e7}', '\u{6e8}'),
+    ('\u{6ed}', '\u{6ed}'),
+    ('\u{711}', '\u{711}'),
+    ('\u{730}', '\u{73f}'),
+    ('\u{7a6}', '\u{7b0}'),
+    ('\u{816}', '\u{817}'),
+    ('\u{81b}', '\u{823}'),
+    ('\u{825}', '\u{827}'),
+    ('\u{829}', '\u{82c}'),
+    ('\u{8d4}', '\u{8df}'),
+    ('\u{8e3}', '\u{8e9}'),
+    ('\u{8f0}', 'ः'),
+    ('\u{93a}', 'ऻ'),
+    ('ा', 'ौ'),
+    ('ॎ', 'ॏ'),
+    ('\u{955}', '\u{957}'),
+    ('\u{962}', '\u{963}'),
+    ('\u{981}', 'ঃ'),
+    ('\u{9be}', '\u{9c4}'),
+    ('ে', 'ৈ'),
+    ('ো', 'ৌ'),
+    ('\u{9d7}', '\u{9d7}'),
+    ('\u{9e2}', '\u{9e3}'),
+    ('\u{a01}', 'ਃ'),
+    ('ਾ', '\u{a42}'),
+    ('\u{a47}', '\u{a48}'),
+    ('\u{a4b}', '\u{a4c}'),
+    ('\u{a51}', '\u{a51}'),
+    ('\u{a70}', '\u{a71}'),
+    ('\u{a75}', '\u{a75}'),
+    ('\u{a81}', 'ઃ'),
+    ('ા', '\u{ac5}'),
+    ('\u{ac7}', 'ૉ'),
+    ('ો', 'ૌ'),
+    ('\u{ae2}', '\u{ae3}'),
+    ('\u{afa}', '\u{afc}'),
+    ('\u{b01}', 'ଃ'),
+    ('\u{b3e}', '\u{b44}'),
+    ('େ', 'ୈ'),
+    ('ୋ', 'ୌ'),
+    ('\u{b56}', '\u{b57}'),
+    ('\u{b62}', '\u{b63}'),
+    ('\u{b82}', '\u{b82}'),
+    ('\u{bbe}', 'ூ'),
+    ('ெ', 'ை'),
+    ('ொ', 'ௌ'),
+    ('\u{bd7}', '\u{bd7}'),
+    ('\u{c00}', 'ః'),
+    ('\u{c3e}', 'ౄ'),
+    ('\u{c46}', '\u{c48}'),
+    ('\u{c4a}', '\u{c4c}'),
+    ('\u{c55}', '\u{c56}'),
+    ('\u{c62}', '\u{c63}'),
+    ('\u{c81}', 'ಃ'),
+    ('ಾ', 'ೄ'),
+    ('\u{cc6}', 'ೈ'),
+    ('ೊ', '\u{ccc}'),
+    ('\u{cd5}', '\u{cd6}'),
+    ('\u{ce2}', '\u{ce3}'),
+    ('\u{d00}', 'ഃ'),
+    ('\u{d3e}', '\u{d44}'),
+    ('െ', 'ൈ'),
+    ('ൊ', 'ൌ'),
+    ('\u{d57}', '\u{d57}'),
+    ('\u{d62}', '\u{d63}'),
+    ('\u{d81}', 'ඃ'),
+    ('\u{dcf}', '\u{dd4}'),
+    ('\u{dd6}', '\u{dd6}'),
+    ('ෘ', '\u{ddf}'),
+    ('ෲ', 'ෳ'),
+    ('\u{e31}', '\u{e31}'),
+    ('\u{e34}', '\u{e3a}'),
+    ('\u{e4d}', '\u{e4d}'),
+    ('\u{eb1}', '\u{eb1}'),
+    ('\u{eb4}', '\u{eb9}'),
+    ('\u{ebb}', '\u{ebc}'),
+    ('\u{ecd}', '\u{ecd}'),
+    ('\u{f71}', '\u{f81}'),
+    ('\u{f8d}', '\u{f97}'),
+    ('\u{f99}', '\u{fbc}'),
+    ('ါ', '\u{1036}'),
+    ('း', 'း'),
+    ('ျ', '\u{103e}'),
+    ('ၖ', '\u{1059}'),
+    ('\u{105e}', '\u{1060}'),
+    ('ၢ', 'ၤ'),
+    ('ၧ', 'ၭ'),
+    ('\u{1071}', '\u{1074}'),
+    ('\u{1082}', '\u{108d}'),
+    ('ႏ', 'ႏ'),
+    ('ႚ', '\u{109d}'),
+    ('\u{1712}', '\u{1713}'),
+    ('\u{1732}', '\u{1733}'),
+    ('\u{1752}', '\u{1753}'),
+    ('\u{1772}', '\u{1773}'),
+    ('ា', 'ៈ'),
+    ('\u{1885}', '\u{1886}'),
+    ('\u{18a9}', '\u{18a9}'),
+    ('\u{1920}', 'ᤫ'),
+    ('ᤰ', 'ᤸ'),
+    ('\u{1a17}', '\u{1a1b}'),
+    ('ᩕ', '\u{1a5e}'),
+    ('ᩡ', '\u{1a74}'),
+    ('\u{1abf}', '\u{1ac0}'),
+    ('\u{1b00}', 'ᬄ'),
+    ('\u{1b35}', 'ᭃ'),
+    ('\u{1b80}', 'ᮂ'),
+    ('ᮡ', '\u{1ba9}'),
+    ('\u{1bac}', '\u{1bad}'),
+    ('ᯧ', '\u{1bf1}'),
+    ('ᰤ', '\u{1c36}'),
+    ('\u{1de7}', '\u{1df4}'),
+    ('Ⓐ', 'ⓩ'),
+    ('\u{2de0}', '\u{2dff}'),
+    ('\u{a674}', '\u{a67b}'),
+    ('\u{a69e}', '\u{a69f}'),
+    ('\u{a802}', '\u{a802}'),
+    ('\u{a80b}', '\u{a80b}'),
+    ('ꠣ', 'ꠧ'),
+    ('ꢀ', 'ꢁ'),
+    ('ꢴ', 'ꣃ'),
+    ('\u{a8c5}', '\u{a8c5}'),
+    ('\u{a8ff}', '\u{a8ff}'),
+    ('\u{a926}', '\u{a92a}'),
+    ('\u{a947}', 'ꥒ'),
+    ('\u{a980}', 'ꦃ'),
+    ('ꦴ', 'ꦿ'),
+    ('\u{a9e5}', '\u{a9e5}'),
+    ('\u{aa29}', '\u{aa36}'),
+    ('\u{aa43}', '\u{aa43}'),
+    ('\u{aa4c}', 'ꩍ'),
+    ('ꩻ', 'ꩽ'),
+    ('\u{aab0}', '\u{aab0}'),
+    ('\u{aab2}', '\u{aab4}'),
+    ('\u{aab7}', '\u{aab8}'),
+    ('\u{aabe}', '\u{aabe}'),
+    ('ꫫ', 'ꫯ'),
+    ('ꫵ', 'ꫵ'),
+    ('ꯣ', 'ꯪ'),
+    ('\u{fb1e}', '\u{fb1e}'),
+    ('\u{10376}', '\u{1037a}'),
+    ('\u{10a01}', '\u{10a03}'),
+    ('\u{10a05}', '\u{10a06}'),
+    ('\u{10a0c}', '\u{10a0f}'),
+    ('\u{10d24}', '\u{10d27}'),
+    ('\u{10eab}', '\u{10eac}'),
+    ('𑀀', '𑀂'),
+    ('\u{11038}', '\u{11045}'),
+    ('𑂂', '𑂂'),
+    ('𑂰', '𑂸'),
+    ('\u{11100}', '\u{11102}'),
+    ('\u{11127}', '\u{11132}'),
+    ('𑅅', '𑅆'),
+    ('\u{11180}', '𑆂'),
+    ('𑆳', '𑆿'),
+    ('\u{111ce}', '\u{111cf}'),
+    ('𑈬', '\u{11234}'),
+    ('\u{11237}', '\u{11237}'),
+    ('\u{1123e}', '\u{1123e}'),
+    ('\u{112df}', '\u{112e8}'),
+    ('\u{11300}', '𑌃'),
+    ('\u{1133e}', '𑍄'),
+    ('𑍇', '𑍈'),
+    ('𑍋', '𑍌'),
+    ('\u{11357}', '\u{11357}'),
+    ('𑍢', '𑍣'),
+    ('𑐵', '𑑁'),
+    ('\u{11443}', '𑑅'),
+    ('\u{114b0}', '𑓁'),
+    ('\u{115af}', '\u{115b5}'),
+    ('𑖸', '𑖾'),
+    ('\u{115dc}', '\u{115dd}'),
+    ('𑘰', '𑘾'),
+    ('\u{11640}', '\u{11640}'),
+    ('\u{116ab}', '\u{116b5}'),
+    ('\u{1171d}', '\u{1172a}'),
+    ('𑠬', '𑠸'),
+    ('\u{11930}', '\u{11935}'),
+    ('\u{11937}', '\u{11938}'),
+    ('\u{1193b}', '\u{1193c}'),
+    ('\u{11940}', '\u{11940}'),
+    ('\u{11942}', '\u{11942}'),
+    ('𑧑', '\u{119d7}'),
+    ('\u{119da}', '𑧟'),
+    ('𑧤', '𑧤'),
+    ('\u{11a01}', '\u{11a0a}'),
+    ('\u{11a35}', '𑨹'),
+    ('\u{11a3b}', '\u{11a3e}'),
+    ('\u{11a51}', '\u{11a5b}'),
+    ('\u{11a8a}', '𑪗'),
+    ('𑰯', '\u{11c36}'),
+    ('\u{11c38}', '𑰾'),
+    ('\u{11c92}', '\u{11ca7}'),
+    ('𑲩', '\u{11cb6}'),
+    ('\u{11d31}', '\u{11d36}'),
+    ('\u{11d3a}', '\u{11d3a}'),
+    ('\u{11d3c}', '\u{11d3d}'),
+    ('\u{11d3f}', '\u{11d41}'),
+    ('\u{11d43}', '\u{11d43}'),
+    ('\u{11d47}', '\u{11d47}'),
+    ('𑶊', '𑶎'),
+    ('\u{11d90}', '\u{11d91}'),
+    ('𑶓', '𑶖'),
+    ('\u{11ef3}', '𑻶'),
+    ('\u{16f4f}', '\u{16f4f}'),
+    ('𖽑', '𖾇'),
+    ('\u{16f8f}', '\u{16f92}'),
+    ('\u{16ff0}', '\u{16ff1}'),
+    ('\u{1bc9e}', '\u{1bc9e}'),
+    ('\u{1e000}', '\u{1e006}'),
+    ('\u{1e008}', '\u{1e018}'),
+    ('\u{1e01b}', '\u{1e021}'),
+    ('\u{1e023}', '\u{1e024}'),
+    ('\u{1e026}', '\u{1e02a}'),
+    ('\u{1e947}', '\u{1e947}'),
+    ('🄰', '🅉'),
+    ('🅐', '🅩'),
+    ('🅰', '🆉'),
+];
+
+pub const OTHER_DEFAULT_IGNORABLE_CODE_POINT: &'static [(char, char)] = &[
+    ('\u{34f}', '\u{34f}'),
+    ('ᅟ', 'ᅠ'),
+    ('\u{17b4}', '\u{17b5}'),
+    ('\u{2065}', '\u{2065}'),
+    ('ㅤ', 'ㅤ'),
+    ('ﾠ', 'ﾠ'),
+    ('\u{fff0}', '\u{fff8}'),
+    ('\u{e0000}', '\u{e0000}'),
+    ('\u{e0002}', '\u{e001f}'),
+    ('\u{e0080}', '\u{e00ff}'),
+    ('\u{e01f0}', '\u{e0fff}'),
+];
+
+pub const OTHER_GRAPHEME_EXTEND: &'static [(char, char)] = &[
+    ('\u{9be}', '\u{9be}'),
+    ('\u{9d7}', '\u{9d7}'),
+    ('\u{b3e}', '\u{b3e}'),
+    ('\u{b57}', '\u{b57}'),
+    ('\u{bbe}', '\u{bbe}'),
+    ('\u{bd7}', '\u{bd7}'),
+    ('\u{cc2}', '\u{cc2}'),
+    ('\u{cd5}', '\u{cd6}'),
+    ('\u{d3e}', '\u{d3e}'),
+    ('\u{d57}', '\u{d57}'),
+    ('\u{dcf}', '\u{dcf}'),
+    ('\u{ddf}', '\u{ddf}'),
+    ('\u{1b35}', '\u{1b35}'),
+    ('\u{200c}', '\u{200c}'),
+    ('\u{302e}', '\u{302f}'),
+    ('\u{ff9e}', '\u{ff9f}'),
+    ('\u{1133e}', '\u{1133e}'),
+    ('\u{11357}', '\u{11357}'),
+    ('\u{114b0}', '\u{114b0}'),
+    ('\u{114bd}', '\u{114bd}'),
+    ('\u{115af}', '\u{115af}'),
+    ('\u{11930}', '\u{11930}'),
+    ('\u{1d165}', '\u{1d165}'),
+    ('\u{1d16e}', '\u{1d172}'),
+    ('\u{e0020}', '\u{e007f}'),
+];
+
+pub const OTHER_ID_CONTINUE: &'static [(char, char)] =
+    &[('·', '·'), ('·', '·'), ('፩', '፱'), ('᧚', '᧚')];
+
+pub const OTHER_ID_START: &'static [(char, char)] =
+    &[('\u{1885}', '\u{1886}'), ('℘', '℘'), ('℮', '℮'), ('゛', '゜')];
+
+pub const OTHER_LOWERCASE: &'static [(char, char)] = &[
+    ('ª', 'ª'),
+    ('º', 'º'),
+    ('ʰ', 'ʸ'),
+    ('ˀ', 'ˁ'),
+    ('ˠ', 'ˤ'),
+    ('\u{345}', '\u{345}'),
+    ('ͺ', 'ͺ'),
+    ('ᴬ', 'ᵪ'),
+    ('ᵸ', 'ᵸ'),
+    ('ᶛ', 'ᶿ'),
+    ('ⁱ', 'ⁱ'),
+    ('ⁿ', 'ⁿ'),
+    ('ₐ', 'ₜ'),
+    ('ⅰ', 'ⅿ'),
+    ('ⓐ', 'ⓩ'),
+    ('ⱼ', 'ⱽ'),
+    ('ꚜ', 'ꚝ'),
+    ('ꝰ', 'ꝰ'),
+    ('ꟸ', 'ꟹ'),
+    ('ꭜ', 'ꭟ'),
+];
+
+pub const OTHER_MATH: &'static [(char, char)] = &[
+    ('^', '^'),
+    ('ϐ', 'ϒ'),
+    ('ϕ', 'ϕ'),
+    ('ϰ', 'ϱ'),
+    ('ϴ', 'ϵ'),
+    ('‖', '‖'),
+    ('′', '‴'),
+    ('⁀', '⁀'),
+    ('\u{2061}', '\u{2064}'),
+    ('⁽', '⁾'),
+    ('₍', '₎'),
+    ('\u{20d0}', '\u{20dc}'),
+    ('\u{20e1}', '\u{20e1}'),
+    ('\u{20e5}', '\u{20e6}'),
+    ('\u{20eb}', '\u{20ef}'),
+    ('ℂ', 'ℂ'),
+    ('ℇ', 'ℇ'),
+    ('ℊ', 'ℓ'),
+    ('ℕ', 'ℕ'),
+    ('ℙ', 'ℝ'),
+    ('ℤ', 'ℤ'),
+    ('ℨ', '℩'),
+    ('ℬ', 'ℭ'),
+    ('ℯ', 'ℱ'),
+    ('ℳ', 'ℸ'),
+    ('ℼ', 'ℿ'),
+    ('ⅅ', 'ⅉ'),
+    ('↕', '↙'),
+    ('↜', '↟'),
+    ('↡', '↢'),
+    ('↤', '↥'),
+    ('↧', '↧'),
+    ('↩', '↭'),
+    ('↰', '↱'),
+    ('↶', '↷'),
+    ('↼', '⇍'),
+    ('⇐', '⇑'),
+    ('⇓', '⇓'),
+    ('⇕', '⇛'),
+    ('⇝', '⇝'),
+    ('⇤', '⇥'),
+    ('⌈', '⌋'),
+    ('⎴', '⎵'),
+    ('⎷', '⎷'),
+    ('⏐', '⏐'),
+    ('⏢', '⏢'),
+    ('■', '□'),
+    ('▮', '▶'),
+    ('▼', '◀'),
+    ('◆', '◇'),
+    ('◊', '○'),
+    ('●', '◓'),
+    ('◢', '◢'),
+    ('◤', '◤'),
+    ('◧', '◬'),
+    ('★', '☆'),
+    ('♀', '♀'),
+    ('♂', '♂'),
+    ('♠', '♣'),
+    ('♭', '♮'),
+    ('⟅', '⟆'),
+    ('⟦', '⟯'),
+    ('⦃', '⦘'),
+    ('⧘', '⧛'),
+    ('⧼', '⧽'),
+    ('﹡', '﹡'),
+    ('﹣', '﹣'),
+    ('﹨', '﹨'),
+    ('＼', '＼'),
+    ('＾', '＾'),
+    ('𝐀', '𝑔'),
+    ('𝑖', '𝒜'),
+    ('𝒞', '𝒟'),
+    ('𝒢', '𝒢'),
+    ('𝒥', '𝒦'),
+    ('𝒩', '𝒬'),
+    ('𝒮', '𝒹'),
+    ('𝒻', '𝒻'),
+    ('𝒽', '𝓃'),
+    ('𝓅', '𝔅'),
+    ('𝔇', '𝔊'),
+    ('𝔍', '𝔔'),
+    ('𝔖', '𝔜'),
+    ('𝔞', '𝔹'),
+    ('𝔻', '𝔾'),
+    ('𝕀', '𝕄'),
+    ('𝕆', '𝕆'),
+    ('𝕊', '𝕐'),
+    ('𝕒', '𝚥'),
+    ('𝚨', '𝛀'),
+    ('𝛂', '𝛚'),
+    ('𝛜', '𝛺'),
+    ('𝛼', '𝜔'),
+    ('𝜖', '𝜴'),
+    ('𝜶', '𝝎'),
+    ('𝝐', '𝝮'),
+    ('𝝰', '𝞈'),
+    ('𝞊', '𝞨'),
+    ('𝞪', '𝟂'),
+    ('𝟄', '𝟋'),
+    ('𝟎', '𝟿'),
+    ('𞸀', '𞸃'),
+    ('𞸅', '𞸟'),
+    ('𞸡', '𞸢'),
+    ('𞸤', '𞸤'),
+    ('𞸧', '𞸧'),
+    ('𞸩', '𞸲'),
+    ('𞸴', '𞸷'),
+    ('𞸹', '𞸹'),
+    ('𞸻', '𞸻'),
+    ('𞹂', '𞹂'),
+    ('𞹇', '𞹇'),
+    ('𞹉', '𞹉'),
+    ('𞹋', '𞹋'),
+    ('𞹍', '𞹏'),
+    ('𞹑', '𞹒'),
+    ('𞹔', '𞹔'),
+    ('𞹗', '𞹗'),
+    ('𞹙', '𞹙'),
+    ('𞹛', '𞹛'),
+    ('𞹝', '𞹝'),
+    ('𞹟', '𞹟'),
+    ('𞹡', '𞹢'),
+    ('𞹤', '𞹤'),
+    ('𞹧', '𞹪'),
+    ('𞹬', '𞹲'),
+    ('𞹴', '𞹷'),
+    ('𞹹', '𞹼'),
+    ('𞹾', '𞹾'),
+    ('𞺀', '𞺉'),
+    ('𞺋', '𞺛'),
+    ('𞺡', '𞺣'),
+    ('𞺥', '𞺩'),
+    ('𞺫', '𞺻'),
+];
+
+pub const OTHER_UPPERCASE: &'static [(char, char)] =
+    &[('Ⅰ', 'Ⅿ'), ('Ⓐ', 'Ⓩ'), ('🄰', '🅉'), ('🅐', '🅩'), ('🅰', '🆉')];
+
+pub const PATTERN_SYNTAX: &'static [(char, char)] = &[
+    ('!', '/'),
+    (':', '@'),
+    ('[', '^'),
+    ('`', '`'),
+    ('{', '~'),
+    ('¡', '§'),
+    ('©', '©'),
+    ('«', '¬'),
+    ('®', '®'),
+    ('°', '±'),
+    ('¶', '¶'),
+    ('»', '»'),
+    ('¿', '¿'),
+    ('×', '×'),
+    ('÷', '÷'),
+    ('‐', '‧'),
+    ('‰', '‾'),
+    ('⁁', '⁓'),
+    ('⁕', '⁞'),
+    ('←', '\u{245f}'),
+    ('─', '❵'),
+    ('➔', '⯿'),
+    ('⸀', '\u{2e7f}'),
+    ('、', '〃'),
+    ('〈', '〠'),
+    ('〰', '〰'),
+    ('﴾', '﴿'),
+    ('﹅', '﹆'),
+];
+
+pub const PATTERN_WHITE_SPACE: &'static [(char, char)] = &[
+    ('\t', '\r'),
+    (' ', ' '),
+    ('\u{85}', '\u{85}'),
+    ('\u{200e}', '\u{200f}'),
+    ('\u{2028}', '\u{2029}'),
+];
+
+pub const PREPENDED_CONCATENATION_MARK: &'static [(char, char)] = &[
+    ('\u{600}', '\u{605}'),
+    ('\u{6dd}', '\u{6dd}'),
+    ('\u{70f}', '\u{70f}'),
+    ('\u{8e2}', '\u{8e2}'),
+    ('\u{110bd}', '\u{110bd}'),
+    ('\u{110cd}', '\u{110cd}'),
+];
+
+pub const QUOTATION_MARK: &'static [(char, char)] = &[
+    ('\"', '\"'),
+    ('\'', '\''),
+    ('«', '«'),
+    ('»', '»'),
+    ('‘', '‟'),
+    ('‹', '›'),
+    ('⹂', '⹂'),
+    ('「', '』'),
+    ('〝', '〟'),
+    ('﹁', '﹄'),
+    ('＂', '＂'),
+    ('＇', '＇'),
+    ('｢', '｣'),
+];
+
+pub const RADICAL: &'static [(char, char)] =
+    &[('⺀', '⺙'), ('⺛', '⻳'), ('⼀', '⿕')];
+
+pub const REGIONAL_INDICATOR: &'static [(char, char)] = &[('🇦', '🇿')];
+
+pub const SENTENCE_TERMINAL: &'static [(char, char)] = &[
+    ('!', '!'),
+    ('.', '.'),
+    ('?', '?'),
+    ('։', '։'),
+    ('؞', '؟'),
+    ('۔', '۔'),
+    ('܀', '܂'),
+    ('߹', '߹'),
+    ('࠷', '࠷'),
+    ('࠹', '࠹'),
+    ('࠽', '࠾'),
+    ('।', '॥'),
+    ('၊', '။'),
+    ('።', '።'),
+    ('፧', '፨'),
+    ('᙮', '᙮'),
+    ('᜵', '᜶'),
+    ('᠃', '᠃'),
+    ('᠉', '᠉'),
+    ('᥄', '᥅'),
+    ('᪨', '᪫'),
+    ('᭚', '᭛'),
+    ('᭞', '᭟'),
+    ('᰻', '᰼'),
+    ('᱾', '᱿'),
+    ('‼', '‽'),
+    ('⁇', '⁉'),
+    ('⸮', '⸮'),
+    ('⸼', '⸼'),
+    ('。', '。'),
+    ('꓿', '꓿'),
+    ('꘎', '꘏'),
+    ('꛳', '꛳'),
+    ('꛷', '꛷'),
+    ('꡶', '꡷'),
+    ('꣎', '꣏'),
+    ('꤯', '꤯'),
+    ('꧈', '꧉'),
+    ('꩝', '꩟'),
+    ('꫰', '꫱'),
+    ('꯫', '꯫'),
+    ('﹒', '﹒'),
+    ('﹖', '﹗'),
+    ('！', '！'),
+    ('．', '．'),
+    ('？', '？'),
+    ('｡', '｡'),
+    ('𐩖', '𐩗'),
+    ('𐽕', '𐽙'),
+    ('𑁇', '𑁈'),
+    ('𑂾', '𑃁'),
+    ('𑅁', '𑅃'),
+    ('𑇅', '𑇆'),
+    ('𑇍', '𑇍'),
+    ('𑇞', '𑇟'),
+    ('𑈸', '𑈹'),
+    ('𑈻', '𑈼'),
+    ('𑊩', '𑊩'),
+    ('𑑋', '𑑌'),
+    ('𑗂', '𑗃'),
+    ('𑗉', '𑗗'),
+    ('𑙁', '𑙂'),
+    ('𑜼', '𑜾'),
+    ('\u{11944}', '\u{11944}'),
+    ('\u{11946}', '\u{11946}'),
+    ('𑩂', '𑩃'),
+    ('𑪛', '𑪜'),
+    ('𑱁', '𑱂'),
+    ('𑻷', '𑻸'),
+    ('𖩮', '𖩯'),
+    ('𖫵', '𖫵'),
+    ('𖬷', '𖬸'),
+    ('𖭄', '𖭄'),
+    ('𖺘', '𖺘'),
+    ('𛲟', '𛲟'),
+    ('𝪈', '𝪈'),
+];
+
+pub const SOFT_DOTTED: &'static [(char, char)] = &[
+    ('i', 'j'),
+    ('į', 'į'),
+    ('ɉ', 'ɉ'),
+    ('ɨ', 'ɨ'),
+    ('ʝ', 'ʝ'),
+    ('ʲ', 'ʲ'),
+    ('ϳ', 'ϳ'),
+    ('і', 'і'),
+    ('ј', 'ј'),
+    ('ᵢ', 'ᵢ'),
+    ('ᶖ', 'ᶖ'),
+    ('ᶤ', 'ᶤ'),
+    ('ᶨ', 'ᶨ'),
+    ('ḭ', 'ḭ'),
+    ('ị', 'ị'),
+    ('ⁱ', 'ⁱ'),
+    ('ⅈ', 'ⅉ'),
+    ('ⱼ', 'ⱼ'),
+    ('𝐢', '𝐣'),
+    ('𝑖', '𝑗'),
+    ('𝒊', '𝒋'),
+    ('𝒾', '𝒿'),
+    ('𝓲', '𝓳'),
+    ('𝔦', '𝔧'),
+    ('𝕚', '𝕛'),
+    ('𝖎', '𝖏'),
+    ('𝗂', '𝗃'),
+    ('𝗶', '𝗷'),
+    ('𝘪', '𝘫'),
+    ('𝙞', '𝙟'),
+    ('𝚒', '𝚓'),
+];
+
+pub const TERMINAL_PUNCTUATION: &'static [(char, char)] = &[
+    ('!', '!'),
+    (',', ','),
+    ('.', '.'),
+    (':', ';'),
+    ('?', '?'),
+    (';', ';'),
+    ('·', '·'),
+    ('։', '։'),
+    ('׃', '׃'),
+    ('،', '،'),
+    ('؛', '؛'),
+    ('؞', '؟'),
+    ('۔', '۔'),
+    ('܀', '܊'),
+    ('܌', '܌'),
+    ('߸', '߹'),
+    ('࠰', '࠾'),
+    ('࡞', '࡞'),
+    ('।', '॥'),
+    ('๚', '๛'),
+    ('༈', '༈'),
+    ('།', '༒'),
+    ('၊', '။'),
+    ('፡', '፨'),
+    ('᙮', '᙮'),
+    ('᛫', '᛭'),
+    ('᜵', '᜶'),
+    ('។', '៖'),
+    ('៚', '៚'),
+    ('᠂', '᠅'),
+    ('᠈', '᠉'),
+    ('᥄', '᥅'),
+    ('᪨', '᪫'),
+    ('᭚', '᭛'),
+    ('᭝', '᭟'),
+    ('᰻', '᰿'),
+    ('᱾', '᱿'),
+    ('‼', '‽'),
+    ('⁇', '⁉'),
+    ('⸮', '⸮'),
+    ('⸼', '⸼'),
+    ('⹁', '⹁'),
+    ('⹌', '⹌'),
+    ('⹎', '⹏'),
+    ('、', '。'),
+    ('꓾', '꓿'),
+    ('꘍', '꘏'),
+    ('꛳', '꛷'),
+    ('꡶', '꡷'),
+    ('꣎', '꣏'),
+    ('꤯', '꤯'),
+    ('꧇', '꧉'),
+    ('꩝', '꩟'),
+    ('꫟', '꫟'),
+    ('꫰', '꫱'),
+    ('꯫', '꯫'),
+    ('﹐', '﹒'),
+    ('﹔', '﹗'),
+    ('！', '！'),
+    ('，', '，'),
+    ('．', '．'),
+    ('：', '；'),
+    ('？', '？'),
+    ('｡', '｡'),
+    ('､', '､'),
+    ('𐎟', '𐎟'),
+    ('𐏐', '𐏐'),
+    ('𐡗', '𐡗'),
+    ('𐤟', '𐤟'),
+    ('𐩖', '𐩗'),
+    ('𐫰', '𐫵'),
+    ('𐬺', '𐬿'),
+    ('𐮙', '𐮜'),
+    ('𐽕', '𐽙'),
+    ('𑁇', '𑁍'),
+    ('𑂾', '𑃁'),
+    ('𑅁', '𑅃'),
+    ('𑇅', '𑇆'),
+    ('𑇍', '𑇍'),
+    ('𑇞', '𑇟'),
+    ('𑈸', '𑈼'),
+    ('𑊩', '𑊩'),
+    ('𑑋', '𑑍'),
+    ('\u{1145a}', '𑑛'),
+    ('𑗂', '𑗅'),
+    ('𑗉', '𑗗'),
+    ('𑙁', '𑙂'),
+    ('𑜼', '𑜾'),
+    ('\u{11944}', '\u{11944}'),
+    ('\u{11946}', '\u{11946}'),
+    ('𑩂', '𑩃'),
+    ('𑪛', '𑪜'),
+    ('𑪡', '𑪢'),
+    ('𑱁', '𑱃'),
+    ('𑱱', '𑱱'),
+    ('𑻷', '𑻸'),
+    ('𒑰', '𒑴'),
+    ('𖩮', '𖩯'),
+    ('𖫵', '𖫵'),
+    ('𖬷', '𖬹'),
+    ('𖭄', '𖭄'),
+    ('𖺗', '𖺘'),
+    ('𛲟', '𛲟'),
+    ('𝪇', '𝪊'),
+];
+
+pub const UNIFIED_IDEOGRAPH: &'static [(char, char)] = &[
+    ('㐀', '\u{4dbf}'),
+    ('一', '\u{9ffc}'),
+    ('﨎', '﨏'),
+    ('﨑', '﨑'),
+    ('﨓', '﨔'),
+    ('﨟', '﨟'),
+    ('﨡', '﨡'),
+    ('﨣', '﨤'),
+    ('﨧', '﨩'),
+    ('𠀀', '\u{2a6dd}'),
+    ('𪜀', '𫜴'),
+    ('𫝀', '𫠝'),
+    ('𫠠', '𬺡'),
+    ('𬺰', '𮯠'),
+    ('\u{30000}', '\u{3134a}'),
+];
+
+pub const UPPERCASE: &'static [(char, char)] = &[
+    ('A', 'Z'),
+    ('À', 'Ö'),
+    ('Ø', 'Þ'),
+    ('Ā', 'Ā'),
+    ('Ă', 'Ă'),
+    ('Ą', 'Ą'),
+    ('Ć', 'Ć'),
+    ('Ĉ', 'Ĉ'),
+    ('Ċ', 'Ċ'),
+    ('Č', 'Č'),
+    ('Ď', 'Ď'),
+    ('Đ', 'Đ'),
+    ('Ē', 'Ē'),
+    ('Ĕ', 'Ĕ'),
+    ('Ė', 'Ė'),
+    ('Ę', 'Ę'),
+    ('Ě', 'Ě'),
+    ('Ĝ', 'Ĝ'),
+    ('Ğ', 'Ğ'),
+    ('Ġ', 'Ġ'),
+    ('Ģ', 'Ģ'),
+    ('Ĥ', 'Ĥ'),
+    ('Ħ', 'Ħ'),
+    ('Ĩ', 'Ĩ'),
+    ('Ī', 'Ī'),
+    ('Ĭ', 'Ĭ'),
+    ('Į', 'Į'),
+    ('İ', 'İ'),
+    ('Ĳ', 'Ĳ'),
+    ('Ĵ', 'Ĵ'),
+    ('Ķ', 'Ķ'),
+    ('Ĺ', 'Ĺ'),
+    ('Ļ', 'Ļ'),
+    ('Ľ', 'Ľ'),
+    ('Ŀ', 'Ŀ'),
+    ('Ł', 'Ł'),
+    ('Ń', 'Ń'),
+    ('Ņ', 'Ņ'),
+    ('Ň', 'Ň'),
+    ('Ŋ', 'Ŋ'),
+    ('Ō', 'Ō'),
+    ('Ŏ', 'Ŏ'),
+    ('Ő', 'Ő'),
+    ('Œ', 'Œ'),
+    ('Ŕ', 'Ŕ'),
+    ('Ŗ', 'Ŗ'),
+    ('Ř', 'Ř'),
+    ('Ś', 'Ś'),
+    ('Ŝ', 'Ŝ'),
+    ('Ş', 'Ş'),
+    ('Š', 'Š'),
+    ('Ţ', 'Ţ'),
+    ('Ť', 'Ť'),
+    ('Ŧ', 'Ŧ'),
+    ('Ũ', 'Ũ'),
+    ('Ū', 'Ū'),
+    ('Ŭ', 'Ŭ'),
+    ('Ů', 'Ů'),
+    ('Ű', 'Ű'),
+    ('Ų', 'Ų'),
+    ('Ŵ', 'Ŵ'),
+    ('Ŷ', 'Ŷ'),
+    ('Ÿ', 'Ź'),
+    ('Ż', 'Ż'),
+    ('Ž', 'Ž'),
+    ('Ɓ', 'Ƃ'),
+    ('Ƅ', 'Ƅ'),
+    ('Ɔ', 'Ƈ'),
+    ('Ɖ', 'Ƌ'),
+    ('Ǝ', 'Ƒ'),
+    ('Ɠ', 'Ɣ'),
+    ('Ɩ', 'Ƙ'),
+    ('Ɯ', 'Ɲ'),
+    ('Ɵ', 'Ơ'),
+    ('Ƣ', 'Ƣ'),
+    ('Ƥ', 'Ƥ'),
+    ('Ʀ', 'Ƨ'),
+    ('Ʃ', 'Ʃ'),
+    ('Ƭ', 'Ƭ'),
+    ('Ʈ', 'Ư'),
+    ('Ʊ', 'Ƴ'),
+    ('Ƶ', 'Ƶ'),
+    ('Ʒ', 'Ƹ'),
+    ('Ƽ', 'Ƽ'),
+    ('Ǆ', 'Ǆ'),
+    ('Ǉ', 'Ǉ'),
+    ('Ǌ', 'Ǌ'),
+    ('Ǎ', 'Ǎ'),
+    ('Ǐ', 'Ǐ'),
+    ('Ǒ', 'Ǒ'),
+    ('Ǔ', 'Ǔ'),
+    ('Ǖ', 'Ǖ'),
+    ('Ǘ', 'Ǘ'),
+    ('Ǚ', 'Ǚ'),
+    ('Ǜ', 'Ǜ'),
+    ('Ǟ', 'Ǟ'),
+    ('Ǡ', 'Ǡ'),
+    ('Ǣ', 'Ǣ'),
+    ('Ǥ', 'Ǥ'),
+    ('Ǧ', 'Ǧ'),
+    ('Ǩ', 'Ǩ'),
+    ('Ǫ', 'Ǫ'),
+    ('Ǭ', 'Ǭ'),
+    ('Ǯ', 'Ǯ'),
+    ('Ǳ', 'Ǳ'),
+    ('Ǵ', 'Ǵ'),
+    ('Ƕ', 'Ǹ'),
+    ('Ǻ', 'Ǻ'),
+    ('Ǽ', 'Ǽ'),
+    ('Ǿ', 'Ǿ'),
+    ('Ȁ', 'Ȁ'),
+    ('Ȃ', 'Ȃ'),
+    ('Ȅ', 'Ȅ'),
+    ('Ȇ', 'Ȇ'),
+    ('Ȉ', 'Ȉ'),
+    ('Ȋ', 'Ȋ'),
+    ('Ȍ', 'Ȍ'),
+    ('Ȏ', 'Ȏ'),
+    ('Ȑ', 'Ȑ'),
+    ('Ȓ', 'Ȓ'),
+    ('Ȕ', 'Ȕ'),
+    ('Ȗ', 'Ȗ'),
+    ('Ș', 'Ș'),
+    ('Ț', 'Ț'),
+    ('Ȝ', 'Ȝ'),
+    ('Ȟ', 'Ȟ'),
+    ('Ƞ', 'Ƞ'),
+    ('Ȣ', 'Ȣ'),
+    ('Ȥ', 'Ȥ'),
+    ('Ȧ', 'Ȧ'),
+    ('Ȩ', 'Ȩ'),
+    ('Ȫ', 'Ȫ'),
+    ('Ȭ', 'Ȭ'),
+    ('Ȯ', 'Ȯ'),
+    ('Ȱ', 'Ȱ'),
+    ('Ȳ', 'Ȳ'),
+    ('Ⱥ', 'Ȼ'),
+    ('Ƚ', 'Ⱦ'),
+    ('Ɂ', 'Ɂ'),
+    ('Ƀ', 'Ɇ'),
+    ('Ɉ', 'Ɉ'),
+    ('Ɋ', 'Ɋ'),
+    ('Ɍ', 'Ɍ'),
+    ('Ɏ', 'Ɏ'),
+    ('Ͱ', 'Ͱ'),
+    ('Ͳ', 'Ͳ'),
+    ('Ͷ', 'Ͷ'),
+    ('Ϳ', 'Ϳ'),
+    ('Ά', 'Ά'),
+    ('Έ', 'Ί'),
+    ('Ό', 'Ό'),
+    ('Ύ', 'Ώ'),
+    ('Α', 'Ρ'),
+    ('Σ', 'Ϋ'),
+    ('Ϗ', 'Ϗ'),
+    ('ϒ', 'ϔ'),
+    ('Ϙ', 'Ϙ'),
+    ('Ϛ', 'Ϛ'),
+    ('Ϝ', 'Ϝ'),
+    ('Ϟ', 'Ϟ'),
+    ('Ϡ', 'Ϡ'),
+    ('Ϣ', 'Ϣ'),
+    ('Ϥ', 'Ϥ'),
+    ('Ϧ', 'Ϧ'),
+    ('Ϩ', 'Ϩ'),
+    ('Ϫ', 'Ϫ'),
+    ('Ϭ', 'Ϭ'),
+    ('Ϯ', 'Ϯ'),
+    ('ϴ', 'ϴ'),
+    ('Ϸ', 'Ϸ'),
+    ('Ϲ', 'Ϻ'),
+    ('Ͻ', 'Я'),
+    ('Ѡ', 'Ѡ'),
+    ('Ѣ', 'Ѣ'),
+    ('Ѥ', 'Ѥ'),
+    ('Ѧ', 'Ѧ'),
+    ('Ѩ', 'Ѩ'),
+    ('Ѫ', 'Ѫ'),
+    ('Ѭ', 'Ѭ'),
+    ('Ѯ', 'Ѯ'),
+    ('Ѱ', 'Ѱ'),
+    ('Ѳ', 'Ѳ'),
+    ('Ѵ', 'Ѵ'),
+    ('Ѷ', 'Ѷ'),
+    ('Ѹ', 'Ѹ'),
+    ('Ѻ', 'Ѻ'),
+    ('Ѽ', 'Ѽ'),
+    ('Ѿ', 'Ѿ'),
+    ('Ҁ', 'Ҁ'),
+    ('Ҋ', 'Ҋ'),
+    ('Ҍ', 'Ҍ'),
+    ('Ҏ', 'Ҏ'),
+    ('Ґ', 'Ґ'),
+    ('Ғ', 'Ғ'),
+    ('Ҕ', 'Ҕ'),
+    ('Җ', 'Җ'),
+    ('Ҙ', 'Ҙ'),
+    ('Қ', 'Қ'),
+    ('Ҝ', 'Ҝ'),
+    ('Ҟ', 'Ҟ'),
+    ('Ҡ', 'Ҡ'),
+    ('Ң', 'Ң'),
+    ('Ҥ', 'Ҥ'),
+    ('Ҧ', 'Ҧ'),
+    ('Ҩ', 'Ҩ'),
+    ('Ҫ', 'Ҫ'),
+    ('Ҭ', 'Ҭ'),
+    ('Ү', 'Ү'),
+    ('Ұ', 'Ұ'),
+    ('Ҳ', 'Ҳ'),
+    ('Ҵ', 'Ҵ'),
+    ('Ҷ', 'Ҷ'),
+    ('Ҹ', 'Ҹ'),
+    ('Һ', 'Һ'),
+    ('Ҽ', 'Ҽ'),
+    ('Ҿ', 'Ҿ'),
+    ('Ӏ', 'Ӂ'),
+    ('Ӄ', 'Ӄ'),
+    ('Ӆ', 'Ӆ'),
+    ('Ӈ', 'Ӈ'),
+    ('Ӊ', 'Ӊ'),
+    ('Ӌ', 'Ӌ'),
+    ('Ӎ', 'Ӎ'),
+    ('Ӑ', 'Ӑ'),
+    ('Ӓ', 'Ӓ'),
+    ('Ӕ', 'Ӕ'),
+    ('Ӗ', 'Ӗ'),
+    ('Ә', 'Ә'),
+    ('Ӛ', 'Ӛ'),
+    ('Ӝ', 'Ӝ'),
+    ('Ӟ', 'Ӟ'),
+    ('Ӡ', 'Ӡ'),
+    ('Ӣ', 'Ӣ'),
+    ('Ӥ', 'Ӥ'),
+    ('Ӧ', 'Ӧ'),
+    ('Ө', 'Ө'),
+    ('Ӫ', 'Ӫ'),
+    ('Ӭ', 'Ӭ'),
+    ('Ӯ', 'Ӯ'),
+    ('Ӱ', 'Ӱ'),
+    ('Ӳ', 'Ӳ'),
+    ('Ӵ', 'Ӵ'),
+    ('Ӷ', 'Ӷ'),
+    ('Ӹ', 'Ӹ'),
+    ('Ӻ', 'Ӻ'),
+    ('Ӽ', 'Ӽ'),
+    ('Ӿ', 'Ӿ'),
+    ('Ԁ', 'Ԁ'),
+    ('Ԃ', 'Ԃ'),
+    ('Ԅ', 'Ԅ'),
+    ('Ԇ', 'Ԇ'),
+    ('Ԉ', 'Ԉ'),
+    ('Ԋ', 'Ԋ'),
+    ('Ԍ', 'Ԍ'),
+    ('Ԏ', 'Ԏ'),
+    ('Ԑ', 'Ԑ'),
+    ('Ԓ', 'Ԓ'),
+    ('Ԕ', 'Ԕ'),
+    ('Ԗ', 'Ԗ'),
+    ('Ԙ', 'Ԙ'),
+    ('Ԛ', 'Ԛ'),
+    ('Ԝ', 'Ԝ'),
+    ('Ԟ', 'Ԟ'),
+    ('Ԡ', 'Ԡ'),
+    ('Ԣ', 'Ԣ'),
+    ('Ԥ', 'Ԥ'),
+    ('Ԧ', 'Ԧ'),
+    ('Ԩ', 'Ԩ'),
+    ('Ԫ', 'Ԫ'),
+    ('Ԭ', 'Ԭ'),
+    ('Ԯ', 'Ԯ'),
+    ('Ա', 'Ֆ'),
+    ('Ⴀ', 'Ⴥ'),
+    ('Ⴧ', 'Ⴧ'),
+    ('Ⴭ', 'Ⴭ'),
+    ('Ꭰ', 'Ᏽ'),
+    ('Ა', 'Ჺ'),
+    ('Ჽ', 'Ჿ'),
+    ('Ḁ', 'Ḁ'),
+    ('Ḃ', 'Ḃ'),
+    ('Ḅ', 'Ḅ'),
+    ('Ḇ', 'Ḇ'),
+    ('Ḉ', 'Ḉ'),
+    ('Ḋ', 'Ḋ'),
+    ('Ḍ', 'Ḍ'),
+    ('Ḏ', 'Ḏ'),
+    ('Ḑ', 'Ḑ'),
+    ('Ḓ', 'Ḓ'),
+    ('Ḕ', 'Ḕ'),
+    ('Ḗ', 'Ḗ'),
+    ('Ḙ', 'Ḙ'),
+    ('Ḛ', 'Ḛ'),
+    ('Ḝ', 'Ḝ'),
+    ('Ḟ', 'Ḟ'),
+    ('Ḡ', 'Ḡ'),
+    ('Ḣ', 'Ḣ'),
+    ('Ḥ', 'Ḥ'),
+    ('Ḧ', 'Ḧ'),
+    ('Ḩ', 'Ḩ'),
+    ('Ḫ', 'Ḫ'),
+    ('Ḭ', 'Ḭ'),
+    ('Ḯ', 'Ḯ'),
+    ('Ḱ', 'Ḱ'),
+    ('Ḳ', 'Ḳ'),
+    ('Ḵ', 'Ḵ'),
+    ('Ḷ', 'Ḷ'),
+    ('Ḹ', 'Ḹ'),
+    ('Ḻ', 'Ḻ'),
+    ('Ḽ', 'Ḽ'),
+    ('Ḿ', 'Ḿ'),
+    ('Ṁ', 'Ṁ'),
+    ('Ṃ', 'Ṃ'),
+    ('Ṅ', 'Ṅ'),
+    ('Ṇ', 'Ṇ'),
+    ('Ṉ', 'Ṉ'),
+    ('Ṋ', 'Ṋ'),
+    ('Ṍ', 'Ṍ'),
+    ('Ṏ', 'Ṏ'),
+    ('Ṑ', 'Ṑ'),
+    ('Ṓ', 'Ṓ'),
+    ('Ṕ', 'Ṕ'),
+    ('Ṗ', 'Ṗ'),
+    ('Ṙ', 'Ṙ'),
+    ('Ṛ', 'Ṛ'),
+    ('Ṝ', 'Ṝ'),
+    ('Ṟ', 'Ṟ'),
+    ('Ṡ', 'Ṡ'),
+    ('Ṣ', 'Ṣ'),
+    ('Ṥ', 'Ṥ'),
+    ('Ṧ', 'Ṧ'),
+    ('Ṩ', 'Ṩ'),
+    ('Ṫ', 'Ṫ'),
+    ('Ṭ', 'Ṭ'),
+    ('Ṯ', 'Ṯ'),
+    ('Ṱ', 'Ṱ'),
+    ('Ṳ', 'Ṳ'),
+    ('Ṵ', 'Ṵ'),
+    ('Ṷ', 'Ṷ'),
+    ('Ṹ', 'Ṹ'),
+    ('Ṻ', 'Ṻ'),
+    ('Ṽ', 'Ṽ'),
+    ('Ṿ', 'Ṿ'),
+    ('Ẁ', 'Ẁ'),
+    ('Ẃ', 'Ẃ'),
+    ('Ẅ', 'Ẅ'),
+    ('Ẇ', 'Ẇ'),
+    ('Ẉ', 'Ẉ'),
+    ('Ẋ', 'Ẋ'),
+    ('Ẍ', 'Ẍ'),
+    ('Ẏ', 'Ẏ'),
+    ('Ẑ', 'Ẑ'),
+    ('Ẓ', 'Ẓ'),
+    ('Ẕ', 'Ẕ'),
+    ('ẞ', 'ẞ'),
+    ('Ạ', 'Ạ'),
+    ('Ả', 'Ả'),
+    ('Ấ', 'Ấ'),
+    ('Ầ', 'Ầ'),
+    ('Ẩ', 'Ẩ'),
+    ('Ẫ', 'Ẫ'),
+    ('Ậ', 'Ậ'),
+    ('Ắ', 'Ắ'),
+    ('Ằ', 'Ằ'),
+    ('Ẳ', 'Ẳ'),
+    ('Ẵ', 'Ẵ'),
+    ('Ặ', 'Ặ'),
+    ('Ẹ', 'Ẹ'),
+    ('Ẻ', 'Ẻ'),
+    ('Ẽ', 'Ẽ'),
+    ('Ế', 'Ế'),
+    ('Ề', 'Ề'),
+    ('Ể', 'Ể'),
+    ('Ễ', 'Ễ'),
+    ('Ệ', 'Ệ'),
+    ('Ỉ', 'Ỉ'),
+    ('Ị', 'Ị'),
+    ('Ọ', 'Ọ'),
+    ('Ỏ', 'Ỏ'),
+    ('Ố', 'Ố'),
+    ('Ồ', 'Ồ'),
+    ('Ổ', 'Ổ'),
+    ('Ỗ', 'Ỗ'),
+    ('Ộ', 'Ộ'),
+    ('Ớ', 'Ớ'),
+    ('Ờ', 'Ờ'),
+    ('Ở', 'Ở'),
+    ('Ỡ', 'Ỡ'),
+    ('Ợ', 'Ợ'),
+    ('Ụ', 'Ụ'),
+    ('Ủ', 'Ủ'),
+    ('Ứ', 'Ứ'),
+    ('Ừ', 'Ừ'),
+    ('Ử', 'Ử'),
+    ('Ữ', 'Ữ'),
+    ('Ự', 'Ự'),
+    ('Ỳ', 'Ỳ'),
+    ('Ỵ', 'Ỵ'),
+    ('Ỷ', 'Ỷ'),
+    ('Ỹ', 'Ỹ'),
+    ('Ỻ', 'Ỻ'),
+    ('Ỽ', 'Ỽ'),
+    ('Ỿ', 'Ỿ'),
+    ('Ἀ', 'Ἇ'),
+    ('Ἐ', 'Ἕ'),
+    ('Ἠ', 'Ἧ'),
+    ('Ἰ', 'Ἷ'),
+    ('Ὀ', 'Ὅ'),
+    ('Ὑ', 'Ὑ'),
+    ('Ὓ', 'Ὓ'),
+    ('Ὕ', 'Ὕ'),
+    ('Ὗ', 'Ὗ'),
+    ('Ὠ', 'Ὧ'),
+    ('Ᾰ', 'Ά'),
+    ('Ὲ', 'Ή'),
+    ('Ῐ', 'Ί'),
+    ('Ῠ', 'Ῥ'),
+    ('Ὸ', 'Ώ'),
+    ('ℂ', 'ℂ'),
+    ('ℇ', 'ℇ'),
+    ('ℋ', 'ℍ'),
+    ('ℐ', 'ℒ'),
+    ('ℕ', 'ℕ'),
+    ('ℙ', 'ℝ'),
+    ('ℤ', 'ℤ'),
+    ('Ω', 'Ω'),
+    ('ℨ', 'ℨ'),
+    ('K', 'ℭ'),
+    ('ℰ', 'ℳ'),
+    ('ℾ', 'ℿ'),
+    ('ⅅ', 'ⅅ'),
+    ('Ⅰ', 'Ⅿ'),
+    ('Ↄ', 'Ↄ'),
+    ('Ⓐ', 'Ⓩ'),
+    ('Ⰰ', 'Ⱞ'),
+    ('Ⱡ', 'Ⱡ'),
+    ('Ɫ', 'Ɽ'),
+    ('Ⱨ', 'Ⱨ'),
+    ('Ⱪ', 'Ⱪ'),
+    ('Ⱬ', 'Ⱬ'),
+    ('Ɑ', 'Ɒ'),
+    ('Ⱳ', 'Ⱳ'),
+    ('Ⱶ', 'Ⱶ'),
+    ('Ȿ', 'Ⲁ'),
+    ('Ⲃ', 'Ⲃ'),
+    ('Ⲅ', 'Ⲅ'),
+    ('Ⲇ', 'Ⲇ'),
+    ('Ⲉ', 'Ⲉ'),
+    ('Ⲋ', 'Ⲋ'),
+    ('Ⲍ', 'Ⲍ'),
+    ('Ⲏ', 'Ⲏ'),
+    ('Ⲑ', 'Ⲑ'),
+    ('Ⲓ', 'Ⲓ'),
+    ('Ⲕ', 'Ⲕ'),
+    ('Ⲗ', 'Ⲗ'),
+    ('Ⲙ', 'Ⲙ'),
+    ('Ⲛ', 'Ⲛ'),
+    ('Ⲝ', 'Ⲝ'),
+    ('Ⲟ', 'Ⲟ'),
+    ('Ⲡ', 'Ⲡ'),
+    ('Ⲣ', 'Ⲣ'),
+    ('Ⲥ', 'Ⲥ'),
+    ('Ⲧ', 'Ⲧ'),
+    ('Ⲩ', 'Ⲩ'),
+    ('Ⲫ', 'Ⲫ'),
+    ('Ⲭ', 'Ⲭ'),
+    ('Ⲯ', 'Ⲯ'),
+    ('Ⲱ', 'Ⲱ'),
+    ('Ⲳ', 'Ⲳ'),
+    ('Ⲵ', 'Ⲵ'),
+    ('Ⲷ', 'Ⲷ'),
+    ('Ⲹ', 'Ⲹ'),
+    ('Ⲻ', 'Ⲻ'),
+    ('Ⲽ', 'Ⲽ'),
+    ('Ⲿ', 'Ⲿ'),
+    ('Ⳁ', 'Ⳁ'),
+    ('Ⳃ', 'Ⳃ'),
+    ('Ⳅ', 'Ⳅ'),
+    ('Ⳇ', 'Ⳇ'),
+    ('Ⳉ', 'Ⳉ'),
+    ('Ⳋ', 'Ⳋ'),
+    ('Ⳍ', 'Ⳍ'),
+    ('Ⳏ', 'Ⳏ'),
+    ('Ⳑ', 'Ⳑ'),
+    ('Ⳓ', 'Ⳓ'),
+    ('Ⳕ', 'Ⳕ'),
+    ('Ⳗ', 'Ⳗ'),
+    ('Ⳙ', 'Ⳙ'),
+    ('Ⳛ', 'Ⳛ'),
+    ('Ⳝ', 'Ⳝ'),
+    ('Ⳟ', 'Ⳟ'),
+    ('Ⳡ', 'Ⳡ'),
+    ('Ⳣ', 'Ⳣ'),
+    ('Ⳬ', 'Ⳬ'),
+    ('Ⳮ', 'Ⳮ'),
+    ('Ⳳ', 'Ⳳ'),
+    ('Ꙁ', 'Ꙁ'),
+    ('Ꙃ', 'Ꙃ'),
+    ('Ꙅ', 'Ꙅ'),
+    ('Ꙇ', 'Ꙇ'),
+    ('Ꙉ', 'Ꙉ'),
+    ('Ꙋ', 'Ꙋ'),
+    ('Ꙍ', 'Ꙍ'),
+    ('Ꙏ', 'Ꙏ'),
+    ('Ꙑ', 'Ꙑ'),
+    ('Ꙓ', 'Ꙓ'),
+    ('Ꙕ', 'Ꙕ'),
+    ('Ꙗ', 'Ꙗ'),
+    ('Ꙙ', 'Ꙙ'),
+    ('Ꙛ', 'Ꙛ'),
+    ('Ꙝ', 'Ꙝ'),
+    ('Ꙟ', 'Ꙟ'),
+    ('Ꙡ', 'Ꙡ'),
+    ('Ꙣ', 'Ꙣ'),
+    ('Ꙥ', 'Ꙥ'),
+    ('Ꙧ', 'Ꙧ'),
+    ('Ꙩ', 'Ꙩ'),
+    ('Ꙫ', 'Ꙫ'),
+    ('Ꙭ', 'Ꙭ'),
+    ('Ꚁ', 'Ꚁ'),
+    ('Ꚃ', 'Ꚃ'),
+    ('Ꚅ', 'Ꚅ'),
+    ('Ꚇ', 'Ꚇ'),
+    ('Ꚉ', 'Ꚉ'),
+    ('Ꚋ', 'Ꚋ'),
+    ('Ꚍ', 'Ꚍ'),
+    ('Ꚏ', 'Ꚏ'),
+    ('Ꚑ', 'Ꚑ'),
+    ('Ꚓ', 'Ꚓ'),
+    ('Ꚕ', 'Ꚕ'),
+    ('Ꚗ', 'Ꚗ'),
+    ('Ꚙ', 'Ꚙ'),
+    ('Ꚛ', 'Ꚛ'),
+    ('Ꜣ', 'Ꜣ'),
+    ('Ꜥ', 'Ꜥ'),
+    ('Ꜧ', 'Ꜧ'),
+    ('Ꜩ', 'Ꜩ'),
+    ('Ꜫ', 'Ꜫ'),
+    ('Ꜭ', 'Ꜭ'),
+    ('Ꜯ', 'Ꜯ'),
+    ('Ꜳ', 'Ꜳ'),
+    ('Ꜵ', 'Ꜵ'),
+    ('Ꜷ', 'Ꜷ'),
+    ('Ꜹ', 'Ꜹ'),
+    ('Ꜻ', 'Ꜻ'),
+    ('Ꜽ', 'Ꜽ'),
+    ('Ꜿ', 'Ꜿ'),
+    ('Ꝁ', 'Ꝁ'),
+    ('Ꝃ', 'Ꝃ'),
+    ('Ꝅ', 'Ꝅ'),
+    ('Ꝇ', 'Ꝇ'),
+    ('Ꝉ', 'Ꝉ'),
+    ('Ꝋ', 'Ꝋ'),
+    ('Ꝍ', 'Ꝍ'),
+    ('Ꝏ', 'Ꝏ'),
+    ('Ꝑ', 'Ꝑ'),
+    ('Ꝓ', 'Ꝓ'),
+    ('Ꝕ', 'Ꝕ'),
+    ('Ꝗ', 'Ꝗ'),
+    ('Ꝙ', 'Ꝙ'),
+    ('Ꝛ', 'Ꝛ'),
+    ('Ꝝ', 'Ꝝ'),
+    ('Ꝟ', 'Ꝟ'),
+    ('Ꝡ', 'Ꝡ'),
+    ('Ꝣ', 'Ꝣ'),
+    ('Ꝥ', 'Ꝥ'),
+    ('Ꝧ', 'Ꝧ'),
+    ('Ꝩ', 'Ꝩ'),
+    ('Ꝫ', 'Ꝫ'),
+    ('Ꝭ', 'Ꝭ'),
+    ('Ꝯ', 'Ꝯ'),
+    ('Ꝺ', 'Ꝺ'),
+    ('Ꝼ', 'Ꝼ'),
+    ('Ᵹ', 'Ꝿ'),
+    ('Ꞁ', 'Ꞁ'),
+    ('Ꞃ', 'Ꞃ'),
+    ('Ꞅ', 'Ꞅ'),
+    ('Ꞇ', 'Ꞇ'),
+    ('Ꞌ', 'Ꞌ'),
+    ('Ɥ', 'Ɥ'),
+    ('Ꞑ', 'Ꞑ'),
+    ('Ꞓ', 'Ꞓ'),
+    ('Ꞗ', 'Ꞗ'),
+    ('Ꞙ', 'Ꞙ'),
+    ('Ꞛ', 'Ꞛ'),
+    ('Ꞝ', 'Ꞝ'),
+    ('Ꞟ', 'Ꞟ'),
+    ('Ꞡ', 'Ꞡ'),
+    ('Ꞣ', 'Ꞣ'),
+    ('Ꞥ', 'Ꞥ'),
+    ('Ꞧ', 'Ꞧ'),
+    ('Ꞩ', 'Ꞩ'),
+    ('Ɦ', 'Ɪ'),
+    ('Ʞ', 'Ꞵ'),
+    ('Ꞷ', 'Ꞷ'),
+    ('Ꞹ', 'Ꞹ'),
+    ('Ꞻ', 'Ꞻ'),
+    ('Ꞽ', 'Ꞽ'),
+    ('Ꞿ', 'Ꞿ'),
+    ('Ꟃ', 'Ꟃ'),
+    ('Ꞔ', '\u{a7c7}'),
+    ('\u{a7c9}', '\u{a7c9}'),
+    ('\u{a7f5}', '\u{a7f5}'),
+    ('Ａ', 'Ｚ'),
+    ('𐐀', '𐐧'),
+    ('𐒰', '𐓓'),
+    ('𐲀', '𐲲'),
+    ('𑢠', '𑢿'),
+    ('𖹀', '𖹟'),
+    ('𝐀', '𝐙'),
+    ('𝐴', '𝑍'),
+    ('𝑨', '𝒁'),
+    ('𝒜', '𝒜'),
+    ('𝒞', '𝒟'),
+    ('𝒢', '𝒢'),
+    ('𝒥', '𝒦'),
+    ('𝒩', '𝒬'),
+    ('𝒮', '𝒵'),
+    ('𝓐', '𝓩'),
+    ('𝔄', '𝔅'),
+    ('𝔇', '𝔊'),
+    ('𝔍', '𝔔'),
+    ('𝔖', '𝔜'),
+    ('𝔸', '𝔹'),
+    ('𝔻', '𝔾'),
+    ('𝕀', '𝕄'),
+    ('𝕆', '𝕆'),
+    ('𝕊', '𝕐'),
+    ('𝕬', '𝖅'),
+    ('𝖠', '𝖹'),
+    ('𝗔', '𝗭'),
+    ('𝘈', '𝘡'),
+    ('𝘼', '𝙕'),
+    ('𝙰', '𝚉'),
+    ('𝚨', '𝛀'),
+    ('𝛢', '𝛺'),
+    ('𝜜', '𝜴'),
+    ('𝝖', '𝝮'),
+    ('𝞐', '𝞨'),
+    ('𝟊', '𝟊'),
+    ('𞤀', '𞤡'),
+    ('🄰', '🅉'),
+    ('🅐', '🅩'),
+    ('🅰', '🆉'),
+];
+
+pub const VARIATION_SELECTOR: &'static [(char, char)] = &[
+    ('\u{180b}', '\u{180d}'),
+    ('\u{fe00}', '\u{fe0f}'),
+    ('\u{e0100}', '\u{e01ef}'),
+];
+
+pub const WHITE_SPACE: &'static [(char, char)] = &[
+    ('\t', '\r'),
+    (' ', ' '),
+    ('\u{85}', '\u{85}'),
+    ('\u{a0}', '\u{a0}'),
+    ('\u{1680}', '\u{1680}'),
+    ('\u{2000}', '\u{200a}'),
+    ('\u{2028}', '\u{2029}'),
+    ('\u{202f}', '\u{202f}'),
+    ('\u{205f}', '\u{205f}'),
+    ('\u{3000}', '\u{3000}'),
+];
+
+pub const XID_CONTINUE: &'static [(char, char)] = &[
+    ('0', '9'),
+    ('A', 'Z'),
+    ('_', '_'),
+    ('a', 'z'),
+    ('ª', 'ª'),
+    ('µ', 'µ'),
+    ('·', '·'),
+    ('º', 'º'),
+    ('À', 'Ö'),
+    ('Ø', 'ö'),
+    ('ø', 'ˁ'),
+    ('ˆ', 'ˑ'),
+    ('ˠ', 'ˤ'),
+    ('ˬ', 'ˬ'),
+    ('ˮ', 'ˮ'),
+    ('\u{300}', 'ʹ'),
+    ('Ͷ', 'ͷ'),
+    ('ͻ', 'ͽ'),
+    ('Ϳ', 'Ϳ'),
+    ('Ά', 'Ί'),
+    ('Ό', 'Ό'),
+    ('Ύ', 'Ρ'),
+    ('Σ', 'ϵ'),
+    ('Ϸ', 'ҁ'),
+    ('\u{483}', '\u{487}'),
+    ('Ҋ', 'ԯ'),
+    ('Ա', 'Ֆ'),
+    ('ՙ', 'ՙ'),
+    ('ՠ', 'ֈ'),
+    ('\u{591}', '\u{5bd}'),
+    ('\u{5bf}', '\u{5bf}'),
+    ('\u{5c1}', '\u{5c2}'),
+    ('\u{5c4}', '\u{5c5}'),
+    ('\u{5c7}', '\u{5c7}'),
+    ('א', 'ת'),
+    ('ׯ', 'ײ'),
+    ('\u{610}', '\u{61a}'),
+    ('ؠ', '٩'),
+    ('ٮ', 'ۓ'),
+    ('ە', '\u{6dc}'),
+    ('\u{6df}', '\u{6e8}'),
+    ('\u{6ea}', 'ۼ'),
+    ('ۿ', 'ۿ'),
+    ('ܐ', '\u{74a}'),
+    ('ݍ', 'ޱ'),
+    ('߀', 'ߵ'),
+    ('ߺ', 'ߺ'),
+    ('\u{7fd}', '\u{7fd}'),
+    ('ࠀ', '\u{82d}'),
+    ('ࡀ', '\u{85b}'),
+    ('ࡠ', 'ࡪ'),
+    ('ࢠ', 'ࢴ'),
+    ('ࢶ', '\u{8c7}'),
+    ('\u{8d3}', '\u{8e1}'),
+    ('\u{8e3}', '\u{963}'),
+    ('०', '९'),
+    ('ॱ', 'ঃ'),
+    ('অ', 'ঌ'),
+    ('এ', 'ঐ'),
+    ('ও', 'ন'),
+    ('প', 'র'),
+    ('ল', 'ল'),
+    ('শ', 'হ'),
+    ('\u{9bc}', '\u{9c4}'),
+    ('ে', 'ৈ'),
+    ('ো', 'ৎ'),
+    ('\u{9d7}', '\u{9d7}'),
+    ('ড়', 'ঢ়'),
+    ('য়', '\u{9e3}'),
+    ('০', 'ৱ'),
+    ('ৼ', 'ৼ'),
+    ('\u{9fe}', '\u{9fe}'),
+    ('\u{a01}', 'ਃ'),
+    ('ਅ', 'ਊ'),
+    ('ਏ', 'ਐ'),
+    ('ਓ', 'ਨ'),
+    ('ਪ', 'ਰ'),
+    ('ਲ', 'ਲ਼'),
+    ('ਵ', 'ਸ਼'),
+    ('ਸ', 'ਹ'),
+    ('\u{a3c}', '\u{a3c}'),
+    ('ਾ', '\u{a42}'),
+    ('\u{a47}', '\u{a48}'),
+    ('\u{a4b}', '\u{a4d}'),
+    ('\u{a51}', '\u{a51}'),
+    ('ਖ਼', 'ੜ'),
+    ('ਫ਼', 'ਫ਼'),
+    ('੦', '\u{a75}'),
+    ('\u{a81}', 'ઃ'),
+    ('અ', 'ઍ'),
+    ('એ', 'ઑ'),
+    ('ઓ', 'ન'),
+    ('પ', 'ર'),
+    ('લ', 'ળ'),
+    ('વ', 'હ'),
+    ('\u{abc}', '\u{ac5}'),
+    ('\u{ac7}', 'ૉ'),
+    ('ો', '\u{acd}'),
+    ('ૐ', 'ૐ'),
+    ('ૠ', '\u{ae3}'),
+    ('૦', '૯'),
+    ('ૹ', '\u{aff}'),
+    ('\u{b01}', 'ଃ'),
+    ('ଅ', 'ଌ'),
+    ('ଏ', 'ଐ'),
+    ('ଓ', 'ନ'),
+    ('ପ', 'ର'),
+    ('ଲ', 'ଳ'),
+    ('ଵ', 'ହ'),
+    ('\u{b3c}', '\u{b44}'),
+    ('େ', 'ୈ'),
+    ('ୋ', '\u{b4d}'),
+    ('\u{b55}', '\u{b57}'),
+    ('ଡ଼', 'ଢ଼'),
+    ('ୟ', '\u{b63}'),
+    ('୦', '୯'),
+    ('ୱ', 'ୱ'),
+    ('\u{b82}', 'ஃ'),
+    ('அ', 'ஊ'),
+    ('எ', 'ஐ'),
+    ('ஒ', 'க'),
+    ('ங', 'ச'),
+    ('ஜ', 'ஜ'),
+    ('ஞ', 'ட'),
+    ('ண', 'த'),
+    ('ந', 'ப'),
+    ('ம', 'ஹ'),
+    ('\u{bbe}', 'ூ'),
+    ('ெ', 'ை'),
+    ('ொ', '\u{bcd}'),
+    ('ௐ', 'ௐ'),
+    ('\u{bd7}', '\u{bd7}'),
+    ('௦', '௯'),
+    ('\u{c00}', 'ఌ'),
+    ('ఎ', 'ఐ'),
+    ('ఒ', 'న'),
+    ('ప', 'హ'),
+    ('ఽ', 'ౄ'),
+    ('\u{c46}', '\u{c48}'),
+    ('\u{c4a}', '\u{c4d}'),
+    ('\u{c55}', '\u{c56}'),
+    ('ౘ', 'ౚ'),
+    ('ౠ', '\u{c63}'),
+    ('౦', '౯'),
+    ('ಀ', 'ಃ'),
+    ('ಅ', 'ಌ'),
+    ('ಎ', 'ಐ'),
+    ('ಒ', 'ನ'),
+    ('ಪ', 'ಳ'),
+    ('ವ', 'ಹ'),
+    ('\u{cbc}', 'ೄ'),
+    ('\u{cc6}', 'ೈ'),
+    ('ೊ', '\u{ccd}'),
+    ('\u{cd5}', '\u{cd6}'),
+    ('ೞ', 'ೞ'),
+    ('ೠ', '\u{ce3}'),
+    ('೦', '೯'),
+    ('ೱ', 'ೲ'),
+    ('\u{d00}', 'ഌ'),
+    ('എ', 'ഐ'),
+    ('ഒ', '\u{d44}'),
+    ('െ', 'ൈ'),
+    ('ൊ', 'ൎ'),
+    ('ൔ', '\u{d57}'),
+    ('ൟ', '\u{d63}'),
+    ('൦', '൯'),
+    ('ൺ', 'ൿ'),
+    ('\u{d81}', 'ඃ'),
+    ('අ', 'ඖ'),
+    ('ක', 'න'),
+    ('ඳ', 'ර'),
+    ('ල', 'ල'),
+    ('ව', 'ෆ'),
+    ('\u{dca}', '\u{dca}'),
+    ('\u{dcf}', '\u{dd4}'),
+    ('\u{dd6}', '\u{dd6}'),
+    ('ෘ', '\u{ddf}'),
+    ('෦', '෯'),
+    ('ෲ', 'ෳ'),
+    ('ก', '\u{e3a}'),
+    ('เ', '\u{e4e}'),
+    ('๐', '๙'),
+    ('ກ', 'ຂ'),
+    ('ຄ', 'ຄ'),
+    ('ຆ', 'ຊ'),
+    ('ຌ', 'ຣ'),
+    ('ລ', 'ລ'),
+    ('ວ', 'ຽ'),
+    ('ເ', 'ໄ'),
+    ('ໆ', 'ໆ'),
+    ('\u{ec8}', '\u{ecd}'),
+    ('໐', '໙'),
+    ('ໜ', 'ໟ'),
+    ('ༀ', 'ༀ'),
+    ('\u{f18}', '\u{f19}'),
+    ('༠', '༩'),
+    ('\u{f35}', '\u{f35}'),
+    ('\u{f37}', '\u{f37}'),
+    ('\u{f39}', '\u{f39}'),
+    ('༾', 'ཇ'),
+    ('ཉ', 'ཬ'),
+    ('\u{f71}', '\u{f84}'),
+    ('\u{f86}', '\u{f97}'),
+    ('\u{f99}', '\u{fbc}'),
+    ('\u{fc6}', '\u{fc6}'),
+    ('က', '၉'),
+    ('ၐ', '\u{109d}'),
+    ('Ⴀ', 'Ⴥ'),
+    ('Ⴧ', 'Ⴧ'),
+    ('Ⴭ', 'Ⴭ'),
+    ('ა', 'ჺ'),
+    ('ჼ', 'ቈ'),
+    ('ቊ', 'ቍ'),
+    ('ቐ', 'ቖ'),
+    ('ቘ', 'ቘ'),
+    ('ቚ', 'ቝ'),
+    ('በ', 'ኈ'),
+    ('ኊ', 'ኍ'),
+    ('ነ', 'ኰ'),
+    ('ኲ', 'ኵ'),
+    ('ኸ', 'ኾ'),
+    ('ዀ', 'ዀ'),
+    ('ዂ', 'ዅ'),
+    ('ወ', 'ዖ'),
+    ('ዘ', 'ጐ'),
+    ('ጒ', 'ጕ'),
+    ('ጘ', 'ፚ'),
+    ('\u{135d}', '\u{135f}'),
+    ('፩', '፱'),
+    ('ᎀ', 'ᎏ'),
+    ('Ꭰ', 'Ᏽ'),
+    ('ᏸ', 'ᏽ'),
+    ('ᐁ', 'ᙬ'),
+    ('ᙯ', 'ᙿ'),
+    ('ᚁ', 'ᚚ'),
+    ('ᚠ', 'ᛪ'),
+    ('ᛮ', 'ᛸ'),
+    ('ᜀ', 'ᜌ'),
+    ('ᜎ', '\u{1714}'),
+    ('ᜠ', '\u{1734}'),
+    ('ᝀ', '\u{1753}'),
+    ('ᝠ', 'ᝬ'),
+    ('ᝮ', 'ᝰ'),
+    ('\u{1772}', '\u{1773}'),
+    ('ក', '\u{17d3}'),
+    ('ៗ', 'ៗ'),
+    ('ៜ', '\u{17dd}'),
+    ('០', '៩'),
+    ('\u{180b}', '\u{180d}'),
+    ('᠐', '᠙'),
+    ('ᠠ', 'ᡸ'),
+    ('ᢀ', 'ᢪ'),
+    ('ᢰ', 'ᣵ'),
+    ('ᤀ', 'ᤞ'),
+    ('\u{1920}', 'ᤫ'),
+    ('ᤰ', '\u{193b}'),
+    ('᥆', 'ᥭ'),
+    ('ᥰ', 'ᥴ'),
+    ('ᦀ', 'ᦫ'),
+    ('ᦰ', 'ᧉ'),
+    ('᧐', '᧚'),
+    ('ᨀ', '\u{1a1b}'),
+    ('ᨠ', '\u{1a5e}'),
+    ('\u{1a60}', '\u{1a7c}'),
+    ('\u{1a7f}', '᪉'),
+    ('᪐', '᪙'),
+    ('ᪧ', 'ᪧ'),
+    ('\u{1ab0}', '\u{1abd}'),
+    ('\u{1abf}', '\u{1ac0}'),
+    ('\u{1b00}', 'ᭋ'),
+    ('᭐', '᭙'),
+    ('\u{1b6b}', '\u{1b73}'),
+    ('\u{1b80}', '᯳'),
+    ('ᰀ', '\u{1c37}'),
+    ('᱀', '᱉'),
+    ('ᱍ', 'ᱽ'),
+    ('ᲀ', 'ᲈ'),
+    ('Ა', 'Ჺ'),
+    ('Ჽ', 'Ჿ'),
+    ('\u{1cd0}', '\u{1cd2}'),
+    ('\u{1cd4}', 'ᳺ'),
+    ('ᴀ', '\u{1df9}'),
+    ('\u{1dfb}', 'ἕ'),
+    ('Ἐ', 'Ἕ'),
+    ('ἠ', 'ὅ'),
+    ('Ὀ', 'Ὅ'),
+    ('ὐ', 'ὗ'),
+    ('Ὑ', 'Ὑ'),
+    ('Ὓ', 'Ὓ'),
+    ('Ὕ', 'Ὕ'),
+    ('Ὗ', 'ώ'),
+    ('ᾀ', 'ᾴ'),
+    ('ᾶ', 'ᾼ'),
+    ('ι', 'ι'),
+    ('ῂ', 'ῄ'),
+    ('ῆ', 'ῌ'),
+    ('ῐ', 'ΐ'),
+    ('ῖ', 'Ί'),
+    ('ῠ', 'Ῥ'),
+    ('ῲ', 'ῴ'),
+    ('ῶ', 'ῼ'),
+    ('‿', '⁀'),
+    ('⁔', '⁔'),
+    ('ⁱ', 'ⁱ'),
+    ('ⁿ', 'ⁿ'),
+    ('ₐ', 'ₜ'),
+    ('\u{20d0}', '\u{20dc}'),
+    ('\u{20e1}', '\u{20e1}'),
+    ('\u{20e5}', '\u{20f0}'),
+    ('ℂ', 'ℂ'),
+    ('ℇ', 'ℇ'),
+    ('ℊ', 'ℓ'),
+    ('ℕ', 'ℕ'),
+    ('℘', 'ℝ'),
+    ('ℤ', 'ℤ'),
+    ('Ω', 'Ω'),
+    ('ℨ', 'ℨ'),
+    ('K', 'ℹ'),
+    ('ℼ', 'ℿ'),
+    ('ⅅ', 'ⅉ'),
+    ('ⅎ', 'ⅎ'),
+    ('Ⅰ', 'ↈ'),
+    ('Ⰰ', 'Ⱞ'),
+    ('ⰰ', 'ⱞ'),
+    ('Ⱡ', 'ⳤ'),
+    ('Ⳬ', 'ⳳ'),
+    ('ⴀ', 'ⴥ'),
+    ('ⴧ', 'ⴧ'),
+    ('ⴭ', 'ⴭ'),
+    ('ⴰ', 'ⵧ'),
+    ('ⵯ', 'ⵯ'),
+    ('\u{2d7f}', 'ⶖ'),
+    ('ⶠ', 'ⶦ'),
+    ('ⶨ', 'ⶮ'),
+    ('ⶰ', 'ⶶ'),
+    ('ⶸ', 'ⶾ'),
+    ('ⷀ', 'ⷆ'),
+    ('ⷈ', 'ⷎ'),
+    ('ⷐ', 'ⷖ'),
+    ('ⷘ', 'ⷞ'),
+    ('\u{2de0}', '\u{2dff}'),
+    ('々', '〇'),
+    ('〡', '\u{302f}'),
+    ('〱', '〵'),
+    ('〸', '〼'),
+    ('ぁ', 'ゖ'),
+    ('\u{3099}', '\u{309a}'),
+    ('ゝ', 'ゟ'),
+    ('ァ', 'ヺ'),
+    ('ー', 'ヿ'),
+    ('ㄅ', 'ㄯ'),
+    ('ㄱ', 'ㆎ'),
+    ('ㆠ', '\u{31bf}'),
+    ('ㇰ', 'ㇿ'),
+    ('㐀', '\u{4dbf}'),
+    ('一', '\u{9ffc}'),
+    ('ꀀ', 'ꒌ'),
+    ('ꓐ', 'ꓽ'),
+    ('ꔀ', 'ꘌ'),
+    ('ꘐ', 'ꘫ'),
+    ('Ꙁ', '\u{a66f}'),
+    ('\u{a674}', '\u{a67d}'),
+    ('ꙿ', '\u{a6f1}'),
+    ('ꜗ', 'ꜟ'),
+    ('Ꜣ', 'ꞈ'),
+    ('Ꞌ', 'ꞿ'),
+    ('Ꟃ', '\u{a7ca}'),
+    ('\u{a7f5}', 'ꠧ'),
+    ('\u{a82c}', '\u{a82c}'),
+    ('ꡀ', 'ꡳ'),
+    ('ꢀ', '\u{a8c5}'),
+    ('꣐', '꣙'),
+    ('\u{a8e0}', 'ꣷ'),
+    ('ꣻ', 'ꣻ'),
+    ('ꣽ', '\u{a92d}'),
+    ('ꤰ', '꥓'),
+    ('ꥠ', 'ꥼ'),
+    ('\u{a980}', '꧀'),
+    ('ꧏ', '꧙'),
+    ('ꧠ', 'ꧾ'),
+    ('ꨀ', '\u{aa36}'),
+    ('ꩀ', 'ꩍ'),
+    ('꩐', '꩙'),
+    ('ꩠ', 'ꩶ'),
+    ('ꩺ', 'ꫂ'),
+    ('ꫛ', 'ꫝ'),
+    ('ꫠ', 'ꫯ'),
+    ('ꫲ', '\u{aaf6}'),
+    ('ꬁ', 'ꬆ'),
+    ('ꬉ', 'ꬎ'),
+    ('ꬑ', 'ꬖ'),
+    ('ꬠ', 'ꬦ'),
+    ('ꬨ', 'ꬮ'),
+    ('ꬰ', 'ꭚ'),
+    ('ꭜ', '\u{ab69}'),
+    ('ꭰ', 'ꯪ'),
+    ('꯬', '\u{abed}'),
+    ('꯰', '꯹'),
+    ('가', '힣'),
+    ('ힰ', 'ퟆ'),
+    ('ퟋ', 'ퟻ'),
+    ('豈', '舘'),
+    ('並', '龎'),
+    ('ﬀ', 'ﬆ'),
+    ('ﬓ', 'ﬗ'),
+    ('יִ', 'ﬨ'),
+    ('שׁ', 'זּ'),
+    ('טּ', 'לּ'),
+    ('מּ', 'מּ'),
+    ('נּ', 'סּ'),
+    ('ףּ', 'פּ'),
+    ('צּ', 'ﮱ'),
+    ('ﯓ', 'ﱝ'),
+    ('ﱤ', 'ﴽ'),
+    ('ﵐ', 'ﶏ'),
+    ('ﶒ', 'ﷇ'),
+    ('ﷰ', 'ﷹ'),
+    ('\u{fe00}', '\u{fe0f}'),
+    ('\u{fe20}', '\u{fe2f}'),
+    ('︳', '︴'),
+    ('﹍', '﹏'),
+    ('ﹱ', 'ﹱ'),
+    ('ﹳ', 'ﹳ'),
+    ('ﹷ', 'ﹷ'),
+    ('ﹹ', 'ﹹ'),
+    ('ﹻ', 'ﹻ'),
+    ('ﹽ', 'ﹽ'),
+    ('ﹿ', 'ﻼ'),
+    ('０', '９'),
+    ('Ａ', 'Ｚ'),
+    ('＿', '＿'),
+    ('ａ', 'ｚ'),
+    ('ｦ', 'ﾾ'),
+    ('ￂ', 'ￇ'),
+    ('ￊ', 'ￏ'),
+    ('ￒ', 'ￗ'),
+    ('ￚ', 'ￜ'),
+    ('𐀀', '𐀋'),
+    ('𐀍', '𐀦'),
+    ('𐀨', '𐀺'),
+    ('𐀼', '𐀽'),
+    ('𐀿', '𐁍'),
+    ('𐁐', '𐁝'),
+    ('𐂀', '𐃺'),
+    ('𐅀', '𐅴'),
+    ('\u{101fd}', '\u{101fd}'),
+    ('𐊀', '𐊜'),
+    ('𐊠', '𐋐'),
+    ('\u{102e0}', '\u{102e0}'),
+    ('𐌀', '𐌟'),
+    ('𐌭', '𐍊'),
+    ('𐍐', '\u{1037a}'),
+    ('𐎀', '𐎝'),
+    ('𐎠', '𐏃'),
+    ('𐏈', '𐏏'),
+    ('𐏑', '𐏕'),
+    ('𐐀', '𐒝'),
+    ('𐒠', '𐒩'),
+    ('𐒰', '𐓓'),
+    ('𐓘', '𐓻'),
+    ('𐔀', '𐔧'),
+    ('𐔰', '𐕣'),
+    ('𐘀', '𐜶'),
+    ('𐝀', '𐝕'),
+    ('𐝠', '𐝧'),
+    ('𐠀', '𐠅'),
+    ('𐠈', '𐠈'),
+    ('𐠊', '𐠵'),
+    ('𐠷', '𐠸'),
+    ('𐠼', '𐠼'),
+    ('𐠿', '𐡕'),
+    ('𐡠', '𐡶'),
+    ('𐢀', '𐢞'),
+    ('𐣠', '𐣲'),
+    ('𐣴', '𐣵'),
+    ('𐤀', '𐤕'),
+    ('𐤠', '𐤹'),
+    ('𐦀', '𐦷'),
+    ('𐦾', '𐦿'),
+    ('𐨀', '\u{10a03}'),
+    ('\u{10a05}', '\u{10a06}'),
+    ('\u{10a0c}', '𐨓'),
+    ('𐨕', '𐨗'),
+    ('𐨙', '𐨵'),
+    ('\u{10a38}', '\u{10a3a}'),
+    ('\u{10a3f}', '\u{10a3f}'),
+    ('𐩠', '𐩼'),
+    ('𐪀', '𐪜'),
+    ('𐫀', '𐫇'),
+    ('𐫉', '\u{10ae6}'),
+    ('𐬀', '𐬵'),
+    ('𐭀', '𐭕'),
+    ('𐭠', '𐭲'),
+    ('𐮀', '𐮑'),
+    ('𐰀', '𐱈'),
+    ('𐲀', '𐲲'),
+    ('𐳀', '𐳲'),
+    ('𐴀', '\u{10d27}'),
+    ('𐴰', '𐴹'),
+    ('\u{10e80}', '\u{10ea9}'),
+    ('\u{10eab}', '\u{10eac}'),
+    ('\u{10eb0}', '\u{10eb1}'),
+    ('𐼀', '𐼜'),
+    ('𐼧', '𐼧'),
+    ('𐼰', '\u{10f50}'),
+    ('\u{10fb0}', '\u{10fc4}'),
+    ('𐿠', '𐿶'),
+    ('𑀀', '\u{11046}'),
+    ('𑁦', '𑁯'),
+    ('\u{1107f}', '\u{110ba}'),
+    ('𑃐', '𑃨'),
+    ('𑃰', '𑃹'),
+    ('\u{11100}', '\u{11134}'),
+    ('𑄶', '𑄿'),
+    ('𑅄', '\u{11147}'),
+    ('𑅐', '\u{11173}'),
+    ('𑅶', '𑅶'),
+    ('\u{11180}', '𑇄'),
+    ('\u{111c9}', '\u{111cc}'),
+    ('\u{111ce}', '𑇚'),
+    ('𑇜', '𑇜'),
+    ('𑈀', '𑈑'),
+    ('𑈓', '\u{11237}'),
+    ('\u{1123e}', '\u{1123e}'),
+    ('𑊀', '𑊆'),
+    ('𑊈', '𑊈'),
+    ('𑊊', '𑊍'),
+    ('𑊏', '𑊝'),
+    ('𑊟', '𑊨'),
+    ('𑊰', '\u{112ea}'),
+    ('𑋰', '𑋹'),
+    ('\u{11300}', '𑌃'),
+    ('𑌅', '𑌌'),
+    ('𑌏', '𑌐'),
+    ('𑌓', '𑌨'),
+    ('𑌪', '𑌰'),
+    ('𑌲', '𑌳'),
+    ('𑌵', '𑌹'),
+    ('\u{1133b}', '𑍄'),
+    ('𑍇', '𑍈'),
+    ('𑍋', '𑍍'),
+    ('𑍐', '𑍐'),
+    ('\u{11357}', '\u{11357}'),
+    ('𑍝', '𑍣'),
+    ('\u{11366}', '\u{1136c}'),
+    ('\u{11370}', '\u{11374}'),
+    ('𑐀', '𑑊'),
+    ('𑑐', '𑑙'),
+    ('\u{1145e}', '\u{11461}'),
+    ('𑒀', '𑓅'),
+    ('𑓇', '𑓇'),
+    ('𑓐', '𑓙'),
+    ('𑖀', '\u{115b5}'),
+    ('𑖸', '\u{115c0}'),
+    ('𑗘', '\u{115dd}'),
+    ('𑘀', '\u{11640}'),
+    ('𑙄', '𑙄'),
+    ('𑙐', '𑙙'),
+    ('𑚀', '𑚸'),
+    ('𑛀', '𑛉'),
+    ('𑜀', '𑜚'),
+    ('\u{1171d}', '\u{1172b}'),
+    ('𑜰', '𑜹'),
+    ('𑠀', '\u{1183a}'),
+    ('𑢠', '𑣩'),
+    ('𑣿', '\u{11906}'),
+    ('\u{11909}', '\u{11909}'),
+    ('\u{1190c}', '\u{11913}'),
+    ('\u{11915}', '\u{11916}'),
+    ('\u{11918}', '\u{11935}'),
+    ('\u{11937}', '\u{11938}'),
+    ('\u{1193b}', '\u{11943}'),
+    ('\u{11950}', '\u{11959}'),
+    ('𑦠', '𑦧'),
+    ('𑦪', '\u{119d7}'),
+    ('\u{119da}', '𑧡'),
+    ('𑧣', '𑧤'),
+    ('𑨀', '\u{11a3e}'),
+    ('\u{11a47}', '\u{11a47}'),
+    ('𑩐', '\u{11a99}'),
+    ('𑪝', '𑪝'),
+    ('𑫀', '𑫸'),
+    ('𑰀', '𑰈'),
+    ('𑰊', '\u{11c36}'),
+    ('\u{11c38}', '𑱀'),
+    ('𑱐', '𑱙'),
+    ('𑱲', '𑲏'),
+    ('\u{11c92}', '\u{11ca7}'),
+    ('𑲩', '\u{11cb6}'),
+    ('𑴀', '𑴆'),
+    ('𑴈', '𑴉'),
+    ('𑴋', '\u{11d36}'),
+    ('\u{11d3a}', '\u{11d3a}'),
+    ('\u{11d3c}', '\u{11d3d}'),
+    ('\u{11d3f}', '\u{11d47}'),
+    ('𑵐', '𑵙'),
+    ('𑵠', '𑵥'),
+    ('𑵧', '𑵨'),
+    ('𑵪', '𑶎'),
+    ('\u{11d90}', '\u{11d91}'),
+    ('𑶓', '𑶘'),
+    ('𑶠', '𑶩'),
+    ('𑻠', '𑻶'),
+    ('\u{11fb0}', '\u{11fb0}'),
+    ('𒀀', '𒎙'),
+    ('𒐀', '𒑮'),
+    ('𒒀', '𒕃'),
+    ('𓀀', '𓐮'),
+    ('𔐀', '𔙆'),
+    ('𖠀', '𖨸'),
+    ('𖩀', '𖩞'),
+    ('𖩠', '𖩩'),
+    ('𖫐', '𖫭'),
+    ('\u{16af0}', '\u{16af4}'),
+    ('𖬀', '\u{16b36}'),
+    ('𖭀', '𖭃'),
+    ('𖭐', '𖭙'),
+    ('𖭣', '𖭷'),
+    ('𖭽', '𖮏'),
+    ('𖹀', '𖹿'),
+    ('𖼀', '𖽊'),
+    ('\u{16f4f}', '𖾇'),
+    ('\u{16f8f}', '𖾟'),
+    ('𖿠', '𖿡'),
+    ('𖿣', '\u{16fe4}'),
+    ('\u{16ff0}', '\u{16ff1}'),
+    ('𗀀', '𘟷'),
+    ('𘠀', '\u{18cd5}'),
+    ('\u{18d00}', '\u{18d08}'),
+    ('𛀀', '𛄞'),
+    ('𛅐', '𛅒'),
+    ('𛅤', '𛅧'),
+    ('𛅰', '𛋻'),
+    ('𛰀', '𛱪'),
+    ('𛱰', '𛱼'),
+    ('𛲀', '𛲈'),
+    ('𛲐', '𛲙'),
+    ('\u{1bc9d}', '\u{1bc9e}'),
+    ('\u{1d165}', '\u{1d169}'),
+    ('𝅭', '\u{1d172}'),
+    ('\u{1d17b}', '\u{1d182}'),
+    ('\u{1d185}', '\u{1d18b}'),
+    ('\u{1d1aa}', '\u{1d1ad}'),
+    ('\u{1d242}', '\u{1d244}'),
+    ('𝐀', '𝑔'),
+    ('𝑖', '𝒜'),
+    ('𝒞', '𝒟'),
+    ('𝒢', '𝒢'),
+    ('𝒥', '𝒦'),
+    ('𝒩', '𝒬'),
+    ('𝒮', '𝒹'),
+    ('𝒻', '𝒻'),
+    ('𝒽', '𝓃'),
+    ('𝓅', '𝔅'),
+    ('𝔇', '𝔊'),
+    ('𝔍', '𝔔'),
+    ('𝔖', '𝔜'),
+    ('𝔞', '𝔹'),
+    ('𝔻', '𝔾'),
+    ('𝕀', '𝕄'),
+    ('𝕆', '𝕆'),
+    ('𝕊', '𝕐'),
+    ('𝕒', '𝚥'),
+    ('𝚨', '𝛀'),
+    ('𝛂', '𝛚'),
+    ('𝛜', '𝛺'),
+    ('𝛼', '𝜔'),
+    ('𝜖', '𝜴'),
+    ('𝜶', '𝝎'),
+    ('𝝐', '𝝮'),
+    ('𝝰', '𝞈'),
+    ('𝞊', '𝞨'),
+    ('𝞪', '𝟂'),
+    ('𝟄', '𝟋'),
+    ('𝟎', '𝟿'),
+    ('\u{1da00}', '\u{1da36}'),
+    ('\u{1da3b}', '\u{1da6c}'),
+    ('\u{1da75}', '\u{1da75}'),
+    ('\u{1da84}', '\u{1da84}'),
+    ('\u{1da9b}', '\u{1da9f}'),
+    ('\u{1daa1}', '\u{1daaf}'),
+    ('\u{1e000}', '\u{1e006}'),
+    ('\u{1e008}', '\u{1e018}'),
+    ('\u{1e01b}', '\u{1e021}'),
+    ('\u{1e023}', '\u{1e024}'),
+    ('\u{1e026}', '\u{1e02a}'),
+    ('𞄀', '𞄬'),
+    ('\u{1e130}', '𞄽'),
+    ('𞅀', '𞅉'),
+    ('𞅎', '𞅎'),
+    ('𞋀', '𞋹'),
+    ('𞠀', '𞣄'),
+    ('\u{1e8d0}', '\u{1e8d6}'),
+    ('𞤀', '𞥋'),
+    ('𞥐', '𞥙'),
+    ('𞸀', '𞸃'),
+    ('𞸅', '𞸟'),
+    ('𞸡', '𞸢'),
+    ('𞸤', '𞸤'),
+    ('𞸧', '𞸧'),
+    ('𞸩', '𞸲'),
+    ('𞸴', '𞸷'),
+    ('𞸹', '𞸹'),
+    ('𞸻', '𞸻'),
+    ('𞹂', '𞹂'),
+    ('𞹇', '𞹇'),
+    ('𞹉', '𞹉'),
+    ('𞹋', '𞹋'),
+    ('𞹍', '𞹏'),
+    ('𞹑', '𞹒'),
+    ('𞹔', '𞹔'),
+    ('𞹗', '𞹗'),
+    ('𞹙', '𞹙'),
+    ('𞹛', '𞹛'),
+    ('𞹝', '𞹝'),
+    ('𞹟', '𞹟'),
+    ('𞹡', '𞹢'),
+    ('𞹤', '𞹤'),
+    ('𞹧', '𞹪'),
+    ('𞹬', '𞹲'),
+    ('𞹴', '𞹷'),
+    ('𞹹', '𞹼'),
+    ('𞹾', '𞹾'),
+    ('𞺀', '𞺉'),
+    ('𞺋', '𞺛'),
+    ('𞺡', '𞺣'),
+    ('𞺥', '𞺩'),
+    ('𞺫', '𞺻'),
+    ('\u{1fbf0}', '\u{1fbf9}'),
+    ('𠀀', '\u{2a6dd}'),
+    ('𪜀', '𫜴'),
+    ('𫝀', '𫠝'),
+    ('𫠠', '𬺡'),
+    ('𬺰', '𮯠'),
+    ('丽', '𪘀'),
+    ('\u{30000}', '\u{3134a}'),
+    ('\u{e0100}', '\u{e01ef}'),
+];
+
+pub const XID_START: &'static [(char, char)] = &[
+    ('A', 'Z'),
+    ('a', 'z'),
+    ('ª', 'ª'),
+    ('µ', 'µ'),
+    ('º', 'º'),
+    ('À', 'Ö'),
+    ('Ø', 'ö'),
+    ('ø', 'ˁ'),
+    ('ˆ', 'ˑ'),
+    ('ˠ', 'ˤ'),
+    ('ˬ', 'ˬ'),
+    ('ˮ', 'ˮ'),
+    ('Ͱ', 'ʹ'),
+    ('Ͷ', 'ͷ'),
+    ('ͻ', 'ͽ'),
+    ('Ϳ', 'Ϳ'),
+    ('Ά', 'Ά'),
+    ('Έ', 'Ί'),
+    ('Ό', 'Ό'),
+    ('Ύ', 'Ρ'),
+    ('Σ', 'ϵ'),
+    ('Ϸ', 'ҁ'),
+    ('Ҋ', 'ԯ'),
+    ('Ա', 'Ֆ'),
+    ('ՙ', 'ՙ'),
+    ('ՠ', 'ֈ'),
+    ('א', 'ת'),
+    ('ׯ', 'ײ'),
+    ('ؠ', 'ي'),
+    ('ٮ', 'ٯ'),
+    ('ٱ', 'ۓ'),
+    ('ە', 'ە'),
+    ('ۥ', 'ۦ'),
+    ('ۮ', 'ۯ'),
+    ('ۺ', 'ۼ'),
+    ('ۿ', 'ۿ'),
+    ('ܐ', 'ܐ'),
+    ('ܒ', 'ܯ'),
+    ('ݍ', 'ޥ'),
+    ('ޱ', 'ޱ'),
+    ('ߊ', 'ߪ'),
+    ('ߴ', 'ߵ'),
+    ('ߺ', 'ߺ'),
+    ('ࠀ', 'ࠕ'),
+    ('ࠚ', 'ࠚ'),
+    ('ࠤ', 'ࠤ'),
+    ('ࠨ', 'ࠨ'),
+    ('ࡀ', 'ࡘ'),
+    ('ࡠ', 'ࡪ'),
+    ('ࢠ', 'ࢴ'),
+    ('ࢶ', '\u{8c7}'),
+    ('ऄ', 'ह'),
+    ('ऽ', 'ऽ'),
+    ('ॐ', 'ॐ'),
+    ('क़', 'ॡ'),
+    ('ॱ', 'ঀ'),
+    ('অ', 'ঌ'),
+    ('এ', 'ঐ'),
+    ('ও', 'ন'),
+    ('প', 'র'),
+    ('ল', 'ল'),
+    ('শ', 'হ'),
+    ('ঽ', 'ঽ'),
+    ('ৎ', 'ৎ'),
+    ('ড়', 'ঢ়'),
+    ('য়', 'ৡ'),
+    ('ৰ', 'ৱ'),
+    ('ৼ', 'ৼ'),
+    ('ਅ', 'ਊ'),
+    ('ਏ', 'ਐ'),
+    ('ਓ', 'ਨ'),
+    ('ਪ', 'ਰ'),
+    ('ਲ', 'ਲ਼'),
+    ('ਵ', 'ਸ਼'),
+    ('ਸ', 'ਹ'),
+    ('ਖ਼', 'ੜ'),
+    ('ਫ਼', 'ਫ਼'),
+    ('ੲ', 'ੴ'),
+    ('અ', 'ઍ'),
+    ('એ', 'ઑ'),
+    ('ઓ', 'ન'),
+    ('પ', 'ર'),
+    ('લ', 'ળ'),
+    ('વ', 'હ'),
+    ('ઽ', 'ઽ'),
+    ('ૐ', 'ૐ'),
+    ('ૠ', 'ૡ'),
+    ('ૹ', 'ૹ'),
+    ('ଅ', 'ଌ'),
+    ('ଏ', 'ଐ'),
+    ('ଓ', 'ନ'),
+    ('ପ', 'ର'),
+    ('ଲ', 'ଳ'),
+    ('ଵ', 'ହ'),
+    ('ଽ', 'ଽ'),
+    ('ଡ଼', 'ଢ଼'),
+    ('ୟ', 'ୡ'),
+    ('ୱ', 'ୱ'),
+    ('ஃ', 'ஃ'),
+    ('அ', 'ஊ'),
+    ('எ', 'ஐ'),
+    ('ஒ', 'க'),
+    ('ங', 'ச'),
+    ('ஜ', 'ஜ'),
+    ('ஞ', 'ட'),
+    ('ண', 'த'),
+    ('ந', 'ப'),
+    ('ம', 'ஹ'),
+    ('ௐ', 'ௐ'),
+    ('అ', 'ఌ'),
+    ('ఎ', 'ఐ'),
+    ('ఒ', 'న'),
+    ('ప', 'హ'),
+    ('ఽ', 'ఽ'),
+    ('ౘ', 'ౚ'),
+    ('ౠ', 'ౡ'),
+    ('ಀ', 'ಀ'),
+    ('ಅ', 'ಌ'),
+    ('ಎ', 'ಐ'),
+    ('ಒ', 'ನ'),
+    ('ಪ', 'ಳ'),
+    ('ವ', 'ಹ'),
+    ('ಽ', 'ಽ'),
+    ('ೞ', 'ೞ'),
+    ('ೠ', 'ೡ'),
+    ('ೱ', 'ೲ'),
+    ('\u{d04}', 'ഌ'),
+    ('എ', 'ഐ'),
+    ('ഒ', 'ഺ'),
+    ('ഽ', 'ഽ'),
+    ('ൎ', 'ൎ'),
+    ('ൔ', 'ൖ'),
+    ('ൟ', 'ൡ'),
+    ('ൺ', 'ൿ'),
+    ('අ', 'ඖ'),
+    ('ක', 'න'),
+    ('ඳ', 'ර'),
+    ('ල', 'ල'),
+    ('ව', 'ෆ'),
+    ('ก', 'ะ'),
+    ('า', 'า'),
+    ('เ', 'ๆ'),
+    ('ກ', 'ຂ'),
+    ('ຄ', 'ຄ'),
+    ('ຆ', 'ຊ'),
+    ('ຌ', 'ຣ'),
+    ('ລ', 'ລ'),
+    ('ວ', 'ະ'),
+    ('າ', 'າ'),
+    ('ຽ', 'ຽ'),
+    ('ເ', 'ໄ'),
+    ('ໆ', 'ໆ'),
+    ('ໜ', 'ໟ'),
+    ('ༀ', 'ༀ'),
+    ('ཀ', 'ཇ'),
+    ('ཉ', 'ཬ'),
+    ('ྈ', 'ྌ'),
+    ('က', 'ဪ'),
+    ('ဿ', 'ဿ'),
+    ('ၐ', 'ၕ'),
+    ('ၚ', 'ၝ'),
+    ('ၡ', 'ၡ'),
+    ('ၥ', 'ၦ'),
+    ('ၮ', 'ၰ'),
+    ('ၵ', 'ႁ'),
+    ('ႎ', 'ႎ'),
+    ('Ⴀ', 'Ⴥ'),
+    ('Ⴧ', 'Ⴧ'),
+    ('Ⴭ', 'Ⴭ'),
+    ('ა', 'ჺ'),
+    ('ჼ', 'ቈ'),
+    ('ቊ', 'ቍ'),
+    ('ቐ', 'ቖ'),
+    ('ቘ', 'ቘ'),
+    ('ቚ', 'ቝ'),
+    ('በ', 'ኈ'),
+    ('ኊ', 'ኍ'),
+    ('ነ', 'ኰ'),
+    ('ኲ', 'ኵ'),
+    ('ኸ', 'ኾ'),
+    ('ዀ', 'ዀ'),
+    ('ዂ', 'ዅ'),
+    ('ወ', 'ዖ'),
+    ('ዘ', 'ጐ'),
+    ('ጒ', 'ጕ'),
+    ('ጘ', 'ፚ'),
+    ('ᎀ', 'ᎏ'),
+    ('Ꭰ', 'Ᏽ'),
+    ('ᏸ', 'ᏽ'),
+    ('ᐁ', 'ᙬ'),
+    ('ᙯ', 'ᙿ'),
+    ('ᚁ', 'ᚚ'),
+    ('ᚠ', 'ᛪ'),
+    ('ᛮ', 'ᛸ'),
+    ('ᜀ', 'ᜌ'),
+    ('ᜎ', 'ᜑ'),
+    ('ᜠ', 'ᜱ'),
+    ('ᝀ', 'ᝑ'),
+    ('ᝠ', 'ᝬ'),
+    ('ᝮ', 'ᝰ'),
+    ('ក', 'ឳ'),
+    ('ៗ', 'ៗ'),
+    ('ៜ', 'ៜ'),
+    ('ᠠ', 'ᡸ'),
+    ('ᢀ', 'ᢨ'),
+    ('ᢪ', 'ᢪ'),
+    ('ᢰ', 'ᣵ'),
+    ('ᤀ', 'ᤞ'),
+    ('ᥐ', 'ᥭ'),
+    ('ᥰ', 'ᥴ'),
+    ('ᦀ', 'ᦫ'),
+    ('ᦰ', 'ᧉ'),
+    ('ᨀ', 'ᨖ'),
+    ('ᨠ', 'ᩔ'),
+    ('ᪧ', 'ᪧ'),
+    ('ᬅ', 'ᬳ'),
+    ('ᭅ', 'ᭋ'),
+    ('ᮃ', 'ᮠ'),
+    ('ᮮ', 'ᮯ'),
+    ('ᮺ', 'ᯥ'),
+    ('ᰀ', 'ᰣ'),
+    ('ᱍ', 'ᱏ'),
+    ('ᱚ', 'ᱽ'),
+    ('ᲀ', 'ᲈ'),
+    ('Ა', 'Ჺ'),
+    ('Ჽ', 'Ჿ'),
+    ('ᳩ', 'ᳬ'),
+    ('ᳮ', 'ᳳ'),
+    ('ᳵ', 'ᳶ'),
+    ('ᳺ', 'ᳺ'),
+    ('ᴀ', 'ᶿ'),
+    ('Ḁ', 'ἕ'),
+    ('Ἐ', 'Ἕ'),
+    ('ἠ', 'ὅ'),
+    ('Ὀ', 'Ὅ'),
+    ('ὐ', 'ὗ'),
+    ('Ὑ', 'Ὑ'),
+    ('Ὓ', 'Ὓ'),
+    ('Ὕ', 'Ὕ'),
+    ('Ὗ', 'ώ'),
+    ('ᾀ', 'ᾴ'),
+    ('ᾶ', 'ᾼ'),
+    ('ι', 'ι'),
+    ('ῂ', 'ῄ'),
+    ('ῆ', 'ῌ'),
+    ('ῐ', 'ΐ'),
+    ('ῖ', 'Ί'),
+    ('ῠ', 'Ῥ'),
+    ('ῲ', 'ῴ'),
+    ('ῶ', 'ῼ'),
+    ('ⁱ', 'ⁱ'),
+    ('ⁿ', 'ⁿ'),
+    ('ₐ', 'ₜ'),
+    ('ℂ', 'ℂ'),
+    ('ℇ', 'ℇ'),
+    ('ℊ', 'ℓ'),
+    ('ℕ', 'ℕ'),
+    ('℘', 'ℝ'),
+    ('ℤ', 'ℤ'),
+    ('Ω', 'Ω'),
+    ('ℨ', 'ℨ'),
+    ('K', 'ℹ'),
+    ('ℼ', 'ℿ'),
+    ('ⅅ', 'ⅉ'),
+    ('ⅎ', 'ⅎ'),
+    ('Ⅰ', 'ↈ'),
+    ('Ⰰ', 'Ⱞ'),
+    ('ⰰ', 'ⱞ'),
+    ('Ⱡ', 'ⳤ'),
+    ('Ⳬ', 'ⳮ'),
+    ('Ⳳ', 'ⳳ'),
+    ('ⴀ', 'ⴥ'),
+    ('ⴧ', 'ⴧ'),
+    ('ⴭ', 'ⴭ'),
+    ('ⴰ', 'ⵧ'),
+    ('ⵯ', 'ⵯ'),
+    ('ⶀ', 'ⶖ'),
+    ('ⶠ', 'ⶦ'),
+    ('ⶨ', 'ⶮ'),
+    ('ⶰ', 'ⶶ'),
+    ('ⶸ', 'ⶾ'),
+    ('ⷀ', 'ⷆ'),
+    ('ⷈ', 'ⷎ'),
+    ('ⷐ', 'ⷖ'),
+    ('ⷘ', 'ⷞ'),
+    ('々', '〇'),
+    ('〡', '〩'),
+    ('〱', '〵'),
+    ('〸', '〼'),
+    ('ぁ', 'ゖ'),
+    ('ゝ', 'ゟ'),
+    ('ァ', 'ヺ'),
+    ('ー', 'ヿ'),
+    ('ㄅ', 'ㄯ'),
+    ('ㄱ', 'ㆎ'),
+    ('ㆠ', '\u{31bf}'),
+    ('ㇰ', 'ㇿ'),
+    ('㐀', '\u{4dbf}'),
+    ('一', '\u{9ffc}'),
+    ('ꀀ', 'ꒌ'),
+    ('ꓐ', 'ꓽ'),
+    ('ꔀ', 'ꘌ'),
+    ('ꘐ', 'ꘟ'),
+    ('ꘪ', 'ꘫ'),
+    ('Ꙁ', 'ꙮ'),
+    ('ꙿ', 'ꚝ'),
+    ('ꚠ', 'ꛯ'),
+    ('ꜗ', 'ꜟ'),
+    ('Ꜣ', 'ꞈ'),
+    ('Ꞌ', 'ꞿ'),
+    ('Ꟃ', '\u{a7ca}'),
+    ('\u{a7f5}', 'ꠁ'),
+    ('ꠃ', 'ꠅ'),
+    ('ꠇ', 'ꠊ'),
+    ('ꠌ', 'ꠢ'),
+    ('ꡀ', 'ꡳ'),
+    ('ꢂ', 'ꢳ'),
+    ('ꣲ', 'ꣷ'),
+    ('ꣻ', 'ꣻ'),
+    ('ꣽ', 'ꣾ'),
+    ('ꤊ', 'ꤥ'),
+    ('ꤰ', 'ꥆ'),
+    ('ꥠ', 'ꥼ'),
+    ('ꦄ', 'ꦲ'),
+    ('ꧏ', 'ꧏ'),
+    ('ꧠ', 'ꧤ'),
+    ('ꧦ', 'ꧯ'),
+    ('ꧺ', 'ꧾ'),
+    ('ꨀ', 'ꨨ'),
+    ('ꩀ', 'ꩂ'),
+    ('ꩄ', 'ꩋ'),
+    ('ꩠ', 'ꩶ'),
+    ('ꩺ', 'ꩺ'),
+    ('ꩾ', 'ꪯ'),
+    ('ꪱ', 'ꪱ'),
+    ('ꪵ', 'ꪶ'),
+    ('ꪹ', 'ꪽ'),
+    ('ꫀ', 'ꫀ'),
+    ('ꫂ', 'ꫂ'),
+    ('ꫛ', 'ꫝ'),
+    ('ꫠ', 'ꫪ'),
+    ('ꫲ', 'ꫴ'),
+    ('ꬁ', 'ꬆ'),
+    ('ꬉ', 'ꬎ'),
+    ('ꬑ', 'ꬖ'),
+    ('ꬠ', 'ꬦ'),
+    ('ꬨ', 'ꬮ'),
+    ('ꬰ', 'ꭚ'),
+    ('ꭜ', '\u{ab69}'),
+    ('ꭰ', 'ꯢ'),
+    ('가', '힣'),
+    ('ힰ', 'ퟆ'),
+    ('ퟋ', 'ퟻ'),
+    ('豈', '舘'),
+    ('並', '龎'),
+    ('ﬀ', 'ﬆ'),
+    ('ﬓ', 'ﬗ'),
+    ('יִ', 'יִ'),
+    ('ײַ', 'ﬨ'),
+    ('שׁ', 'זּ'),
+    ('טּ', 'לּ'),
+    ('מּ', 'מּ'),
+    ('נּ', 'סּ'),
+    ('ףּ', 'פּ'),
+    ('צּ', 'ﮱ'),
+    ('ﯓ', 'ﱝ'),
+    ('ﱤ', 'ﴽ'),
+    ('ﵐ', 'ﶏ'),
+    ('ﶒ', 'ﷇ'),
+    ('ﷰ', 'ﷹ'),
+    ('ﹱ', 'ﹱ'),
+    ('ﹳ', 'ﹳ'),
+    ('ﹷ', 'ﹷ'),
+    ('ﹹ', 'ﹹ'),
+    ('ﹻ', 'ﹻ'),
+    ('ﹽ', 'ﹽ'),
+    ('ﹿ', 'ﻼ'),
+    ('Ａ', 'Ｚ'),
+    ('ａ', 'ｚ'),
+    ('ｦ', 'ﾝ'),
+    ('ﾠ', 'ﾾ'),
+    ('ￂ', 'ￇ'),
+    ('ￊ', 'ￏ'),
+    ('ￒ', 'ￗ'),
+    ('ￚ', 'ￜ'),
+    ('𐀀', '𐀋'),
+    ('𐀍', '𐀦'),
+    ('𐀨', '𐀺'),
+    ('𐀼', '𐀽'),
+    ('𐀿', '𐁍'),
+    ('𐁐', '𐁝'),
+    ('𐂀', '𐃺'),
+    ('𐅀', '𐅴'),
+    ('𐊀', '𐊜'),
+    ('𐊠', '𐋐'),
+    ('𐌀', '𐌟'),
+    ('𐌭', '𐍊'),
+    ('𐍐', '𐍵'),
+    ('𐎀', '𐎝'),
+    ('𐎠', '𐏃'),
+    ('𐏈', '𐏏'),
+    ('𐏑', '𐏕'),
+    ('𐐀', '𐒝'),
+    ('𐒰', '𐓓'),
+    ('𐓘', '𐓻'),
+    ('𐔀', '𐔧'),
+    ('𐔰', '𐕣'),
+    ('𐘀', '𐜶'),
+    ('𐝀', '𐝕'),
+    ('𐝠', '𐝧'),
+    ('𐠀', '𐠅'),
+    ('𐠈', '𐠈'),
+    ('𐠊', '𐠵'),
+    ('𐠷', '𐠸'),
+    ('𐠼', '𐠼'),
+    ('𐠿', '𐡕'),
+    ('𐡠', '𐡶'),
+    ('𐢀', '𐢞'),
+    ('𐣠', '𐣲'),
+    ('𐣴', '𐣵'),
+    ('𐤀', '𐤕'),
+    ('𐤠', '𐤹'),
+    ('𐦀', '𐦷'),
+    ('𐦾', '𐦿'),
+    ('𐨀', '𐨀'),
+    ('𐨐', '𐨓'),
+    ('𐨕', '𐨗'),
+    ('𐨙', '𐨵'),
+    ('𐩠', '𐩼'),
+    ('𐪀', '𐪜'),
+    ('𐫀', '𐫇'),
+    ('𐫉', '𐫤'),
+    ('𐬀', '𐬵'),
+    ('𐭀', '𐭕'),
+    ('𐭠', '𐭲'),
+    ('𐮀', '𐮑'),
+    ('𐰀', '𐱈'),
+    ('𐲀', '𐲲'),
+    ('𐳀', '𐳲'),
+    ('𐴀', '𐴣'),
+    ('\u{10e80}', '\u{10ea9}'),
+    ('\u{10eb0}', '\u{10eb1}'),
+    ('𐼀', '𐼜'),
+    ('𐼧', '𐼧'),
+    ('𐼰', '𐽅'),
+    ('\u{10fb0}', '\u{10fc4}'),
+    ('𐿠', '𐿶'),
+    ('𑀃', '𑀷'),
+    ('𑂃', '𑂯'),
+    ('𑃐', '𑃨'),
+    ('𑄃', '𑄦'),
+    ('𑅄', '𑅄'),
+    ('\u{11147}', '\u{11147}'),
+    ('𑅐', '𑅲'),
+    ('𑅶', '𑅶'),
+    ('𑆃', '𑆲'),
+    ('𑇁', '𑇄'),
+    ('𑇚', '𑇚'),
+    ('𑇜', '𑇜'),
+    ('𑈀', '𑈑'),
+    ('𑈓', '𑈫'),
+    ('𑊀', '𑊆'),
+    ('𑊈', '𑊈'),
+    ('𑊊', '𑊍'),
+    ('𑊏', '𑊝'),
+    ('𑊟', '𑊨'),
+    ('𑊰', '𑋞'),
+    ('𑌅', '𑌌'),
+    ('𑌏', '𑌐'),
+    ('𑌓', '𑌨'),
+    ('𑌪', '𑌰'),
+    ('𑌲', '𑌳'),
+    ('𑌵', '𑌹'),
+    ('𑌽', '𑌽'),
+    ('𑍐', '𑍐'),
+    ('𑍝', '𑍡'),
+    ('𑐀', '𑐴'),
+    ('𑑇', '𑑊'),
+    ('𑑟', '\u{11461}'),
+    ('𑒀', '𑒯'),
+    ('𑓄', '𑓅'),
+    ('𑓇', '𑓇'),
+    ('𑖀', '𑖮'),
+    ('𑗘', '𑗛'),
+    ('𑘀', '𑘯'),
+    ('𑙄', '𑙄'),
+    ('𑚀', '𑚪'),
+    ('𑚸', '𑚸'),
+    ('𑜀', '𑜚'),
+    ('𑠀', '𑠫'),
+    ('𑢠', '𑣟'),
+    ('𑣿', '\u{11906}'),
+    ('\u{11909}', '\u{11909}'),
+    ('\u{1190c}', '\u{11913}'),
+    ('\u{11915}', '\u{11916}'),
+    ('\u{11918}', '\u{1192f}'),
+    ('\u{1193f}', '\u{1193f}'),
+    ('\u{11941}', '\u{11941}'),
+    ('𑦠', '𑦧'),
+    ('𑦪', '𑧐'),
+    ('𑧡', '𑧡'),
+    ('𑧣', '𑧣'),
+    ('𑨀', '𑨀'),
+    ('𑨋', '𑨲'),
+    ('𑨺', '𑨺'),
+    ('𑩐', '𑩐'),
+    ('𑩜', '𑪉'),
+    ('𑪝', '𑪝'),
+    ('𑫀', '𑫸'),
+    ('𑰀', '𑰈'),
+    ('𑰊', '𑰮'),
+    ('𑱀', '𑱀'),
+    ('𑱲', '𑲏'),
+    ('𑴀', '𑴆'),
+    ('𑴈', '𑴉'),
+    ('𑴋', '𑴰'),
+    ('𑵆', '𑵆'),
+    ('𑵠', '𑵥'),
+    ('𑵧', '𑵨'),
+    ('𑵪', '𑶉'),
+    ('𑶘', '𑶘'),
+    ('𑻠', '𑻲'),
+    ('\u{11fb0}', '\u{11fb0}'),
+    ('𒀀', '𒎙'),
+    ('𒐀', '𒑮'),
+    ('𒒀', '𒕃'),
+    ('𓀀', '𓐮'),
+    ('𔐀', '𔙆'),
+    ('𖠀', '𖨸'),
+    ('𖩀', '𖩞'),
+    ('𖫐', '𖫭'),
+    ('𖬀', '𖬯'),
+    ('𖭀', '𖭃'),
+    ('𖭣', '𖭷'),
+    ('𖭽', '𖮏'),
+    ('𖹀', '𖹿'),
+    ('𖼀', '𖽊'),
+    ('𖽐', '𖽐'),
+    ('𖾓', '𖾟'),
+    ('𖿠', '𖿡'),
+    ('𖿣', '𖿣'),
+    ('𗀀', '𘟷'),
+    ('𘠀', '\u{18cd5}'),
+    ('\u{18d00}', '\u{18d08}'),
+    ('𛀀', '𛄞'),
+    ('𛅐', '𛅒'),
+    ('𛅤', '𛅧'),
+    ('𛅰', '𛋻'),
+    ('𛰀', '𛱪'),
+    ('𛱰', '𛱼'),
+    ('𛲀', '𛲈'),
+    ('𛲐', '𛲙'),
+    ('𝐀', '𝑔'),
+    ('𝑖', '𝒜'),
+    ('𝒞', '𝒟'),
+    ('𝒢', '𝒢'),
+    ('𝒥', '𝒦'),
+    ('𝒩', '𝒬'),
+    ('𝒮', '𝒹'),
+    ('𝒻', '𝒻'),
+    ('𝒽', '𝓃'),
+    ('𝓅', '𝔅'),
+    ('𝔇', '𝔊'),
+    ('𝔍', '𝔔'),
+    ('𝔖', '𝔜'),
+    ('𝔞', '𝔹'),
+    ('𝔻', '𝔾'),
+    ('𝕀', '𝕄'),
+    ('𝕆', '𝕆'),
+    ('𝕊', '𝕐'),
+    ('𝕒', '𝚥'),
+    ('𝚨', '𝛀'),
+    ('𝛂', '𝛚'),
+    ('𝛜', '𝛺'),
+    ('𝛼', '𝜔'),
+    ('𝜖', '𝜴'),
+    ('𝜶', '𝝎'),
+    ('𝝐', '𝝮'),
+    ('𝝰', '𝞈'),
+    ('𝞊', '𝞨'),
+    ('𝞪', '𝟂'),
+    ('𝟄', '𝟋'),
+    ('𞄀', '𞄬'),
+    ('𞄷', '𞄽'),
+    ('𞅎', '𞅎'),
+    ('𞋀', '𞋫'),
+    ('𞠀', '𞣄'),
+    ('𞤀', '𞥃'),
+    ('𞥋', '𞥋'),
+    ('𞸀', '𞸃'),
+    ('𞸅', '𞸟'),
+    ('𞸡', '𞸢'),
+    ('𞸤', '𞸤'),
+    ('𞸧', '𞸧'),
+    ('𞸩', '𞸲'),
+    ('𞸴', '𞸷'),
+    ('𞸹', '𞸹'),
+    ('𞸻', '𞸻'),
+    ('𞹂', '𞹂'),
+    ('𞹇', '𞹇'),
+    ('𞹉', '𞹉'),
+    ('𞹋', '𞹋'),
+    ('𞹍', '𞹏'),
+    ('𞹑', '𞹒'),
+    ('𞹔', '𞹔'),
+    ('𞹗', '𞹗'),
+    ('𞹙', '𞹙'),
+    ('𞹛', '𞹛'),
+    ('𞹝', '𞹝'),
+    ('𞹟', '𞹟'),
+    ('𞹡', '𞹢'),
+    ('𞹤', '𞹤'),
+    ('𞹧', '𞹪'),
+    ('𞹬', '𞹲'),
+    ('𞹴', '𞹷'),
+    ('𞹹', '𞹼'),
+    ('𞹾', '𞹾'),
+    ('𞺀', '𞺉'),
+    ('𞺋', '𞺛'),
+    ('𞺡', '𞺣'),
+    ('𞺥', '𞺩'),
+    ('𞺫', '𞺻'),
+    ('𠀀', '\u{2a6dd}'),
+    ('𪜀', '𫜴'),
+    ('𫝀', '𫠝'),
+    ('𫠠', '𬺡'),
+    ('𬺰', '𮯠'),
+    ('丽', '𪘀'),
+    ('\u{30000}', '\u{3134a}'),
+];

diff --git a/src/unicode_tables/property_names.rs b/src/unicode_tables/property_names.rs
new file mode 100644
index 0000000..19ca856
--- /dev/null
+++ b/src/unicode_tables/property_names.rs

@@ -0,0 +1,262 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+//  ucd-generate property-names ucd-13.0.0
+//
+// ucd-generate 0.2.7 is available on crates.io.
+
+pub const PROPERTY_NAMES: &'static [(&'static str, &'static str)] = &[
+    ("age", "Age"),
+    ("ahex", "ASCII_Hex_Digit"),
+    ("alpha", "Alphabetic"),
+    ("alphabetic", "Alphabetic"),
+    ("asciihexdigit", "ASCII_Hex_Digit"),
+    ("bc", "Bidi_Class"),
+    ("bidic", "Bidi_Control"),
+    ("bidiclass", "Bidi_Class"),
+    ("bidicontrol", "Bidi_Control"),
+    ("bidim", "Bidi_Mirrored"),
+    ("bidimirrored", "Bidi_Mirrored"),
+    ("bidimirroringglyph", "Bidi_Mirroring_Glyph"),
+    ("bidipairedbracket", "Bidi_Paired_Bracket"),
+    ("bidipairedbrackettype", "Bidi_Paired_Bracket_Type"),
+    ("blk", "Block"),
+    ("block", "Block"),
+    ("bmg", "Bidi_Mirroring_Glyph"),
+    ("bpb", "Bidi_Paired_Bracket"),
+    ("bpt", "Bidi_Paired_Bracket_Type"),
+    ("canonicalcombiningclass", "Canonical_Combining_Class"),
+    ("cased", "Cased"),
+    ("casefolding", "Case_Folding"),
+    ("caseignorable", "Case_Ignorable"),
+    ("ccc", "Canonical_Combining_Class"),
+    ("ce", "Composition_Exclusion"),
+    ("cf", "Case_Folding"),
+    ("changeswhencasefolded", "Changes_When_Casefolded"),
+    ("changeswhencasemapped", "Changes_When_Casemapped"),
+    ("changeswhenlowercased", "Changes_When_Lowercased"),
+    ("changeswhennfkccasefolded", "Changes_When_NFKC_Casefolded"),
+    ("changeswhentitlecased", "Changes_When_Titlecased"),
+    ("changeswhenuppercased", "Changes_When_Uppercased"),
+    ("ci", "Case_Ignorable"),
+    ("cjkaccountingnumeric", "kAccountingNumeric"),
+    ("cjkcompatibilityvariant", "kCompatibilityVariant"),
+    ("cjkiicore", "kIICore"),
+    ("cjkirggsource", "kIRG_GSource"),
+    ("cjkirghsource", "kIRG_HSource"),
+    ("cjkirgjsource", "kIRG_JSource"),
+    ("cjkirgkpsource", "kIRG_KPSource"),
+    ("cjkirgksource", "kIRG_KSource"),
+    ("cjkirgmsource", "kIRG_MSource"),
+    ("cjkirgssource", "kIRG_SSource"),
+    ("cjkirgtsource", "kIRG_TSource"),
+    ("cjkirguksource", "kIRG_UKSource"),
+    ("cjkirgusource", "kIRG_USource"),
+    ("cjkirgvsource", "kIRG_VSource"),
+    ("cjkothernumeric", "kOtherNumeric"),
+    ("cjkprimarynumeric", "kPrimaryNumeric"),
+    ("cjkrsunicode", "kRSUnicode"),
+    ("compex", "Full_Composition_Exclusion"),
+    ("compositionexclusion", "Composition_Exclusion"),
+    ("cwcf", "Changes_When_Casefolded"),
+    ("cwcm", "Changes_When_Casemapped"),
+    ("cwkcf", "Changes_When_NFKC_Casefolded"),
+    ("cwl", "Changes_When_Lowercased"),
+    ("cwt", "Changes_When_Titlecased"),
+    ("cwu", "Changes_When_Uppercased"),
+    ("dash", "Dash"),
+    ("decompositionmapping", "Decomposition_Mapping"),
+    ("decompositiontype", "Decomposition_Type"),
+    ("defaultignorablecodepoint", "Default_Ignorable_Code_Point"),
+    ("dep", "Deprecated"),
+    ("deprecated", "Deprecated"),
+    ("di", "Default_Ignorable_Code_Point"),
+    ("dia", "Diacritic"),
+    ("diacritic", "Diacritic"),
+    ("dm", "Decomposition_Mapping"),
+    ("dt", "Decomposition_Type"),
+    ("ea", "East_Asian_Width"),
+    ("eastasianwidth", "East_Asian_Width"),
+    ("ebase", "Emoji_Modifier_Base"),
+    ("ecomp", "Emoji_Component"),
+    ("emod", "Emoji_Modifier"),
+    ("emoji", "Emoji"),
+    ("emojicomponent", "Emoji_Component"),
+    ("emojimodifier", "Emoji_Modifier"),
+    ("emojimodifierbase", "Emoji_Modifier_Base"),
+    ("emojipresentation", "Emoji_Presentation"),
+    ("epres", "Emoji_Presentation"),
+    ("equideo", "Equivalent_Unified_Ideograph"),
+    ("equivalentunifiedideograph", "Equivalent_Unified_Ideograph"),
+    ("expandsonnfc", "Expands_On_NFC"),
+    ("expandsonnfd", "Expands_On_NFD"),
+    ("expandsonnfkc", "Expands_On_NFKC"),
+    ("expandsonnfkd", "Expands_On_NFKD"),
+    ("ext", "Extender"),
+    ("extendedpictographic", "Extended_Pictographic"),
+    ("extender", "Extender"),
+    ("extpict", "Extended_Pictographic"),
+    ("fcnfkc", "FC_NFKC_Closure"),
+    ("fcnfkcclosure", "FC_NFKC_Closure"),
+    ("fullcompositionexclusion", "Full_Composition_Exclusion"),
+    ("gc", "General_Category"),
+    ("gcb", "Grapheme_Cluster_Break"),
+    ("generalcategory", "General_Category"),
+    ("graphemebase", "Grapheme_Base"),
+    ("graphemeclusterbreak", "Grapheme_Cluster_Break"),
+    ("graphemeextend", "Grapheme_Extend"),
+    ("graphemelink", "Grapheme_Link"),
+    ("grbase", "Grapheme_Base"),
+    ("grext", "Grapheme_Extend"),
+    ("grlink", "Grapheme_Link"),
+    ("hangulsyllabletype", "Hangul_Syllable_Type"),
+    ("hex", "Hex_Digit"),
+    ("hexdigit", "Hex_Digit"),
+    ("hst", "Hangul_Syllable_Type"),
+    ("hyphen", "Hyphen"),
+    ("idc", "ID_Continue"),
+    ("idcontinue", "ID_Continue"),
+    ("ideo", "Ideographic"),
+    ("ideographic", "Ideographic"),
+    ("ids", "ID_Start"),
+    ("idsb", "IDS_Binary_Operator"),
+    ("idsbinaryoperator", "IDS_Binary_Operator"),
+    ("idst", "IDS_Trinary_Operator"),
+    ("idstart", "ID_Start"),
+    ("idstrinaryoperator", "IDS_Trinary_Operator"),
+    ("indicpositionalcategory", "Indic_Positional_Category"),
+    ("indicsyllabiccategory", "Indic_Syllabic_Category"),
+    ("inpc", "Indic_Positional_Category"),
+    ("insc", "Indic_Syllabic_Category"),
+    ("isc", "ISO_Comment"),
+    ("jamoshortname", "Jamo_Short_Name"),
+    ("jg", "Joining_Group"),
+    ("joinc", "Join_Control"),
+    ("joincontrol", "Join_Control"),
+    ("joininggroup", "Joining_Group"),
+    ("joiningtype", "Joining_Type"),
+    ("jsn", "Jamo_Short_Name"),
+    ("jt", "Joining_Type"),
+    ("kaccountingnumeric", "kAccountingNumeric"),
+    ("kcompatibilityvariant", "kCompatibilityVariant"),
+    ("kiicore", "kIICore"),
+    ("kirggsource", "kIRG_GSource"),
+    ("kirghsource", "kIRG_HSource"),
+    ("kirgjsource", "kIRG_JSource"),
+    ("kirgkpsource", "kIRG_KPSource"),
+    ("kirgksource", "kIRG_KSource"),
+    ("kirgmsource", "kIRG_MSource"),
+    ("kirgssource", "kIRG_SSource"),
+    ("kirgtsource", "kIRG_TSource"),
+    ("kirguksource", "kIRG_UKSource"),
+    ("kirgusource", "kIRG_USource"),
+    ("kirgvsource", "kIRG_VSource"),
+    ("kothernumeric", "kOtherNumeric"),
+    ("kprimarynumeric", "kPrimaryNumeric"),
+    ("krsunicode", "kRSUnicode"),
+    ("lb", "Line_Break"),
+    ("lc", "Lowercase_Mapping"),
+    ("linebreak", "Line_Break"),
+    ("loe", "Logical_Order_Exception"),
+    ("logicalorderexception", "Logical_Order_Exception"),
+    ("lower", "Lowercase"),
+    ("lowercase", "Lowercase"),
+    ("lowercasemapping", "Lowercase_Mapping"),
+    ("math", "Math"),
+    ("na", "Name"),
+    ("na1", "Unicode_1_Name"),
+    ("name", "Name"),
+    ("namealias", "Name_Alias"),
+    ("nchar", "Noncharacter_Code_Point"),
+    ("nfcqc", "NFC_Quick_Check"),
+    ("nfcquickcheck", "NFC_Quick_Check"),
+    ("nfdqc", "NFD_Quick_Check"),
+    ("nfdquickcheck", "NFD_Quick_Check"),
+    ("nfkccasefold", "NFKC_Casefold"),
+    ("nfkccf", "NFKC_Casefold"),
+    ("nfkcqc", "NFKC_Quick_Check"),
+    ("nfkcquickcheck", "NFKC_Quick_Check"),
+    ("nfkdqc", "NFKD_Quick_Check"),
+    ("nfkdquickcheck", "NFKD_Quick_Check"),
+    ("noncharactercodepoint", "Noncharacter_Code_Point"),
+    ("nt", "Numeric_Type"),
+    ("numerictype", "Numeric_Type"),
+    ("numericvalue", "Numeric_Value"),
+    ("nv", "Numeric_Value"),
+    ("oalpha", "Other_Alphabetic"),
+    ("ocomment", "ISO_Comment"),
+    ("odi", "Other_Default_Ignorable_Code_Point"),
+    ("ogrext", "Other_Grapheme_Extend"),
+    ("oidc", "Other_ID_Continue"),
+    ("oids", "Other_ID_Start"),
+    ("olower", "Other_Lowercase"),
+    ("omath", "Other_Math"),
+    ("otheralphabetic", "Other_Alphabetic"),
+    ("otherdefaultignorablecodepoint", "Other_Default_Ignorable_Code_Point"),
+    ("othergraphemeextend", "Other_Grapheme_Extend"),
+    ("otheridcontinue", "Other_ID_Continue"),
+    ("otheridstart", "Other_ID_Start"),
+    ("otherlowercase", "Other_Lowercase"),
+    ("othermath", "Other_Math"),
+    ("otheruppercase", "Other_Uppercase"),
+    ("oupper", "Other_Uppercase"),
+    ("patsyn", "Pattern_Syntax"),
+    ("patternsyntax", "Pattern_Syntax"),
+    ("patternwhitespace", "Pattern_White_Space"),
+    ("patws", "Pattern_White_Space"),
+    ("pcm", "Prepended_Concatenation_Mark"),
+    ("prependedconcatenationmark", "Prepended_Concatenation_Mark"),
+    ("qmark", "Quotation_Mark"),
+    ("quotationmark", "Quotation_Mark"),
+    ("radical", "Radical"),
+    ("regionalindicator", "Regional_Indicator"),
+    ("ri", "Regional_Indicator"),
+    ("sb", "Sentence_Break"),
+    ("sc", "Script"),
+    ("scf", "Simple_Case_Folding"),
+    ("script", "Script"),
+    ("scriptextensions", "Script_Extensions"),
+    ("scx", "Script_Extensions"),
+    ("sd", "Soft_Dotted"),
+    ("sentencebreak", "Sentence_Break"),
+    ("sentenceterminal", "Sentence_Terminal"),
+    ("sfc", "Simple_Case_Folding"),
+    ("simplecasefolding", "Simple_Case_Folding"),
+    ("simplelowercasemapping", "Simple_Lowercase_Mapping"),
+    ("simpletitlecasemapping", "Simple_Titlecase_Mapping"),
+    ("simpleuppercasemapping", "Simple_Uppercase_Mapping"),
+    ("slc", "Simple_Lowercase_Mapping"),
+    ("softdotted", "Soft_Dotted"),
+    ("space", "White_Space"),
+    ("stc", "Simple_Titlecase_Mapping"),
+    ("sterm", "Sentence_Terminal"),
+    ("suc", "Simple_Uppercase_Mapping"),
+    ("tc", "Titlecase_Mapping"),
+    ("term", "Terminal_Punctuation"),
+    ("terminalpunctuation", "Terminal_Punctuation"),
+    ("titlecasemapping", "Titlecase_Mapping"),
+    ("uc", "Uppercase_Mapping"),
+    ("uideo", "Unified_Ideograph"),
+    ("unicode1name", "Unicode_1_Name"),
+    ("unicoderadicalstroke", "kRSUnicode"),
+    ("unifiedideograph", "Unified_Ideograph"),
+    ("upper", "Uppercase"),
+    ("uppercase", "Uppercase"),
+    ("uppercasemapping", "Uppercase_Mapping"),
+    ("urs", "kRSUnicode"),
+    ("variationselector", "Variation_Selector"),
+    ("verticalorientation", "Vertical_Orientation"),
+    ("vo", "Vertical_Orientation"),
+    ("vs", "Variation_Selector"),
+    ("wb", "Word_Break"),
+    ("whitespace", "White_Space"),
+    ("wordbreak", "Word_Break"),
+    ("wspace", "White_Space"),
+    ("xidc", "XID_Continue"),
+    ("xidcontinue", "XID_Continue"),
+    ("xids", "XID_Start"),
+    ("xidstart", "XID_Start"),
+    ("xonfc", "Expands_On_NFC"),
+    ("xonfd", "Expands_On_NFD"),
+    ("xonfkc", "Expands_On_NFKC"),
+    ("xonfkd", "Expands_On_NFKD"),
+];

diff --git a/src/unicode_tables/property_values.rs b/src/unicode_tables/property_values.rs
new file mode 100644
index 0000000..bff7d34
--- /dev/null
+++ b/src/unicode_tables/property_values.rs

@@ -0,0 +1,894 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+//  ucd-generate property-values ucd-13.0.0 --include gc,script,scx,age,gcb,wb,sb
+//
+// ucd-generate 0.2.7 is available on crates.io.
+
+pub const PROPERTY_VALUES: &'static [(
+    &'static str,
+    &'static [(&'static str, &'static str)],
+)] = &[
+    (
+        "Age",
+        &[
+            ("1.1", "V1_1"),
+            ("10.0", "V10_0"),
+            ("11.0", "V11_0"),
+            ("12.0", "V12_0"),
+            ("12.1", "V12_1"),
+            ("13.0", "V13_0"),
+            ("2.0", "V2_0"),
+            ("2.1", "V2_1"),
+            ("3.0", "V3_0"),
+            ("3.1", "V3_1"),
+            ("3.2", "V3_2"),
+            ("4.0", "V4_0"),
+            ("4.1", "V4_1"),
+            ("5.0", "V5_0"),
+            ("5.1", "V5_1"),
+            ("5.2", "V5_2"),
+            ("6.0", "V6_0"),
+            ("6.1", "V6_1"),
+            ("6.2", "V6_2"),
+            ("6.3", "V6_3"),
+            ("7.0", "V7_0"),
+            ("8.0", "V8_0"),
+            ("9.0", "V9_0"),
+            ("na", "Unassigned"),
+            ("unassigned", "Unassigned"),
+            ("v100", "V10_0"),
+            ("v11", "V1_1"),
+            ("v110", "V11_0"),
+            ("v120", "V12_0"),
+            ("v121", "V12_1"),
+            ("v130", "V13_0"),
+            ("v20", "V2_0"),
+            ("v21", "V2_1"),
+            ("v30", "V3_0"),
+            ("v31", "V3_1"),
+            ("v32", "V3_2"),
+            ("v40", "V4_0"),
+            ("v41", "V4_1"),
+            ("v50", "V5_0"),
+            ("v51", "V5_1"),
+            ("v52", "V5_2"),
+            ("v60", "V6_0"),
+            ("v61", "V6_1"),
+            ("v62", "V6_2"),
+            ("v63", "V6_3"),
+            ("v70", "V7_0"),
+            ("v80", "V8_0"),
+            ("v90", "V9_0"),
+        ],
+    ),
+    (
+        "General_Category",
+        &[
+            ("c", "Other"),
+            ("casedletter", "Cased_Letter"),
+            ("cc", "Control"),
+            ("cf", "Format"),
+            ("closepunctuation", "Close_Punctuation"),
+            ("cn", "Unassigned"),
+            ("cntrl", "Control"),
+            ("co", "Private_Use"),
+            ("combiningmark", "Mark"),
+            ("connectorpunctuation", "Connector_Punctuation"),
+            ("control", "Control"),
+            ("cs", "Surrogate"),
+            ("currencysymbol", "Currency_Symbol"),
+            ("dashpunctuation", "Dash_Punctuation"),
+            ("decimalnumber", "Decimal_Number"),
+            ("digit", "Decimal_Number"),
+            ("enclosingmark", "Enclosing_Mark"),
+            ("finalpunctuation", "Final_Punctuation"),
+            ("format", "Format"),
+            ("initialpunctuation", "Initial_Punctuation"),
+            ("l", "Letter"),
+            ("lc", "Cased_Letter"),
+            ("letter", "Letter"),
+            ("letternumber", "Letter_Number"),
+            ("lineseparator", "Line_Separator"),
+            ("ll", "Lowercase_Letter"),
+            ("lm", "Modifier_Letter"),
+            ("lo", "Other_Letter"),
+            ("lowercaseletter", "Lowercase_Letter"),
+            ("lt", "Titlecase_Letter"),
+            ("lu", "Uppercase_Letter"),
+            ("m", "Mark"),
+            ("mark", "Mark"),
+            ("mathsymbol", "Math_Symbol"),
+            ("mc", "Spacing_Mark"),
+            ("me", "Enclosing_Mark"),
+            ("mn", "Nonspacing_Mark"),
+            ("modifierletter", "Modifier_Letter"),
+            ("modifiersymbol", "Modifier_Symbol"),
+            ("n", "Number"),
+            ("nd", "Decimal_Number"),
+            ("nl", "Letter_Number"),
+            ("no", "Other_Number"),
+            ("nonspacingmark", "Nonspacing_Mark"),
+            ("number", "Number"),
+            ("openpunctuation", "Open_Punctuation"),
+            ("other", "Other"),
+            ("otherletter", "Other_Letter"),
+            ("othernumber", "Other_Number"),
+            ("otherpunctuation", "Other_Punctuation"),
+            ("othersymbol", "Other_Symbol"),
+            ("p", "Punctuation"),
+            ("paragraphseparator", "Paragraph_Separator"),
+            ("pc", "Connector_Punctuation"),
+            ("pd", "Dash_Punctuation"),
+            ("pe", "Close_Punctuation"),
+            ("pf", "Final_Punctuation"),
+            ("pi", "Initial_Punctuation"),
+            ("po", "Other_Punctuation"),
+            ("privateuse", "Private_Use"),
+            ("ps", "Open_Punctuation"),
+            ("punct", "Punctuation"),
+            ("punctuation", "Punctuation"),
+            ("s", "Symbol"),
+            ("sc", "Currency_Symbol"),
+            ("separator", "Separator"),
+            ("sk", "Modifier_Symbol"),
+            ("sm", "Math_Symbol"),
+            ("so", "Other_Symbol"),
+            ("spaceseparator", "Space_Separator"),
+            ("spacingmark", "Spacing_Mark"),
+            ("surrogate", "Surrogate"),
+            ("symbol", "Symbol"),
+            ("titlecaseletter", "Titlecase_Letter"),
+            ("unassigned", "Unassigned"),
+            ("uppercaseletter", "Uppercase_Letter"),
+            ("z", "Separator"),
+            ("zl", "Line_Separator"),
+            ("zp", "Paragraph_Separator"),
+            ("zs", "Space_Separator"),
+        ],
+    ),
+    (
+        "Grapheme_Cluster_Break",
+        &[
+            ("cn", "Control"),
+            ("control", "Control"),
+            ("cr", "CR"),
+            ("eb", "E_Base"),
+            ("ebase", "E_Base"),
+            ("ebasegaz", "E_Base_GAZ"),
+            ("ebg", "E_Base_GAZ"),
+            ("em", "E_Modifier"),
+            ("emodifier", "E_Modifier"),
+            ("ex", "Extend"),
+            ("extend", "Extend"),
+            ("gaz", "Glue_After_Zwj"),
+            ("glueafterzwj", "Glue_After_Zwj"),
+            ("l", "L"),
+            ("lf", "LF"),
+            ("lv", "LV"),
+            ("lvt", "LVT"),
+            ("other", "Other"),
+            ("pp", "Prepend"),
+            ("prepend", "Prepend"),
+            ("regionalindicator", "Regional_Indicator"),
+            ("ri", "Regional_Indicator"),
+            ("sm", "SpacingMark"),
+            ("spacingmark", "SpacingMark"),
+            ("t", "T"),
+            ("v", "V"),
+            ("xx", "Other"),
+            ("zwj", "ZWJ"),
+        ],
+    ),
+    (
+        "Script",
+        &[
+            ("adlam", "Adlam"),
+            ("adlm", "Adlam"),
+            ("aghb", "Caucasian_Albanian"),
+            ("ahom", "Ahom"),
+            ("anatolianhieroglyphs", "Anatolian_Hieroglyphs"),
+            ("arab", "Arabic"),
+            ("arabic", "Arabic"),
+            ("armenian", "Armenian"),
+            ("armi", "Imperial_Aramaic"),
+            ("armn", "Armenian"),
+            ("avestan", "Avestan"),
+            ("avst", "Avestan"),
+            ("bali", "Balinese"),
+            ("balinese", "Balinese"),
+            ("bamu", "Bamum"),
+            ("bamum", "Bamum"),
+            ("bass", "Bassa_Vah"),
+            ("bassavah", "Bassa_Vah"),
+            ("batak", "Batak"),
+            ("batk", "Batak"),
+            ("beng", "Bengali"),
+            ("bengali", "Bengali"),
+            ("bhaiksuki", "Bhaiksuki"),
+            ("bhks", "Bhaiksuki"),
+            ("bopo", "Bopomofo"),
+            ("bopomofo", "Bopomofo"),
+            ("brah", "Brahmi"),
+            ("brahmi", "Brahmi"),
+            ("brai", "Braille"),
+            ("braille", "Braille"),
+            ("bugi", "Buginese"),
+            ("buginese", "Buginese"),
+            ("buhd", "Buhid"),
+            ("buhid", "Buhid"),
+            ("cakm", "Chakma"),
+            ("canadianaboriginal", "Canadian_Aboriginal"),
+            ("cans", "Canadian_Aboriginal"),
+            ("cari", "Carian"),
+            ("carian", "Carian"),
+            ("caucasianalbanian", "Caucasian_Albanian"),
+            ("chakma", "Chakma"),
+            ("cham", "Cham"),
+            ("cher", "Cherokee"),
+            ("cherokee", "Cherokee"),
+            ("chorasmian", "Chorasmian"),
+            ("chrs", "Chorasmian"),
+            ("common", "Common"),
+            ("copt", "Coptic"),
+            ("coptic", "Coptic"),
+            ("cprt", "Cypriot"),
+            ("cuneiform", "Cuneiform"),
+            ("cypriot", "Cypriot"),
+            ("cyrillic", "Cyrillic"),
+            ("cyrl", "Cyrillic"),
+            ("deseret", "Deseret"),
+            ("deva", "Devanagari"),
+            ("devanagari", "Devanagari"),
+            ("diak", "Dives_Akuru"),
+            ("divesakuru", "Dives_Akuru"),
+            ("dogr", "Dogra"),
+            ("dogra", "Dogra"),
+            ("dsrt", "Deseret"),
+            ("dupl", "Duployan"),
+            ("duployan", "Duployan"),
+            ("egyp", "Egyptian_Hieroglyphs"),
+            ("egyptianhieroglyphs", "Egyptian_Hieroglyphs"),
+            ("elba", "Elbasan"),
+            ("elbasan", "Elbasan"),
+            ("elym", "Elymaic"),
+            ("elymaic", "Elymaic"),
+            ("ethi", "Ethiopic"),
+            ("ethiopic", "Ethiopic"),
+            ("geor", "Georgian"),
+            ("georgian", "Georgian"),
+            ("glag", "Glagolitic"),
+            ("glagolitic", "Glagolitic"),
+            ("gong", "Gunjala_Gondi"),
+            ("gonm", "Masaram_Gondi"),
+            ("goth", "Gothic"),
+            ("gothic", "Gothic"),
+            ("gran", "Grantha"),
+            ("grantha", "Grantha"),
+            ("greek", "Greek"),
+            ("grek", "Greek"),
+            ("gujarati", "Gujarati"),
+            ("gujr", "Gujarati"),
+            ("gunjalagondi", "Gunjala_Gondi"),
+            ("gurmukhi", "Gurmukhi"),
+            ("guru", "Gurmukhi"),
+            ("han", "Han"),
+            ("hang", "Hangul"),
+            ("hangul", "Hangul"),
+            ("hani", "Han"),
+            ("hanifirohingya", "Hanifi_Rohingya"),
+            ("hano", "Hanunoo"),
+            ("hanunoo", "Hanunoo"),
+            ("hatr", "Hatran"),
+            ("hatran", "Hatran"),
+            ("hebr", "Hebrew"),
+            ("hebrew", "Hebrew"),
+            ("hira", "Hiragana"),
+            ("hiragana", "Hiragana"),
+            ("hluw", "Anatolian_Hieroglyphs"),
+            ("hmng", "Pahawh_Hmong"),
+            ("hmnp", "Nyiakeng_Puachue_Hmong"),
+            ("hrkt", "Katakana_Or_Hiragana"),
+            ("hung", "Old_Hungarian"),
+            ("imperialaramaic", "Imperial_Aramaic"),
+            ("inherited", "Inherited"),
+            ("inscriptionalpahlavi", "Inscriptional_Pahlavi"),
+            ("inscriptionalparthian", "Inscriptional_Parthian"),
+            ("ital", "Old_Italic"),
+            ("java", "Javanese"),
+            ("javanese", "Javanese"),
+            ("kaithi", "Kaithi"),
+            ("kali", "Kayah_Li"),
+            ("kana", "Katakana"),
+            ("kannada", "Kannada"),
+            ("katakana", "Katakana"),
+            ("katakanaorhiragana", "Katakana_Or_Hiragana"),
+            ("kayahli", "Kayah_Li"),
+            ("khar", "Kharoshthi"),
+            ("kharoshthi", "Kharoshthi"),
+            ("khitansmallscript", "Khitan_Small_Script"),
+            ("khmer", "Khmer"),
+            ("khmr", "Khmer"),
+            ("khoj", "Khojki"),
+            ("khojki", "Khojki"),
+            ("khudawadi", "Khudawadi"),
+            ("kits", "Khitan_Small_Script"),
+            ("knda", "Kannada"),
+            ("kthi", "Kaithi"),
+            ("lana", "Tai_Tham"),
+            ("lao", "Lao"),
+            ("laoo", "Lao"),
+            ("latin", "Latin"),
+            ("latn", "Latin"),
+            ("lepc", "Lepcha"),
+            ("lepcha", "Lepcha"),
+            ("limb", "Limbu"),
+            ("limbu", "Limbu"),
+            ("lina", "Linear_A"),
+            ("linb", "Linear_B"),
+            ("lineara", "Linear_A"),
+            ("linearb", "Linear_B"),
+            ("lisu", "Lisu"),
+            ("lyci", "Lycian"),
+            ("lycian", "Lycian"),
+            ("lydi", "Lydian"),
+            ("lydian", "Lydian"),
+            ("mahajani", "Mahajani"),
+            ("mahj", "Mahajani"),
+            ("maka", "Makasar"),
+            ("makasar", "Makasar"),
+            ("malayalam", "Malayalam"),
+            ("mand", "Mandaic"),
+            ("mandaic", "Mandaic"),
+            ("mani", "Manichaean"),
+            ("manichaean", "Manichaean"),
+            ("marc", "Marchen"),
+            ("marchen", "Marchen"),
+            ("masaramgondi", "Masaram_Gondi"),
+            ("medefaidrin", "Medefaidrin"),
+            ("medf", "Medefaidrin"),
+            ("meeteimayek", "Meetei_Mayek"),
+            ("mend", "Mende_Kikakui"),
+            ("mendekikakui", "Mende_Kikakui"),
+            ("merc", "Meroitic_Cursive"),
+            ("mero", "Meroitic_Hieroglyphs"),
+            ("meroiticcursive", "Meroitic_Cursive"),
+            ("meroitichieroglyphs", "Meroitic_Hieroglyphs"),
+            ("miao", "Miao"),
+            ("mlym", "Malayalam"),
+            ("modi", "Modi"),
+            ("mong", "Mongolian"),
+            ("mongolian", "Mongolian"),
+            ("mro", "Mro"),
+            ("mroo", "Mro"),
+            ("mtei", "Meetei_Mayek"),
+            ("mult", "Multani"),
+            ("multani", "Multani"),
+            ("myanmar", "Myanmar"),
+            ("mymr", "Myanmar"),
+            ("nabataean", "Nabataean"),
+            ("nand", "Nandinagari"),
+            ("nandinagari", "Nandinagari"),
+            ("narb", "Old_North_Arabian"),
+            ("nbat", "Nabataean"),
+            ("newa", "Newa"),
+            ("newtailue", "New_Tai_Lue"),
+            ("nko", "Nko"),
+            ("nkoo", "Nko"),
+            ("nshu", "Nushu"),
+            ("nushu", "Nushu"),
+            ("nyiakengpuachuehmong", "Nyiakeng_Puachue_Hmong"),
+            ("ogam", "Ogham"),
+            ("ogham", "Ogham"),
+            ("olchiki", "Ol_Chiki"),
+            ("olck", "Ol_Chiki"),
+            ("oldhungarian", "Old_Hungarian"),
+            ("olditalic", "Old_Italic"),
+            ("oldnortharabian", "Old_North_Arabian"),
+            ("oldpermic", "Old_Permic"),
+            ("oldpersian", "Old_Persian"),
+            ("oldsogdian", "Old_Sogdian"),
+            ("oldsoutharabian", "Old_South_Arabian"),
+            ("oldturkic", "Old_Turkic"),
+            ("oriya", "Oriya"),
+            ("orkh", "Old_Turkic"),
+            ("orya", "Oriya"),
+            ("osage", "Osage"),
+            ("osge", "Osage"),
+            ("osma", "Osmanya"),
+            ("osmanya", "Osmanya"),
+            ("pahawhhmong", "Pahawh_Hmong"),
+            ("palm", "Palmyrene"),
+            ("palmyrene", "Palmyrene"),
+            ("pauc", "Pau_Cin_Hau"),
+            ("paucinhau", "Pau_Cin_Hau"),
+            ("perm", "Old_Permic"),
+            ("phag", "Phags_Pa"),
+            ("phagspa", "Phags_Pa"),
+            ("phli", "Inscriptional_Pahlavi"),
+            ("phlp", "Psalter_Pahlavi"),
+            ("phnx", "Phoenician"),
+            ("phoenician", "Phoenician"),
+            ("plrd", "Miao"),
+            ("prti", "Inscriptional_Parthian"),
+            ("psalterpahlavi", "Psalter_Pahlavi"),
+            ("qaac", "Coptic"),
+            ("qaai", "Inherited"),
+            ("rejang", "Rejang"),
+            ("rjng", "Rejang"),
+            ("rohg", "Hanifi_Rohingya"),
+            ("runic", "Runic"),
+            ("runr", "Runic"),
+            ("samaritan", "Samaritan"),
+            ("samr", "Samaritan"),
+            ("sarb", "Old_South_Arabian"),
+            ("saur", "Saurashtra"),
+            ("saurashtra", "Saurashtra"),
+            ("sgnw", "SignWriting"),
+            ("sharada", "Sharada"),
+            ("shavian", "Shavian"),
+            ("shaw", "Shavian"),
+            ("shrd", "Sharada"),
+            ("sidd", "Siddham"),
+            ("siddham", "Siddham"),
+            ("signwriting", "SignWriting"),
+            ("sind", "Khudawadi"),
+            ("sinh", "Sinhala"),
+            ("sinhala", "Sinhala"),
+            ("sogd", "Sogdian"),
+            ("sogdian", "Sogdian"),
+            ("sogo", "Old_Sogdian"),
+            ("sora", "Sora_Sompeng"),
+            ("sorasompeng", "Sora_Sompeng"),
+            ("soyo", "Soyombo"),
+            ("soyombo", "Soyombo"),
+            ("sund", "Sundanese"),
+            ("sundanese", "Sundanese"),
+            ("sylo", "Syloti_Nagri"),
+            ("sylotinagri", "Syloti_Nagri"),
+            ("syrc", "Syriac"),
+            ("syriac", "Syriac"),
+            ("tagalog", "Tagalog"),
+            ("tagb", "Tagbanwa"),
+            ("tagbanwa", "Tagbanwa"),
+            ("taile", "Tai_Le"),
+            ("taitham", "Tai_Tham"),
+            ("taiviet", "Tai_Viet"),
+            ("takr", "Takri"),
+            ("takri", "Takri"),
+            ("tale", "Tai_Le"),
+            ("talu", "New_Tai_Lue"),
+            ("tamil", "Tamil"),
+            ("taml", "Tamil"),
+            ("tang", "Tangut"),
+            ("tangut", "Tangut"),
+            ("tavt", "Tai_Viet"),
+            ("telu", "Telugu"),
+            ("telugu", "Telugu"),
+            ("tfng", "Tifinagh"),
+            ("tglg", "Tagalog"),
+            ("thaa", "Thaana"),
+            ("thaana", "Thaana"),
+            ("thai", "Thai"),
+            ("tibetan", "Tibetan"),
+            ("tibt", "Tibetan"),
+            ("tifinagh", "Tifinagh"),
+            ("tirh", "Tirhuta"),
+            ("tirhuta", "Tirhuta"),
+            ("ugar", "Ugaritic"),
+            ("ugaritic", "Ugaritic"),
+            ("unknown", "Unknown"),
+            ("vai", "Vai"),
+            ("vaii", "Vai"),
+            ("wancho", "Wancho"),
+            ("wara", "Warang_Citi"),
+            ("warangciti", "Warang_Citi"),
+            ("wcho", "Wancho"),
+            ("xpeo", "Old_Persian"),
+            ("xsux", "Cuneiform"),
+            ("yezi", "Yezidi"),
+            ("yezidi", "Yezidi"),
+            ("yi", "Yi"),
+            ("yiii", "Yi"),
+            ("zanabazarsquare", "Zanabazar_Square"),
+            ("zanb", "Zanabazar_Square"),
+            ("zinh", "Inherited"),
+            ("zyyy", "Common"),
+            ("zzzz", "Unknown"),
+        ],
+    ),
+    (
+        "Script_Extensions",
+        &[
+            ("adlam", "Adlam"),
+            ("adlm", "Adlam"),
+            ("aghb", "Caucasian_Albanian"),
+            ("ahom", "Ahom"),
+            ("anatolianhieroglyphs", "Anatolian_Hieroglyphs"),
+            ("arab", "Arabic"),
+            ("arabic", "Arabic"),
+            ("armenian", "Armenian"),
+            ("armi", "Imperial_Aramaic"),
+            ("armn", "Armenian"),
+            ("avestan", "Avestan"),
+            ("avst", "Avestan"),
+            ("bali", "Balinese"),
+            ("balinese", "Balinese"),
+            ("bamu", "Bamum"),
+            ("bamum", "Bamum"),
+            ("bass", "Bassa_Vah"),
+            ("bassavah", "Bassa_Vah"),
+            ("batak", "Batak"),
+            ("batk", "Batak"),
+            ("beng", "Bengali"),
+            ("bengali", "Bengali"),
+            ("bhaiksuki", "Bhaiksuki"),
+            ("bhks", "Bhaiksuki"),
+            ("bopo", "Bopomofo"),
+            ("bopomofo", "Bopomofo"),
+            ("brah", "Brahmi"),
+            ("brahmi", "Brahmi"),
+            ("brai", "Braille"),
+            ("braille", "Braille"),
+            ("bugi", "Buginese"),
+            ("buginese", "Buginese"),
+            ("buhd", "Buhid"),
+            ("buhid", "Buhid"),
+            ("cakm", "Chakma"),
+            ("canadianaboriginal", "Canadian_Aboriginal"),
+            ("cans", "Canadian_Aboriginal"),
+            ("cari", "Carian"),
+            ("carian", "Carian"),
+            ("caucasianalbanian", "Caucasian_Albanian"),
+            ("chakma", "Chakma"),
+            ("cham", "Cham"),
+            ("cher", "Cherokee"),
+            ("cherokee", "Cherokee"),
+            ("chorasmian", "Chorasmian"),
+            ("chrs", "Chorasmian"),
+            ("common", "Common"),
+            ("copt", "Coptic"),
+            ("coptic", "Coptic"),
+            ("cprt", "Cypriot"),
+            ("cuneiform", "Cuneiform"),
+            ("cypriot", "Cypriot"),
+            ("cyrillic", "Cyrillic"),
+            ("cyrl", "Cyrillic"),
+            ("deseret", "Deseret"),
+            ("deva", "Devanagari"),
+            ("devanagari", "Devanagari"),
+            ("diak", "Dives_Akuru"),
+            ("divesakuru", "Dives_Akuru"),
+            ("dogr", "Dogra"),
+            ("dogra", "Dogra"),
+            ("dsrt", "Deseret"),
+            ("dupl", "Duployan"),
+            ("duployan", "Duployan"),
+            ("egyp", "Egyptian_Hieroglyphs"),
+            ("egyptianhieroglyphs", "Egyptian_Hieroglyphs"),
+            ("elba", "Elbasan"),
+            ("elbasan", "Elbasan"),
+            ("elym", "Elymaic"),
+            ("elymaic", "Elymaic"),
+            ("ethi", "Ethiopic"),
+            ("ethiopic", "Ethiopic"),
+            ("geor", "Georgian"),
+            ("georgian", "Georgian"),
+            ("glag", "Glagolitic"),
+            ("glagolitic", "Glagolitic"),
+            ("gong", "Gunjala_Gondi"),
+            ("gonm", "Masaram_Gondi"),
+            ("goth", "Gothic"),
+            ("gothic", "Gothic"),
+            ("gran", "Grantha"),
+            ("grantha", "Grantha"),
+            ("greek", "Greek"),
+            ("grek", "Greek"),
+            ("gujarati", "Gujarati"),
+            ("gujr", "Gujarati"),
+            ("gunjalagondi", "Gunjala_Gondi"),
+            ("gurmukhi", "Gurmukhi"),
+            ("guru", "Gurmukhi"),
+            ("han", "Han"),
+            ("hang", "Hangul"),
+            ("hangul", "Hangul"),
+            ("hani", "Han"),
+            ("hanifirohingya", "Hanifi_Rohingya"),
+            ("hano", "Hanunoo"),
+            ("hanunoo", "Hanunoo"),
+            ("hatr", "Hatran"),
+            ("hatran", "Hatran"),
+            ("hebr", "Hebrew"),
+            ("hebrew", "Hebrew"),
+            ("hira", "Hiragana"),
+            ("hiragana", "Hiragana"),
+            ("hluw", "Anatolian_Hieroglyphs"),
+            ("hmng", "Pahawh_Hmong"),
+            ("hmnp", "Nyiakeng_Puachue_Hmong"),
+            ("hrkt", "Katakana_Or_Hiragana"),
+            ("hung", "Old_Hungarian"),
+            ("imperialaramaic", "Imperial_Aramaic"),
+            ("inherited", "Inherited"),
+            ("inscriptionalpahlavi", "Inscriptional_Pahlavi"),
+            ("inscriptionalparthian", "Inscriptional_Parthian"),
+            ("ital", "Old_Italic"),
+            ("java", "Javanese"),
+            ("javanese", "Javanese"),
+            ("kaithi", "Kaithi"),
+            ("kali", "Kayah_Li"),
+            ("kana", "Katakana"),
+            ("kannada", "Kannada"),
+            ("katakana", "Katakana"),
+            ("katakanaorhiragana", "Katakana_Or_Hiragana"),
+            ("kayahli", "Kayah_Li"),
+            ("khar", "Kharoshthi"),
+            ("kharoshthi", "Kharoshthi"),
+            ("khitansmallscript", "Khitan_Small_Script"),
+            ("khmer", "Khmer"),
+            ("khmr", "Khmer"),
+            ("khoj", "Khojki"),
+            ("khojki", "Khojki"),
+            ("khudawadi", "Khudawadi"),
+            ("kits", "Khitan_Small_Script"),
+            ("knda", "Kannada"),
+            ("kthi", "Kaithi"),
+            ("lana", "Tai_Tham"),
+            ("lao", "Lao"),
+            ("laoo", "Lao"),
+            ("latin", "Latin"),
+            ("latn", "Latin"),
+            ("lepc", "Lepcha"),
+            ("lepcha", "Lepcha"),
+            ("limb", "Limbu"),
+            ("limbu", "Limbu"),
+            ("lina", "Linear_A"),
+            ("linb", "Linear_B"),
+            ("lineara", "Linear_A"),
+            ("linearb", "Linear_B"),
+            ("lisu", "Lisu"),
+            ("lyci", "Lycian"),
+            ("lycian", "Lycian"),
+            ("lydi", "Lydian"),
+            ("lydian", "Lydian"),
+            ("mahajani", "Mahajani"),
+            ("mahj", "Mahajani"),
+            ("maka", "Makasar"),
+            ("makasar", "Makasar"),
+            ("malayalam", "Malayalam"),
+            ("mand", "Mandaic"),
+            ("mandaic", "Mandaic"),
+            ("mani", "Manichaean"),
+            ("manichaean", "Manichaean"),
+            ("marc", "Marchen"),
+            ("marchen", "Marchen"),
+            ("masaramgondi", "Masaram_Gondi"),
+            ("medefaidrin", "Medefaidrin"),
+            ("medf", "Medefaidrin"),
+            ("meeteimayek", "Meetei_Mayek"),
+            ("mend", "Mende_Kikakui"),
+            ("mendekikakui", "Mende_Kikakui"),
+            ("merc", "Meroitic_Cursive"),
+            ("mero", "Meroitic_Hieroglyphs"),
+            ("meroiticcursive", "Meroitic_Cursive"),
+            ("meroitichieroglyphs", "Meroitic_Hieroglyphs"),
+            ("miao", "Miao"),
+            ("mlym", "Malayalam"),
+            ("modi", "Modi"),
+            ("mong", "Mongolian"),
+            ("mongolian", "Mongolian"),
+            ("mro", "Mro"),
+            ("mroo", "Mro"),
+            ("mtei", "Meetei_Mayek"),
+            ("mult", "Multani"),
+            ("multani", "Multani"),
+            ("myanmar", "Myanmar"),
+            ("mymr", "Myanmar"),
+            ("nabataean", "Nabataean"),
+            ("nand", "Nandinagari"),
+            ("nandinagari", "Nandinagari"),
+            ("narb", "Old_North_Arabian"),
+            ("nbat", "Nabataean"),
+            ("newa", "Newa"),
+            ("newtailue", "New_Tai_Lue"),
+            ("nko", "Nko"),
+            ("nkoo", "Nko"),
+            ("nshu", "Nushu"),
+            ("nushu", "Nushu"),
+            ("nyiakengpuachuehmong", "Nyiakeng_Puachue_Hmong"),
+            ("ogam", "Ogham"),
+            ("ogham", "Ogham"),
+            ("olchiki", "Ol_Chiki"),
+            ("olck", "Ol_Chiki"),
+            ("oldhungarian", "Old_Hungarian"),
+            ("olditalic", "Old_Italic"),
+            ("oldnortharabian", "Old_North_Arabian"),
+            ("oldpermic", "Old_Permic"),
+            ("oldpersian", "Old_Persian"),
+            ("oldsogdian", "Old_Sogdian"),
+            ("oldsoutharabian", "Old_South_Arabian"),
+            ("oldturkic", "Old_Turkic"),
+            ("oriya", "Oriya"),
+            ("orkh", "Old_Turkic"),
+            ("orya", "Oriya"),
+            ("osage", "Osage"),
+            ("osge", "Osage"),
+            ("osma", "Osmanya"),
+            ("osmanya", "Osmanya"),
+            ("pahawhhmong", "Pahawh_Hmong"),
+            ("palm", "Palmyrene"),
+            ("palmyrene", "Palmyrene"),
+            ("pauc", "Pau_Cin_Hau"),
+            ("paucinhau", "Pau_Cin_Hau"),
+            ("perm", "Old_Permic"),
+            ("phag", "Phags_Pa"),
+            ("phagspa", "Phags_Pa"),
+            ("phli", "Inscriptional_Pahlavi"),
+            ("phlp", "Psalter_Pahlavi"),
+            ("phnx", "Phoenician"),
+            ("phoenician", "Phoenician"),
+            ("plrd", "Miao"),
+            ("prti", "Inscriptional_Parthian"),
+            ("psalterpahlavi", "Psalter_Pahlavi"),
+            ("qaac", "Coptic"),
+            ("qaai", "Inherited"),
+            ("rejang", "Rejang"),
+            ("rjng", "Rejang"),
+            ("rohg", "Hanifi_Rohingya"),
+            ("runic", "Runic"),
+            ("runr", "Runic"),
+            ("samaritan", "Samaritan"),
+            ("samr", "Samaritan"),
+            ("sarb", "Old_South_Arabian"),
+            ("saur", "Saurashtra"),
+            ("saurashtra", "Saurashtra"),
+            ("sgnw", "SignWriting"),
+            ("sharada", "Sharada"),
+            ("shavian", "Shavian"),
+            ("shaw", "Shavian"),
+            ("shrd", "Sharada"),
+            ("sidd", "Siddham"),
+            ("siddham", "Siddham"),
+            ("signwriting", "SignWriting"),
+            ("sind", "Khudawadi"),
+            ("sinh", "Sinhala"),
+            ("sinhala", "Sinhala"),
+            ("sogd", "Sogdian"),
+            ("sogdian", "Sogdian"),
+            ("sogo", "Old_Sogdian"),
+            ("sora", "Sora_Sompeng"),
+            ("sorasompeng", "Sora_Sompeng"),
+            ("soyo", "Soyombo"),
+            ("soyombo", "Soyombo"),
+            ("sund", "Sundanese"),
+            ("sundanese", "Sundanese"),
+            ("sylo", "Syloti_Nagri"),
+            ("sylotinagri", "Syloti_Nagri"),
+            ("syrc", "Syriac"),
+            ("syriac", "Syriac"),
+            ("tagalog", "Tagalog"),
+            ("tagb", "Tagbanwa"),
+            ("tagbanwa", "Tagbanwa"),
+            ("taile", "Tai_Le"),
+            ("taitham", "Tai_Tham"),
+            ("taiviet", "Tai_Viet"),
+            ("takr", "Takri"),
+            ("takri", "Takri"),
+            ("tale", "Tai_Le"),
+            ("talu", "New_Tai_Lue"),
+            ("tamil", "Tamil"),
+            ("taml", "Tamil"),
+            ("tang", "Tangut"),
+            ("tangut", "Tangut"),
+            ("tavt", "Tai_Viet"),
+            ("telu", "Telugu"),
+            ("telugu", "Telugu"),
+            ("tfng", "Tifinagh"),
+            ("tglg", "Tagalog"),
+            ("thaa", "Thaana"),
+            ("thaana", "Thaana"),
+            ("thai", "Thai"),
+            ("tibetan", "Tibetan"),
+            ("tibt", "Tibetan"),
+            ("tifinagh", "Tifinagh"),
+            ("tirh", "Tirhuta"),
+            ("tirhuta", "Tirhuta"),
+            ("ugar", "Ugaritic"),
+            ("ugaritic", "Ugaritic"),
+            ("unknown", "Unknown"),
+            ("vai", "Vai"),
+            ("vaii", "Vai"),
+            ("wancho", "Wancho"),
+            ("wara", "Warang_Citi"),
+            ("warangciti", "Warang_Citi"),
+            ("wcho", "Wancho"),
+            ("xpeo", "Old_Persian"),
+            ("xsux", "Cuneiform"),
+            ("yezi", "Yezidi"),
+            ("yezidi", "Yezidi"),
+            ("yi", "Yi"),
+            ("yiii", "Yi"),
+            ("zanabazarsquare", "Zanabazar_Square"),
+            ("zanb", "Zanabazar_Square"),
+            ("zinh", "Inherited"),
+            ("zyyy", "Common"),
+            ("zzzz", "Unknown"),
+        ],
+    ),
+    (
+        "Sentence_Break",
+        &[
+            ("at", "ATerm"),
+            ("aterm", "ATerm"),
+            ("cl", "Close"),
+            ("close", "Close"),
+            ("cr", "CR"),
+            ("ex", "Extend"),
+            ("extend", "Extend"),
+            ("fo", "Format"),
+            ("format", "Format"),
+            ("le", "OLetter"),
+            ("lf", "LF"),
+            ("lo", "Lower"),
+            ("lower", "Lower"),
+            ("nu", "Numeric"),
+            ("numeric", "Numeric"),
+            ("oletter", "OLetter"),
+            ("other", "Other"),
+            ("sc", "SContinue"),
+            ("scontinue", "SContinue"),
+            ("se", "Sep"),
+            ("sep", "Sep"),
+            ("sp", "Sp"),
+            ("st", "STerm"),
+            ("sterm", "STerm"),
+            ("up", "Upper"),
+            ("upper", "Upper"),
+            ("xx", "Other"),
+        ],
+    ),
+    (
+        "Word_Break",
+        &[
+            ("aletter", "ALetter"),
+            ("cr", "CR"),
+            ("doublequote", "Double_Quote"),
+            ("dq", "Double_Quote"),
+            ("eb", "E_Base"),
+            ("ebase", "E_Base"),
+            ("ebasegaz", "E_Base_GAZ"),
+            ("ebg", "E_Base_GAZ"),
+            ("em", "E_Modifier"),
+            ("emodifier", "E_Modifier"),
+            ("ex", "ExtendNumLet"),
+            ("extend", "Extend"),
+            ("extendnumlet", "ExtendNumLet"),
+            ("fo", "Format"),
+            ("format", "Format"),
+            ("gaz", "Glue_After_Zwj"),
+            ("glueafterzwj", "Glue_After_Zwj"),
+            ("hebrewletter", "Hebrew_Letter"),
+            ("hl", "Hebrew_Letter"),
+            ("ka", "Katakana"),
+            ("katakana", "Katakana"),
+            ("le", "ALetter"),
+            ("lf", "LF"),
+            ("mb", "MidNumLet"),
+            ("midletter", "MidLetter"),
+            ("midnum", "MidNum"),
+            ("midnumlet", "MidNumLet"),
+            ("ml", "MidLetter"),
+            ("mn", "MidNum"),
+            ("newline", "Newline"),
+            ("nl", "Newline"),
+            ("nu", "Numeric"),
+            ("numeric", "Numeric"),
+            ("other", "Other"),
+            ("regionalindicator", "Regional_Indicator"),
+            ("ri", "Regional_Indicator"),
+            ("singlequote", "Single_Quote"),
+            ("sq", "Single_Quote"),
+            ("wsegspace", "WSegSpace"),
+            ("xx", "Other"),
+            ("zwj", "ZWJ"),
+        ],
+    ),
+];

diff --git a/src/unicode_tables/script.rs b/src/unicode_tables/script.rs
new file mode 100644
index 0000000..a48203e
--- /dev/null
+++ b/src/unicode_tables/script.rs

@@ -0,0 +1,1216 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+//  ucd-generate script ucd-13.0.0 --chars
+//
+// ucd-generate 0.2.7 is available on crates.io.
+
+pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
+    ("Adlam", ADLAM),
+    ("Ahom", AHOM),
+    ("Anatolian_Hieroglyphs", ANATOLIAN_HIEROGLYPHS),
+    ("Arabic", ARABIC),
+    ("Armenian", ARMENIAN),
+    ("Avestan", AVESTAN),
+    ("Balinese", BALINESE),
+    ("Bamum", BAMUM),
+    ("Bassa_Vah", BASSA_VAH),
+    ("Batak", BATAK),
+    ("Bengali", BENGALI),
+    ("Bhaiksuki", BHAIKSUKI),
+    ("Bopomofo", BOPOMOFO),
+    ("Brahmi", BRAHMI),
+    ("Braille", BRAILLE),
+    ("Buginese", BUGINESE),
+    ("Buhid", BUHID),
+    ("Canadian_Aboriginal", CANADIAN_ABORIGINAL),
+    ("Carian", CARIAN),
+    ("Caucasian_Albanian", CAUCASIAN_ALBANIAN),
+    ("Chakma", CHAKMA),
+    ("Cham", CHAM),
+    ("Cherokee", CHEROKEE),
+    ("Chorasmian", CHORASMIAN),
+    ("Common", COMMON),
+    ("Coptic", COPTIC),
+    ("Cuneiform", CUNEIFORM),
+    ("Cypriot", CYPRIOT),
+    ("Cyrillic", CYRILLIC),
+    ("Deseret", DESERET),
+    ("Devanagari", DEVANAGARI),
+    ("Dives_Akuru", DIVES_AKURU),
+    ("Dogra", DOGRA),
+    ("Duployan", DUPLOYAN),
+    ("Egyptian_Hieroglyphs", EGYPTIAN_HIEROGLYPHS),
+    ("Elbasan", ELBASAN),
+    ("Elymaic", ELYMAIC),
+    ("Ethiopic", ETHIOPIC),
+    ("Georgian", GEORGIAN),
+    ("Glagolitic", GLAGOLITIC),
+    ("Gothic", GOTHIC),
+    ("Grantha", GRANTHA),
+    ("Greek", GREEK),
+    ("Gujarati", GUJARATI),
+    ("Gunjala_Gondi", GUNJALA_GONDI),
+    ("Gurmukhi", GURMUKHI),
+    ("Han", HAN),
+    ("Hangul", HANGUL),
+    ("Hanifi_Rohingya", HANIFI_ROHINGYA),
+    ("Hanunoo", HANUNOO),
+    ("Hatran", HATRAN),
+    ("Hebrew", HEBREW),
+    ("Hiragana", HIRAGANA),
+    ("Imperial_Aramaic", IMPERIAL_ARAMAIC),
+    ("Inherited", INHERITED),
+    ("Inscriptional_Pahlavi", INSCRIPTIONAL_PAHLAVI),
+    ("Inscriptional_Parthian", INSCRIPTIONAL_PARTHIAN),
+    ("Javanese", JAVANESE),
+    ("Kaithi", KAITHI),
+    ("Kannada", KANNADA),
+    ("Katakana", KATAKANA),
+    ("Kayah_Li", KAYAH_LI),
+    ("Kharoshthi", KHAROSHTHI),
+    ("Khitan_Small_Script", KHITAN_SMALL_SCRIPT),
+    ("Khmer", KHMER),
+    ("Khojki", KHOJKI),
+    ("Khudawadi", KHUDAWADI),
+    ("Lao", LAO),
+    ("Latin", LATIN),
+    ("Lepcha", LEPCHA),
+    ("Limbu", LIMBU),
+    ("Linear_A", LINEAR_A),
+    ("Linear_B", LINEAR_B),
+    ("Lisu", LISU),
+    ("Lycian", LYCIAN),
+    ("Lydian", LYDIAN),
+    ("Mahajani", MAHAJANI),
+    ("Makasar", MAKASAR),
+    ("Malayalam", MALAYALAM),
+    ("Mandaic", MANDAIC),
+    ("Manichaean", MANICHAEAN),
+    ("Marchen", MARCHEN),
+    ("Masaram_Gondi", MASARAM_GONDI),
+    ("Medefaidrin", MEDEFAIDRIN),
+    ("Meetei_Mayek", MEETEI_MAYEK),
+    ("Mende_Kikakui", MENDE_KIKAKUI),
+    ("Meroitic_Cursive", MEROITIC_CURSIVE),
+    ("Meroitic_Hieroglyphs", MEROITIC_HIEROGLYPHS),
+    ("Miao", MIAO),
+    ("Modi", MODI),
+    ("Mongolian", MONGOLIAN),
+    ("Mro", MRO),
+    ("Multani", MULTANI),
+    ("Myanmar", MYANMAR),
+    ("Nabataean", NABATAEAN),
+    ("Nandinagari", NANDINAGARI),
+    ("New_Tai_Lue", NEW_TAI_LUE),
+    ("Newa", NEWA),
+    ("Nko", NKO),
+    ("Nushu", NUSHU),
+    ("Nyiakeng_Puachue_Hmong", NYIAKENG_PUACHUE_HMONG),
+    ("Ogham", OGHAM),
+    ("Ol_Chiki", OL_CHIKI),
+    ("Old_Hungarian", OLD_HUNGARIAN),
+    ("Old_Italic", OLD_ITALIC),
+    ("Old_North_Arabian", OLD_NORTH_ARABIAN),
+    ("Old_Permic", OLD_PERMIC),
+    ("Old_Persian", OLD_PERSIAN),
+    ("Old_Sogdian", OLD_SOGDIAN),
+    ("Old_South_Arabian", OLD_SOUTH_ARABIAN),
+    ("Old_Turkic", OLD_TURKIC),
+    ("Oriya", ORIYA),
+    ("Osage", OSAGE),
+    ("Osmanya", OSMANYA),
+    ("Pahawh_Hmong", PAHAWH_HMONG),
+    ("Palmyrene", PALMYRENE),
+    ("Pau_Cin_Hau", PAU_CIN_HAU),
+    ("Phags_Pa", PHAGS_PA),
+    ("Phoenician", PHOENICIAN),
+    ("Psalter_Pahlavi", PSALTER_PAHLAVI),
+    ("Rejang", REJANG),
+    ("Runic", RUNIC),
+    ("Samaritan", SAMARITAN),
+    ("Saurashtra", SAURASHTRA),
+    ("Sharada", SHARADA),
+    ("Shavian", SHAVIAN),
+    ("Siddham", SIDDHAM),
+    ("SignWriting", SIGNWRITING),
+    ("Sinhala", SINHALA),
+    ("Sogdian", SOGDIAN),
+    ("Sora_Sompeng", SORA_SOMPENG),
+    ("Soyombo", SOYOMBO),
+    ("Sundanese", SUNDANESE),
+    ("Syloti_Nagri", SYLOTI_NAGRI),
+    ("Syriac", SYRIAC),
+    ("Tagalog", TAGALOG),
+    ("Tagbanwa", TAGBANWA),
+    ("Tai_Le", TAI_LE),
+    ("Tai_Tham", TAI_THAM),
+    ("Tai_Viet", TAI_VIET),
+    ("Takri", TAKRI),
+    ("Tamil", TAMIL),
+    ("Tangut", TANGUT),
+    ("Telugu", TELUGU),
+    ("Thaana", THAANA),
+    ("Thai", THAI),
+    ("Tibetan", TIBETAN),
+    ("Tifinagh", TIFINAGH),
+    ("Tirhuta", TIRHUTA),
+    ("Ugaritic", UGARITIC),
+    ("Vai", VAI),
+    ("Wancho", WANCHO),
+    ("Warang_Citi", WARANG_CITI),
+    ("Yezidi", YEZIDI),
+    ("Yi", YI),
+    ("Zanabazar_Square", ZANABAZAR_SQUARE),
+];
+
+pub const ADLAM: &'static [(char, char)] =
+    &[('𞤀', '𞥋'), ('𞥐', '𞥙'), ('𞥞', '𞥟')];
+
+pub const AHOM: &'static [(char, char)] =
+    &[('𑜀', '𑜚'), ('\u{1171d}', '\u{1172b}'), ('𑜰', '𑜿')];
+
+pub const ANATOLIAN_HIEROGLYPHS: &'static [(char, char)] = &[('𔐀', '𔙆')];
+
+pub const ARABIC: &'static [(char, char)] = &[
+    ('\u{600}', '\u{604}'),
+    ('؆', '؋'),
+    ('؍', '\u{61a}'),
+    ('\u{61c}', '\u{61c}'),
+    ('؞', '؞'),
+    ('ؠ', 'ؿ'),
+    ('ف', 'ي'),
+    ('\u{656}', 'ٯ'),
+    ('ٱ', '\u{6dc}'),
+    ('۞', 'ۿ'),
+    ('ݐ', 'ݿ'),
+    ('ࢠ', 'ࢴ'),
+    ('ࢶ', '\u{8c7}'),
+    ('\u{8d3}', '\u{8e1}'),
+    ('\u{8e3}', '\u{8ff}'),
+    ('ﭐ', '﯁'),
+    ('ﯓ', 'ﴽ'),
+    ('ﵐ', 'ﶏ'),
+    ('ﶒ', 'ﷇ'),
+    ('ﷰ', '﷽'),
+    ('ﹰ', 'ﹴ'),
+    ('ﹶ', 'ﻼ'),
+    ('𐹠', '𐹾'),
+    ('𞸀', '𞸃'),
+    ('𞸅', '𞸟'),
+    ('𞸡', '𞸢'),
+    ('𞸤', '𞸤'),
+    ('𞸧', '𞸧'),
+    ('𞸩', '𞸲'),
+    ('𞸴', '𞸷'),
+    ('𞸹', '𞸹'),
+    ('𞸻', '𞸻'),
+    ('𞹂', '𞹂'),
+    ('𞹇', '𞹇'),
+    ('𞹉', '𞹉'),
+    ('𞹋', '𞹋'),
+    ('𞹍', '𞹏'),
+    ('𞹑', '𞹒'),
+    ('𞹔', '𞹔'),
+    ('𞹗', '𞹗'),
+    ('𞹙', '𞹙'),
+    ('𞹛', '𞹛'),
+    ('𞹝', '𞹝'),
+    ('𞹟', '𞹟'),
+    ('𞹡', '𞹢'),
+    ('𞹤', '𞹤'),
+    ('𞹧', '𞹪'),
+    ('𞹬', '𞹲'),
+    ('𞹴', '𞹷'),
+    ('𞹹', '𞹼'),
+    ('𞹾', '𞹾'),
+    ('𞺀', '𞺉'),
+    ('𞺋', '𞺛'),
+    ('𞺡', '𞺣'),
+    ('𞺥', '𞺩'),
+    ('𞺫', '𞺻'),
+    ('𞻰', '𞻱'),
+];
+
+pub const ARMENIAN: &'static [(char, char)] =
+    &[('Ա', 'Ֆ'), ('ՙ', '֊'), ('֍', '֏'), ('ﬓ', 'ﬗ')];
+
+pub const AVESTAN: &'static [(char, char)] = &[('𐬀', '𐬵'), ('𐬹', '𐬿')];
+
+pub const BALINESE: &'static [(char, char)] = &[('\u{1b00}', 'ᭋ'), ('᭐', '᭼')];
+
+pub const BAMUM: &'static [(char, char)] = &[('ꚠ', '꛷'), ('𖠀', '𖨸')];
+
+pub const BASSA_VAH: &'static [(char, char)] =
+    &[('𖫐', '𖫭'), ('\u{16af0}', '𖫵')];
+
+pub const BATAK: &'static [(char, char)] = &[('ᯀ', '᯳'), ('᯼', '᯿')];
+
+pub const BENGALI: &'static [(char, char)] = &[
+    ('ঀ', 'ঃ'),
+    ('অ', 'ঌ'),
+    ('এ', 'ঐ'),
+    ('ও', 'ন'),
+    ('প', 'র'),
+    ('ল', 'ল'),
+    ('শ', 'হ'),
+    ('\u{9bc}', '\u{9c4}'),
+    ('ে', 'ৈ'),
+    ('ো', 'ৎ'),
+    ('\u{9d7}', '\u{9d7}'),
+    ('ড়', 'ঢ়'),
+    ('য়', '\u{9e3}'),
+    ('০', '\u{9fe}'),
+];
+
+pub const BHAIKSUKI: &'static [(char, char)] =
+    &[('𑰀', '𑰈'), ('𑰊', '\u{11c36}'), ('\u{11c38}', '𑱅'), ('𑱐', '𑱬')];
+
+pub const BOPOMOFO: &'static [(char, char)] =
+    &[('˪', '˫'), ('ㄅ', 'ㄯ'), ('ㆠ', '\u{31bf}')];
+
+pub const BRAHMI: &'static [(char, char)] =
+    &[('𑀀', '𑁍'), ('𑁒', '𑁯'), ('\u{1107f}', '\u{1107f}')];
+
+pub const BRAILLE: &'static [(char, char)] = &[('⠀', '⣿')];
+
+pub const BUGINESE: &'static [(char, char)] = &[('ᨀ', '\u{1a1b}'), ('᨞', '᨟')];
+
+pub const BUHID: &'static [(char, char)] = &[('ᝀ', '\u{1753}')];
+
+pub const CANADIAN_ABORIGINAL: &'static [(char, char)] =
+    &[('᐀', 'ᙿ'), ('ᢰ', 'ᣵ')];
+
+pub const CARIAN: &'static [(char, char)] = &[('𐊠', '𐋐')];
+
+pub const CAUCASIAN_ALBANIAN: &'static [(char, char)] =
+    &[('𐔰', '𐕣'), ('𐕯', '𐕯')];
+
+pub const CHAKMA: &'static [(char, char)] =
+    &[('\u{11100}', '\u{11134}'), ('𑄶', '\u{11147}')];
+
+pub const CHAM: &'static [(char, char)] =
+    &[('ꨀ', '\u{aa36}'), ('ꩀ', 'ꩍ'), ('꩐', '꩙'), ('꩜', '꩟')];
+
+pub const CHEROKEE: &'static [(char, char)] =
+    &[('Ꭰ', 'Ᏽ'), ('ᏸ', 'ᏽ'), ('ꭰ', 'ꮿ')];
+
+pub const CHORASMIAN: &'static [(char, char)] = &[('\u{10fb0}', '\u{10fcb}')];
+
+pub const COMMON: &'static [(char, char)] = &[
+    ('\u{0}', '@'),
+    ('[', '`'),
+    ('{', '©'),
+    ('«', '¹'),
+    ('»', '¿'),
+    ('×', '×'),
+    ('÷', '÷'),
+    ('ʹ', '˟'),
+    ('˥', '˩'),
+    ('ˬ', '˿'),
+    ('ʹ', 'ʹ'),
+    (';', ';'),
+    ('΅', '΅'),
+    ('·', '·'),
+    ('\u{605}', '\u{605}'),
+    ('،', '،'),
+    ('؛', '؛'),
+    ('؟', '؟'),
+    ('ـ', 'ـ'),
+    ('\u{6dd}', '\u{6dd}'),
+    ('\u{8e2}', '\u{8e2}'),
+    ('।', '॥'),
+    ('฿', '฿'),
+    ('࿕', '࿘'),
+    ('჻', '჻'),
+    ('᛫', '᛭'),
+    ('᜵', '᜶'),
+    ('᠂', '᠃'),
+    ('᠅', '᠅'),
+    ('᳓', '᳓'),
+    ('᳡', '᳡'),
+    ('ᳩ', 'ᳬ'),
+    ('ᳮ', 'ᳳ'),
+    ('ᳵ', '᳷'),
+    ('ᳺ', 'ᳺ'),
+    ('\u{2000}', '\u{200b}'),
+    ('\u{200e}', '\u{2064}'),
+    ('\u{2066}', '⁰'),
+    ('⁴', '⁾'),
+    ('₀', '₎'),
+    ('₠', '₿'),
+    ('℀', '℥'),
+    ('℧', '℩'),
+    ('ℬ', 'ℱ'),
+    ('ℳ', '⅍'),
+    ('⅏', '⅟'),
+    ('↉', '↋'),
+    ('←', '␦'),
+    ('⑀', '⑊'),
+    ('①', '⟿'),
+    ('⤀', '⭳'),
+    ('⭶', '⮕'),
+    ('\u{2b97}', '⯿'),
+    ('⸀', '\u{2e52}'),
+    ('⿰', '⿻'),
+    ('\u{3000}', '〄'),
+    ('〆', '〆'),
+    ('〈', '〠'),
+    ('〰', '〷'),
+    ('〼', '〿'),
+    ('゛', '゜'),
+    ('゠', '゠'),
+    ('・', 'ー'),
+    ('㆐', '㆟'),
+    ('㇀', '㇣'),
+    ('㈠', '㉟'),
+    ('㉿', '㋏'),
+    ('㋿', '㋿'),
+    ('㍘', '㏿'),
+    ('䷀', '䷿'),
+    ('꜀', '꜡'),
+    ('ꞈ', '꞊'),
+    ('꠰', '꠹'),
+    ('꤮', '꤮'),
+    ('ꧏ', 'ꧏ'),
+    ('꭛', '꭛'),
+    ('\u{ab6a}', '\u{ab6b}'),
+    ('﴾', '﴿'),
+    ('︐', '︙'),
+    ('︰', '﹒'),
+    ('﹔', '﹦'),
+    ('﹨', '﹫'),
+    ('\u{feff}', '\u{feff}'),
+    ('！', '＠'),
+    ('［', '｀'),
+    ('｛', '･'),
+    ('ｰ', 'ｰ'),
+    ('\u{ff9e}', '\u{ff9f}'),
+    ('￠', '￦'),
+    ('￨', '￮'),
+    ('\u{fff9}', '�'),
+    ('𐄀', '𐄂'),
+    ('𐄇', '𐄳'),
+    ('𐄷', '𐄿'),
+    ('𐆐', '\u{1019c}'),
+    ('𐇐', '𐇼'),
+    ('𐋡', '𐋻'),
+    ('𖿢', '𖿣'),
+    ('\u{1bca0}', '\u{1bca3}'),
+    ('𝀀', '𝃵'),
+    ('𝄀', '𝄦'),
+    ('𝄩', '𝅦'),
+    ('𝅪', '\u{1d17a}'),
+    ('𝆃', '𝆄'),
+    ('𝆌', '𝆩'),
+    ('𝆮', '𝇨'),
+    ('𝋠', '𝋳'),
+    ('𝌀', '𝍖'),
+    ('𝍠', '𝍸'),
+    ('𝐀', '𝑔'),
+    ('𝑖', '𝒜'),
+    ('𝒞', '𝒟'),
+    ('𝒢', '𝒢'),
+    ('𝒥', '𝒦'),
+    ('𝒩', '𝒬'),
+    ('𝒮', '𝒹'),
+    ('𝒻', '𝒻'),
+    ('𝒽', '𝓃'),
+    ('𝓅', '𝔅'),
+    ('𝔇', '𝔊'),
+    ('𝔍', '𝔔'),
+    ('𝔖', '𝔜'),
+    ('𝔞', '𝔹'),
+    ('𝔻', '𝔾'),
+    ('𝕀', '𝕄'),
+    ('𝕆', '𝕆'),
+    ('𝕊', '𝕐'),
+    ('𝕒', '𝚥'),
+    ('𝚨', '𝟋'),
+    ('𝟎', '𝟿'),
+    ('𞱱', '𞲴'),
+    ('𞴁', '𞴽'),
+    ('🀀', '🀫'),
+    ('🀰', '🂓'),
+    ('🂠', '🂮'),
+    ('🂱', '🂿'),
+    ('🃁', '🃏'),
+    ('🃑', '🃵'),
+    ('🄀', '\u{1f1ad}'),
+    ('🇦', '🇿'),
+    ('🈁', '🈂'),
+    ('🈐', '🈻'),
+    ('🉀', '🉈'),
+    ('🉐', '🉑'),
+    ('🉠', '🉥'),
+    ('🌀', '\u{1f6d7}'),
+    ('🛠', '🛬'),
+    ('🛰', '\u{1f6fc}'),
+    ('🜀', '🝳'),
+    ('🞀', '🟘'),
+    ('🟠', '🟫'),
+    ('🠀', '🠋'),
+    ('🠐', '🡇'),
+    ('🡐', '🡙'),
+    ('🡠', '🢇'),
+    ('🢐', '🢭'),
+    ('\u{1f8b0}', '\u{1f8b1}'),
+    ('🤀', '\u{1f978}'),
+    ('🥺', '\u{1f9cb}'),
+    ('🧍', '🩓'),
+    ('🩠', '🩭'),
+    ('🩰', '\u{1fa74}'),
+    ('🩸', '🩺'),
+    ('🪀', '\u{1fa86}'),
+    ('🪐', '\u{1faa8}'),
+    ('\u{1fab0}', '\u{1fab6}'),
+    ('\u{1fac0}', '\u{1fac2}'),
+    ('\u{1fad0}', '\u{1fad6}'),
+    ('\u{1fb00}', '\u{1fb92}'),
+    ('\u{1fb94}', '\u{1fbca}'),
+    ('\u{1fbf0}', '\u{1fbf9}'),
+    ('\u{e0001}', '\u{e0001}'),
+    ('\u{e0020}', '\u{e007f}'),
+];
+
+pub const COPTIC: &'static [(char, char)] =
+    &[('Ϣ', 'ϯ'), ('Ⲁ', 'ⳳ'), ('⳹', '⳿')];
+
+pub const CUNEIFORM: &'static [(char, char)] =
+    &[('𒀀', '𒎙'), ('𒐀', '𒑮'), ('𒑰', '𒑴'), ('𒒀', '𒕃')];
+
+pub const CYPRIOT: &'static [(char, char)] =
+    &[('𐠀', '𐠅'), ('𐠈', '𐠈'), ('𐠊', '𐠵'), ('𐠷', '𐠸'), ('𐠼', '𐠼'), ('𐠿', '𐠿')];
+
+pub const CYRILLIC: &'static [(char, char)] = &[
+    ('Ѐ', '\u{484}'),
+    ('\u{487}', 'ԯ'),
+    ('ᲀ', 'ᲈ'),
+    ('ᴫ', 'ᴫ'),
+    ('ᵸ', 'ᵸ'),
+    ('\u{2de0}', '\u{2dff}'),
+    ('Ꙁ', '\u{a69f}'),
+    ('\u{fe2e}', '\u{fe2f}'),
+];
+
+pub const DESERET: &'static [(char, char)] = &[('𐐀', '𐑏')];
+
+pub const DEVANAGARI: &'static [(char, char)] = &[
+    ('\u{900}', 'ॐ'),
+    ('\u{955}', '\u{963}'),
+    ('०', 'ॿ'),
+    ('\u{a8e0}', '\u{a8ff}'),
+];
+
+pub const DIVES_AKURU: &'static [(char, char)] = &[
+    ('\u{11900}', '\u{11906}'),
+    ('\u{11909}', '\u{11909}'),
+    ('\u{1190c}', '\u{11913}'),
+    ('\u{11915}', '\u{11916}'),
+    ('\u{11918}', '\u{11935}'),
+    ('\u{11937}', '\u{11938}'),
+    ('\u{1193b}', '\u{11946}'),
+    ('\u{11950}', '\u{11959}'),
+];
+
+pub const DOGRA: &'static [(char, char)] = &[('𑠀', '𑠻')];
+
+pub const DUPLOYAN: &'static [(char, char)] =
+    &[('𛰀', '𛱪'), ('𛱰', '𛱼'), ('𛲀', '𛲈'), ('𛲐', '𛲙'), ('𛲜', '𛲟')];
+
+pub const EGYPTIAN_HIEROGLYPHS: &'static [(char, char)] =
+    &[('𓀀', '𓐮'), ('\u{13430}', '\u{13438}')];
+
+pub const ELBASAN: &'static [(char, char)] = &[('𐔀', '𐔧')];
+
+pub const ELYMAIC: &'static [(char, char)] = &[('𐿠', '𐿶')];
+
+pub const ETHIOPIC: &'static [(char, char)] = &[
+    ('ሀ', 'ቈ'),
+    ('ቊ', 'ቍ'),
+    ('ቐ', 'ቖ'),
+    ('ቘ', 'ቘ'),
+    ('ቚ', 'ቝ'),
+    ('በ', 'ኈ'),
+    ('ኊ', 'ኍ'),
+    ('ነ', 'ኰ'),
+    ('ኲ', 'ኵ'),
+    ('ኸ', 'ኾ'),
+    ('ዀ', 'ዀ'),
+    ('ዂ', 'ዅ'),
+    ('ወ', 'ዖ'),
+    ('ዘ', 'ጐ'),
+    ('ጒ', 'ጕ'),
+    ('ጘ', 'ፚ'),
+    ('\u{135d}', '፼'),
+    ('ᎀ', '᎙'),
+    ('ⶀ', 'ⶖ'),
+    ('ⶠ', 'ⶦ'),
+    ('ⶨ', 'ⶮ'),
+    ('ⶰ', 'ⶶ'),
+    ('ⶸ', 'ⶾ'),
+    ('ⷀ', 'ⷆ'),
+    ('ⷈ', 'ⷎ'),
+    ('ⷐ', 'ⷖ'),
+    ('ⷘ', 'ⷞ'),
+    ('ꬁ', 'ꬆ'),
+    ('ꬉ', 'ꬎ'),
+    ('ꬑ', 'ꬖ'),
+    ('ꬠ', 'ꬦ'),
+    ('ꬨ', 'ꬮ'),
+];
+
+pub const GEORGIAN: &'static [(char, char)] = &[
+    ('Ⴀ', 'Ⴥ'),
+    ('Ⴧ', 'Ⴧ'),
+    ('Ⴭ', 'Ⴭ'),
+    ('ა', 'ჺ'),
+    ('ჼ', 'ჿ'),
+    ('Ა', 'Ჺ'),
+    ('Ჽ', 'Ჿ'),
+    ('ⴀ', 'ⴥ'),
+    ('ⴧ', 'ⴧ'),
+    ('ⴭ', 'ⴭ'),
+];
+
+pub const GLAGOLITIC: &'static [(char, char)] = &[
+    ('Ⰰ', 'Ⱞ'),
+    ('ⰰ', 'ⱞ'),
+    ('\u{1e000}', '\u{1e006}'),
+    ('\u{1e008}', '\u{1e018}'),
+    ('\u{1e01b}', '\u{1e021}'),
+    ('\u{1e023}', '\u{1e024}'),
+    ('\u{1e026}', '\u{1e02a}'),
+];
+
+pub const GOTHIC: &'static [(char, char)] = &[('𐌰', '𐍊')];
+
+pub const GRANTHA: &'static [(char, char)] = &[
+    ('\u{11300}', '𑌃'),
+    ('𑌅', '𑌌'),
+    ('𑌏', '𑌐'),
+    ('𑌓', '𑌨'),
+    ('𑌪', '𑌰'),
+    ('𑌲', '𑌳'),
+    ('𑌵', '𑌹'),
+    ('\u{1133c}', '𑍄'),
+    ('𑍇', '𑍈'),
+    ('𑍋', '𑍍'),
+    ('𑍐', '𑍐'),
+    ('\u{11357}', '\u{11357}'),
+    ('𑍝', '𑍣'),
+    ('\u{11366}', '\u{1136c}'),
+    ('\u{11370}', '\u{11374}'),
+];
+
+pub const GREEK: &'static [(char, char)] = &[
+    ('Ͱ', 'ͳ'),
+    ('͵', 'ͷ'),
+    ('ͺ', 'ͽ'),
+    ('Ϳ', 'Ϳ'),
+    ('΄', '΄'),
+    ('Ά', 'Ά'),
+    ('Έ', 'Ί'),
+    ('Ό', 'Ό'),
+    ('Ύ', 'Ρ'),
+    ('Σ', 'ϡ'),
+    ('ϰ', 'Ͽ'),
+    ('ᴦ', 'ᴪ'),
+    ('ᵝ', 'ᵡ'),
+    ('ᵦ', 'ᵪ'),
+    ('ᶿ', 'ᶿ'),
+    ('ἀ', 'ἕ'),
+    ('Ἐ', 'Ἕ'),
+    ('ἠ', 'ὅ'),
+    ('Ὀ', 'Ὅ'),
+    ('ὐ', 'ὗ'),
+    ('Ὑ', 'Ὑ'),
+    ('Ὓ', 'Ὓ'),
+    ('Ὕ', 'Ὕ'),
+    ('Ὗ', 'ώ'),
+    ('ᾀ', 'ᾴ'),
+    ('ᾶ', 'ῄ'),
+    ('ῆ', 'ΐ'),
+    ('ῖ', 'Ί'),
+    ('῝', '`'),
+    ('ῲ', 'ῴ'),
+    ('ῶ', '῾'),
+    ('Ω', 'Ω'),
+    ('ꭥ', 'ꭥ'),
+    ('𐅀', '𐆎'),
+    ('𐆠', '𐆠'),
+    ('𝈀', '𝉅'),
+];
+
+pub const GUJARATI: &'static [(char, char)] = &[
+    ('\u{a81}', 'ઃ'),
+    ('અ', 'ઍ'),
+    ('એ', 'ઑ'),
+    ('ઓ', 'ન'),
+    ('પ', 'ર'),
+    ('લ', 'ળ'),
+    ('વ', 'હ'),
+    ('\u{abc}', '\u{ac5}'),
+    ('\u{ac7}', 'ૉ'),
+    ('ો', '\u{acd}'),
+    ('ૐ', 'ૐ'),
+    ('ૠ', '\u{ae3}'),
+    ('૦', '૱'),
+    ('ૹ', '\u{aff}'),
+];
+
+pub const GUNJALA_GONDI: &'static [(char, char)] = &[
+    ('𑵠', '𑵥'),
+    ('𑵧', '𑵨'),
+    ('𑵪', '𑶎'),
+    ('\u{11d90}', '\u{11d91}'),
+    ('𑶓', '𑶘'),
+    ('𑶠', '𑶩'),
+];
+
+pub const GURMUKHI: &'static [(char, char)] = &[
+    ('\u{a01}', 'ਃ'),
+    ('ਅ', 'ਊ'),
+    ('ਏ', 'ਐ'),
+    ('ਓ', 'ਨ'),
+    ('ਪ', 'ਰ'),
+    ('ਲ', 'ਲ਼'),
+    ('ਵ', 'ਸ਼'),
+    ('ਸ', 'ਹ'),
+    ('\u{a3c}', '\u{a3c}'),
+    ('ਾ', '\u{a42}'),
+    ('\u{a47}', '\u{a48}'),
+    ('\u{a4b}', '\u{a4d}'),
+    ('\u{a51}', '\u{a51}'),
+    ('ਖ਼', 'ੜ'),
+    ('ਫ਼', 'ਫ਼'),
+    ('੦', '੶'),
+];
+
+pub const HAN: &'static [(char, char)] = &[
+    ('⺀', '⺙'),
+    ('⺛', '⻳'),
+    ('⼀', '⿕'),
+    ('々', '々'),
+    ('〇', '〇'),
+    ('〡', '〩'),
+    ('〸', '〻'),
+    ('㐀', '\u{4dbf}'),
+    ('一', '\u{9ffc}'),
+    ('豈', '舘'),
+    ('並', '龎'),
+    ('\u{16ff0}', '\u{16ff1}'),
+    ('𠀀', '\u{2a6dd}'),
+    ('𪜀', '𫜴'),
+    ('𫝀', '𫠝'),
+    ('𫠠', '𬺡'),
+    ('𬺰', '𮯠'),
+    ('丽', '𪘀'),
+    ('\u{30000}', '\u{3134a}'),
+];
+
+pub const HANGUL: &'static [(char, char)] = &[
+    ('ᄀ', 'ᇿ'),
+    ('\u{302e}', '\u{302f}'),
+    ('ㄱ', 'ㆎ'),
+    ('㈀', '㈞'),
+    ('㉠', '㉾'),
+    ('ꥠ', 'ꥼ'),
+    ('가', '힣'),
+    ('ힰ', 'ퟆ'),
+    ('ퟋ', 'ퟻ'),
+    ('ﾠ', 'ﾾ'),
+    ('ￂ', 'ￇ'),
+    ('ￊ', 'ￏ'),
+    ('ￒ', 'ￗ'),
+    ('ￚ', 'ￜ'),
+];
+
+pub const HANIFI_ROHINGYA: &'static [(char, char)] =
+    &[('𐴀', '\u{10d27}'), ('𐴰', '𐴹')];
+
+pub const HANUNOO: &'static [(char, char)] = &[('ᜠ', '\u{1734}')];
+
+pub const HATRAN: &'static [(char, char)] =
+    &[('𐣠', '𐣲'), ('𐣴', '𐣵'), ('𐣻', '𐣿')];
+
+pub const HEBREW: &'static [(char, char)] = &[
+    ('\u{591}', '\u{5c7}'),
+    ('א', 'ת'),
+    ('ׯ', '״'),
+    ('יִ', 'זּ'),
+    ('טּ', 'לּ'),
+    ('מּ', 'מּ'),
+    ('נּ', 'סּ'),
+    ('ףּ', 'פּ'),
+    ('צּ', 'ﭏ'),
+];
+
+pub const HIRAGANA: &'static [(char, char)] =
+    &[('ぁ', 'ゖ'), ('ゝ', 'ゟ'), ('𛀁', '𛄞'), ('𛅐', '𛅒'), ('🈀', '🈀')];
+
+pub const IMPERIAL_ARAMAIC: &'static [(char, char)] =
+    &[('𐡀', '𐡕'), ('𐡗', '𐡟')];
+
+pub const INHERITED: &'static [(char, char)] = &[
+    ('\u{300}', '\u{36f}'),
+    ('\u{485}', '\u{486}'),
+    ('\u{64b}', '\u{655}'),
+    ('\u{670}', '\u{670}'),
+    ('\u{951}', '\u{954}'),
+    ('\u{1ab0}', '\u{1ac0}'),
+    ('\u{1cd0}', '\u{1cd2}'),
+    ('\u{1cd4}', '\u{1ce0}'),
+    ('\u{1ce2}', '\u{1ce8}'),
+    ('\u{1ced}', '\u{1ced}'),
+    ('\u{1cf4}', '\u{1cf4}'),
+    ('\u{1cf8}', '\u{1cf9}'),
+    ('\u{1dc0}', '\u{1df9}'),
+    ('\u{1dfb}', '\u{1dff}'),
+    ('\u{200c}', '\u{200d}'),
+    ('\u{20d0}', '\u{20f0}'),
+    ('\u{302a}', '\u{302d}'),
+    ('\u{3099}', '\u{309a}'),
+    ('\u{fe00}', '\u{fe0f}'),
+    ('\u{fe20}', '\u{fe2d}'),
+    ('\u{101fd}', '\u{101fd}'),
+    ('\u{102e0}', '\u{102e0}'),
+    ('\u{1133b}', '\u{1133b}'),
+    ('\u{1d167}', '\u{1d169}'),
+    ('\u{1d17b}', '\u{1d182}'),
+    ('\u{1d185}', '\u{1d18b}'),
+    ('\u{1d1aa}', '\u{1d1ad}'),
+    ('\u{e0100}', '\u{e01ef}'),
+];
+
+pub const INSCRIPTIONAL_PAHLAVI: &'static [(char, char)] =
+    &[('𐭠', '𐭲'), ('𐭸', '𐭿')];
+
+pub const INSCRIPTIONAL_PARTHIAN: &'static [(char, char)] =
+    &[('𐭀', '𐭕'), ('𐭘', '𐭟')];
+
+pub const JAVANESE: &'static [(char, char)] =
+    &[('\u{a980}', '꧍'), ('꧐', '꧙'), ('꧞', '꧟')];
+
+pub const KAITHI: &'static [(char, char)] =
+    &[('\u{11080}', '𑃁'), ('\u{110cd}', '\u{110cd}')];
+
+pub const KANNADA: &'static [(char, char)] = &[
+    ('ಀ', 'ಌ'),
+    ('ಎ', 'ಐ'),
+    ('ಒ', 'ನ'),
+    ('ಪ', 'ಳ'),
+    ('ವ', 'ಹ'),
+    ('\u{cbc}', 'ೄ'),
+    ('\u{cc6}', 'ೈ'),
+    ('ೊ', '\u{ccd}'),
+    ('\u{cd5}', '\u{cd6}'),
+    ('ೞ', 'ೞ'),
+    ('ೠ', '\u{ce3}'),
+    ('೦', '೯'),
+    ('ೱ', 'ೲ'),
+];
+
+pub const KATAKANA: &'static [(char, char)] = &[
+    ('ァ', 'ヺ'),
+    ('ヽ', 'ヿ'),
+    ('ㇰ', 'ㇿ'),
+    ('㋐', '㋾'),
+    ('㌀', '㍗'),
+    ('ｦ', 'ｯ'),
+    ('ｱ', 'ﾝ'),
+    ('𛀀', '𛀀'),
+    ('𛅤', '𛅧'),
+];
+
+pub const KAYAH_LI: &'static [(char, char)] = &[('꤀', '\u{a92d}'), ('꤯', '꤯')];
+
+pub const KHAROSHTHI: &'static [(char, char)] = &[
+    ('𐨀', '\u{10a03}'),
+    ('\u{10a05}', '\u{10a06}'),
+    ('\u{10a0c}', '𐨓'),
+    ('𐨕', '𐨗'),
+    ('𐨙', '𐨵'),
+    ('\u{10a38}', '\u{10a3a}'),
+    ('\u{10a3f}', '𐩈'),
+    ('𐩐', '𐩘'),
+];
+
+pub const KHITAN_SMALL_SCRIPT: &'static [(char, char)] =
+    &[('\u{16fe4}', '\u{16fe4}'), ('\u{18b00}', '\u{18cd5}')];
+
+pub const KHMER: &'static [(char, char)] =
+    &[('ក', '\u{17dd}'), ('០', '៩'), ('៰', '៹'), ('᧠', '᧿')];
+
+pub const KHOJKI: &'static [(char, char)] = &[('𑈀', '𑈑'), ('𑈓', '\u{1123e}')];
+
+pub const KHUDAWADI: &'static [(char, char)] =
+    &[('𑊰', '\u{112ea}'), ('𑋰', '𑋹')];
+
+pub const LAO: &'static [(char, char)] = &[
+    ('ກ', 'ຂ'),
+    ('ຄ', 'ຄ'),
+    ('ຆ', 'ຊ'),
+    ('ຌ', 'ຣ'),
+    ('ລ', 'ລ'),
+    ('ວ', 'ຽ'),
+    ('ເ', 'ໄ'),
+    ('ໆ', 'ໆ'),
+    ('\u{ec8}', '\u{ecd}'),
+    ('໐', '໙'),
+    ('ໜ', 'ໟ'),
+];
+
+pub const LATIN: &'static [(char, char)] = &[
+    ('A', 'Z'),
+    ('a', 'z'),
+    ('ª', 'ª'),
+    ('º', 'º'),
+    ('À', 'Ö'),
+    ('Ø', 'ö'),
+    ('ø', 'ʸ'),
+    ('ˠ', 'ˤ'),
+    ('ᴀ', 'ᴥ'),
+    ('ᴬ', 'ᵜ'),
+    ('ᵢ', 'ᵥ'),
+    ('ᵫ', 'ᵷ'),
+    ('ᵹ', 'ᶾ'),
+    ('Ḁ', 'ỿ'),
+    ('ⁱ', 'ⁱ'),
+    ('ⁿ', 'ⁿ'),
+    ('ₐ', 'ₜ'),
+    ('K', 'Å'),
+    ('Ⅎ', 'Ⅎ'),
+    ('ⅎ', 'ⅎ'),
+    ('Ⅰ', 'ↈ'),
+    ('Ⱡ', 'Ɀ'),
+    ('Ꜣ', 'ꞇ'),
+    ('Ꞌ', 'ꞿ'),
+    ('Ꟃ', '\u{a7ca}'),
+    ('\u{a7f5}', 'ꟿ'),
+    ('ꬰ', 'ꭚ'),
+    ('ꭜ', 'ꭤ'),
+    ('ꭦ', '\u{ab69}'),
+    ('ﬀ', 'ﬆ'),
+    ('Ａ', 'Ｚ'),
+    ('ａ', 'ｚ'),
+];
+
+pub const LEPCHA: &'static [(char, char)] =
+    &[('ᰀ', '\u{1c37}'), ('᰻', '᱉'), ('ᱍ', 'ᱏ')];
+
+pub const LIMBU: &'static [(char, char)] = &[
+    ('ᤀ', 'ᤞ'),
+    ('\u{1920}', 'ᤫ'),
+    ('ᤰ', '\u{193b}'),
+    ('᥀', '᥀'),
+    ('᥄', '᥏'),
+];
+
+pub const LINEAR_A: &'static [(char, char)] =
+    &[('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧')];
+
+pub const LINEAR_B: &'static [(char, char)] = &[
+    ('𐀀', '𐀋'),
+    ('𐀍', '𐀦'),
+    ('𐀨', '𐀺'),
+    ('𐀼', '𐀽'),
+    ('𐀿', '𐁍'),
+    ('𐁐', '𐁝'),
+    ('𐂀', '𐃺'),
+];
+
+pub const LISU: &'static [(char, char)] =
+    &[('ꓐ', '꓿'), ('\u{11fb0}', '\u{11fb0}')];
+
+pub const LYCIAN: &'static [(char, char)] = &[('𐊀', '𐊜')];
+
+pub const LYDIAN: &'static [(char, char)] = &[('𐤠', '𐤹'), ('𐤿', '𐤿')];
+
+pub const MAHAJANI: &'static [(char, char)] = &[('𑅐', '𑅶')];
+
+pub const MAKASAR: &'static [(char, char)] = &[('𑻠', '𑻸')];
+
+pub const MALAYALAM: &'static [(char, char)] = &[
+    ('\u{d00}', 'ഌ'),
+    ('എ', 'ഐ'),
+    ('ഒ', '\u{d44}'),
+    ('െ', 'ൈ'),
+    ('ൊ', '൏'),
+    ('ൔ', '\u{d63}'),
+    ('൦', 'ൿ'),
+];
+
+pub const MANDAIC: &'static [(char, char)] = &[('ࡀ', '\u{85b}'), ('࡞', '࡞')];
+
+pub const MANICHAEAN: &'static [(char, char)] =
+    &[('𐫀', '\u{10ae6}'), ('𐫫', '𐫶')];
+
+pub const MARCHEN: &'static [(char, char)] =
+    &[('𑱰', '𑲏'), ('\u{11c92}', '\u{11ca7}'), ('𑲩', '\u{11cb6}')];
+
+pub const MASARAM_GONDI: &'static [(char, char)] = &[
+    ('𑴀', '𑴆'),
+    ('𑴈', '𑴉'),
+    ('𑴋', '\u{11d36}'),
+    ('\u{11d3a}', '\u{11d3a}'),
+    ('\u{11d3c}', '\u{11d3d}'),
+    ('\u{11d3f}', '\u{11d47}'),
+    ('𑵐', '𑵙'),
+];
+
+pub const MEDEFAIDRIN: &'static [(char, char)] = &[('𖹀', '𖺚')];
+
+pub const MEETEI_MAYEK: &'static [(char, char)] =
+    &[('ꫠ', '\u{aaf6}'), ('ꯀ', '\u{abed}'), ('꯰', '꯹')];
+
+pub const MENDE_KIKAKUI: &'static [(char, char)] =
+    &[('𞠀', '𞣄'), ('𞣇', '\u{1e8d6}')];
+
+pub const MEROITIC_CURSIVE: &'static [(char, char)] =
+    &[('𐦠', '𐦷'), ('𐦼', '𐧏'), ('𐧒', '𐧿')];
+
+pub const MEROITIC_HIEROGLYPHS: &'static [(char, char)] = &[('𐦀', '𐦟')];
+
+pub const MIAO: &'static [(char, char)] =
+    &[('𖼀', '𖽊'), ('\u{16f4f}', '𖾇'), ('\u{16f8f}', '𖾟')];
+
+pub const MODI: &'static [(char, char)] = &[('𑘀', '𑙄'), ('𑙐', '𑙙')];
+
+pub const MONGOLIAN: &'static [(char, char)] = &[
+    ('᠀', '᠁'),
+    ('᠄', '᠄'),
+    ('᠆', '\u{180e}'),
+    ('᠐', '᠙'),
+    ('ᠠ', 'ᡸ'),
+    ('ᢀ', 'ᢪ'),
+    ('𑙠', '𑙬'),
+];
+
+pub const MRO: &'static [(char, char)] = &[('𖩀', '𖩞'), ('𖩠', '𖩩'), ('𖩮', '𖩯')];
+
+pub const MULTANI: &'static [(char, char)] =
+    &[('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'), ('𑊏', '𑊝'), ('𑊟', '𑊩')];
+
+pub const MYANMAR: &'static [(char, char)] =
+    &[('က', '႟'), ('ꧠ', 'ꧾ'), ('ꩠ', 'ꩿ')];
+
+pub const NABATAEAN: &'static [(char, char)] = &[('𐢀', '𐢞'), ('𐢧', '𐢯')];
+
+pub const NANDINAGARI: &'static [(char, char)] =
+    &[('𑦠', '𑦧'), ('𑦪', '\u{119d7}'), ('\u{119da}', '𑧤')];
+
+pub const NEW_TAI_LUE: &'static [(char, char)] =
+    &[('ᦀ', 'ᦫ'), ('ᦰ', 'ᧉ'), ('᧐', '᧚'), ('᧞', '᧟')];
+
+pub const NEWA: &'static [(char, char)] = &[('𑐀', '𑑛'), ('𑑝', '\u{11461}')];
+
+pub const NKO: &'static [(char, char)] = &[('߀', 'ߺ'), ('\u{7fd}', '߿')];
+
+pub const NUSHU: &'static [(char, char)] = &[('𖿡', '𖿡'), ('𛅰', '𛋻')];
+
+pub const NYIAKENG_PUACHUE_HMONG: &'static [(char, char)] =
+    &[('𞄀', '𞄬'), ('\u{1e130}', '𞄽'), ('𞅀', '𞅉'), ('𞅎', '𞅏')];
+
+pub const OGHAM: &'static [(char, char)] = &[('\u{1680}', '᚜')];
+
+pub const OL_CHIKI: &'static [(char, char)] = &[('᱐', '᱿')];
+
+pub const OLD_HUNGARIAN: &'static [(char, char)] =
+    &[('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𐳺', '𐳿')];
+
+pub const OLD_ITALIC: &'static [(char, char)] = &[('𐌀', '𐌣'), ('𐌭', '𐌯')];
+
+pub const OLD_NORTH_ARABIAN: &'static [(char, char)] = &[('𐪀', '𐪟')];
+
+pub const OLD_PERMIC: &'static [(char, char)] = &[('𐍐', '\u{1037a}')];
+
+pub const OLD_PERSIAN: &'static [(char, char)] = &[('𐎠', '𐏃'), ('𐏈', '𐏕')];
+
+pub const OLD_SOGDIAN: &'static [(char, char)] = &[('𐼀', '𐼧')];
+
+pub const OLD_SOUTH_ARABIAN: &'static [(char, char)] = &[('𐩠', '𐩿')];
+
+pub const OLD_TURKIC: &'static [(char, char)] = &[('𐰀', '𐱈')];
+
+pub const ORIYA: &'static [(char, char)] = &[
+    ('\u{b01}', 'ଃ'),
+    ('ଅ', 'ଌ'),
+    ('ଏ', 'ଐ'),
+    ('ଓ', 'ନ'),
+    ('ପ', 'ର'),
+    ('ଲ', 'ଳ'),
+    ('ଵ', 'ହ'),
+    ('\u{b3c}', '\u{b44}'),
+    ('େ', 'ୈ'),
+    ('ୋ', '\u{b4d}'),
+    ('\u{b55}', '\u{b57}'),
+    ('ଡ଼', 'ଢ଼'),
+    ('ୟ', '\u{b63}'),
+    ('୦', '୷'),
+];
+
+pub const OSAGE: &'static [(char, char)] = &[('𐒰', '𐓓'), ('𐓘', '𐓻')];
+
+pub const OSMANYA: &'static [(char, char)] = &[('𐒀', '𐒝'), ('𐒠', '𐒩')];
+
+pub const PAHAWH_HMONG: &'static [(char, char)] =
+    &[('𖬀', '𖭅'), ('𖭐', '𖭙'), ('𖭛', '𖭡'), ('𖭣', '𖭷'), ('𖭽', '𖮏')];
+
+pub const PALMYRENE: &'static [(char, char)] = &[('𐡠', '𐡿')];
+
+pub const PAU_CIN_HAU: &'static [(char, char)] = &[('𑫀', '𑫸')];
+
+pub const PHAGS_PA: &'static [(char, char)] = &[('ꡀ', '꡷')];
+
+pub const PHOENICIAN: &'static [(char, char)] = &[('𐤀', '𐤛'), ('𐤟', '𐤟')];
+
+pub const PSALTER_PAHLAVI: &'static [(char, char)] =
+    &[('𐮀', '𐮑'), ('𐮙', '𐮜'), ('𐮩', '𐮯')];
+
+pub const REJANG: &'static [(char, char)] = &[('ꤰ', '꥓'), ('꥟', '꥟')];
+
+pub const RUNIC: &'static [(char, char)] = &[('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ')];
+
+pub const SAMARITAN: &'static [(char, char)] = &[('ࠀ', '\u{82d}'), ('࠰', '࠾')];
+
+pub const SAURASHTRA: &'static [(char, char)] =
+    &[('ꢀ', '\u{a8c5}'), ('꣎', '꣙')];
+
+pub const SHARADA: &'static [(char, char)] = &[('\u{11180}', '𑇟')];
+
+pub const SHAVIAN: &'static [(char, char)] = &[('𐑐', '𐑿')];
+
+pub const SIDDHAM: &'static [(char, char)] =
+    &[('𑖀', '\u{115b5}'), ('𑖸', '\u{115dd}')];
+
+pub const SIGNWRITING: &'static [(char, char)] =
+    &[('𝠀', '𝪋'), ('\u{1da9b}', '\u{1da9f}'), ('\u{1daa1}', '\u{1daaf}')];
+
+pub const SINHALA: &'static [(char, char)] = &[
+    ('\u{d81}', 'ඃ'),
+    ('අ', 'ඖ'),
+    ('ක', 'න'),
+    ('ඳ', 'ර'),
+    ('ල', 'ල'),
+    ('ව', 'ෆ'),
+    ('\u{dca}', '\u{dca}'),
+    ('\u{dcf}', '\u{dd4}'),
+    ('\u{dd6}', '\u{dd6}'),
+    ('ෘ', '\u{ddf}'),
+    ('෦', '෯'),
+    ('ෲ', '෴'),
+    ('𑇡', '𑇴'),
+];
+
+pub const SOGDIAN: &'static [(char, char)] = &[('𐼰', '𐽙')];
+
+pub const SORA_SOMPENG: &'static [(char, char)] = &[('𑃐', '𑃨'), ('𑃰', '𑃹')];
+
+pub const SOYOMBO: &'static [(char, char)] = &[('𑩐', '𑪢')];
+
+pub const SUNDANESE: &'static [(char, char)] =
+    &[('\u{1b80}', 'ᮿ'), ('᳀', '᳇')];
+
+pub const SYLOTI_NAGRI: &'static [(char, char)] = &[('ꠀ', '\u{a82c}')];
+
+pub const SYRIAC: &'static [(char, char)] =
+    &[('܀', '܍'), ('\u{70f}', '\u{74a}'), ('ݍ', 'ݏ'), ('ࡠ', 'ࡪ')];
+
+pub const TAGALOG: &'static [(char, char)] = &[('ᜀ', 'ᜌ'), ('ᜎ', '\u{1714}')];
+
+pub const TAGBANWA: &'static [(char, char)] =
+    &[('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), ('\u{1772}', '\u{1773}')];
+
+pub const TAI_LE: &'static [(char, char)] = &[('ᥐ', 'ᥭ'), ('ᥰ', 'ᥴ')];
+
+pub const TAI_THAM: &'static [(char, char)] = &[
+    ('ᨠ', '\u{1a5e}'),
+    ('\u{1a60}', '\u{1a7c}'),
+    ('\u{1a7f}', '᪉'),
+    ('᪐', '᪙'),
+    ('᪠', '᪭'),
+];
+
+pub const TAI_VIET: &'static [(char, char)] = &[('ꪀ', 'ꫂ'), ('ꫛ', '꫟')];
+
+pub const TAKRI: &'static [(char, char)] = &[('𑚀', '𑚸'), ('𑛀', '𑛉')];
+
+pub const TAMIL: &'static [(char, char)] = &[
+    ('\u{b82}', 'ஃ'),
+    ('அ', 'ஊ'),
+    ('எ', 'ஐ'),
+    ('ஒ', 'க'),
+    ('ங', 'ச'),
+    ('ஜ', 'ஜ'),
+    ('ஞ', 'ட'),
+    ('ண', 'த'),
+    ('ந', 'ப'),
+    ('ம', 'ஹ'),
+    ('\u{bbe}', 'ூ'),
+    ('ெ', 'ை'),
+    ('ொ', '\u{bcd}'),
+    ('ௐ', 'ௐ'),
+    ('\u{bd7}', '\u{bd7}'),
+    ('௦', '௺'),
+    ('𑿀', '𑿱'),
+    ('𑿿', '𑿿'),
+];
+
+pub const TANGUT: &'static [(char, char)] = &[
+    ('𖿠', '𖿠'),
+    ('𗀀', '𘟷'),
+    ('𘠀', '\u{18aff}'),
+    ('\u{18d00}', '\u{18d08}'),
+];
+
+pub const TELUGU: &'static [(char, char)] = &[
+    ('\u{c00}', 'ఌ'),
+    ('ఎ', 'ఐ'),
+    ('ఒ', 'న'),
+    ('ప', 'హ'),
+    ('ఽ', 'ౄ'),
+    ('\u{c46}', '\u{c48}'),
+    ('\u{c4a}', '\u{c4d}'),
+    ('\u{c55}', '\u{c56}'),
+    ('ౘ', 'ౚ'),
+    ('ౠ', '\u{c63}'),
+    ('౦', '౯'),
+    ('౷', '౿'),
+];
+
+pub const THAANA: &'static [(char, char)] = &[('ހ', 'ޱ')];
+
+pub const THAI: &'static [(char, char)] = &[('ก', '\u{e3a}'), ('เ', '๛')];
+
+pub const TIBETAN: &'static [(char, char)] = &[
+    ('ༀ', 'ཇ'),
+    ('ཉ', 'ཬ'),
+    ('\u{f71}', '\u{f97}'),
+    ('\u{f99}', '\u{fbc}'),
+    ('྾', '࿌'),
+    ('࿎', '࿔'),
+    ('࿙', '࿚'),
+];
+
+pub const TIFINAGH: &'static [(char, char)] =
+    &[('ⴰ', 'ⵧ'), ('ⵯ', '⵰'), ('\u{2d7f}', '\u{2d7f}')];
+
+pub const TIRHUTA: &'static [(char, char)] = &[('𑒀', '𑓇'), ('𑓐', '𑓙')];
+
+pub const UGARITIC: &'static [(char, char)] = &[('𐎀', '𐎝'), ('𐎟', '𐎟')];
+
+pub const VAI: &'static [(char, char)] = &[('ꔀ', 'ꘫ')];
+
+pub const WANCHO: &'static [(char, char)] = &[('𞋀', '𞋹'), ('𞋿', '𞋿')];
+
+pub const WARANG_CITI: &'static [(char, char)] = &[('𑢠', '𑣲'), ('𑣿', '𑣿')];
+
+pub const YEZIDI: &'static [(char, char)] = &[
+    ('\u{10e80}', '\u{10ea9}'),
+    ('\u{10eab}', '\u{10ead}'),
+    ('\u{10eb0}', '\u{10eb1}'),
+];
+
+pub const YI: &'static [(char, char)] = &[('ꀀ', 'ꒌ'), ('꒐', '꓆')];
+
+pub const ZANABAZAR_SQUARE: &'static [(char, char)] = &[('𑨀', '\u{11a47}')];

diff --git a/src/unicode_tables/script_extension.rs b/src/unicode_tables/script_extension.rs
new file mode 100644
index 0000000..1b6b4ff
--- /dev/null
+++ b/src/unicode_tables/script_extension.rs

@@ -0,0 +1,1394 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+//  ucd-generate script-extension ucd-13.0.0 --chars
+//
+// ucd-generate 0.2.7 is available on crates.io.
+
+pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
+    ("Adlam", ADLAM),
+    ("Ahom", AHOM),
+    ("Anatolian_Hieroglyphs", ANATOLIAN_HIEROGLYPHS),
+    ("Arabic", ARABIC),
+    ("Armenian", ARMENIAN),
+    ("Avestan", AVESTAN),
+    ("Balinese", BALINESE),
+    ("Bamum", BAMUM),
+    ("Bassa_Vah", BASSA_VAH),
+    ("Batak", BATAK),
+    ("Bengali", BENGALI),
+    ("Bhaiksuki", BHAIKSUKI),
+    ("Bopomofo", BOPOMOFO),
+    ("Brahmi", BRAHMI),
+    ("Braille", BRAILLE),
+    ("Buginese", BUGINESE),
+    ("Buhid", BUHID),
+    ("Canadian_Aboriginal", CANADIAN_ABORIGINAL),
+    ("Carian", CARIAN),
+    ("Caucasian_Albanian", CAUCASIAN_ALBANIAN),
+    ("Chakma", CHAKMA),
+    ("Cham", CHAM),
+    ("Cherokee", CHEROKEE),
+    ("Chorasmian", CHORASMIAN),
+    ("Common", COMMON),
+    ("Coptic", COPTIC),
+    ("Cuneiform", CUNEIFORM),
+    ("Cypriot", CYPRIOT),
+    ("Cyrillic", CYRILLIC),
+    ("Deseret", DESERET),
+    ("Devanagari", DEVANAGARI),
+    ("Dives_Akuru", DIVES_AKURU),
+    ("Dogra", DOGRA),
+    ("Duployan", DUPLOYAN),
+    ("Egyptian_Hieroglyphs", EGYPTIAN_HIEROGLYPHS),
+    ("Elbasan", ELBASAN),
+    ("Elymaic", ELYMAIC),
+    ("Ethiopic", ETHIOPIC),
+    ("Georgian", GEORGIAN),
+    ("Glagolitic", GLAGOLITIC),
+    ("Gothic", GOTHIC),
+    ("Grantha", GRANTHA),
+    ("Greek", GREEK),
+    ("Gujarati", GUJARATI),
+    ("Gunjala_Gondi", GUNJALA_GONDI),
+    ("Gurmukhi", GURMUKHI),
+    ("Han", HAN),
+    ("Hangul", HANGUL),
+    ("Hanifi_Rohingya", HANIFI_ROHINGYA),
+    ("Hanunoo", HANUNOO),
+    ("Hatran", HATRAN),
+    ("Hebrew", HEBREW),
+    ("Hiragana", HIRAGANA),
+    ("Imperial_Aramaic", IMPERIAL_ARAMAIC),
+    ("Inherited", INHERITED),
+    ("Inscriptional_Pahlavi", INSCRIPTIONAL_PAHLAVI),
+    ("Inscriptional_Parthian", INSCRIPTIONAL_PARTHIAN),
+    ("Javanese", JAVANESE),
+    ("Kaithi", KAITHI),
+    ("Kannada", KANNADA),
+    ("Katakana", KATAKANA),
+    ("Kayah_Li", KAYAH_LI),
+    ("Kharoshthi", KHAROSHTHI),
+    ("Khitan_Small_Script", KHITAN_SMALL_SCRIPT),
+    ("Khmer", KHMER),
+    ("Khojki", KHOJKI),
+    ("Khudawadi", KHUDAWADI),
+    ("Lao", LAO),
+    ("Latin", LATIN),
+    ("Lepcha", LEPCHA),
+    ("Limbu", LIMBU),
+    ("Linear_A", LINEAR_A),
+    ("Linear_B", LINEAR_B),
+    ("Lisu", LISU),
+    ("Lycian", LYCIAN),
+    ("Lydian", LYDIAN),
+    ("Mahajani", MAHAJANI),
+    ("Makasar", MAKASAR),
+    ("Malayalam", MALAYALAM),
+    ("Mandaic", MANDAIC),
+    ("Manichaean", MANICHAEAN),
+    ("Marchen", MARCHEN),
+    ("Masaram_Gondi", MASARAM_GONDI),
+    ("Medefaidrin", MEDEFAIDRIN),
+    ("Meetei_Mayek", MEETEI_MAYEK),
+    ("Mende_Kikakui", MENDE_KIKAKUI),
+    ("Meroitic_Cursive", MEROITIC_CURSIVE),
+    ("Meroitic_Hieroglyphs", MEROITIC_HIEROGLYPHS),
+    ("Miao", MIAO),
+    ("Modi", MODI),
+    ("Mongolian", MONGOLIAN),
+    ("Mro", MRO),
+    ("Multani", MULTANI),
+    ("Myanmar", MYANMAR),
+    ("Nabataean", NABATAEAN),
+    ("Nandinagari", NANDINAGARI),
+    ("New_Tai_Lue", NEW_TAI_LUE),
+    ("Newa", NEWA),
+    ("Nko", NKO),
+    ("Nushu", NUSHU),
+    ("Nyiakeng_Puachue_Hmong", NYIAKENG_PUACHUE_HMONG),
+    ("Ogham", OGHAM),
+    ("Ol_Chiki", OL_CHIKI),
+    ("Old_Hungarian", OLD_HUNGARIAN),
+    ("Old_Italic", OLD_ITALIC),
+    ("Old_North_Arabian", OLD_NORTH_ARABIAN),
+    ("Old_Permic", OLD_PERMIC),
+    ("Old_Persian", OLD_PERSIAN),
+    ("Old_Sogdian", OLD_SOGDIAN),
+    ("Old_South_Arabian", OLD_SOUTH_ARABIAN),
+    ("Old_Turkic", OLD_TURKIC),
+    ("Oriya", ORIYA),
+    ("Osage", OSAGE),
+    ("Osmanya", OSMANYA),
+    ("Pahawh_Hmong", PAHAWH_HMONG),
+    ("Palmyrene", PALMYRENE),
+    ("Pau_Cin_Hau", PAU_CIN_HAU),
+    ("Phags_Pa", PHAGS_PA),
+    ("Phoenician", PHOENICIAN),
+    ("Psalter_Pahlavi", PSALTER_PAHLAVI),
+    ("Rejang", REJANG),
+    ("Runic", RUNIC),
+    ("Samaritan", SAMARITAN),
+    ("Saurashtra", SAURASHTRA),
+    ("Sharada", SHARADA),
+    ("Shavian", SHAVIAN),
+    ("Siddham", SIDDHAM),
+    ("SignWriting", SIGNWRITING),
+    ("Sinhala", SINHALA),
+    ("Sogdian", SOGDIAN),
+    ("Sora_Sompeng", SORA_SOMPENG),
+    ("Soyombo", SOYOMBO),
+    ("Sundanese", SUNDANESE),
+    ("Syloti_Nagri", SYLOTI_NAGRI),
+    ("Syriac", SYRIAC),
+    ("Tagalog", TAGALOG),
+    ("Tagbanwa", TAGBANWA),
+    ("Tai_Le", TAI_LE),
+    ("Tai_Tham", TAI_THAM),
+    ("Tai_Viet", TAI_VIET),
+    ("Takri", TAKRI),
+    ("Tamil", TAMIL),
+    ("Tangut", TANGUT),
+    ("Telugu", TELUGU),
+    ("Thaana", THAANA),
+    ("Thai", THAI),
+    ("Tibetan", TIBETAN),
+    ("Tifinagh", TIFINAGH),
+    ("Tirhuta", TIRHUTA),
+    ("Ugaritic", UGARITIC),
+    ("Vai", VAI),
+    ("Wancho", WANCHO),
+    ("Warang_Citi", WARANG_CITI),
+    ("Yezidi", YEZIDI),
+    ("Yi", YI),
+    ("Zanabazar_Square", ZANABAZAR_SQUARE),
+];
+
+pub const ADLAM: &'static [(char, char)] =
+    &[('ـ', 'ـ'), ('𞤀', '𞥋'), ('𞥐', '𞥙'), ('𞥞', '𞥟')];
+
+pub const AHOM: &'static [(char, char)] =
+    &[('𑜀', '𑜚'), ('\u{1171d}', '\u{1172b}'), ('𑜰', '𑜿')];
+
+pub const ANATOLIAN_HIEROGLYPHS: &'static [(char, char)] = &[('𔐀', '𔙆')];
+
+pub const ARABIC: &'static [(char, char)] = &[
+    ('\u{600}', '\u{604}'),
+    ('؆', '\u{61c}'),
+    ('؞', '\u{6dc}'),
+    ('۞', 'ۿ'),
+    ('ݐ', 'ݿ'),
+    ('ࢠ', 'ࢴ'),
+    ('ࢶ', '\u{8c7}'),
+    ('\u{8d3}', '\u{8e1}'),
+    ('\u{8e3}', '\u{8ff}'),
+    ('ﭐ', '﯁'),
+    ('ﯓ', 'ﴽ'),
+    ('ﵐ', 'ﶏ'),
+    ('ﶒ', 'ﷇ'),
+    ('ﷰ', '﷽'),
+    ('ﹰ', 'ﹴ'),
+    ('ﹶ', 'ﻼ'),
+    ('\u{102e0}', '𐋻'),
+    ('𐹠', '𐹾'),
+    ('𞸀', '𞸃'),
+    ('𞸅', '𞸟'),
+    ('𞸡', '𞸢'),
+    ('𞸤', '𞸤'),
+    ('𞸧', '𞸧'),
+    ('𞸩', '𞸲'),
+    ('𞸴', '𞸷'),
+    ('𞸹', '𞸹'),
+    ('𞸻', '𞸻'),
+    ('𞹂', '𞹂'),
+    ('𞹇', '𞹇'),
+    ('𞹉', '𞹉'),
+    ('𞹋', '𞹋'),
+    ('𞹍', '𞹏'),
+    ('𞹑', '𞹒'),
+    ('𞹔', '𞹔'),
+    ('𞹗', '𞹗'),
+    ('𞹙', '𞹙'),
+    ('𞹛', '𞹛'),
+    ('𞹝', '𞹝'),
+    ('𞹟', '𞹟'),
+    ('𞹡', '𞹢'),
+    ('𞹤', '𞹤'),
+    ('𞹧', '𞹪'),
+    ('𞹬', '𞹲'),
+    ('𞹴', '𞹷'),
+    ('𞹹', '𞹼'),
+    ('𞹾', '𞹾'),
+    ('𞺀', '𞺉'),
+    ('𞺋', '𞺛'),
+    ('𞺡', '𞺣'),
+    ('𞺥', '𞺩'),
+    ('𞺫', '𞺻'),
+    ('𞻰', '𞻱'),
+];
+
+pub const ARMENIAN: &'static [(char, char)] =
+    &[('Ա', 'Ֆ'), ('ՙ', '֊'), ('֍', '֏'), ('ﬓ', 'ﬗ')];
+
+pub const AVESTAN: &'static [(char, char)] = &[('𐬀', '𐬵'), ('𐬹', '𐬿')];
+
+pub const BALINESE: &'static [(char, char)] = &[('\u{1b00}', 'ᭋ'), ('᭐', '᭼')];
+
+pub const BAMUM: &'static [(char, char)] = &[('ꚠ', '꛷'), ('𖠀', '𖨸')];
+
+pub const BASSA_VAH: &'static [(char, char)] =
+    &[('𖫐', '𖫭'), ('\u{16af0}', '𖫵')];
+
+pub const BATAK: &'static [(char, char)] = &[('ᯀ', '᯳'), ('᯼', '᯿')];
+
+pub const BENGALI: &'static [(char, char)] = &[
+    ('\u{951}', '\u{952}'),
+    ('।', '॥'),
+    ('ঀ', 'ঃ'),
+    ('অ', 'ঌ'),
+    ('এ', 'ঐ'),
+    ('ও', 'ন'),
+    ('প', 'র'),
+    ('ল', 'ল'),
+    ('শ', 'হ'),
+    ('\u{9bc}', '\u{9c4}'),
+    ('ে', 'ৈ'),
+    ('ো', 'ৎ'),
+    ('\u{9d7}', '\u{9d7}'),
+    ('ড়', 'ঢ়'),
+    ('য়', '\u{9e3}'),
+    ('০', '\u{9fe}'),
+    ('\u{1cd0}', '\u{1cd0}'),
+    ('\u{1cd2}', '\u{1cd2}'),
+    ('\u{1cd5}', '\u{1cd6}'),
+    ('\u{1cd8}', '\u{1cd8}'),
+    ('᳡', '᳡'),
+    ('ᳪ', 'ᳪ'),
+    ('\u{1ced}', '\u{1ced}'),
+    ('ᳲ', 'ᳲ'),
+    ('ᳵ', '᳷'),
+    ('\u{a8f1}', '\u{a8f1}'),
+];
+
+pub const BHAIKSUKI: &'static [(char, char)] =
+    &[('𑰀', '𑰈'), ('𑰊', '\u{11c36}'), ('\u{11c38}', '𑱅'), ('𑱐', '𑱬')];
+
+pub const BOPOMOFO: &'static [(char, char)] = &[
+    ('˪', '˫'),
+    ('、', '〃'),
+    ('〈', '】'),
+    ('〓', '〟'),
+    ('\u{302a}', '\u{302d}'),
+    ('〰', '〰'),
+    ('〷', '〷'),
+    ('・', '・'),
+    ('ㄅ', 'ㄯ'),
+    ('ㆠ', '\u{31bf}'),
+    ('﹅', '﹆'),
+    ('｡', '･'),
+];
+
+pub const BRAHMI: &'static [(char, char)] =
+    &[('𑀀', '𑁍'), ('𑁒', '𑁯'), ('\u{1107f}', '\u{1107f}')];
+
+pub const BRAILLE: &'static [(char, char)] = &[('⠀', '⣿')];
+
+pub const BUGINESE: &'static [(char, char)] =
+    &[('ᨀ', '\u{1a1b}'), ('᨞', '᨟'), ('ꧏ', 'ꧏ')];
+
+pub const BUHID: &'static [(char, char)] = &[('᜵', '᜶'), ('ᝀ', '\u{1753}')];
+
+pub const CANADIAN_ABORIGINAL: &'static [(char, char)] =
+    &[('᐀', 'ᙿ'), ('ᢰ', 'ᣵ')];
+
+pub const CARIAN: &'static [(char, char)] = &[('𐊠', '𐋐')];
+
+pub const CAUCASIAN_ALBANIAN: &'static [(char, char)] =
+    &[('𐔰', '𐕣'), ('𐕯', '𐕯')];
+
+pub const CHAKMA: &'static [(char, char)] =
+    &[('০', '৯'), ('၀', '၉'), ('\u{11100}', '\u{11134}'), ('𑄶', '\u{11147}')];
+
+pub const CHAM: &'static [(char, char)] =
+    &[('ꨀ', '\u{aa36}'), ('ꩀ', 'ꩍ'), ('꩐', '꩙'), ('꩜', '꩟')];
+
+pub const CHEROKEE: &'static [(char, char)] =
+    &[('Ꭰ', 'Ᏽ'), ('ᏸ', 'ᏽ'), ('ꭰ', 'ꮿ')];
+
+pub const CHORASMIAN: &'static [(char, char)] = &[('\u{10fb0}', '\u{10fcb}')];
+
+pub const COMMON: &'static [(char, char)] = &[
+    ('\u{0}', '@'),
+    ('[', '`'),
+    ('{', '©'),
+    ('«', '¹'),
+    ('»', '¿'),
+    ('×', '×'),
+    ('÷', '÷'),
+    ('ʹ', '˟'),
+    ('˥', '˩'),
+    ('ˬ', '˿'),
+    ('ʹ', 'ʹ'),
+    (';', ';'),
+    ('΅', '΅'),
+    ('·', '·'),
+    ('\u{605}', '\u{605}'),
+    ('\u{6dd}', '\u{6dd}'),
+    ('\u{8e2}', '\u{8e2}'),
+    ('฿', '฿'),
+    ('࿕', '࿘'),
+    ('᛫', '᛭'),
+    ('\u{2000}', '\u{200b}'),
+    ('\u{200e}', '\u{202e}'),
+    ('‰', '\u{2064}'),
+    ('\u{2066}', '⁰'),
+    ('⁴', '⁾'),
+    ('₀', '₎'),
+    ('₠', '₿'),
+    ('℀', '℥'),
+    ('℧', '℩'),
+    ('ℬ', 'ℱ'),
+    ('ℳ', '⅍'),
+    ('⅏', '⅟'),
+    ('↉', '↋'),
+    ('←', '␦'),
+    ('⑀', '⑊'),
+    ('①', '⟿'),
+    ('⤀', '⭳'),
+    ('⭶', '⮕'),
+    ('\u{2b97}', '⯿'),
+    ('⸀', '⹂'),
+    ('⹄', '\u{2e52}'),
+    ('⿰', '⿻'),
+    ('\u{3000}', '\u{3000}'),
+    ('〄', '〄'),
+    ('〒', '〒'),
+    ('〠', '〠'),
+    ('〶', '〶'),
+    ('㉈', '㉟'),
+    ('㉿', '㉿'),
+    ('㊱', '㊿'),
+    ('㋌', '㋏'),
+    ('㍱', '㍺'),
+    ('㎀', '㏟'),
+    ('㏿', '㏿'),
+    ('䷀', '䷿'),
+    ('꜈', '꜡'),
+    ('ꞈ', '꞊'),
+    ('꭛', '꭛'),
+    ('\u{ab6a}', '\u{ab6b}'),
+    ('﴾', '﴿'),
+    ('︐', '︙'),
+    ('︰', '﹄'),
+    ('﹇', '﹒'),
+    ('﹔', '﹦'),
+    ('﹨', '﹫'),
+    ('\u{feff}', '\u{feff}'),
+    ('！', '＠'),
+    ('［', '｀'),
+    ('｛', '｠'),
+    ('￠', '￦'),
+    ('￨', '￮'),
+    ('\u{fff9}', '�'),
+    ('𐆐', '\u{1019c}'),
+    ('𐇐', '𐇼'),
+    ('𖿢', '𖿣'),
+    ('𝀀', '𝃵'),
+    ('𝄀', '𝄦'),
+    ('𝄩', '𝅦'),
+    ('𝅪', '\u{1d17a}'),
+    ('𝆃', '𝆄'),
+    ('𝆌', '𝆩'),
+    ('𝆮', '𝇨'),
+    ('𝋠', '𝋳'),
+    ('𝌀', '𝍖'),
+    ('𝍲', '𝍸'),
+    ('𝐀', '𝑔'),
+    ('𝑖', '𝒜'),
+    ('𝒞', '𝒟'),
+    ('𝒢', '𝒢'),
+    ('𝒥', '𝒦'),
+    ('𝒩', '𝒬'),
+    ('𝒮', '𝒹'),
+    ('𝒻', '𝒻'),
+    ('𝒽', '𝓃'),
+    ('𝓅', '𝔅'),
+    ('𝔇', '𝔊'),
+    ('𝔍', '𝔔'),
+    ('𝔖', '𝔜'),
+    ('𝔞', '𝔹'),
+    ('𝔻', '𝔾'),
+    ('𝕀', '𝕄'),
+    ('𝕆', '𝕆'),
+    ('𝕊', '𝕐'),
+    ('𝕒', '𝚥'),
+    ('𝚨', '𝟋'),
+    ('𝟎', '𝟿'),
+    ('𞱱', '𞲴'),
+    ('𞴁', '𞴽'),
+    ('🀀', '🀫'),
+    ('🀰', '🂓'),
+    ('🂠', '🂮'),
+    ('🂱', '🂿'),
+    ('🃁', '🃏'),
+    ('🃑', '🃵'),
+    ('🄀', '\u{1f1ad}'),
+    ('🇦', '🇿'),
+    ('🈁', '🈂'),
+    ('🈐', '🈻'),
+    ('🉀', '🉈'),
+    ('🉠', '🉥'),
+    ('🌀', '\u{1f6d7}'),
+    ('🛠', '🛬'),
+    ('🛰', '\u{1f6fc}'),
+    ('🜀', '🝳'),
+    ('🞀', '🟘'),
+    ('🟠', '🟫'),
+    ('🠀', '🠋'),
+    ('🠐', '🡇'),
+    ('🡐', '🡙'),
+    ('🡠', '🢇'),
+    ('🢐', '🢭'),
+    ('\u{1f8b0}', '\u{1f8b1}'),
+    ('🤀', '\u{1f978}'),
+    ('🥺', '\u{1f9cb}'),
+    ('🧍', '🩓'),
+    ('🩠', '🩭'),
+    ('🩰', '\u{1fa74}'),
+    ('🩸', '🩺'),
+    ('🪀', '\u{1fa86}'),
+    ('🪐', '\u{1faa8}'),
+    ('\u{1fab0}', '\u{1fab6}'),
+    ('\u{1fac0}', '\u{1fac2}'),
+    ('\u{1fad0}', '\u{1fad6}'),
+    ('\u{1fb00}', '\u{1fb92}'),
+    ('\u{1fb94}', '\u{1fbca}'),
+    ('\u{1fbf0}', '\u{1fbf9}'),
+    ('\u{e0001}', '\u{e0001}'),
+    ('\u{e0020}', '\u{e007f}'),
+];
+
+pub const COPTIC: &'static [(char, char)] =
+    &[('Ϣ', 'ϯ'), ('Ⲁ', 'ⳳ'), ('⳹', '⳿'), ('\u{102e0}', '𐋻')];
+
+pub const CUNEIFORM: &'static [(char, char)] =
+    &[('𒀀', '𒎙'), ('𒐀', '𒑮'), ('𒑰', '𒑴'), ('𒒀', '𒕃')];
+
+pub const CYPRIOT: &'static [(char, char)] = &[
+    ('𐄀', '𐄂'),
+    ('𐄇', '𐄳'),
+    ('𐄷', '𐄿'),
+    ('𐠀', '𐠅'),
+    ('𐠈', '𐠈'),
+    ('𐠊', '𐠵'),
+    ('𐠷', '𐠸'),
+    ('𐠼', '𐠼'),
+    ('𐠿', '𐠿'),
+];
+
+pub const CYRILLIC: &'static [(char, char)] = &[
+    ('Ѐ', 'ԯ'),
+    ('ᲀ', 'ᲈ'),
+    ('ᴫ', 'ᴫ'),
+    ('ᵸ', 'ᵸ'),
+    ('\u{1df8}', '\u{1df8}'),
+    ('\u{2de0}', '\u{2dff}'),
+    ('⹃', '⹃'),
+    ('Ꙁ', '\u{a69f}'),
+    ('\u{fe2e}', '\u{fe2f}'),
+];
+
+pub const DESERET: &'static [(char, char)] = &[('𐐀', '𐑏')];
+
+pub const DEVANAGARI: &'static [(char, char)] = &[
+    ('\u{900}', '\u{952}'),
+    ('\u{955}', 'ॿ'),
+    ('\u{1cd0}', 'ᳶ'),
+    ('\u{1cf8}', '\u{1cf9}'),
+    ('\u{20f0}', '\u{20f0}'),
+    ('꠰', '꠹'),
+    ('\u{a8e0}', '\u{a8ff}'),
+];
+
+pub const DIVES_AKURU: &'static [(char, char)] = &[
+    ('\u{11900}', '\u{11906}'),
+    ('\u{11909}', '\u{11909}'),
+    ('\u{1190c}', '\u{11913}'),
+    ('\u{11915}', '\u{11916}'),
+    ('\u{11918}', '\u{11935}'),
+    ('\u{11937}', '\u{11938}'),
+    ('\u{1193b}', '\u{11946}'),
+    ('\u{11950}', '\u{11959}'),
+];
+
+pub const DOGRA: &'static [(char, char)] =
+    &[('।', '९'), ('꠰', '꠹'), ('𑠀', '𑠻')];
+
+pub const DUPLOYAN: &'static [(char, char)] =
+    &[('𛰀', '𛱪'), ('𛱰', '𛱼'), ('𛲀', '𛲈'), ('𛲐', '𛲙'), ('𛲜', '\u{1bca3}')];
+
+pub const EGYPTIAN_HIEROGLYPHS: &'static [(char, char)] =
+    &[('𓀀', '𓐮'), ('\u{13430}', '\u{13438}')];
+
+pub const ELBASAN: &'static [(char, char)] = &[('𐔀', '𐔧')];
+
+pub const ELYMAIC: &'static [(char, char)] = &[('𐿠', '𐿶')];
+
+pub const ETHIOPIC: &'static [(char, char)] = &[
+    ('ሀ', 'ቈ'),
+    ('ቊ', 'ቍ'),
+    ('ቐ', 'ቖ'),
+    ('ቘ', 'ቘ'),
+    ('ቚ', 'ቝ'),
+    ('በ', 'ኈ'),
+    ('ኊ', 'ኍ'),
+    ('ነ', 'ኰ'),
+    ('ኲ', 'ኵ'),
+    ('ኸ', 'ኾ'),
+    ('ዀ', 'ዀ'),
+    ('ዂ', 'ዅ'),
+    ('ወ', 'ዖ'),
+    ('ዘ', 'ጐ'),
+    ('ጒ', 'ጕ'),
+    ('ጘ', 'ፚ'),
+    ('\u{135d}', '፼'),
+    ('ᎀ', '᎙'),
+    ('ⶀ', 'ⶖ'),
+    ('ⶠ', 'ⶦ'),
+    ('ⶨ', 'ⶮ'),
+    ('ⶰ', 'ⶶ'),
+    ('ⶸ', 'ⶾ'),
+    ('ⷀ', 'ⷆ'),
+    ('ⷈ', 'ⷎ'),
+    ('ⷐ', 'ⷖ'),
+    ('ⷘ', 'ⷞ'),
+    ('ꬁ', 'ꬆ'),
+    ('ꬉ', 'ꬎ'),
+    ('ꬑ', 'ꬖ'),
+    ('ꬠ', 'ꬦ'),
+    ('ꬨ', 'ꬮ'),
+];
+
+pub const GEORGIAN: &'static [(char, char)] = &[
+    ('Ⴀ', 'Ⴥ'),
+    ('Ⴧ', 'Ⴧ'),
+    ('Ⴭ', 'Ⴭ'),
+    ('ა', 'ჿ'),
+    ('Ა', 'Ჺ'),
+    ('Ჽ', 'Ჿ'),
+    ('ⴀ', 'ⴥ'),
+    ('ⴧ', 'ⴧ'),
+    ('ⴭ', 'ⴭ'),
+];
+
+pub const GLAGOLITIC: &'static [(char, char)] = &[
+    ('\u{484}', '\u{484}'),
+    ('\u{487}', '\u{487}'),
+    ('Ⰰ', 'Ⱞ'),
+    ('ⰰ', 'ⱞ'),
+    ('⹃', '⹃'),
+    ('\u{a66f}', '\u{a66f}'),
+    ('\u{1e000}', '\u{1e006}'),
+    ('\u{1e008}', '\u{1e018}'),
+    ('\u{1e01b}', '\u{1e021}'),
+    ('\u{1e023}', '\u{1e024}'),
+    ('\u{1e026}', '\u{1e02a}'),
+];
+
+pub const GOTHIC: &'static [(char, char)] = &[('𐌰', '𐍊')];
+
+pub const GRANTHA: &'static [(char, char)] = &[
+    ('\u{951}', '\u{952}'),
+    ('।', '॥'),
+    ('௦', '௳'),
+    ('\u{1cd0}', '\u{1cd0}'),
+    ('\u{1cd2}', '᳓'),
+    ('ᳲ', '\u{1cf4}'),
+    ('\u{1cf8}', '\u{1cf9}'),
+    ('\u{20f0}', '\u{20f0}'),
+    ('\u{11300}', '𑌃'),
+    ('𑌅', '𑌌'),
+    ('𑌏', '𑌐'),
+    ('𑌓', '𑌨'),
+    ('𑌪', '𑌰'),
+    ('𑌲', '𑌳'),
+    ('𑌵', '𑌹'),
+    ('\u{1133b}', '𑍄'),
+    ('𑍇', '𑍈'),
+    ('𑍋', '𑍍'),
+    ('𑍐', '𑍐'),
+    ('\u{11357}', '\u{11357}'),
+    ('𑍝', '𑍣'),
+    ('\u{11366}', '\u{1136c}'),
+    ('\u{11370}', '\u{11374}'),
+    ('𑿐', '𑿑'),
+    ('𑿓', '𑿓'),
+];
+
+pub const GREEK: &'static [(char, char)] = &[
+    ('\u{342}', '\u{342}'),
+    ('\u{345}', '\u{345}'),
+    ('Ͱ', 'ͳ'),
+    ('͵', 'ͷ'),
+    ('ͺ', 'ͽ'),
+    ('Ϳ', 'Ϳ'),
+    ('΄', '΄'),
+    ('Ά', 'Ά'),
+    ('Έ', 'Ί'),
+    ('Ό', 'Ό'),
+    ('Ύ', 'Ρ'),
+    ('Σ', 'ϡ'),
+    ('ϰ', 'Ͽ'),
+    ('ᴦ', 'ᴪ'),
+    ('ᵝ', 'ᵡ'),
+    ('ᵦ', 'ᵪ'),
+    ('ᶿ', '\u{1dc1}'),
+    ('ἀ', 'ἕ'),
+    ('Ἐ', 'Ἕ'),
+    ('ἠ', 'ὅ'),
+    ('Ὀ', 'Ὅ'),
+    ('ὐ', 'ὗ'),
+    ('Ὑ', 'Ὑ'),
+    ('Ὓ', 'Ὓ'),
+    ('Ὕ', 'Ὕ'),
+    ('Ὗ', 'ώ'),
+    ('ᾀ', 'ᾴ'),
+    ('ᾶ', 'ῄ'),
+    ('ῆ', 'ΐ'),
+    ('ῖ', 'Ί'),
+    ('῝', '`'),
+    ('ῲ', 'ῴ'),
+    ('ῶ', '῾'),
+    ('Ω', 'Ω'),
+    ('ꭥ', 'ꭥ'),
+    ('𐅀', '𐆎'),
+    ('𐆠', '𐆠'),
+    ('𝈀', '𝉅'),
+];
+
+pub const GUJARATI: &'static [(char, char)] = &[
+    ('\u{951}', '\u{952}'),
+    ('।', '॥'),
+    ('\u{a81}', 'ઃ'),
+    ('અ', 'ઍ'),
+    ('એ', 'ઑ'),
+    ('ઓ', 'ન'),
+    ('પ', 'ર'),
+    ('લ', 'ળ'),
+    ('વ', 'હ'),
+    ('\u{abc}', '\u{ac5}'),
+    ('\u{ac7}', 'ૉ'),
+    ('ો', '\u{acd}'),
+    ('ૐ', 'ૐ'),
+    ('ૠ', '\u{ae3}'),
+    ('૦', '૱'),
+    ('ૹ', '\u{aff}'),
+    ('꠰', '꠹'),
+];
+
+pub const GUNJALA_GONDI: &'static [(char, char)] = &[
+    ('।', '॥'),
+    ('𑵠', '𑵥'),
+    ('𑵧', '𑵨'),
+    ('𑵪', '𑶎'),
+    ('\u{11d90}', '\u{11d91}'),
+    ('𑶓', '𑶘'),
+    ('𑶠', '𑶩'),
+];
+
+pub const GURMUKHI: &'static [(char, char)] = &[
+    ('\u{951}', '\u{952}'),
+    ('।', '॥'),
+    ('\u{a01}', 'ਃ'),
+    ('ਅ', 'ਊ'),
+    ('ਏ', 'ਐ'),
+    ('ਓ', 'ਨ'),
+    ('ਪ', 'ਰ'),
+    ('ਲ', 'ਲ਼'),
+    ('ਵ', 'ਸ਼'),
+    ('ਸ', 'ਹ'),
+    ('\u{a3c}', '\u{a3c}'),
+    ('ਾ', '\u{a42}'),
+    ('\u{a47}', '\u{a48}'),
+    ('\u{a4b}', '\u{a4d}'),
+    ('\u{a51}', '\u{a51}'),
+    ('ਖ਼', 'ੜ'),
+    ('ਫ਼', 'ਫ਼'),
+    ('੦', '੶'),
+    ('꠰', '꠹'),
+];
+
+pub const HAN: &'static [(char, char)] = &[
+    ('⺀', '⺙'),
+    ('⺛', '⻳'),
+    ('⼀', '⿕'),
+    ('、', '〃'),
+    ('々', '】'),
+    ('〓', '〟'),
+    ('〡', '\u{302d}'),
+    ('〰', '〰'),
+    ('〷', '〿'),
+    ('・', '・'),
+    ('㆐', '㆟'),
+    ('㇀', '㇣'),
+    ('㈠', '㉇'),
+    ('㊀', '㊰'),
+    ('㋀', '㋋'),
+    ('㋿', '㋿'),
+    ('㍘', '㍰'),
+    ('㍻', '㍿'),
+    ('㏠', '㏾'),
+    ('㐀', '\u{4dbf}'),
+    ('一', '\u{9ffc}'),
+    ('꜀', '꜇'),
+    ('豈', '舘'),
+    ('並', '龎'),
+    ('﹅', '﹆'),
+    ('｡', '･'),
+    ('\u{16ff0}', '\u{16ff1}'),
+    ('𝍠', '𝍱'),
+    ('🉐', '🉑'),
+    ('𠀀', '\u{2a6dd}'),
+    ('𪜀', '𫜴'),
+    ('𫝀', '𫠝'),
+    ('𫠠', '𬺡'),
+    ('𬺰', '𮯠'),
+    ('丽', '𪘀'),
+    ('\u{30000}', '\u{3134a}'),
+];
+
+pub const HANGUL: &'static [(char, char)] = &[
+    ('ᄀ', 'ᇿ'),
+    ('、', '〃'),
+    ('〈', '】'),
+    ('〓', '〟'),
+    ('\u{302e}', '〰'),
+    ('〷', '〷'),
+    ('・', '・'),
+    ('ㄱ', 'ㆎ'),
+    ('㈀', '㈞'),
+    ('㉠', '㉾'),
+    ('ꥠ', 'ꥼ'),
+    ('가', '힣'),
+    ('ힰ', 'ퟆ'),
+    ('ퟋ', 'ퟻ'),
+    ('﹅', '﹆'),
+    ('｡', '･'),
+    ('ﾠ', 'ﾾ'),
+    ('ￂ', 'ￇ'),
+    ('ￊ', 'ￏ'),
+    ('ￒ', 'ￗ'),
+    ('ￚ', 'ￜ'),
+];
+
+pub const HANIFI_ROHINGYA: &'static [(char, char)] = &[
+    ('،', '،'),
+    ('؛', '؛'),
+    ('؟', '؟'),
+    ('ـ', 'ـ'),
+    ('۔', '۔'),
+    ('𐴀', '\u{10d27}'),
+    ('𐴰', '𐴹'),
+];
+
+pub const HANUNOO: &'static [(char, char)] = &[('ᜠ', '᜶')];
+
+pub const HATRAN: &'static [(char, char)] =
+    &[('𐣠', '𐣲'), ('𐣴', '𐣵'), ('𐣻', '𐣿')];
+
+pub const HEBREW: &'static [(char, char)] = &[
+    ('\u{591}', '\u{5c7}'),
+    ('א', 'ת'),
+    ('ׯ', '״'),
+    ('יִ', 'זּ'),
+    ('טּ', 'לּ'),
+    ('מּ', 'מּ'),
+    ('נּ', 'סּ'),
+    ('ףּ', 'פּ'),
+    ('צּ', 'ﭏ'),
+];
+
+pub const HIRAGANA: &'static [(char, char)] = &[
+    ('、', '〃'),
+    ('〈', '】'),
+    ('〓', '〟'),
+    ('〰', '〵'),
+    ('〷', '〷'),
+    ('〼', '〽'),
+    ('ぁ', 'ゖ'),
+    ('\u{3099}', '゠'),
+    ('・', 'ー'),
+    ('﹅', '﹆'),
+    ('｡', '･'),
+    ('ｰ', 'ｰ'),
+    ('\u{ff9e}', '\u{ff9f}'),
+    ('𛀁', '𛄞'),
+    ('𛅐', '𛅒'),
+    ('🈀', '🈀'),
+];
+
+pub const IMPERIAL_ARAMAIC: &'static [(char, char)] =
+    &[('𐡀', '𐡕'), ('𐡗', '𐡟')];
+
+pub const INHERITED: &'static [(char, char)] = &[
+    ('\u{300}', '\u{341}'),
+    ('\u{343}', '\u{344}'),
+    ('\u{346}', '\u{362}'),
+    ('\u{953}', '\u{954}'),
+    ('\u{1ab0}', '\u{1ac0}'),
+    ('\u{1dc2}', '\u{1df7}'),
+    ('\u{1df9}', '\u{1df9}'),
+    ('\u{1dfb}', '\u{1dff}'),
+    ('\u{200c}', '\u{200d}'),
+    ('\u{20d0}', '\u{20ef}'),
+    ('\u{fe00}', '\u{fe0f}'),
+    ('\u{fe20}', '\u{fe2d}'),
+    ('\u{101fd}', '\u{101fd}'),
+    ('\u{1d167}', '\u{1d169}'),
+    ('\u{1d17b}', '\u{1d182}'),
+    ('\u{1d185}', '\u{1d18b}'),
+    ('\u{1d1aa}', '\u{1d1ad}'),
+    ('\u{e0100}', '\u{e01ef}'),
+];
+
+pub const INSCRIPTIONAL_PAHLAVI: &'static [(char, char)] =
+    &[('𐭠', '𐭲'), ('𐭸', '𐭿')];
+
+pub const INSCRIPTIONAL_PARTHIAN: &'static [(char, char)] =
+    &[('𐭀', '𐭕'), ('𐭘', '𐭟')];
+
+pub const JAVANESE: &'static [(char, char)] =
+    &[('\u{a980}', '꧍'), ('ꧏ', '꧙'), ('꧞', '꧟')];
+
+pub const KAITHI: &'static [(char, char)] =
+    &[('०', '९'), ('꠰', '꠹'), ('\u{11080}', '𑃁'), ('\u{110cd}', '\u{110cd}')];
+
+pub const KANNADA: &'static [(char, char)] = &[
+    ('\u{951}', '\u{952}'),
+    ('।', '॥'),
+    ('ಀ', 'ಌ'),
+    ('ಎ', 'ಐ'),
+    ('ಒ', 'ನ'),
+    ('ಪ', 'ಳ'),
+    ('ವ', 'ಹ'),
+    ('\u{cbc}', 'ೄ'),
+    ('\u{cc6}', 'ೈ'),
+    ('ೊ', '\u{ccd}'),
+    ('\u{cd5}', '\u{cd6}'),
+    ('ೞ', 'ೞ'),
+    ('ೠ', '\u{ce3}'),
+    ('೦', '೯'),
+    ('ೱ', 'ೲ'),
+    ('\u{1cd0}', '\u{1cd0}'),
+    ('\u{1cd2}', '\u{1cd2}'),
+    ('\u{1cda}', '\u{1cda}'),
+    ('ᳲ', 'ᳲ'),
+    ('\u{1cf4}', '\u{1cf4}'),
+    ('꠰', '꠵'),
+];
+
+pub const KATAKANA: &'static [(char, char)] = &[
+    ('、', '〃'),
+    ('〈', '】'),
+    ('〓', '〟'),
+    ('〰', '〵'),
+    ('〷', '〷'),
+    ('〼', '〽'),
+    ('\u{3099}', '゜'),
+    ('゠', 'ヿ'),
+    ('ㇰ', 'ㇿ'),
+    ('㋐', '㋾'),
+    ('㌀', '㍗'),
+    ('﹅', '﹆'),
+    ('｡', '\u{ff9f}'),
+    ('𛀀', '𛀀'),
+    ('𛅤', '𛅧'),
+];
+
+pub const KAYAH_LI: &'static [(char, char)] = &[('꤀', '꤯')];
+
+pub const KHAROSHTHI: &'static [(char, char)] = &[
+    ('𐨀', '\u{10a03}'),
+    ('\u{10a05}', '\u{10a06}'),
+    ('\u{10a0c}', '𐨓'),
+    ('𐨕', '𐨗'),
+    ('𐨙', '𐨵'),
+    ('\u{10a38}', '\u{10a3a}'),
+    ('\u{10a3f}', '𐩈'),
+    ('𐩐', '𐩘'),
+];
+
+pub const KHITAN_SMALL_SCRIPT: &'static [(char, char)] =
+    &[('\u{16fe4}', '\u{16fe4}'), ('\u{18b00}', '\u{18cd5}')];
+
+pub const KHMER: &'static [(char, char)] =
+    &[('ក', '\u{17dd}'), ('០', '៩'), ('៰', '៹'), ('᧠', '᧿')];
+
+pub const KHOJKI: &'static [(char, char)] =
+    &[('૦', '૯'), ('꠰', '꠹'), ('𑈀', '𑈑'), ('𑈓', '\u{1123e}')];
+
+pub const KHUDAWADI: &'static [(char, char)] =
+    &[('।', '॥'), ('꠰', '꠹'), ('𑊰', '\u{112ea}'), ('𑋰', '𑋹')];
+
+pub const LAO: &'static [(char, char)] = &[
+    ('ກ', 'ຂ'),
+    ('ຄ', 'ຄ'),
+    ('ຆ', 'ຊ'),
+    ('ຌ', 'ຣ'),
+    ('ລ', 'ລ'),
+    ('ວ', 'ຽ'),
+    ('ເ', 'ໄ'),
+    ('ໆ', 'ໆ'),
+    ('\u{ec8}', '\u{ecd}'),
+    ('໐', '໙'),
+    ('ໜ', 'ໟ'),
+];
+
+pub const LATIN: &'static [(char, char)] = &[
+    ('A', 'Z'),
+    ('a', 'z'),
+    ('ª', 'ª'),
+    ('º', 'º'),
+    ('À', 'Ö'),
+    ('Ø', 'ö'),
+    ('ø', 'ʸ'),
+    ('ˠ', 'ˤ'),
+    ('\u{363}', '\u{36f}'),
+    ('\u{485}', '\u{486}'),
+    ('\u{951}', '\u{952}'),
+    ('჻', '჻'),
+    ('ᴀ', 'ᴥ'),
+    ('ᴬ', 'ᵜ'),
+    ('ᵢ', 'ᵥ'),
+    ('ᵫ', 'ᵷ'),
+    ('ᵹ', 'ᶾ'),
+    ('Ḁ', 'ỿ'),
+    ('\u{202f}', '\u{202f}'),
+    ('ⁱ', 'ⁱ'),
+    ('ⁿ', 'ⁿ'),
+    ('ₐ', 'ₜ'),
+    ('\u{20f0}', '\u{20f0}'),
+    ('K', 'Å'),
+    ('Ⅎ', 'Ⅎ'),
+    ('ⅎ', 'ⅎ'),
+    ('Ⅰ', 'ↈ'),
+    ('Ⱡ', 'Ɀ'),
+    ('꜀', '꜇'),
+    ('Ꜣ', 'ꞇ'),
+    ('Ꞌ', 'ꞿ'),
+    ('Ꟃ', '\u{a7ca}'),
+    ('\u{a7f5}', 'ꟿ'),
+    ('꤮', '꤮'),
+    ('ꬰ', 'ꭚ'),
+    ('ꭜ', 'ꭤ'),
+    ('ꭦ', '\u{ab69}'),
+    ('ﬀ', 'ﬆ'),
+    ('Ａ', 'Ｚ'),
+    ('ａ', 'ｚ'),
+];
+
+pub const LEPCHA: &'static [(char, char)] =
+    &[('ᰀ', '\u{1c37}'), ('᰻', '᱉'), ('ᱍ', 'ᱏ')];
+
+pub const LIMBU: &'static [(char, char)] = &[
+    ('॥', '॥'),
+    ('ᤀ', 'ᤞ'),
+    ('\u{1920}', 'ᤫ'),
+    ('ᤰ', '\u{193b}'),
+    ('᥀', '᥀'),
+    ('᥄', '᥏'),
+];
+
+pub const LINEAR_A: &'static [(char, char)] =
+    &[('𐄇', '𐄳'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧')];
+
+pub const LINEAR_B: &'static [(char, char)] = &[
+    ('𐀀', '𐀋'),
+    ('𐀍', '𐀦'),
+    ('𐀨', '𐀺'),
+    ('𐀼', '𐀽'),
+    ('𐀿', '𐁍'),
+    ('𐁐', '𐁝'),
+    ('𐂀', '𐃺'),
+    ('𐄀', '𐄂'),
+    ('𐄇', '𐄳'),
+    ('𐄷', '𐄿'),
+];
+
+pub const LISU: &'static [(char, char)] =
+    &[('ꓐ', '꓿'), ('\u{11fb0}', '\u{11fb0}')];
+
+pub const LYCIAN: &'static [(char, char)] = &[('𐊀', '𐊜')];
+
+pub const LYDIAN: &'static [(char, char)] = &[('𐤠', '𐤹'), ('𐤿', '𐤿')];
+
+pub const MAHAJANI: &'static [(char, char)] =
+    &[('।', '९'), ('꠰', '꠹'), ('𑅐', '𑅶')];
+
+pub const MAKASAR: &'static [(char, char)] = &[('𑻠', '𑻸')];
+
+pub const MALAYALAM: &'static [(char, char)] = &[
+    ('\u{951}', '\u{952}'),
+    ('।', '॥'),
+    ('\u{d00}', 'ഌ'),
+    ('എ', 'ഐ'),
+    ('ഒ', '\u{d44}'),
+    ('െ', 'ൈ'),
+    ('ൊ', '൏'),
+    ('ൔ', '\u{d63}'),
+    ('൦', 'ൿ'),
+    ('\u{1cda}', '\u{1cda}'),
+    ('꠰', '꠲'),
+];
+
+pub const MANDAIC: &'static [(char, char)] =
+    &[('ـ', 'ـ'), ('ࡀ', '\u{85b}'), ('࡞', '࡞')];
+
+pub const MANICHAEAN: &'static [(char, char)] =
+    &[('ـ', 'ـ'), ('𐫀', '\u{10ae6}'), ('𐫫', '𐫶')];
+
+pub const MARCHEN: &'static [(char, char)] =
+    &[('𑱰', '𑲏'), ('\u{11c92}', '\u{11ca7}'), ('𑲩', '\u{11cb6}')];
+
+pub const MASARAM_GONDI: &'static [(char, char)] = &[
+    ('।', '॥'),
+    ('𑴀', '𑴆'),
+    ('𑴈', '𑴉'),
+    ('𑴋', '\u{11d36}'),
+    ('\u{11d3a}', '\u{11d3a}'),
+    ('\u{11d3c}', '\u{11d3d}'),
+    ('\u{11d3f}', '\u{11d47}'),
+    ('𑵐', '𑵙'),
+];
+
+pub const MEDEFAIDRIN: &'static [(char, char)] = &[('𖹀', '𖺚')];
+
+pub const MEETEI_MAYEK: &'static [(char, char)] =
+    &[('ꫠ', '\u{aaf6}'), ('ꯀ', '\u{abed}'), ('꯰', '꯹')];
+
+pub const MENDE_KIKAKUI: &'static [(char, char)] =
+    &[('𞠀', '𞣄'), ('𞣇', '\u{1e8d6}')];
+
+pub const MEROITIC_CURSIVE: &'static [(char, char)] =
+    &[('𐦠', '𐦷'), ('𐦼', '𐧏'), ('𐧒', '𐧿')];
+
+pub const MEROITIC_HIEROGLYPHS: &'static [(char, char)] = &[('𐦀', '𐦟')];
+
+pub const MIAO: &'static [(char, char)] =
+    &[('𖼀', '𖽊'), ('\u{16f4f}', '𖾇'), ('\u{16f8f}', '𖾟')];
+
+pub const MODI: &'static [(char, char)] =
+    &[('꠰', '꠹'), ('𑘀', '𑙄'), ('𑙐', '𑙙')];
+
+pub const MONGOLIAN: &'static [(char, char)] = &[
+    ('᠀', '\u{180e}'),
+    ('᠐', '᠙'),
+    ('ᠠ', 'ᡸ'),
+    ('ᢀ', 'ᢪ'),
+    ('\u{202f}', '\u{202f}'),
+    ('𑙠', '𑙬'),
+];
+
+pub const MRO: &'static [(char, char)] = &[('𖩀', '𖩞'), ('𖩠', '𖩩'), ('𖩮', '𖩯')];
+
+pub const MULTANI: &'static [(char, char)] =
+    &[('੦', '੯'), ('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'), ('𑊏', '𑊝'), ('𑊟', '𑊩')];
+
+pub const MYANMAR: &'static [(char, char)] =
+    &[('က', '႟'), ('꤮', '꤮'), ('ꧠ', 'ꧾ'), ('ꩠ', 'ꩿ')];
+
+pub const NABATAEAN: &'static [(char, char)] = &[('𐢀', '𐢞'), ('𐢧', '𐢯')];
+
+pub const NANDINAGARI: &'static [(char, char)] = &[
+    ('।', '॥'),
+    ('೦', '೯'),
+    ('ᳩ', 'ᳩ'),
+    ('ᳲ', 'ᳲ'),
+    ('ᳺ', 'ᳺ'),
+    ('꠰', '꠵'),
+    ('𑦠', '𑦧'),
+    ('𑦪', '\u{119d7}'),
+    ('\u{119da}', '𑧤'),
+];
+
+pub const NEW_TAI_LUE: &'static [(char, char)] =
+    &[('ᦀ', 'ᦫ'), ('ᦰ', 'ᧉ'), ('᧐', '᧚'), ('᧞', '᧟')];
+
+pub const NEWA: &'static [(char, char)] = &[('𑐀', '𑑛'), ('𑑝', '\u{11461}')];
+
+pub const NKO: &'static [(char, char)] = &[('߀', 'ߺ'), ('\u{7fd}', '߿')];
+
+pub const NUSHU: &'static [(char, char)] = &[('𖿡', '𖿡'), ('𛅰', '𛋻')];
+
+pub const NYIAKENG_PUACHUE_HMONG: &'static [(char, char)] =
+    &[('𞄀', '𞄬'), ('\u{1e130}', '𞄽'), ('𞅀', '𞅉'), ('𞅎', '𞅏')];
+
+pub const OGHAM: &'static [(char, char)] = &[('\u{1680}', '᚜')];
+
+pub const OL_CHIKI: &'static [(char, char)] = &[('᱐', '᱿')];
+
+pub const OLD_HUNGARIAN: &'static [(char, char)] =
+    &[('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𐳺', '𐳿')];
+
+pub const OLD_ITALIC: &'static [(char, char)] = &[('𐌀', '𐌣'), ('𐌭', '𐌯')];
+
+pub const OLD_NORTH_ARABIAN: &'static [(char, char)] = &[('𐪀', '𐪟')];
+
+pub const OLD_PERMIC: &'static [(char, char)] =
+    &[('\u{483}', '\u{483}'), ('𐍐', '\u{1037a}')];
+
+pub const OLD_PERSIAN: &'static [(char, char)] = &[('𐎠', '𐏃'), ('𐏈', '𐏕')];
+
+pub const OLD_SOGDIAN: &'static [(char, char)] = &[('𐼀', '𐼧')];
+
+pub const OLD_SOUTH_ARABIAN: &'static [(char, char)] = &[('𐩠', '𐩿')];
+
+pub const OLD_TURKIC: &'static [(char, char)] = &[('𐰀', '𐱈')];
+
+pub const ORIYA: &'static [(char, char)] = &[
+    ('\u{951}', '\u{952}'),
+    ('।', '॥'),
+    ('\u{b01}', 'ଃ'),
+    ('ଅ', 'ଌ'),
+    ('ଏ', 'ଐ'),
+    ('ଓ', 'ନ'),
+    ('ପ', 'ର'),
+    ('ଲ', 'ଳ'),
+    ('ଵ', 'ହ'),
+    ('\u{b3c}', '\u{b44}'),
+    ('େ', 'ୈ'),
+    ('ୋ', '\u{b4d}'),
+    ('\u{b55}', '\u{b57}'),
+    ('ଡ଼', 'ଢ଼'),
+    ('ୟ', '\u{b63}'),
+    ('୦', '୷'),
+    ('\u{1cda}', '\u{1cda}'),
+    ('ᳲ', 'ᳲ'),
+];
+
+pub const OSAGE: &'static [(char, char)] = &[('𐒰', '𐓓'), ('𐓘', '𐓻')];
+
+pub const OSMANYA: &'static [(char, char)] = &[('𐒀', '𐒝'), ('𐒠', '𐒩')];
+
+pub const PAHAWH_HMONG: &'static [(char, char)] =
+    &[('𖬀', '𖭅'), ('𖭐', '𖭙'), ('𖭛', '𖭡'), ('𖭣', '𖭷'), ('𖭽', '𖮏')];
+
+pub const PALMYRENE: &'static [(char, char)] = &[('𐡠', '𐡿')];
+
+pub const PAU_CIN_HAU: &'static [(char, char)] = &[('𑫀', '𑫸')];
+
+pub const PHAGS_PA: &'static [(char, char)] =
+    &[('᠂', '᠃'), ('᠅', '᠅'), ('ꡀ', '꡷')];
+
+pub const PHOENICIAN: &'static [(char, char)] = &[('𐤀', '𐤛'), ('𐤟', '𐤟')];
+
+pub const PSALTER_PAHLAVI: &'static [(char, char)] =
+    &[('ـ', 'ـ'), ('𐮀', '𐮑'), ('𐮙', '𐮜'), ('𐮩', '𐮯')];
+
+pub const REJANG: &'static [(char, char)] = &[('ꤰ', '꥓'), ('꥟', '꥟')];
+
+pub const RUNIC: &'static [(char, char)] = &[('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ')];
+
+pub const SAMARITAN: &'static [(char, char)] = &[('ࠀ', '\u{82d}'), ('࠰', '࠾')];
+
+pub const SAURASHTRA: &'static [(char, char)] =
+    &[('ꢀ', '\u{a8c5}'), ('꣎', '꣙')];
+
+pub const SHARADA: &'static [(char, char)] = &[
+    ('\u{951}', '\u{951}'),
+    ('\u{1cd7}', '\u{1cd7}'),
+    ('\u{1cd9}', '\u{1cd9}'),
+    ('\u{1cdc}', '\u{1cdd}'),
+    ('\u{1ce0}', '\u{1ce0}'),
+    ('\u{11180}', '𑇟'),
+];
+
+pub const SHAVIAN: &'static [(char, char)] = &[('𐑐', '𐑿')];
+
+pub const SIDDHAM: &'static [(char, char)] =
+    &[('𑖀', '\u{115b5}'), ('𑖸', '\u{115dd}')];
+
+pub const SIGNWRITING: &'static [(char, char)] =
+    &[('𝠀', '𝪋'), ('\u{1da9b}', '\u{1da9f}'), ('\u{1daa1}', '\u{1daaf}')];
+
+pub const SINHALA: &'static [(char, char)] = &[
+    ('।', '॥'),
+    ('\u{d81}', 'ඃ'),
+    ('අ', 'ඖ'),
+    ('ක', 'න'),
+    ('ඳ', 'ර'),
+    ('ල', 'ල'),
+    ('ව', 'ෆ'),
+    ('\u{dca}', '\u{dca}'),
+    ('\u{dcf}', '\u{dd4}'),
+    ('\u{dd6}', '\u{dd6}'),
+    ('ෘ', '\u{ddf}'),
+    ('෦', '෯'),
+    ('ෲ', '෴'),
+    ('𑇡', '𑇴'),
+];
+
+pub const SOGDIAN: &'static [(char, char)] = &[('ـ', 'ـ'), ('𐼰', '𐽙')];
+
+pub const SORA_SOMPENG: &'static [(char, char)] = &[('𑃐', '𑃨'), ('𑃰', '𑃹')];
+
+pub const SOYOMBO: &'static [(char, char)] = &[('𑩐', '𑪢')];
+
+pub const SUNDANESE: &'static [(char, char)] =
+    &[('\u{1b80}', 'ᮿ'), ('᳀', '᳇')];
+
+pub const SYLOTI_NAGRI: &'static [(char, char)] =
+    &[('।', '॥'), ('০', '৯'), ('ꠀ', '\u{a82c}')];
+
+pub const SYRIAC: &'static [(char, char)] = &[
+    ('،', '،'),
+    ('؛', '\u{61c}'),
+    ('؟', '؟'),
+    ('ـ', 'ـ'),
+    ('\u{64b}', '\u{655}'),
+    ('\u{670}', '\u{670}'),
+    ('܀', '܍'),
+    ('\u{70f}', '\u{74a}'),
+    ('ݍ', 'ݏ'),
+    ('ࡠ', 'ࡪ'),
+    ('\u{1df8}', '\u{1df8}'),
+];
+
+pub const TAGALOG: &'static [(char, char)] =
+    &[('ᜀ', 'ᜌ'), ('ᜎ', '\u{1714}'), ('᜵', '᜶')];
+
+pub const TAGBANWA: &'static [(char, char)] =
+    &[('᜵', '᜶'), ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), ('\u{1772}', '\u{1773}')];
+
+pub const TAI_LE: &'static [(char, char)] =
+    &[('၀', '၉'), ('ᥐ', 'ᥭ'), ('ᥰ', 'ᥴ')];
+
+pub const TAI_THAM: &'static [(char, char)] = &[
+    ('ᨠ', '\u{1a5e}'),
+    ('\u{1a60}', '\u{1a7c}'),
+    ('\u{1a7f}', '᪉'),
+    ('᪐', '᪙'),
+    ('᪠', '᪭'),
+];
+
+pub const TAI_VIET: &'static [(char, char)] = &[('ꪀ', 'ꫂ'), ('ꫛ', '꫟')];
+
+pub const TAKRI: &'static [(char, char)] =
+    &[('।', '॥'), ('꠰', '꠹'), ('𑚀', '𑚸'), ('𑛀', '𑛉')];
+
+pub const TAMIL: &'static [(char, char)] = &[
+    ('\u{951}', '\u{952}'),
+    ('।', '॥'),
+    ('\u{b82}', 'ஃ'),
+    ('அ', 'ஊ'),
+    ('எ', 'ஐ'),
+    ('ஒ', 'க'),
+    ('ங', 'ச'),
+    ('ஜ', 'ஜ'),
+    ('ஞ', 'ட'),
+    ('ண', 'த'),
+    ('ந', 'ப'),
+    ('ம', 'ஹ'),
+    ('\u{bbe}', 'ூ'),
+    ('ெ', 'ை'),
+    ('ொ', '\u{bcd}'),
+    ('ௐ', 'ௐ'),
+    ('\u{bd7}', '\u{bd7}'),
+    ('௦', '௺'),
+    ('\u{1cda}', '\u{1cda}'),
+    ('ꣳ', 'ꣳ'),
+    ('\u{11301}', '\u{11301}'),
+    ('𑌃', '𑌃'),
+    ('\u{1133b}', '\u{1133c}'),
+    ('𑿀', '𑿱'),
+    ('𑿿', '𑿿'),
+];
+
+pub const TANGUT: &'static [(char, char)] = &[
+    ('𖿠', '𖿠'),
+    ('𗀀', '𘟷'),
+    ('𘠀', '\u{18aff}'),
+    ('\u{18d00}', '\u{18d08}'),
+];
+
+pub const TELUGU: &'static [(char, char)] = &[
+    ('\u{951}', '\u{952}'),
+    ('।', '॥'),
+    ('\u{c00}', 'ఌ'),
+    ('ఎ', 'ఐ'),
+    ('ఒ', 'న'),
+    ('ప', 'హ'),
+    ('ఽ', 'ౄ'),
+    ('\u{c46}', '\u{c48}'),
+    ('\u{c4a}', '\u{c4d}'),
+    ('\u{c55}', '\u{c56}'),
+    ('ౘ', 'ౚ'),
+    ('ౠ', '\u{c63}'),
+    ('౦', '౯'),
+    ('౷', '౿'),
+    ('\u{1cda}', '\u{1cda}'),
+    ('ᳲ', 'ᳲ'),
+];
+
+pub const THAANA: &'static [(char, char)] = &[
+    ('،', '،'),
+    ('؛', '\u{61c}'),
+    ('؟', '؟'),
+    ('٠', '٩'),
+    ('ހ', 'ޱ'),
+    ('ﷲ', 'ﷲ'),
+    ('﷽', '﷽'),
+];
+
+pub const THAI: &'static [(char, char)] = &[('ก', '\u{e3a}'), ('เ', '๛')];
+
+pub const TIBETAN: &'static [(char, char)] = &[
+    ('ༀ', 'ཇ'),
+    ('ཉ', 'ཬ'),
+    ('\u{f71}', '\u{f97}'),
+    ('\u{f99}', '\u{fbc}'),
+    ('྾', '࿌'),
+    ('࿎', '࿔'),
+    ('࿙', '࿚'),
+];
+
+pub const TIFINAGH: &'static [(char, char)] =
+    &[('ⴰ', 'ⵧ'), ('ⵯ', '⵰'), ('\u{2d7f}', '\u{2d7f}')];
+
+pub const TIRHUTA: &'static [(char, char)] = &[
+    ('\u{951}', '\u{952}'),
+    ('।', '॥'),
+    ('ᳲ', 'ᳲ'),
+    ('꠰', '꠹'),
+    ('𑒀', '𑓇'),
+    ('𑓐', '𑓙'),
+];
+
+pub const UGARITIC: &'static [(char, char)] = &[('𐎀', '𐎝'), ('𐎟', '𐎟')];
+
+pub const VAI: &'static [(char, char)] = &[('ꔀ', 'ꘫ')];
+
+pub const WANCHO: &'static [(char, char)] = &[('𞋀', '𞋹'), ('𞋿', '𞋿')];
+
+pub const WARANG_CITI: &'static [(char, char)] = &[('𑢠', '𑣲'), ('𑣿', '𑣿')];
+
+pub const YEZIDI: &'static [(char, char)] = &[
+    ('،', '،'),
+    ('؛', '؛'),
+    ('؟', '؟'),
+    ('٠', '٩'),
+    ('\u{10e80}', '\u{10ea9}'),
+    ('\u{10eab}', '\u{10ead}'),
+    ('\u{10eb0}', '\u{10eb1}'),
+];
+
+pub const YI: &'static [(char, char)] = &[
+    ('、', '。'),
+    ('〈', '】'),
+    ('〔', '〛'),
+    ('・', '・'),
+    ('ꀀ', 'ꒌ'),
+    ('꒐', '꓆'),
+    ('｡', '･'),
+];
+
+pub const ZANABAZAR_SQUARE: &'static [(char, char)] = &[('𑨀', '\u{11a47}')];

diff --git a/src/unicode_tables/sentence_break.rs b/src/unicode_tables/sentence_break.rs
new file mode 100644
index 0000000..e4840f4
--- /dev/null
+++ b/src/unicode_tables/sentence_break.rs

@@ -0,0 +1,2394 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+//  ucd-generate sentence-break ucd-13.0.0 --chars
+//
+// ucd-generate 0.2.7 is available on crates.io.
+
+pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
+    ("ATerm", ATERM),
+    ("CR", CR),
+    ("Close", CLOSE),
+    ("Extend", EXTEND),
+    ("Format", FORMAT),
+    ("LF", LF),
+    ("Lower", LOWER),
+    ("Numeric", NUMERIC),
+    ("OLetter", OLETTER),
+    ("SContinue", SCONTINUE),
+    ("STerm", STERM),
+    ("Sep", SEP),
+    ("Sp", SP),
+    ("Upper", UPPER),
+];
+
+pub const ATERM: &'static [(char, char)] =
+    &[('.', '.'), ('․', '․'), ('﹒', '﹒'), ('．', '．')];
+
+pub const CR: &'static [(char, char)] = &[('\r', '\r')];
+
+pub const CLOSE: &'static [(char, char)] = &[
+    ('\"', '\"'),
+    ('\'', ')'),
+    ('[', '['),
+    (']', ']'),
+    ('{', '{'),
+    ('}', '}'),
+    ('«', '«'),
+    ('»', '»'),
+    ('༺', '༽'),
+    ('᚛', '᚜'),
+    ('‘', '‟'),
+    ('‹', '›'),
+    ('⁅', '⁆'),
+    ('⁽', '⁾'),
+    ('₍', '₎'),
+    ('⌈', '⌋'),
+    ('⟨', '⟩'),
+    ('❛', '❠'),
+    ('❨', '❵'),
+    ('⟅', '⟆'),
+    ('⟦', '⟯'),
+    ('⦃', '⦘'),
+    ('⧘', '⧛'),
+    ('⧼', '⧽'),
+    ('⸀', '⸍'),
+    ('⸜', '⸝'),
+    ('⸠', '⸩'),
+    ('⹂', '⹂'),
+    ('〈', '】'),
+    ('〔', '〛'),
+    ('〝', '〟'),
+    ('﴾', '﴿'),
+    ('︗', '︘'),
+    ('︵', '﹄'),
+    ('﹇', '﹈'),
+    ('﹙', '﹞'),
+    ('（', '）'),
+    ('［', '［'),
+    ('］', '］'),
+    ('｛', '｛'),
+    ('｝', '｝'),
+    ('｟', '｠'),
+    ('｢', '｣'),
+    ('🙶', '🙸'),
+];
+
+pub const EXTEND: &'static [(char, char)] = &[
+    ('\u{300}', '\u{36f}'),
+    ('\u{483}', '\u{489}'),
+    ('\u{591}', '\u{5bd}'),
+    ('\u{5bf}', '\u{5bf}'),
+    ('\u{5c1}', '\u{5c2}'),
+    ('\u{5c4}', '\u{5c5}'),
+    ('\u{5c7}', '\u{5c7}'),
+    ('\u{610}', '\u{61a}'),
+    ('\u{64b}', '\u{65f}'),
+    ('\u{670}', '\u{670}'),
+    ('\u{6d6}', '\u{6dc}'),
+    ('\u{6df}', '\u{6e4}'),
+    ('\u{6e7}', '\u{6e8}'),
+    ('\u{6ea}', '\u{6ed}'),
+    ('\u{711}', '\u{711}'),
+    ('\u{730}', '\u{74a}'),
+    ('\u{7a6}', '\u{7b0}'),
+    ('\u{7eb}', '\u{7f3}'),
+    ('\u{7fd}', '\u{7fd}'),
+    ('\u{816}', '\u{819}'),
+    ('\u{81b}', '\u{823}'),
+    ('\u{825}', '\u{827}'),
+    ('\u{829}', '\u{82d}'),
+    ('\u{859}', '\u{85b}'),
+    ('\u{8d3}', '\u{8e1}'),
+    ('\u{8e3}', 'ः'),
+    ('\u{93a}', '\u{93c}'),
+    ('ा', 'ॏ'),
+    ('\u{951}', '\u{957}'),
+    ('\u{962}', '\u{963}'),
+    ('\u{981}', 'ঃ'),
+    ('\u{9bc}', '\u{9bc}'),
+    ('\u{9be}', '\u{9c4}'),
+    ('ে', 'ৈ'),
+    ('ো', '\u{9cd}'),
+    ('\u{9d7}', '\u{9d7}'),
+    ('\u{9e2}', '\u{9e3}'),
+    ('\u{9fe}', '\u{9fe}'),
+    ('\u{a01}', 'ਃ'),
+    ('\u{a3c}', '\u{a3c}'),
+    ('ਾ', '\u{a42}'),
+    ('\u{a47}', '\u{a48}'),
+    ('\u{a4b}', '\u{a4d}'),
+    ('\u{a51}', '\u{a51}'),
+    ('\u{a70}', '\u{a71}'),
+    ('\u{a75}', '\u{a75}'),
+    ('\u{a81}', 'ઃ'),
+    ('\u{abc}', '\u{abc}'),
+    ('ા', '\u{ac5}'),
+    ('\u{ac7}', 'ૉ'),
+    ('ો', '\u{acd}'),
+    ('\u{ae2}', '\u{ae3}'),
+    ('\u{afa}', '\u{aff}'),
+    ('\u{b01}', 'ଃ'),
+    ('\u{b3c}', '\u{b3c}'),
+    ('\u{b3e}', '\u{b44}'),
+    ('େ', 'ୈ'),
+    ('ୋ', '\u{b4d}'),
+    ('\u{b55}', '\u{b57}'),
+    ('\u{b62}', '\u{b63}'),
+    ('\u{b82}', '\u{b82}'),
+    ('\u{bbe}', 'ூ'),
+    ('ெ', 'ை'),
+    ('ொ', '\u{bcd}'),
+    ('\u{bd7}', '\u{bd7}'),
+    ('\u{c00}', '\u{c04}'),
+    ('\u{c3e}', 'ౄ'),
+    ('\u{c46}', '\u{c48}'),
+    ('\u{c4a}', '\u{c4d}'),
+    ('\u{c55}', '\u{c56}'),
+    ('\u{c62}', '\u{c63}'),
+    ('\u{c81}', 'ಃ'),
+    ('\u{cbc}', '\u{cbc}'),
+    ('ಾ', 'ೄ'),
+    ('\u{cc6}', 'ೈ'),
+    ('ೊ', '\u{ccd}'),
+    ('\u{cd5}', '\u{cd6}'),
+    ('\u{ce2}', '\u{ce3}'),
+    ('\u{d00}', 'ഃ'),
+    ('\u{d3b}', '\u{d3c}'),
+    ('\u{d3e}', '\u{d44}'),
+    ('െ', 'ൈ'),
+    ('ൊ', '\u{d4d}'),
+    ('\u{d57}', '\u{d57}'),
+    ('\u{d62}', '\u{d63}'),
+    ('\u{d81}', 'ඃ'),
+    ('\u{dca}', '\u{dca}'),
+    ('\u{dcf}', '\u{dd4}'),
+    ('\u{dd6}', '\u{dd6}'),
+    ('ෘ', '\u{ddf}'),
+    ('ෲ', 'ෳ'),
+    ('\u{e31}', '\u{e31}'),
+    ('\u{e34}', '\u{e3a}'),
+    ('\u{e47}', '\u{e4e}'),
+    ('\u{eb1}', '\u{eb1}'),
+    ('\u{eb4}', '\u{ebc}'),
+    ('\u{ec8}', '\u{ecd}'),
+    ('\u{f18}', '\u{f19}'),
+    ('\u{f35}', '\u{f35}'),
+    ('\u{f37}', '\u{f37}'),
+    ('\u{f39}', '\u{f39}'),
+    ('༾', '༿'),
+    ('\u{f71}', '\u{f84}'),
+    ('\u{f86}', '\u{f87}'),
+    ('\u{f8d}', '\u{f97}'),
+    ('\u{f99}', '\u{fbc}'),
+    ('\u{fc6}', '\u{fc6}'),
+    ('ါ', '\u{103e}'),
+    ('ၖ', '\u{1059}'),
+    ('\u{105e}', '\u{1060}'),
+    ('ၢ', 'ၤ'),
+    ('ၧ', 'ၭ'),
+    ('\u{1071}', '\u{1074}'),
+    ('\u{1082}', '\u{108d}'),
+    ('ႏ', 'ႏ'),
+    ('ႚ', '\u{109d}'),
+    ('\u{135d}', '\u{135f}'),
+    ('\u{1712}', '\u{1714}'),
+    ('\u{1732}', '\u{1734}'),
+    ('\u{1752}', '\u{1753}'),
+    ('\u{1772}', '\u{1773}'),
+    ('\u{17b4}', '\u{17d3}'),
+    ('\u{17dd}', '\u{17dd}'),
+    ('\u{180b}', '\u{180d}'),
+    ('\u{1885}', '\u{1886}'),
+    ('\u{18a9}', '\u{18a9}'),
+    ('\u{1920}', 'ᤫ'),
+    ('ᤰ', '\u{193b}'),
+    ('\u{1a17}', '\u{1a1b}'),
+    ('ᩕ', '\u{1a5e}'),
+    ('\u{1a60}', '\u{1a7c}'),
+    ('\u{1a7f}', '\u{1a7f}'),
+    ('\u{1ab0}', '\u{1ac0}'),
+    ('\u{1b00}', 'ᬄ'),
+    ('\u{1b34}', '᭄'),
+    ('\u{1b6b}', '\u{1b73}'),
+    ('\u{1b80}', 'ᮂ'),
+    ('ᮡ', '\u{1bad}'),
+    ('\u{1be6}', '᯳'),
+    ('ᰤ', '\u{1c37}'),
+    ('\u{1cd0}', '\u{1cd2}'),
+    ('\u{1cd4}', '\u{1ce8}'),
+    ('\u{1ced}', '\u{1ced}'),
+    ('\u{1cf4}', '\u{1cf4}'),
+    ('᳷', '\u{1cf9}'),
+    ('\u{1dc0}', '\u{1df9}'),
+    ('\u{1dfb}', '\u{1dff}'),
+    ('\u{200c}', '\u{200d}'),
+    ('\u{20d0}', '\u{20f0}'),
+    ('\u{2cef}', '\u{2cf1}'),
+    ('\u{2d7f}', '\u{2d7f}'),
+    ('\u{2de0}', '\u{2dff}'),
+    ('\u{302a}', '\u{302f}'),
+    ('\u{3099}', '\u{309a}'),
+    ('\u{a66f}', '\u{a672}'),
+    ('\u{a674}', '\u{a67d}'),
+    ('\u{a69e}', '\u{a69f}'),
+    ('\u{a6f0}', '\u{a6f1}'),
+    ('\u{a802}', '\u{a802}'),
+    ('\u{a806}', '\u{a806}'),
+    ('\u{a80b}', '\u{a80b}'),
+    ('ꠣ', 'ꠧ'),
+    ('\u{a82c}', '\u{a82c}'),
+    ('ꢀ', 'ꢁ'),
+    ('ꢴ', '\u{a8c5}'),
+    ('\u{a8e0}', '\u{a8f1}'),
+    ('\u{a8ff}', '\u{a8ff}'),
+    ('\u{a926}', '\u{a92d}'),
+    ('\u{a947}', '꥓'),
+    ('\u{a980}', 'ꦃ'),
+    ('\u{a9b3}', '꧀'),
+    ('\u{a9e5}', '\u{a9e5}'),
+    ('\u{aa29}', '\u{aa36}'),
+    ('\u{aa43}', '\u{aa43}'),
+    ('\u{aa4c}', 'ꩍ'),
+    ('ꩻ', 'ꩽ'),
+    ('\u{aab0}', '\u{aab0}'),
+    ('\u{aab2}', '\u{aab4}'),
+    ('\u{aab7}', '\u{aab8}'),
+    ('\u{aabe}', '\u{aabf}'),
+    ('\u{aac1}', '\u{aac1}'),
+    ('ꫫ', 'ꫯ'),
+    ('ꫵ', '\u{aaf6}'),
+    ('ꯣ', 'ꯪ'),
+    ('꯬', '\u{abed}'),
+    ('\u{fb1e}', '\u{fb1e}'),
+    ('\u{fe00}', '\u{fe0f}'),
+    ('\u{fe20}', '\u{fe2f}'),
+    ('\u{ff9e}', '\u{ff9f}'),
+    ('\u{101fd}', '\u{101fd}'),
+    ('\u{102e0}', '\u{102e0}'),
+    ('\u{10376}', '\u{1037a}'),
+    ('\u{10a01}', '\u{10a03}'),
+    ('\u{10a05}', '\u{10a06}'),
+    ('\u{10a0c}', '\u{10a0f}'),
+    ('\u{10a38}', '\u{10a3a}'),
+    ('\u{10a3f}', '\u{10a3f}'),
+    ('\u{10ae5}', '\u{10ae6}'),
+    ('\u{10d24}', '\u{10d27}'),
+    ('\u{10eab}', '\u{10eac}'),
+    ('\u{10f46}', '\u{10f50}'),
+    ('𑀀', '𑀂'),
+    ('\u{11038}', '\u{11046}'),
+    ('\u{1107f}', '𑂂'),
+    ('𑂰', '\u{110ba}'),
+    ('\u{11100}', '\u{11102}'),
+    ('\u{11127}', '\u{11134}'),
+    ('𑅅', '𑅆'),
+    ('\u{11173}', '\u{11173}'),
+    ('\u{11180}', '𑆂'),
+    ('𑆳', '𑇀'),
+    ('\u{111c9}', '\u{111cc}'),
+    ('\u{111ce}', '\u{111cf}'),
+    ('𑈬', '\u{11237}'),
+    ('\u{1123e}', '\u{1123e}'),
+    ('\u{112df}', '\u{112ea}'),
+    ('\u{11300}', '𑌃'),
+    ('\u{1133b}', '\u{1133c}'),
+    ('\u{1133e}', '𑍄'),
+    ('𑍇', '𑍈'),
+    ('𑍋', '𑍍'),
+    ('\u{11357}', '\u{11357}'),
+    ('𑍢', '𑍣'),
+    ('\u{11366}', '\u{1136c}'),
+    ('\u{11370}', '\u{11374}'),
+    ('𑐵', '\u{11446}'),
+    ('\u{1145e}', '\u{1145e}'),
+    ('\u{114b0}', '\u{114c3}'),
+    ('\u{115af}', '\u{115b5}'),
+    ('𑖸', '\u{115c0}'),
+    ('\u{115dc}', '\u{115dd}'),
+    ('𑘰', '\u{11640}'),
+    ('\u{116ab}', '\u{116b7}'),
+    ('\u{1171d}', '\u{1172b}'),
+    ('𑠬', '\u{1183a}'),
+    ('\u{11930}', '\u{11935}'),
+    ('\u{11937}', '\u{11938}'),
+    ('\u{1193b}', '\u{1193e}'),
+    ('\u{11940}', '\u{11940}'),
+    ('\u{11942}', '\u{11943}'),
+    ('𑧑', '\u{119d7}'),
+    ('\u{119da}', '\u{119e0}'),
+    ('𑧤', '𑧤'),
+    ('\u{11a01}', '\u{11a0a}'),
+    ('\u{11a33}', '𑨹'),
+    ('\u{11a3b}', '\u{11a3e}'),
+    ('\u{11a47}', '\u{11a47}'),
+    ('\u{11a51}', '\u{11a5b}'),
+    ('\u{11a8a}', '\u{11a99}'),
+    ('𑰯', '\u{11c36}'),
+    ('\u{11c38}', '\u{11c3f}'),
+    ('\u{11c92}', '\u{11ca7}'),
+    ('𑲩', '\u{11cb6}'),
+    ('\u{11d31}', '\u{11d36}'),
+    ('\u{11d3a}', '\u{11d3a}'),
+    ('\u{11d3c}', '\u{11d3d}'),
+    ('\u{11d3f}', '\u{11d45}'),
+    ('\u{11d47}', '\u{11d47}'),
+    ('𑶊', '𑶎'),
+    ('\u{11d90}', '\u{11d91}'),
+    ('𑶓', '\u{11d97}'),
+    ('\u{11ef3}', '𑻶'),
+    ('\u{16af0}', '\u{16af4}'),
+    ('\u{16b30}', '\u{16b36}'),
+    ('\u{16f4f}', '\u{16f4f}'),
+    ('𖽑', '𖾇'),
+    ('\u{16f8f}', '\u{16f92}'),
+    ('\u{16fe4}', '\u{16fe4}'),
+    ('\u{16ff0}', '\u{16ff1}'),
+    ('\u{1bc9d}', '\u{1bc9e}'),
+    ('\u{1d165}', '\u{1d169}'),
+    ('𝅭', '\u{1d172}'),
+    ('\u{1d17b}', '\u{1d182}'),
+    ('\u{1d185}', '\u{1d18b}'),
+    ('\u{1d1aa}', '\u{1d1ad}'),
+    ('\u{1d242}', '\u{1d244}'),
+    ('\u{1da00}', '\u{1da36}'),
+    ('\u{1da3b}', '\u{1da6c}'),
+    ('\u{1da75}', '\u{1da75}'),
+    ('\u{1da84}', '\u{1da84}'),
+    ('\u{1da9b}', '\u{1da9f}'),
+    ('\u{1daa1}', '\u{1daaf}'),
+    ('\u{1e000}', '\u{1e006}'),
+    ('\u{1e008}', '\u{1e018}'),
+    ('\u{1e01b}', '\u{1e021}'),
+    ('\u{1e023}', '\u{1e024}'),
+    ('\u{1e026}', '\u{1e02a}'),
+    ('\u{1e130}', '\u{1e136}'),
+    ('\u{1e2ec}', '\u{1e2ef}'),
+    ('\u{1e8d0}', '\u{1e8d6}'),
+    ('\u{1e944}', '\u{1e94a}'),
+    ('\u{e0020}', '\u{e007f}'),
+    ('\u{e0100}', '\u{e01ef}'),
+];
+
+pub const FORMAT: &'static [(char, char)] = &[
+    ('\u{ad}', '\u{ad}'),
+    ('\u{600}', '\u{605}'),
+    ('\u{61c}', '\u{61c}'),
+    ('\u{6dd}', '\u{6dd}'),
+    ('\u{70f}', '\u{70f}'),
+    ('\u{8e2}', '\u{8e2}'),
+    ('\u{180e}', '\u{180e}'),
+    ('\u{200b}', '\u{200b}'),
+    ('\u{200e}', '\u{200f}'),
+    ('\u{202a}', '\u{202e}'),
+    ('\u{2060}', '\u{2064}'),
+    ('\u{2066}', '\u{206f}'),
+    ('\u{feff}', '\u{feff}'),
+    ('\u{fff9}', '\u{fffb}'),
+    ('\u{110bd}', '\u{110bd}'),
+    ('\u{110cd}', '\u{110cd}'),
+    ('\u{13430}', '\u{13438}'),
+    ('\u{1bca0}', '\u{1bca3}'),
+    ('\u{1d173}', '\u{1d17a}'),
+    ('\u{e0001}', '\u{e0001}'),
+];
+
+pub const LF: &'static [(char, char)] = &[('\n', '\n')];
+
+pub const LOWER: &'static [(char, char)] = &[
+    ('a', 'z'),
+    ('ª', 'ª'),
+    ('µ', 'µ'),
+    ('º', 'º'),
+    ('ß', 'ö'),
+    ('ø', 'ÿ'),
+    ('ā', 'ā'),
+    ('ă', 'ă'),
+    ('ą', 'ą'),
+    ('ć', 'ć'),
+    ('ĉ', 'ĉ'),
+    ('ċ', 'ċ'),
+    ('č', 'č'),
+    ('ď', 'ď'),
+    ('đ', 'đ'),
+    ('ē', 'ē'),
+    ('ĕ', 'ĕ'),
+    ('ė', 'ė'),
+    ('ę', 'ę'),
+    ('ě', 'ě'),
+    ('ĝ', 'ĝ'),
+    ('ğ', 'ğ'),
+    ('ġ', 'ġ'),
+    ('ģ', 'ģ'),
+    ('ĥ', 'ĥ'),
+    ('ħ', 'ħ'),
+    ('ĩ', 'ĩ'),
+    ('ī', 'ī'),
+    ('ĭ', 'ĭ'),
+    ('į', 'į'),
+    ('ı', 'ı'),
+    ('ĳ', 'ĳ'),
+    ('ĵ', 'ĵ'),
+    ('ķ', 'ĸ'),
+    ('ĺ', 'ĺ'),
+    ('ļ', 'ļ'),
+    ('ľ', 'ľ'),
+    ('ŀ', 'ŀ'),
+    ('ł', 'ł'),
+    ('ń', 'ń'),
+    ('ņ', 'ņ'),
+    ('ň', 'ŉ'),
+    ('ŋ', 'ŋ'),
+    ('ō', 'ō'),
+    ('ŏ', 'ŏ'),
+    ('ő', 'ő'),
+    ('œ', 'œ'),
+    ('ŕ', 'ŕ'),
+    ('ŗ', 'ŗ'),
+    ('ř', 'ř'),
+    ('ś', 'ś'),
+    ('ŝ', 'ŝ'),
+    ('ş', 'ş'),
+    ('š', 'š'),
+    ('ţ', 'ţ'),
+    ('ť', 'ť'),
+    ('ŧ', 'ŧ'),
+    ('ũ', 'ũ'),
+    ('ū', 'ū'),
+    ('ŭ', 'ŭ'),
+    ('ů', 'ů'),
+    ('ű', 'ű'),
+    ('ų', 'ų'),
+    ('ŵ', 'ŵ'),
+    ('ŷ', 'ŷ'),
+    ('ź', 'ź'),
+    ('ż', 'ż'),
+    ('ž', 'ƀ'),
+    ('ƃ', 'ƃ'),
+    ('ƅ', 'ƅ'),
+    ('ƈ', 'ƈ'),
+    ('ƌ', 'ƍ'),
+    ('ƒ', 'ƒ'),
+    ('ƕ', 'ƕ'),
+    ('ƙ', 'ƛ'),
+    ('ƞ', 'ƞ'),
+    ('ơ', 'ơ'),
+    ('ƣ', 'ƣ'),
+    ('ƥ', 'ƥ'),
+    ('ƨ', 'ƨ'),
+    ('ƪ', 'ƫ'),
+    ('ƭ', 'ƭ'),
+    ('ư', 'ư'),
+    ('ƴ', 'ƴ'),
+    ('ƶ', 'ƶ'),
+    ('ƹ', 'ƺ'),
+    ('ƽ', 'ƿ'),
+    ('ǆ', 'ǆ'),
+    ('ǉ', 'ǉ'),
+    ('ǌ', 'ǌ'),
+    ('ǎ', 'ǎ'),
+    ('ǐ', 'ǐ'),
+    ('ǒ', 'ǒ'),
+    ('ǔ', 'ǔ'),
+    ('ǖ', 'ǖ'),
+    ('ǘ', 'ǘ'),
+    ('ǚ', 'ǚ'),
+    ('ǜ', 'ǝ'),
+    ('ǟ', 'ǟ'),
+    ('ǡ', 'ǡ'),
+    ('ǣ', 'ǣ'),
+    ('ǥ', 'ǥ'),
+    ('ǧ', 'ǧ'),
+    ('ǩ', 'ǩ'),
+    ('ǫ', 'ǫ'),
+    ('ǭ', 'ǭ'),
+    ('ǯ', 'ǰ'),
+    ('ǳ', 'ǳ'),
+    ('ǵ', 'ǵ'),
+    ('ǹ', 'ǹ'),
+    ('ǻ', 'ǻ'),
+    ('ǽ', 'ǽ'),
+    ('ǿ', 'ǿ'),
+    ('ȁ', 'ȁ'),
+    ('ȃ', 'ȃ'),
+    ('ȅ', 'ȅ'),
+    ('ȇ', 'ȇ'),
+    ('ȉ', 'ȉ'),
+    ('ȋ', 'ȋ'),
+    ('ȍ', 'ȍ'),
+    ('ȏ', 'ȏ'),
+    ('ȑ', 'ȑ'),
+    ('ȓ', 'ȓ'),
+    ('ȕ', 'ȕ'),
+    ('ȗ', 'ȗ'),
+    ('ș', 'ș'),
+    ('ț', 'ț'),
+    ('ȝ', 'ȝ'),
+    ('ȟ', 'ȟ'),
+    ('ȡ', 'ȡ'),
+    ('ȣ', 'ȣ'),
+    ('ȥ', 'ȥ'),
+    ('ȧ', 'ȧ'),
+    ('ȩ', 'ȩ'),
+    ('ȫ', 'ȫ'),
+    ('ȭ', 'ȭ'),
+    ('ȯ', 'ȯ'),
+    ('ȱ', 'ȱ'),
+    ('ȳ', 'ȹ'),
+    ('ȼ', 'ȼ'),
+    ('ȿ', 'ɀ'),
+    ('ɂ', 'ɂ'),
+    ('ɇ', 'ɇ'),
+    ('ɉ', 'ɉ'),
+    ('ɋ', 'ɋ'),
+    ('ɍ', 'ɍ'),
+    ('ɏ', 'ʓ'),
+    ('ʕ', 'ʸ'),
+    ('ˀ', 'ˁ'),
+    ('ˠ', 'ˤ'),
+    ('ͱ', 'ͱ'),
+    ('ͳ', 'ͳ'),
+    ('ͷ', 'ͷ'),
+    ('ͺ', 'ͽ'),
+    ('ΐ', 'ΐ'),
+    ('ά', 'ώ'),
+    ('ϐ', 'ϑ'),
+    ('ϕ', 'ϗ'),
+    ('ϙ', 'ϙ'),
+    ('ϛ', 'ϛ'),
+    ('ϝ', 'ϝ'),
+    ('ϟ', 'ϟ'),
+    ('ϡ', 'ϡ'),
+    ('ϣ', 'ϣ'),
+    ('ϥ', 'ϥ'),
+    ('ϧ', 'ϧ'),
+    ('ϩ', 'ϩ'),
+    ('ϫ', 'ϫ'),
+    ('ϭ', 'ϭ'),
+    ('ϯ', 'ϳ'),
+    ('ϵ', 'ϵ'),
+    ('ϸ', 'ϸ'),
+    ('ϻ', 'ϼ'),
+    ('а', 'џ'),
+    ('ѡ', 'ѡ'),
+    ('ѣ', 'ѣ'),
+    ('ѥ', 'ѥ'),
+    ('ѧ', 'ѧ'),
+    ('ѩ', 'ѩ'),
+    ('ѫ', 'ѫ'),
+    ('ѭ', 'ѭ'),
+    ('ѯ', 'ѯ'),
+    ('ѱ', 'ѱ'),
+    ('ѳ', 'ѳ'),
+    ('ѵ', 'ѵ'),
+    ('ѷ', 'ѷ'),
+    ('ѹ', 'ѹ'),
+    ('ѻ', 'ѻ'),
+    ('ѽ', 'ѽ'),
+    ('ѿ', 'ѿ'),
+    ('ҁ', 'ҁ'),
+    ('ҋ', 'ҋ'),
+    ('ҍ', 'ҍ'),
+    ('ҏ', 'ҏ'),
+    ('ґ', 'ґ'),
+    ('ғ', 'ғ'),
+    ('ҕ', 'ҕ'),
+    ('җ', 'җ'),
+    ('ҙ', 'ҙ'),
+    ('қ', 'қ'),
+    ('ҝ', 'ҝ'),
+    ('ҟ', 'ҟ'),
+    ('ҡ', 'ҡ'),
+    ('ң', 'ң'),
+    ('ҥ', 'ҥ'),
+    ('ҧ', 'ҧ'),
+    ('ҩ', 'ҩ'),
+    ('ҫ', 'ҫ'),
+    ('ҭ', 'ҭ'),
+    ('ү', 'ү'),
+    ('ұ', 'ұ'),
+    ('ҳ', 'ҳ'),
+    ('ҵ', 'ҵ'),
+    ('ҷ', 'ҷ'),
+    ('ҹ', 'ҹ'),
+    ('һ', 'һ'),
+    ('ҽ', 'ҽ'),
+    ('ҿ', 'ҿ'),
+    ('ӂ', 'ӂ'),
+    ('ӄ', 'ӄ'),
+    ('ӆ', 'ӆ'),
+    ('ӈ', 'ӈ'),
+    ('ӊ', 'ӊ'),
+    ('ӌ', 'ӌ'),
+    ('ӎ', 'ӏ'),
+    ('ӑ', 'ӑ'),
+    ('ӓ', 'ӓ'),
+    ('ӕ', 'ӕ'),
+    ('ӗ', 'ӗ'),
+    ('ә', 'ә'),
+    ('ӛ', 'ӛ'),
+    ('ӝ', 'ӝ'),
+    ('ӟ', 'ӟ'),
+    ('ӡ', 'ӡ'),
+    ('ӣ', 'ӣ'),
+    ('ӥ', 'ӥ'),
+    ('ӧ', 'ӧ'),
+    ('ө', 'ө'),
+    ('ӫ', 'ӫ'),
+    ('ӭ', 'ӭ'),
+    ('ӯ', 'ӯ'),
+    ('ӱ', 'ӱ'),
+    ('ӳ', 'ӳ'),
+    ('ӵ', 'ӵ'),
+    ('ӷ', 'ӷ'),
+    ('ӹ', 'ӹ'),
+    ('ӻ', 'ӻ'),
+    ('ӽ', 'ӽ'),
+    ('ӿ', 'ӿ'),
+    ('ԁ', 'ԁ'),
+    ('ԃ', 'ԃ'),
+    ('ԅ', 'ԅ'),
+    ('ԇ', 'ԇ'),
+    ('ԉ', 'ԉ'),
+    ('ԋ', 'ԋ'),
+    ('ԍ', 'ԍ'),
+    ('ԏ', 'ԏ'),
+    ('ԑ', 'ԑ'),
+    ('ԓ', 'ԓ'),
+    ('ԕ', 'ԕ'),
+    ('ԗ', 'ԗ'),
+    ('ԙ', 'ԙ'),
+    ('ԛ', 'ԛ'),
+    ('ԝ', 'ԝ'),
+    ('ԟ', 'ԟ'),
+    ('ԡ', 'ԡ'),
+    ('ԣ', 'ԣ'),
+    ('ԥ', 'ԥ'),
+    ('ԧ', 'ԧ'),
+    ('ԩ', 'ԩ'),
+    ('ԫ', 'ԫ'),
+    ('ԭ', 'ԭ'),
+    ('ԯ', 'ԯ'),
+    ('ՠ', 'ֈ'),
+    ('ᏸ', 'ᏽ'),
+    ('ᲀ', 'ᲈ'),
+    ('ᴀ', 'ᶿ'),
+    ('ḁ', 'ḁ'),
+    ('ḃ', 'ḃ'),
+    ('ḅ', 'ḅ'),
+    ('ḇ', 'ḇ'),
+    ('ḉ', 'ḉ'),
+    ('ḋ', 'ḋ'),
+    ('ḍ', 'ḍ'),
+    ('ḏ', 'ḏ'),
+    ('ḑ', 'ḑ'),
+    ('ḓ', 'ḓ'),
+    ('ḕ', 'ḕ'),
+    ('ḗ', 'ḗ'),
+    ('ḙ', 'ḙ'),
+    ('ḛ', 'ḛ'),
+    ('ḝ', 'ḝ'),
+    ('ḟ', 'ḟ'),
+    ('ḡ', 'ḡ'),
+    ('ḣ', 'ḣ'),
+    ('ḥ', 'ḥ'),
+    ('ḧ', 'ḧ'),
+    ('ḩ', 'ḩ'),
+    ('ḫ', 'ḫ'),
+    ('ḭ', 'ḭ'),
+    ('ḯ', 'ḯ'),
+    ('ḱ', 'ḱ'),
+    ('ḳ', 'ḳ'),
+    ('ḵ', 'ḵ'),
+    ('ḷ', 'ḷ'),
+    ('ḹ', 'ḹ'),
+    ('ḻ', 'ḻ'),
+    ('ḽ', 'ḽ'),
+    ('ḿ', 'ḿ'),
+    ('ṁ', 'ṁ'),
+    ('ṃ', 'ṃ'),
+    ('ṅ', 'ṅ'),
+    ('ṇ', 'ṇ'),
+    ('ṉ', 'ṉ'),
+    ('ṋ', 'ṋ'),
+    ('ṍ', 'ṍ'),
+    ('ṏ', 'ṏ'),
+    ('ṑ', 'ṑ'),
+    ('ṓ', 'ṓ'),
+    ('ṕ', 'ṕ'),
+    ('ṗ', 'ṗ'),
+    ('ṙ', 'ṙ'),
+    ('ṛ', 'ṛ'),
+    ('ṝ', 'ṝ'),
+    ('ṟ', 'ṟ'),
+    ('ṡ', 'ṡ'),
+    ('ṣ', 'ṣ'),
+    ('ṥ', 'ṥ'),
+    ('ṧ', 'ṧ'),
+    ('ṩ', 'ṩ'),
+    ('ṫ', 'ṫ'),
+    ('ṭ', 'ṭ'),
+    ('ṯ', 'ṯ'),
+    ('ṱ', 'ṱ'),
+    ('ṳ', 'ṳ'),
+    ('ṵ', 'ṵ'),
+    ('ṷ', 'ṷ'),
+    ('ṹ', 'ṹ'),
+    ('ṻ', 'ṻ'),
+    ('ṽ', 'ṽ'),
+    ('ṿ', 'ṿ'),
+    ('ẁ', 'ẁ'),
+    ('ẃ', 'ẃ'),
+    ('ẅ', 'ẅ'),
+    ('ẇ', 'ẇ'),
+    ('ẉ', 'ẉ'),
+    ('ẋ', 'ẋ'),
+    ('ẍ', 'ẍ'),
+    ('ẏ', 'ẏ'),
+    ('ẑ', 'ẑ'),
+    ('ẓ', 'ẓ'),
+    ('ẕ', 'ẝ'),
+    ('ẟ', 'ẟ'),
+    ('ạ', 'ạ'),
+    ('ả', 'ả'),
+    ('ấ', 'ấ'),
+    ('ầ', 'ầ'),
+    ('ẩ', 'ẩ'),
+    ('ẫ', 'ẫ'),
+    ('ậ', 'ậ'),
+    ('ắ', 'ắ'),
+    ('ằ', 'ằ'),
+    ('ẳ', 'ẳ'),
+    ('ẵ', 'ẵ'),
+    ('ặ', 'ặ'),
+    ('ẹ', 'ẹ'),
+    ('ẻ', 'ẻ'),
+    ('ẽ', 'ẽ'),
+    ('ế', 'ế'),
+    ('ề', 'ề'),
+    ('ể', 'ể'),
+    ('ễ', 'ễ'),
+    ('ệ', 'ệ'),
+    ('ỉ', 'ỉ'),
+    ('ị', 'ị'),
+    ('ọ', 'ọ'),
+    ('ỏ', 'ỏ'),
+    ('ố', 'ố'),
+    ('ồ', 'ồ'),
+    ('ổ', 'ổ'),
+    ('ỗ', 'ỗ'),
+    ('ộ', 'ộ'),
+    ('ớ', 'ớ'),
+    ('ờ', 'ờ'),
+    ('ở', 'ở'),
+    ('ỡ', 'ỡ'),
+    ('ợ', 'ợ'),
+    ('ụ', 'ụ'),
+    ('ủ', 'ủ'),
+    ('ứ', 'ứ'),
+    ('ừ', 'ừ'),
+    ('ử', 'ử'),
+    ('ữ', 'ữ'),
+    ('ự', 'ự'),
+    ('ỳ', 'ỳ'),
+    ('ỵ', 'ỵ'),
+    ('ỷ', 'ỷ'),
+    ('ỹ', 'ỹ'),
+    ('ỻ', 'ỻ'),
+    ('ỽ', 'ỽ'),
+    ('ỿ', 'ἇ'),
+    ('ἐ', 'ἕ'),
+    ('ἠ', 'ἧ'),
+    ('ἰ', 'ἷ'),
+    ('ὀ', 'ὅ'),
+    ('ὐ', 'ὗ'),
+    ('ὠ', 'ὧ'),
+    ('ὰ', 'ώ'),
+    ('ᾀ', 'ᾇ'),
+    ('ᾐ', 'ᾗ'),
+    ('ᾠ', 'ᾧ'),
+    ('ᾰ', 'ᾴ'),
+    ('ᾶ', 'ᾷ'),
+    ('ι', 'ι'),
+    ('ῂ', 'ῄ'),
+    ('ῆ', 'ῇ'),
+    ('ῐ', 'ΐ'),
+    ('ῖ', 'ῗ'),
+    ('ῠ', 'ῧ'),
+    ('ῲ', 'ῴ'),
+    ('ῶ', 'ῷ'),
+    ('ⁱ', 'ⁱ'),
+    ('ⁿ', 'ⁿ'),
+    ('ₐ', 'ₜ'),
+    ('ℊ', 'ℊ'),
+    ('ℎ', 'ℏ'),
+    ('ℓ', 'ℓ'),
+    ('ℯ', 'ℯ'),
+    ('ℴ', 'ℴ'),
+    ('ℹ', 'ℹ'),
+    ('ℼ', 'ℽ'),
+    ('ⅆ', 'ⅉ'),
+    ('ⅎ', 'ⅎ'),
+    ('ⅰ', 'ⅿ'),
+    ('ↄ', 'ↄ'),
+    ('ⓐ', 'ⓩ'),
+    ('ⰰ', 'ⱞ'),
+    ('ⱡ', 'ⱡ'),
+    ('ⱥ', 'ⱦ'),
+    ('ⱨ', 'ⱨ'),
+    ('ⱪ', 'ⱪ'),
+    ('ⱬ', 'ⱬ'),
+    ('ⱱ', 'ⱱ'),
+    ('ⱳ', 'ⱴ'),
+    ('ⱶ', 'ⱽ'),
+    ('ⲁ', 'ⲁ'),
+    ('ⲃ', 'ⲃ'),
+    ('ⲅ', 'ⲅ'),
+    ('ⲇ', 'ⲇ'),
+    ('ⲉ', 'ⲉ'),
+    ('ⲋ', 'ⲋ'),
+    ('ⲍ', 'ⲍ'),
+    ('ⲏ', 'ⲏ'),
+    ('ⲑ', 'ⲑ'),
+    ('ⲓ', 'ⲓ'),
+    ('ⲕ', 'ⲕ'),
+    ('ⲗ', 'ⲗ'),
+    ('ⲙ', 'ⲙ'),
+    ('ⲛ', 'ⲛ'),
+    ('ⲝ', 'ⲝ'),
+    ('ⲟ', 'ⲟ'),
+    ('ⲡ', 'ⲡ'),
+    ('ⲣ', 'ⲣ'),
+    ('ⲥ', 'ⲥ'),
+    ('ⲧ', 'ⲧ'),
+    ('ⲩ', 'ⲩ'),
+    ('ⲫ', 'ⲫ'),
+    ('ⲭ', 'ⲭ'),
+    ('ⲯ', 'ⲯ'),
+    ('ⲱ', 'ⲱ'),
+    ('ⲳ', 'ⲳ'),
+    ('ⲵ', 'ⲵ'),
+    ('ⲷ', 'ⲷ'),
+    ('ⲹ', 'ⲹ'),
+    ('ⲻ', 'ⲻ'),
+    ('ⲽ', 'ⲽ'),
+    ('ⲿ', 'ⲿ'),
+    ('ⳁ', 'ⳁ'),
+    ('ⳃ', 'ⳃ'),
+    ('ⳅ', 'ⳅ'),
+    ('ⳇ', 'ⳇ'),
+    ('ⳉ', 'ⳉ'),
+    ('ⳋ', 'ⳋ'),
+    ('ⳍ', 'ⳍ'),
+    ('ⳏ', 'ⳏ'),
+    ('ⳑ', 'ⳑ'),
+    ('ⳓ', 'ⳓ'),
+    ('ⳕ', 'ⳕ'),
+    ('ⳗ', 'ⳗ'),
+    ('ⳙ', 'ⳙ'),
+    ('ⳛ', 'ⳛ'),
+    ('ⳝ', 'ⳝ'),
+    ('ⳟ', 'ⳟ'),
+    ('ⳡ', 'ⳡ'),
+    ('ⳣ', 'ⳤ'),
+    ('ⳬ', 'ⳬ'),
+    ('ⳮ', 'ⳮ'),
+    ('ⳳ', 'ⳳ'),
+    ('ⴀ', 'ⴥ'),
+    ('ⴧ', 'ⴧ'),
+    ('ⴭ', 'ⴭ'),
+    ('ꙁ', 'ꙁ'),
+    ('ꙃ', 'ꙃ'),
+    ('ꙅ', 'ꙅ'),
+    ('ꙇ', 'ꙇ'),
+    ('ꙉ', 'ꙉ'),
+    ('ꙋ', 'ꙋ'),
+    ('ꙍ', 'ꙍ'),
+    ('ꙏ', 'ꙏ'),
+    ('ꙑ', 'ꙑ'),
+    ('ꙓ', 'ꙓ'),
+    ('ꙕ', 'ꙕ'),
+    ('ꙗ', 'ꙗ'),
+    ('ꙙ', 'ꙙ'),
+    ('ꙛ', 'ꙛ'),
+    ('ꙝ', 'ꙝ'),
+    ('ꙟ', 'ꙟ'),
+    ('ꙡ', 'ꙡ'),
+    ('ꙣ', 'ꙣ'),
+    ('ꙥ', 'ꙥ'),
+    ('ꙧ', 'ꙧ'),
+    ('ꙩ', 'ꙩ'),
+    ('ꙫ', 'ꙫ'),
+    ('ꙭ', 'ꙭ'),
+    ('ꚁ', 'ꚁ'),
+    ('ꚃ', 'ꚃ'),
+    ('ꚅ', 'ꚅ'),
+    ('ꚇ', 'ꚇ'),
+    ('ꚉ', 'ꚉ'),
+    ('ꚋ', 'ꚋ'),
+    ('ꚍ', 'ꚍ'),
+    ('ꚏ', 'ꚏ'),
+    ('ꚑ', 'ꚑ'),
+    ('ꚓ', 'ꚓ'),
+    ('ꚕ', 'ꚕ'),
+    ('ꚗ', 'ꚗ'),
+    ('ꚙ', 'ꚙ'),
+    ('ꚛ', 'ꚝ'),
+    ('ꜣ', 'ꜣ'),
+    ('ꜥ', 'ꜥ'),
+    ('ꜧ', 'ꜧ'),
+    ('ꜩ', 'ꜩ'),
+    ('ꜫ', 'ꜫ'),
+    ('ꜭ', 'ꜭ'),
+    ('ꜯ', 'ꜱ'),
+    ('ꜳ', 'ꜳ'),
+    ('ꜵ', 'ꜵ'),
+    ('ꜷ', 'ꜷ'),
+    ('ꜹ', 'ꜹ'),
+    ('ꜻ', 'ꜻ'),
+    ('ꜽ', 'ꜽ'),
+    ('ꜿ', 'ꜿ'),
+    ('ꝁ', 'ꝁ'),
+    ('ꝃ', 'ꝃ'),
+    ('ꝅ', 'ꝅ'),
+    ('ꝇ', 'ꝇ'),
+    ('ꝉ', 'ꝉ'),
+    ('ꝋ', 'ꝋ'),
+    ('ꝍ', 'ꝍ'),
+    ('ꝏ', 'ꝏ'),
+    ('ꝑ', 'ꝑ'),
+    ('ꝓ', 'ꝓ'),
+    ('ꝕ', 'ꝕ'),
+    ('ꝗ', 'ꝗ'),
+    ('ꝙ', 'ꝙ'),
+    ('ꝛ', 'ꝛ'),
+    ('ꝝ', 'ꝝ'),
+    ('ꝟ', 'ꝟ'),
+    ('ꝡ', 'ꝡ'),
+    ('ꝣ', 'ꝣ'),
+    ('ꝥ', 'ꝥ'),
+    ('ꝧ', 'ꝧ'),
+    ('ꝩ', 'ꝩ'),
+    ('ꝫ', 'ꝫ'),
+    ('ꝭ', 'ꝭ'),
+    ('ꝯ', 'ꝸ'),
+    ('ꝺ', 'ꝺ'),
+    ('ꝼ', 'ꝼ'),
+    ('ꝿ', 'ꝿ'),
+    ('ꞁ', 'ꞁ'),
+    ('ꞃ', 'ꞃ'),
+    ('ꞅ', 'ꞅ'),
+    ('ꞇ', 'ꞇ'),
+    ('ꞌ', 'ꞌ'),
+    ('ꞎ', 'ꞎ'),
+    ('ꞑ', 'ꞑ'),
+    ('ꞓ', 'ꞕ'),
+    ('ꞗ', 'ꞗ'),
+    ('ꞙ', 'ꞙ'),
+    ('ꞛ', 'ꞛ'),
+    ('ꞝ', 'ꞝ'),
+    ('ꞟ', 'ꞟ'),
+    ('ꞡ', 'ꞡ'),
+    ('ꞣ', 'ꞣ'),
+    ('ꞥ', 'ꞥ'),
+    ('ꞧ', 'ꞧ'),
+    ('ꞩ', 'ꞩ'),
+    ('ꞯ', 'ꞯ'),
+    ('ꞵ', 'ꞵ'),
+    ('ꞷ', 'ꞷ'),
+    ('ꞹ', 'ꞹ'),
+    ('ꞻ', 'ꞻ'),
+    ('ꞽ', 'ꞽ'),
+    ('ꞿ', 'ꞿ'),
+    ('ꟃ', 'ꟃ'),
+    ('\u{a7c8}', '\u{a7c8}'),
+    ('\u{a7ca}', '\u{a7ca}'),
+    ('\u{a7f6}', '\u{a7f6}'),
+    ('ꟸ', 'ꟺ'),
+    ('ꬰ', 'ꭚ'),
+    ('ꭜ', '\u{ab68}'),
+    ('ꭰ', 'ꮿ'),
+    ('ﬀ', 'ﬆ'),
+    ('ﬓ', 'ﬗ'),
+    ('ａ', 'ｚ'),
+    ('𐐨', '𐑏'),
+    ('𐓘', '𐓻'),
+    ('𐳀', '𐳲'),
+    ('𑣀', '𑣟'),
+    ('𖹠', '𖹿'),
+    ('𝐚', '𝐳'),
+    ('𝑎', '𝑔'),
+    ('𝑖', '𝑧'),
+    ('𝒂', '𝒛'),
+    ('𝒶', '𝒹'),
+    ('𝒻', '𝒻'),
+    ('𝒽', '𝓃'),
+    ('𝓅', '𝓏'),
+    ('𝓪', '𝔃'),
+    ('𝔞', '𝔷'),
+    ('𝕒', '𝕫'),
+    ('𝖆', '𝖟'),
+    ('𝖺', '𝗓'),
+    ('𝗮', '𝘇'),
+    ('𝘢', '𝘻'),
+    ('𝙖', '𝙯'),
+    ('𝚊', '𝚥'),
+    ('𝛂', '𝛚'),
+    ('𝛜', '𝛡'),
+    ('𝛼', '𝜔'),
+    ('𝜖', '𝜛'),
+    ('𝜶', '𝝎'),
+    ('𝝐', '𝝕'),
+    ('𝝰', '𝞈'),
+    ('𝞊', '𝞏'),
+    ('𝞪', '𝟂'),
+    ('𝟄', '𝟉'),
+    ('𝟋', '𝟋'),
+    ('𞤢', '𞥃'),
+];
+
+pub const NUMERIC: &'static [(char, char)] = &[
+    ('0', '9'),
+    ('٠', '٩'),
+    ('٫', '٬'),
+    ('۰', '۹'),
+    ('߀', '߉'),
+    ('०', '९'),
+    ('০', '৯'),
+    ('੦', '੯'),
+    ('૦', '૯'),
+    ('୦', '୯'),
+    ('௦', '௯'),
+    ('౦', '౯'),
+    ('೦', '೯'),
+    ('൦', '൯'),
+    ('෦', '෯'),
+    ('๐', '๙'),
+    ('໐', '໙'),
+    ('༠', '༩'),
+    ('၀', '၉'),
+    ('႐', '႙'),
+    ('០', '៩'),
+    ('᠐', '᠙'),
+    ('᥆', '᥏'),
+    ('᧐', '᧙'),
+    ('᪀', '᪉'),
+    ('᪐', '᪙'),
+    ('᭐', '᭙'),
+    ('᮰', '᮹'),
+    ('᱀', '᱉'),
+    ('᱐', '᱙'),
+    ('꘠', '꘩'),
+    ('꣐', '꣙'),
+    ('꤀', '꤉'),
+    ('꧐', '꧙'),
+    ('꧰', '꧹'),
+    ('꩐', '꩙'),
+    ('꯰', '꯹'),
+    ('０', '９'),
+    ('𐒠', '𐒩'),
+    ('𐴰', '𐴹'),
+    ('𑁦', '𑁯'),
+    ('𑃰', '𑃹'),
+    ('𑄶', '𑄿'),
+    ('𑇐', '𑇙'),
+    ('𑋰', '𑋹'),
+    ('𑑐', '𑑙'),
+    ('𑓐', '𑓙'),
+    ('𑙐', '𑙙'),
+    ('𑛀', '𑛉'),
+    ('𑜰', '𑜹'),
+    ('𑣠', '𑣩'),
+    ('\u{11950}', '\u{11959}'),
+    ('𑱐', '𑱙'),
+    ('𑵐', '𑵙'),
+    ('𑶠', '𑶩'),
+    ('𖩠', '𖩩'),
+    ('𖭐', '𖭙'),
+    ('𝟎', '𝟿'),
+    ('𞅀', '𞅉'),
+    ('𞋰', '𞋹'),
+    ('𞥐', '𞥙'),
+    ('\u{1fbf0}', '\u{1fbf9}'),
+];
+
+pub const OLETTER: &'static [(char, char)] = &[
+    ('ƻ', 'ƻ'),
+    ('ǀ', 'ǃ'),
+    ('ʔ', 'ʔ'),
+    ('ʹ', 'ʿ'),
+    ('ˆ', 'ˑ'),
+    ('ˬ', 'ˬ'),
+    ('ˮ', 'ˮ'),
+    ('ʹ', 'ʹ'),
+    ('ՙ', 'ՙ'),
+    ('א', 'ת'),
+    ('ׯ', '׳'),
+    ('ؠ', 'ي'),
+    ('ٮ', 'ٯ'),
+    ('ٱ', 'ۓ'),
+    ('ە', 'ە'),
+    ('ۥ', 'ۦ'),
+    ('ۮ', 'ۯ'),
+    ('ۺ', 'ۼ'),
+    ('ۿ', 'ۿ'),
+    ('ܐ', 'ܐ'),
+    ('ܒ', 'ܯ'),
+    ('ݍ', 'ޥ'),
+    ('ޱ', 'ޱ'),
+    ('ߊ', 'ߪ'),
+    ('ߴ', 'ߵ'),
+    ('ߺ', 'ߺ'),
+    ('ࠀ', 'ࠕ'),
+    ('ࠚ', 'ࠚ'),
+    ('ࠤ', 'ࠤ'),
+    ('ࠨ', 'ࠨ'),
+    ('ࡀ', 'ࡘ'),
+    ('ࡠ', 'ࡪ'),
+    ('ࢠ', 'ࢴ'),
+    ('ࢶ', '\u{8c7}'),
+    ('ऄ', 'ह'),
+    ('ऽ', 'ऽ'),
+    ('ॐ', 'ॐ'),
+    ('क़', 'ॡ'),
+    ('ॱ', 'ঀ'),
+    ('অ', 'ঌ'),
+    ('এ', 'ঐ'),
+    ('ও', 'ন'),
+    ('প', 'র'),
+    ('ল', 'ল'),
+    ('শ', 'হ'),
+    ('ঽ', 'ঽ'),
+    ('ৎ', 'ৎ'),
+    ('ড়', 'ঢ়'),
+    ('য়', 'ৡ'),
+    ('ৰ', 'ৱ'),
+    ('ৼ', 'ৼ'),
+    ('ਅ', 'ਊ'),
+    ('ਏ', 'ਐ'),
+    ('ਓ', 'ਨ'),
+    ('ਪ', 'ਰ'),
+    ('ਲ', 'ਲ਼'),
+    ('ਵ', 'ਸ਼'),
+    ('ਸ', 'ਹ'),
+    ('ਖ਼', 'ੜ'),
+    ('ਫ਼', 'ਫ਼'),
+    ('ੲ', 'ੴ'),
+    ('અ', 'ઍ'),
+    ('એ', 'ઑ'),
+    ('ઓ', 'ન'),
+    ('પ', 'ર'),
+    ('લ', 'ળ'),
+    ('વ', 'હ'),
+    ('ઽ', 'ઽ'),
+    ('ૐ', 'ૐ'),
+    ('ૠ', 'ૡ'),
+    ('ૹ', 'ૹ'),
+    ('ଅ', 'ଌ'),
+    ('ଏ', 'ଐ'),
+    ('ଓ', 'ନ'),
+    ('ପ', 'ର'),
+    ('ଲ', 'ଳ'),
+    ('ଵ', 'ହ'),
+    ('ଽ', 'ଽ'),
+    ('ଡ଼', 'ଢ଼'),
+    ('ୟ', 'ୡ'),
+    ('ୱ', 'ୱ'),
+    ('ஃ', 'ஃ'),
+    ('அ', 'ஊ'),
+    ('எ', 'ஐ'),
+    ('ஒ', 'க'),
+    ('ங', 'ச'),
+    ('ஜ', 'ஜ'),
+    ('ஞ', 'ட'),
+    ('ண', 'த'),
+    ('ந', 'ப'),
+    ('ம', 'ஹ'),
+    ('ௐ', 'ௐ'),
+    ('అ', 'ఌ'),
+    ('ఎ', 'ఐ'),
+    ('ఒ', 'న'),
+    ('ప', 'హ'),
+    ('ఽ', 'ఽ'),
+    ('ౘ', 'ౚ'),
+    ('ౠ', 'ౡ'),
+    ('ಀ', 'ಀ'),
+    ('ಅ', 'ಌ'),
+    ('ಎ', 'ಐ'),
+    ('ಒ', 'ನ'),
+    ('ಪ', 'ಳ'),
+    ('ವ', 'ಹ'),
+    ('ಽ', 'ಽ'),
+    ('ೞ', 'ೞ'),
+    ('ೠ', 'ೡ'),
+    ('ೱ', 'ೲ'),
+    ('\u{d04}', 'ഌ'),
+    ('എ', 'ഐ'),
+    ('ഒ', 'ഺ'),
+    ('ഽ', 'ഽ'),
+    ('ൎ', 'ൎ'),
+    ('ൔ', 'ൖ'),
+    ('ൟ', 'ൡ'),
+    ('ൺ', 'ൿ'),
+    ('අ', 'ඖ'),
+    ('ක', 'න'),
+    ('ඳ', 'ර'),
+    ('ල', 'ල'),
+    ('ව', 'ෆ'),
+    ('ก', 'ะ'),
+    ('า', 'ำ'),
+    ('เ', 'ๆ'),
+    ('ກ', 'ຂ'),
+    ('ຄ', 'ຄ'),
+    ('ຆ', 'ຊ'),
+    ('ຌ', 'ຣ'),
+    ('ລ', 'ລ'),
+    ('ວ', 'ະ'),
+    ('າ', 'ຳ'),
+    ('ຽ', 'ຽ'),
+    ('ເ', 'ໄ'),
+    ('ໆ', 'ໆ'),
+    ('ໜ', 'ໟ'),
+    ('ༀ', 'ༀ'),
+    ('ཀ', 'ཇ'),
+    ('ཉ', 'ཬ'),
+    ('ྈ', 'ྌ'),
+    ('က', 'ဪ'),
+    ('ဿ', 'ဿ'),
+    ('ၐ', 'ၕ'),
+    ('ၚ', 'ၝ'),
+    ('ၡ', 'ၡ'),
+    ('ၥ', 'ၦ'),
+    ('ၮ', 'ၰ'),
+    ('ၵ', 'ႁ'),
+    ('ႎ', 'ႎ'),
+    ('ა', 'ჺ'),
+    ('ჼ', 'ቈ'),
+    ('ቊ', 'ቍ'),
+    ('ቐ', 'ቖ'),
+    ('ቘ', 'ቘ'),
+    ('ቚ', 'ቝ'),
+    ('በ', 'ኈ'),
+    ('ኊ', 'ኍ'),
+    ('ነ', 'ኰ'),
+    ('ኲ', 'ኵ'),
+    ('ኸ', 'ኾ'),
+    ('ዀ', 'ዀ'),
+    ('ዂ', 'ዅ'),
+    ('ወ', 'ዖ'),
+    ('ዘ', 'ጐ'),
+    ('ጒ', 'ጕ'),
+    ('ጘ', 'ፚ'),
+    ('ᎀ', 'ᎏ'),
+    ('ᐁ', 'ᙬ'),
+    ('ᙯ', 'ᙿ'),
+    ('ᚁ', 'ᚚ'),
+    ('ᚠ', 'ᛪ'),
+    ('ᛮ', 'ᛸ'),
+    ('ᜀ', 'ᜌ'),
+    ('ᜎ', 'ᜑ'),
+    ('ᜠ', 'ᜱ'),
+    ('ᝀ', 'ᝑ'),
+    ('ᝠ', 'ᝬ'),
+    ('ᝮ', 'ᝰ'),
+    ('ក', 'ឳ'),
+    ('ៗ', 'ៗ'),
+    ('ៜ', 'ៜ'),
+    ('ᠠ', 'ᡸ'),
+    ('ᢀ', 'ᢄ'),
+    ('ᢇ', 'ᢨ'),
+    ('ᢪ', 'ᢪ'),
+    ('ᢰ', 'ᣵ'),
+    ('ᤀ', 'ᤞ'),
+    ('ᥐ', 'ᥭ'),
+    ('ᥰ', 'ᥴ'),
+    ('ᦀ', 'ᦫ'),
+    ('ᦰ', 'ᧉ'),
+    ('ᨀ', 'ᨖ'),
+    ('ᨠ', 'ᩔ'),
+    ('ᪧ', 'ᪧ'),
+    ('ᬅ', 'ᬳ'),
+    ('ᭅ', 'ᭋ'),
+    ('ᮃ', 'ᮠ'),
+    ('ᮮ', 'ᮯ'),
+    ('ᮺ', 'ᯥ'),
+    ('ᰀ', 'ᰣ'),
+    ('ᱍ', 'ᱏ'),
+    ('ᱚ', 'ᱽ'),
+    ('Ა', 'Ჺ'),
+    ('Ჽ', 'Ჿ'),
+    ('ᳩ', 'ᳬ'),
+    ('ᳮ', 'ᳳ'),
+    ('ᳵ', 'ᳶ'),
+    ('ᳺ', 'ᳺ'),
+    ('ℵ', 'ℸ'),
+    ('ↀ', 'ↂ'),
+    ('ↅ', 'ↈ'),
+    ('ⴰ', 'ⵧ'),
+    ('ⵯ', 'ⵯ'),
+    ('ⶀ', 'ⶖ'),
+    ('ⶠ', 'ⶦ'),
+    ('ⶨ', 'ⶮ'),
+    ('ⶰ', 'ⶶ'),
+    ('ⶸ', 'ⶾ'),
+    ('ⷀ', 'ⷆ'),
+    ('ⷈ', 'ⷎ'),
+    ('ⷐ', 'ⷖ'),
+    ('ⷘ', 'ⷞ'),
+    ('ⸯ', 'ⸯ'),
+    ('々', '〇'),
+    ('〡', '〩'),
+    ('〱', '〵'),
+    ('〸', '〼'),
+    ('ぁ', 'ゖ'),
+    ('ゝ', 'ゟ'),
+    ('ァ', 'ヺ'),
+    ('ー', 'ヿ'),
+    ('ㄅ', 'ㄯ'),
+    ('ㄱ', 'ㆎ'),
+    ('ㆠ', '\u{31bf}'),
+    ('ㇰ', 'ㇿ'),
+    ('㐀', '\u{4dbf}'),
+    ('一', '\u{9ffc}'),
+    ('ꀀ', 'ꒌ'),
+    ('ꓐ', 'ꓽ'),
+    ('ꔀ', 'ꘌ'),
+    ('ꘐ', 'ꘟ'),
+    ('ꘪ', 'ꘫ'),
+    ('ꙮ', 'ꙮ'),
+    ('ꙿ', 'ꙿ'),
+    ('ꚠ', 'ꛯ'),
+    ('ꜗ', 'ꜟ'),
+    ('ꞈ', 'ꞈ'),
+    ('ꞏ', 'ꞏ'),
+    ('ꟷ', 'ꟷ'),
+    ('ꟻ', 'ꠁ'),
+    ('ꠃ', 'ꠅ'),
+    ('ꠇ', 'ꠊ'),
+    ('ꠌ', 'ꠢ'),
+    ('ꡀ', 'ꡳ'),
+    ('ꢂ', 'ꢳ'),
+    ('ꣲ', 'ꣷ'),
+    ('ꣻ', 'ꣻ'),
+    ('ꣽ', 'ꣾ'),
+    ('ꤊ', 'ꤥ'),
+    ('ꤰ', 'ꥆ'),
+    ('ꥠ', 'ꥼ'),
+    ('ꦄ', 'ꦲ'),
+    ('ꧏ', 'ꧏ'),
+    ('ꧠ', 'ꧤ'),
+    ('ꧦ', 'ꧯ'),
+    ('ꧺ', 'ꧾ'),
+    ('ꨀ', 'ꨨ'),
+    ('ꩀ', 'ꩂ'),
+    ('ꩄ', 'ꩋ'),
+    ('ꩠ', 'ꩶ'),
+    ('ꩺ', 'ꩺ'),
+    ('ꩾ', 'ꪯ'),
+    ('ꪱ', 'ꪱ'),
+    ('ꪵ', 'ꪶ'),
+    ('ꪹ', 'ꪽ'),
+    ('ꫀ', 'ꫀ'),
+    ('ꫂ', 'ꫂ'),
+    ('ꫛ', 'ꫝ'),
+    ('ꫠ', 'ꫪ'),
+    ('ꫲ', 'ꫴ'),
+    ('ꬁ', 'ꬆ'),
+    ('ꬉ', 'ꬎ'),
+    ('ꬑ', 'ꬖ'),
+    ('ꬠ', 'ꬦ'),
+    ('ꬨ', 'ꬮ'),
+    ('\u{ab69}', '\u{ab69}'),
+    ('ꯀ', 'ꯢ'),
+    ('가', '힣'),
+    ('ힰ', 'ퟆ'),
+    ('ퟋ', 'ퟻ'),
+    ('豈', '舘'),
+    ('並', '龎'),
+    ('יִ', 'יִ'),
+    ('ײַ', 'ﬨ'),
+    ('שׁ', 'זּ'),
+    ('טּ', 'לּ'),
+    ('מּ', 'מּ'),
+    ('נּ', 'סּ'),
+    ('ףּ', 'פּ'),
+    ('צּ', 'ﮱ'),
+    ('ﯓ', 'ﴽ'),
+    ('ﵐ', 'ﶏ'),
+    ('ﶒ', 'ﷇ'),
+    ('ﷰ', 'ﷻ'),
+    ('ﹰ', 'ﹴ'),
+    ('ﹶ', 'ﻼ'),
+    ('ｦ', 'ﾝ'),
+    ('ﾠ', 'ﾾ'),
+    ('ￂ', 'ￇ'),
+    ('ￊ', 'ￏ'),
+    ('ￒ', 'ￗ'),
+    ('ￚ', 'ￜ'),
+    ('𐀀', '𐀋'),
+    ('𐀍', '𐀦'),
+    ('𐀨', '𐀺'),
+    ('𐀼', '𐀽'),
+    ('𐀿', '𐁍'),
+    ('𐁐', '𐁝'),
+    ('𐂀', '𐃺'),
+    ('𐅀', '𐅴'),
+    ('𐊀', '𐊜'),
+    ('𐊠', '𐋐'),
+    ('𐌀', '𐌟'),
+    ('𐌭', '𐍊'),
+    ('𐍐', '𐍵'),
+    ('𐎀', '𐎝'),
+    ('𐎠', '𐏃'),
+    ('𐏈', '𐏏'),
+    ('𐏑', '𐏕'),
+    ('𐑐', '𐒝'),
+    ('𐔀', '𐔧'),
+    ('𐔰', '𐕣'),
+    ('𐘀', '𐜶'),
+    ('𐝀', '𐝕'),
+    ('𐝠', '𐝧'),
+    ('𐠀', '𐠅'),
+    ('𐠈', '𐠈'),
+    ('𐠊', '𐠵'),
+    ('𐠷', '𐠸'),
+    ('𐠼', '𐠼'),
+    ('𐠿', '𐡕'),
+    ('𐡠', '𐡶'),
+    ('𐢀', '𐢞'),
+    ('𐣠', '𐣲'),
+    ('𐣴', '𐣵'),
+    ('𐤀', '𐤕'),
+    ('𐤠', '𐤹'),
+    ('𐦀', '𐦷'),
+    ('𐦾', '𐦿'),
+    ('𐨀', '𐨀'),
+    ('𐨐', '𐨓'),
+    ('𐨕', '𐨗'),
+    ('𐨙', '𐨵'),
+    ('𐩠', '𐩼'),
+    ('𐪀', '𐪜'),
+    ('𐫀', '𐫇'),
+    ('𐫉', '𐫤'),
+    ('𐬀', '𐬵'),
+    ('𐭀', '𐭕'),
+    ('𐭠', '𐭲'),
+    ('𐮀', '𐮑'),
+    ('𐰀', '𐱈'),
+    ('𐴀', '𐴣'),
+    ('\u{10e80}', '\u{10ea9}'),
+    ('\u{10eb0}', '\u{10eb1}'),
+    ('𐼀', '𐼜'),
+    ('𐼧', '𐼧'),
+    ('𐼰', '𐽅'),
+    ('\u{10fb0}', '\u{10fc4}'),
+    ('𐿠', '𐿶'),
+    ('𑀃', '𑀷'),
+    ('𑂃', '𑂯'),
+    ('𑃐', '𑃨'),
+    ('𑄃', '𑄦'),
+    ('𑅄', '𑅄'),
+    ('\u{11147}', '\u{11147}'),
+    ('𑅐', '𑅲'),
+    ('𑅶', '𑅶'),
+    ('𑆃', '𑆲'),
+    ('𑇁', '𑇄'),
+    ('𑇚', '𑇚'),
+    ('𑇜', '𑇜'),
+    ('𑈀', '𑈑'),
+    ('𑈓', '𑈫'),
+    ('𑊀', '𑊆'),
+    ('𑊈', '𑊈'),
+    ('𑊊', '𑊍'),
+    ('𑊏', '𑊝'),
+    ('𑊟', '𑊨'),
+    ('𑊰', '𑋞'),
+    ('𑌅', '𑌌'),
+    ('𑌏', '𑌐'),
+    ('𑌓', '𑌨'),
+    ('𑌪', '𑌰'),
+    ('𑌲', '𑌳'),
+    ('𑌵', '𑌹'),
+    ('𑌽', '𑌽'),
+    ('𑍐', '𑍐'),
+    ('𑍝', '𑍡'),
+    ('𑐀', '𑐴'),
+    ('𑑇', '𑑊'),
+    ('𑑟', '\u{11461}'),
+    ('𑒀', '𑒯'),
+    ('𑓄', '𑓅'),
+    ('𑓇', '𑓇'),
+    ('𑖀', '𑖮'),
+    ('𑗘', '𑗛'),
+    ('𑘀', '𑘯'),
+    ('𑙄', '𑙄'),
+    ('𑚀', '𑚪'),
+    ('𑚸', '𑚸'),
+    ('𑜀', '𑜚'),
+    ('𑠀', '𑠫'),
+    ('𑣿', '\u{11906}'),
+    ('\u{11909}', '\u{11909}'),
+    ('\u{1190c}', '\u{11913}'),
+    ('\u{11915}', '\u{11916}'),
+    ('\u{11918}', '\u{1192f}'),
+    ('\u{1193f}', '\u{1193f}'),
+    ('\u{11941}', '\u{11941}'),
+    ('𑦠', '𑦧'),
+    ('𑦪', '𑧐'),
+    ('𑧡', '𑧡'),
+    ('𑧣', '𑧣'),
+    ('𑨀', '𑨀'),
+    ('𑨋', '𑨲'),
+    ('𑨺', '𑨺'),
+    ('𑩐', '𑩐'),
+    ('𑩜', '𑪉'),
+    ('𑪝', '𑪝'),
+    ('𑫀', '𑫸'),
+    ('𑰀', '𑰈'),
+    ('𑰊', '𑰮'),
+    ('𑱀', '𑱀'),
+    ('𑱲', '𑲏'),
+    ('𑴀', '𑴆'),
+    ('𑴈', '𑴉'),
+    ('𑴋', '𑴰'),
+    ('𑵆', '𑵆'),
+    ('𑵠', '𑵥'),
+    ('𑵧', '𑵨'),
+    ('𑵪', '𑶉'),
+    ('𑶘', '𑶘'),
+    ('𑻠', '𑻲'),
+    ('\u{11fb0}', '\u{11fb0}'),
+    ('𒀀', '𒎙'),
+    ('𒐀', '𒑮'),
+    ('𒒀', '𒕃'),
+    ('𓀀', '𓐮'),
+    ('𔐀', '𔙆'),
+    ('𖠀', '𖨸'),
+    ('𖩀', '𖩞'),
+    ('𖫐', '𖫭'),
+    ('𖬀', '𖬯'),
+    ('𖭀', '𖭃'),
+    ('𖭣', '𖭷'),
+    ('𖭽', '𖮏'),
+    ('𖼀', '𖽊'),
+    ('𖽐', '𖽐'),
+    ('𖾓', '𖾟'),
+    ('𖿠', '𖿡'),
+    ('𖿣', '𖿣'),
+    ('𗀀', '𘟷'),
+    ('𘠀', '\u{18cd5}'),
+    ('\u{18d00}', '\u{18d08}'),
+    ('𛀀', '𛄞'),
+    ('𛅐', '𛅒'),
+    ('𛅤', '𛅧'),
+    ('𛅰', '𛋻'),
+    ('𛰀', '𛱪'),
+    ('𛱰', '𛱼'),
+    ('𛲀', '𛲈'),
+    ('𛲐', '𛲙'),
+    ('𞄀', '𞄬'),
+    ('𞄷', '𞄽'),
+    ('𞅎', '𞅎'),
+    ('𞋀', '𞋫'),
+    ('𞠀', '𞣄'),
+    ('𞥋', '𞥋'),
+    ('𞸀', '𞸃'),
+    ('𞸅', '𞸟'),
+    ('𞸡', '𞸢'),
+    ('𞸤', '𞸤'),
+    ('𞸧', '𞸧'),
+    ('𞸩', '𞸲'),
+    ('𞸴', '𞸷'),
+    ('𞸹', '𞸹'),
+    ('𞸻', '𞸻'),
+    ('𞹂', '𞹂'),
+    ('𞹇', '𞹇'),
+    ('𞹉', '𞹉'),
+    ('𞹋', '𞹋'),
+    ('𞹍', '𞹏'),
+    ('𞹑', '𞹒'),
+    ('𞹔', '𞹔'),
+    ('𞹗', '𞹗'),
+    ('𞹙', '𞹙'),
+    ('𞹛', '𞹛'),
+    ('𞹝', '𞹝'),
+    ('𞹟', '𞹟'),
+    ('𞹡', '𞹢'),
+    ('𞹤', '𞹤'),
+    ('𞹧', '𞹪'),
+    ('𞹬', '𞹲'),
+    ('𞹴', '𞹷'),
+    ('𞹹', '𞹼'),
+    ('𞹾', '𞹾'),
+    ('𞺀', '𞺉'),
+    ('𞺋', '𞺛'),
+    ('𞺡', '𞺣'),
+    ('𞺥', '𞺩'),
+    ('𞺫', '𞺻'),
+    ('𠀀', '\u{2a6dd}'),
+    ('𪜀', '𫜴'),
+    ('𫝀', '𫠝'),
+    ('𫠠', '𬺡'),
+    ('𬺰', '𮯠'),
+    ('丽', '𪘀'),
+    ('\u{30000}', '\u{3134a}'),
+];
+
+pub const SCONTINUE: &'static [(char, char)] = &[
+    (',', '-'),
+    (':', ':'),
+    ('՝', '՝'),
+    ('،', '؍'),
+    ('߸', '߸'),
+    ('᠂', '᠂'),
+    ('᠈', '᠈'),
+    ('–', '—'),
+    ('、', '、'),
+    ('︐', '︑'),
+    ('︓', '︓'),
+    ('︱', '︲'),
+    ('﹐', '﹑'),
+    ('﹕', '﹕'),
+    ('﹘', '﹘'),
+    ('﹣', '﹣'),
+    ('，', '－'),
+    ('：', '：'),
+    ('､', '､'),
+];
+
+pub const STERM: &'static [(char, char)] = &[
+    ('!', '!'),
+    ('?', '?'),
+    ('։', '։'),
+    ('؞', '؟'),
+    ('۔', '۔'),
+    ('܀', '܂'),
+    ('߹', '߹'),
+    ('࠷', '࠷'),
+    ('࠹', '࠹'),
+    ('࠽', '࠾'),
+    ('।', '॥'),
+    ('၊', '။'),
+    ('።', '።'),
+    ('፧', '፨'),
+    ('᙮', '᙮'),
+    ('᜵', '᜶'),
+    ('᠃', '᠃'),
+    ('᠉', '᠉'),
+    ('᥄', '᥅'),
+    ('᪨', '᪫'),
+    ('᭚', '᭛'),
+    ('᭞', '᭟'),
+    ('᰻', '᰼'),
+    ('᱾', '᱿'),
+    ('‼', '‽'),
+    ('⁇', '⁉'),
+    ('⸮', '⸮'),
+    ('⸼', '⸼'),
+    ('。', '。'),
+    ('꓿', '꓿'),
+    ('꘎', '꘏'),
+    ('꛳', '꛳'),
+    ('꛷', '꛷'),
+    ('꡶', '꡷'),
+    ('꣎', '꣏'),
+    ('꤯', '꤯'),
+    ('꧈', '꧉'),
+    ('꩝', '꩟'),
+    ('꫰', '꫱'),
+    ('꯫', '꯫'),
+    ('﹖', '﹗'),
+    ('！', '！'),
+    ('？', '？'),
+    ('｡', '｡'),
+    ('𐩖', '𐩗'),
+    ('𐽕', '𐽙'),
+    ('𑁇', '𑁈'),
+    ('𑂾', '𑃁'),
+    ('𑅁', '𑅃'),
+    ('𑇅', '𑇆'),
+    ('𑇍', '𑇍'),
+    ('𑇞', '𑇟'),
+    ('𑈸', '𑈹'),
+    ('𑈻', '𑈼'),
+    ('𑊩', '𑊩'),
+    ('𑑋', '𑑌'),
+    ('𑗂', '𑗃'),
+    ('𑗉', '𑗗'),
+    ('𑙁', '𑙂'),
+    ('𑜼', '𑜾'),
+    ('\u{11944}', '\u{11944}'),
+    ('\u{11946}', '\u{11946}'),
+    ('𑩂', '𑩃'),
+    ('𑪛', '𑪜'),
+    ('𑱁', '𑱂'),
+    ('𑻷', '𑻸'),
+    ('𖩮', '𖩯'),
+    ('𖫵', '𖫵'),
+    ('𖬷', '𖬸'),
+    ('𖭄', '𖭄'),
+    ('𖺘', '𖺘'),
+    ('𛲟', '𛲟'),
+    ('𝪈', '𝪈'),
+];
+
+pub const SEP: &'static [(char, char)] =
+    &[('\u{85}', '\u{85}'), ('\u{2028}', '\u{2029}')];
+
+pub const SP: &'static [(char, char)] = &[
+    ('\t', '\t'),
+    ('\u{b}', '\u{c}'),
+    (' ', ' '),
+    ('\u{a0}', '\u{a0}'),
+    ('\u{1680}', '\u{1680}'),
+    ('\u{2000}', '\u{200a}'),
+    ('\u{202f}', '\u{202f}'),
+    ('\u{205f}', '\u{205f}'),
+    ('\u{3000}', '\u{3000}'),
+];
+
+pub const UPPER: &'static [(char, char)] = &[
+    ('A', 'Z'),
+    ('À', 'Ö'),
+    ('Ø', 'Þ'),
+    ('Ā', 'Ā'),
+    ('Ă', 'Ă'),
+    ('Ą', 'Ą'),
+    ('Ć', 'Ć'),
+    ('Ĉ', 'Ĉ'),
+    ('Ċ', 'Ċ'),
+    ('Č', 'Č'),
+    ('Ď', 'Ď'),
+    ('Đ', 'Đ'),
+    ('Ē', 'Ē'),
+    ('Ĕ', 'Ĕ'),
+    ('Ė', 'Ė'),
+    ('Ę', 'Ę'),
+    ('Ě', 'Ě'),
+    ('Ĝ', 'Ĝ'),
+    ('Ğ', 'Ğ'),
+    ('Ġ', 'Ġ'),
+    ('Ģ', 'Ģ'),
+    ('Ĥ', 'Ĥ'),
+    ('Ħ', 'Ħ'),
+    ('Ĩ', 'Ĩ'),
+    ('Ī', 'Ī'),
+    ('Ĭ', 'Ĭ'),
+    ('Į', 'Į'),
+    ('İ', 'İ'),
+    ('Ĳ', 'Ĳ'),
+    ('Ĵ', 'Ĵ'),
+    ('Ķ', 'Ķ'),
+    ('Ĺ', 'Ĺ'),
+    ('Ļ', 'Ļ'),
+    ('Ľ', 'Ľ'),
+    ('Ŀ', 'Ŀ'),
+    ('Ł', 'Ł'),
+    ('Ń', 'Ń'),
+    ('Ņ', 'Ņ'),
+    ('Ň', 'Ň'),
+    ('Ŋ', 'Ŋ'),
+    ('Ō', 'Ō'),
+    ('Ŏ', 'Ŏ'),
+    ('Ő', 'Ő'),
+    ('Œ', 'Œ'),
+    ('Ŕ', 'Ŕ'),
+    ('Ŗ', 'Ŗ'),
+    ('Ř', 'Ř'),
+    ('Ś', 'Ś'),
+    ('Ŝ', 'Ŝ'),
+    ('Ş', 'Ş'),
+    ('Š', 'Š'),
+    ('Ţ', 'Ţ'),
+    ('Ť', 'Ť'),
+    ('Ŧ', 'Ŧ'),
+    ('Ũ', 'Ũ'),
+    ('Ū', 'Ū'),
+    ('Ŭ', 'Ŭ'),
+    ('Ů', 'Ů'),
+    ('Ű', 'Ű'),
+    ('Ų', 'Ų'),
+    ('Ŵ', 'Ŵ'),
+    ('Ŷ', 'Ŷ'),
+    ('Ÿ', 'Ź'),
+    ('Ż', 'Ż'),
+    ('Ž', 'Ž'),
+    ('Ɓ', 'Ƃ'),
+    ('Ƅ', 'Ƅ'),
+    ('Ɔ', 'Ƈ'),
+    ('Ɖ', 'Ƌ'),
+    ('Ǝ', 'Ƒ'),
+    ('Ɠ', 'Ɣ'),
+    ('Ɩ', 'Ƙ'),
+    ('Ɯ', 'Ɲ'),
+    ('Ɵ', 'Ơ'),
+    ('Ƣ', 'Ƣ'),
+    ('Ƥ', 'Ƥ'),
+    ('Ʀ', 'Ƨ'),
+    ('Ʃ', 'Ʃ'),
+    ('Ƭ', 'Ƭ'),
+    ('Ʈ', 'Ư'),
+    ('Ʊ', 'Ƴ'),
+    ('Ƶ', 'Ƶ'),
+    ('Ʒ', 'Ƹ'),
+    ('Ƽ', 'Ƽ'),
+    ('Ǆ', 'ǅ'),
+    ('Ǉ', 'ǈ'),
+    ('Ǌ', 'ǋ'),
+    ('Ǎ', 'Ǎ'),
+    ('Ǐ', 'Ǐ'),
+    ('Ǒ', 'Ǒ'),
+    ('Ǔ', 'Ǔ'),
+    ('Ǖ', 'Ǖ'),
+    ('Ǘ', 'Ǘ'),
+    ('Ǚ', 'Ǚ'),
+    ('Ǜ', 'Ǜ'),
+    ('Ǟ', 'Ǟ'),
+    ('Ǡ', 'Ǡ'),
+    ('Ǣ', 'Ǣ'),
+    ('Ǥ', 'Ǥ'),
+    ('Ǧ', 'Ǧ'),
+    ('Ǩ', 'Ǩ'),
+    ('Ǫ', 'Ǫ'),
+    ('Ǭ', 'Ǭ'),
+    ('Ǯ', 'Ǯ'),
+    ('Ǳ', 'ǲ'),
+    ('Ǵ', 'Ǵ'),
+    ('Ƕ', 'Ǹ'),
+    ('Ǻ', 'Ǻ'),
+    ('Ǽ', 'Ǽ'),
+    ('Ǿ', 'Ǿ'),
+    ('Ȁ', 'Ȁ'),
+    ('Ȃ', 'Ȃ'),
+    ('Ȅ', 'Ȅ'),
+    ('Ȇ', 'Ȇ'),
+    ('Ȉ', 'Ȉ'),
+    ('Ȋ', 'Ȋ'),
+    ('Ȍ', 'Ȍ'),
+    ('Ȏ', 'Ȏ'),
+    ('Ȑ', 'Ȑ'),
+    ('Ȓ', 'Ȓ'),
+    ('Ȕ', 'Ȕ'),
+    ('Ȗ', 'Ȗ'),
+    ('Ș', 'Ș'),
+    ('Ț', 'Ț'),
+    ('Ȝ', 'Ȝ'),
+    ('Ȟ', 'Ȟ'),
+    ('Ƞ', 'Ƞ'),
+    ('Ȣ', 'Ȣ'),
+    ('Ȥ', 'Ȥ'),
+    ('Ȧ', 'Ȧ'),
+    ('Ȩ', 'Ȩ'),
+    ('Ȫ', 'Ȫ'),
+    ('Ȭ', 'Ȭ'),
+    ('Ȯ', 'Ȯ'),
+    ('Ȱ', 'Ȱ'),
+    ('Ȳ', 'Ȳ'),
+    ('Ⱥ', 'Ȼ'),
+    ('Ƚ', 'Ⱦ'),
+    ('Ɂ', 'Ɂ'),
+    ('Ƀ', 'Ɇ'),
+    ('Ɉ', 'Ɉ'),
+    ('Ɋ', 'Ɋ'),
+    ('Ɍ', 'Ɍ'),
+    ('Ɏ', 'Ɏ'),
+    ('Ͱ', 'Ͱ'),
+    ('Ͳ', 'Ͳ'),
+    ('Ͷ', 'Ͷ'),
+    ('Ϳ', 'Ϳ'),
+    ('Ά', 'Ά'),
+    ('Έ', 'Ί'),
+    ('Ό', 'Ό'),
+    ('Ύ', 'Ώ'),
+    ('Α', 'Ρ'),
+    ('Σ', 'Ϋ'),
+    ('Ϗ', 'Ϗ'),
+    ('ϒ', 'ϔ'),
+    ('Ϙ', 'Ϙ'),
+    ('Ϛ', 'Ϛ'),
+    ('Ϝ', 'Ϝ'),
+    ('Ϟ', 'Ϟ'),
+    ('Ϡ', 'Ϡ'),
+    ('Ϣ', 'Ϣ'),
+    ('Ϥ', 'Ϥ'),
+    ('Ϧ', 'Ϧ'),
+    ('Ϩ', 'Ϩ'),
+    ('Ϫ', 'Ϫ'),
+    ('Ϭ', 'Ϭ'),
+    ('Ϯ', 'Ϯ'),
+    ('ϴ', 'ϴ'),
+    ('Ϸ', 'Ϸ'),
+    ('Ϲ', 'Ϻ'),
+    ('Ͻ', 'Я'),
+    ('Ѡ', 'Ѡ'),
+    ('Ѣ', 'Ѣ'),
+    ('Ѥ', 'Ѥ'),
+    ('Ѧ', 'Ѧ'),
+    ('Ѩ', 'Ѩ'),
+    ('Ѫ', 'Ѫ'),
+    ('Ѭ', 'Ѭ'),
+    ('Ѯ', 'Ѯ'),
+    ('Ѱ', 'Ѱ'),
+    ('Ѳ', 'Ѳ'),
+    ('Ѵ', 'Ѵ'),
+    ('Ѷ', 'Ѷ'),
+    ('Ѹ', 'Ѹ'),
+    ('Ѻ', 'Ѻ'),
+    ('Ѽ', 'Ѽ'),
+    ('Ѿ', 'Ѿ'),
+    ('Ҁ', 'Ҁ'),
+    ('Ҋ', 'Ҋ'),
+    ('Ҍ', 'Ҍ'),
+    ('Ҏ', 'Ҏ'),
+    ('Ґ', 'Ґ'),
+    ('Ғ', 'Ғ'),
+    ('Ҕ', 'Ҕ'),
+    ('Җ', 'Җ'),
+    ('Ҙ', 'Ҙ'),
+    ('Қ', 'Қ'),
+    ('Ҝ', 'Ҝ'),
+    ('Ҟ', 'Ҟ'),
+    ('Ҡ', 'Ҡ'),
+    ('Ң', 'Ң'),
+    ('Ҥ', 'Ҥ'),
+    ('Ҧ', 'Ҧ'),
+    ('Ҩ', 'Ҩ'),
+    ('Ҫ', 'Ҫ'),
+    ('Ҭ', 'Ҭ'),
+    ('Ү', 'Ү'),
+    ('Ұ', 'Ұ'),
+    ('Ҳ', 'Ҳ'),
+    ('Ҵ', 'Ҵ'),
+    ('Ҷ', 'Ҷ'),
+    ('Ҹ', 'Ҹ'),
+    ('Һ', 'Һ'),
+    ('Ҽ', 'Ҽ'),
+    ('Ҿ', 'Ҿ'),
+    ('Ӏ', 'Ӂ'),
+    ('Ӄ', 'Ӄ'),
+    ('Ӆ', 'Ӆ'),
+    ('Ӈ', 'Ӈ'),
+    ('Ӊ', 'Ӊ'),
+    ('Ӌ', 'Ӌ'),
+    ('Ӎ', 'Ӎ'),
+    ('Ӑ', 'Ӑ'),
+    ('Ӓ', 'Ӓ'),
+    ('Ӕ', 'Ӕ'),
+    ('Ӗ', 'Ӗ'),
+    ('Ә', 'Ә'),
+    ('Ӛ', 'Ӛ'),
+    ('Ӝ', 'Ӝ'),
+    ('Ӟ', 'Ӟ'),
+    ('Ӡ', 'Ӡ'),
+    ('Ӣ', 'Ӣ'),
+    ('Ӥ', 'Ӥ'),
+    ('Ӧ', 'Ӧ'),
+    ('Ө', 'Ө'),
+    ('Ӫ', 'Ӫ'),
+    ('Ӭ', 'Ӭ'),
+    ('Ӯ', 'Ӯ'),
+    ('Ӱ', 'Ӱ'),
+    ('Ӳ', 'Ӳ'),
+    ('Ӵ', 'Ӵ'),
+    ('Ӷ', 'Ӷ'),
+    ('Ӹ', 'Ӹ'),
+    ('Ӻ', 'Ӻ'),
+    ('Ӽ', 'Ӽ'),
+    ('Ӿ', 'Ӿ'),
+    ('Ԁ', 'Ԁ'),
+    ('Ԃ', 'Ԃ'),
+    ('Ԅ', 'Ԅ'),
+    ('Ԇ', 'Ԇ'),
+    ('Ԉ', 'Ԉ'),
+    ('Ԋ', 'Ԋ'),
+    ('Ԍ', 'Ԍ'),
+    ('Ԏ', 'Ԏ'),
+    ('Ԑ', 'Ԑ'),
+    ('Ԓ', 'Ԓ'),
+    ('Ԕ', 'Ԕ'),
+    ('Ԗ', 'Ԗ'),
+    ('Ԙ', 'Ԙ'),
+    ('Ԛ', 'Ԛ'),
+    ('Ԝ', 'Ԝ'),
+    ('Ԟ', 'Ԟ'),
+    ('Ԡ', 'Ԡ'),
+    ('Ԣ', 'Ԣ'),
+    ('Ԥ', 'Ԥ'),
+    ('Ԧ', 'Ԧ'),
+    ('Ԩ', 'Ԩ'),
+    ('Ԫ', 'Ԫ'),
+    ('Ԭ', 'Ԭ'),
+    ('Ԯ', 'Ԯ'),
+    ('Ա', 'Ֆ'),
+    ('Ⴀ', 'Ⴥ'),
+    ('Ⴧ', 'Ⴧ'),
+    ('Ⴭ', 'Ⴭ'),
+    ('Ꭰ', 'Ᏽ'),
+    ('Ḁ', 'Ḁ'),
+    ('Ḃ', 'Ḃ'),
+    ('Ḅ', 'Ḅ'),
+    ('Ḇ', 'Ḇ'),
+    ('Ḉ', 'Ḉ'),
+    ('Ḋ', 'Ḋ'),
+    ('Ḍ', 'Ḍ'),
+    ('Ḏ', 'Ḏ'),
+    ('Ḑ', 'Ḑ'),
+    ('Ḓ', 'Ḓ'),
+    ('Ḕ', 'Ḕ'),
+    ('Ḗ', 'Ḗ'),
+    ('Ḙ', 'Ḙ'),
+    ('Ḛ', 'Ḛ'),
+    ('Ḝ', 'Ḝ'),
+    ('Ḟ', 'Ḟ'),
+    ('Ḡ', 'Ḡ'),
+    ('Ḣ', 'Ḣ'),
+    ('Ḥ', 'Ḥ'),
+    ('Ḧ', 'Ḧ'),
+    ('Ḩ', 'Ḩ'),
+    ('Ḫ', 'Ḫ'),
+    ('Ḭ', 'Ḭ'),
+    ('Ḯ', 'Ḯ'),
+    ('Ḱ', 'Ḱ'),
+    ('Ḳ', 'Ḳ'),
+    ('Ḵ', 'Ḵ'),
+    ('Ḷ', 'Ḷ'),
+    ('Ḹ', 'Ḹ'),
+    ('Ḻ', 'Ḻ'),
+    ('Ḽ', 'Ḽ'),
+    ('Ḿ', 'Ḿ'),
+    ('Ṁ', 'Ṁ'),
+    ('Ṃ', 'Ṃ'),
+    ('Ṅ', 'Ṅ'),
+    ('Ṇ', 'Ṇ'),
+    ('Ṉ', 'Ṉ'),
+    ('Ṋ', 'Ṋ'),
+    ('Ṍ', 'Ṍ'),
+    ('Ṏ', 'Ṏ'),
+    ('Ṑ', 'Ṑ'),
+    ('Ṓ', 'Ṓ'),
+    ('Ṕ', 'Ṕ'),
+    ('Ṗ', 'Ṗ'),
+    ('Ṙ', 'Ṙ'),
+    ('Ṛ', 'Ṛ'),
+    ('Ṝ', 'Ṝ'),
+    ('Ṟ', 'Ṟ'),
+    ('Ṡ', 'Ṡ'),
+    ('Ṣ', 'Ṣ'),
+    ('Ṥ', 'Ṥ'),
+    ('Ṧ', 'Ṧ'),
+    ('Ṩ', 'Ṩ'),
+    ('Ṫ', 'Ṫ'),
+    ('Ṭ', 'Ṭ'),
+    ('Ṯ', 'Ṯ'),
+    ('Ṱ', 'Ṱ'),
+    ('Ṳ', 'Ṳ'),
+    ('Ṵ', 'Ṵ'),
+    ('Ṷ', 'Ṷ'),
+    ('Ṹ', 'Ṹ'),
+    ('Ṻ', 'Ṻ'),
+    ('Ṽ', 'Ṽ'),
+    ('Ṿ', 'Ṿ'),
+    ('Ẁ', 'Ẁ'),
+    ('Ẃ', 'Ẃ'),
+    ('Ẅ', 'Ẅ'),
+    ('Ẇ', 'Ẇ'),
+    ('Ẉ', 'Ẉ'),
+    ('Ẋ', 'Ẋ'),
+    ('Ẍ', 'Ẍ'),
+    ('Ẏ', 'Ẏ'),
+    ('Ẑ', 'Ẑ'),
+    ('Ẓ', 'Ẓ'),
+    ('Ẕ', 'Ẕ'),
+    ('ẞ', 'ẞ'),
+    ('Ạ', 'Ạ'),
+    ('Ả', 'Ả'),
+    ('Ấ', 'Ấ'),
+    ('Ầ', 'Ầ'),
+    ('Ẩ', 'Ẩ'),
+    ('Ẫ', 'Ẫ'),
+    ('Ậ', 'Ậ'),
+    ('Ắ', 'Ắ'),
+    ('Ằ', 'Ằ'),
+    ('Ẳ', 'Ẳ'),
+    ('Ẵ', 'Ẵ'),
+    ('Ặ', 'Ặ'),
+    ('Ẹ', 'Ẹ'),
+    ('Ẻ', 'Ẻ'),
+    ('Ẽ', 'Ẽ'),
+    ('Ế', 'Ế'),
+    ('Ề', 'Ề'),
+    ('Ể', 'Ể'),
+    ('Ễ', 'Ễ'),
+    ('Ệ', 'Ệ'),
+    ('Ỉ', 'Ỉ'),
+    ('Ị', 'Ị'),
+    ('Ọ', 'Ọ'),
+    ('Ỏ', 'Ỏ'),
+    ('Ố', 'Ố'),
+    ('Ồ', 'Ồ'),
+    ('Ổ', 'Ổ'),
+    ('Ỗ', 'Ỗ'),
+    ('Ộ', 'Ộ'),
+    ('Ớ', 'Ớ'),
+    ('Ờ', 'Ờ'),
+    ('Ở', 'Ở'),
+    ('Ỡ', 'Ỡ'),
+    ('Ợ', 'Ợ'),
+    ('Ụ', 'Ụ'),
+    ('Ủ', 'Ủ'),
+    ('Ứ', 'Ứ'),
+    ('Ừ', 'Ừ'),
+    ('Ử', 'Ử'),
+    ('Ữ', 'Ữ'),
+    ('Ự', 'Ự'),
+    ('Ỳ', 'Ỳ'),
+    ('Ỵ', 'Ỵ'),
+    ('Ỷ', 'Ỷ'),
+    ('Ỹ', 'Ỹ'),
+    ('Ỻ', 'Ỻ'),
+    ('Ỽ', 'Ỽ'),
+    ('Ỿ', 'Ỿ'),
+    ('Ἀ', 'Ἇ'),
+    ('Ἐ', 'Ἕ'),
+    ('Ἠ', 'Ἧ'),
+    ('Ἰ', 'Ἷ'),
+    ('Ὀ', 'Ὅ'),
+    ('Ὑ', 'Ὑ'),
+    ('Ὓ', 'Ὓ'),
+    ('Ὕ', 'Ὕ'),
+    ('Ὗ', 'Ὗ'),
+    ('Ὠ', 'Ὧ'),
+    ('ᾈ', 'ᾏ'),
+    ('ᾘ', 'ᾟ'),
+    ('ᾨ', 'ᾯ'),
+    ('Ᾰ', 'ᾼ'),
+    ('Ὲ', 'ῌ'),
+    ('Ῐ', 'Ί'),
+    ('Ῠ', 'Ῥ'),
+    ('Ὸ', 'ῼ'),
+    ('ℂ', 'ℂ'),
+    ('ℇ', 'ℇ'),
+    ('ℋ', 'ℍ'),
+    ('ℐ', 'ℒ'),
+    ('ℕ', 'ℕ'),
+    ('ℙ', 'ℝ'),
+    ('ℤ', 'ℤ'),
+    ('Ω', 'Ω'),
+    ('ℨ', 'ℨ'),
+    ('K', 'ℭ'),
+    ('ℰ', 'ℳ'),
+    ('ℾ', 'ℿ'),
+    ('ⅅ', 'ⅅ'),
+    ('Ⅰ', 'Ⅿ'),
+    ('Ↄ', 'Ↄ'),
+    ('Ⓐ', 'Ⓩ'),
+    ('Ⰰ', 'Ⱞ'),
+    ('Ⱡ', 'Ⱡ'),
+    ('Ɫ', 'Ɽ'),
+    ('Ⱨ', 'Ⱨ'),
+    ('Ⱪ', 'Ⱪ'),
+    ('Ⱬ', 'Ⱬ'),
+    ('Ɑ', 'Ɒ'),
+    ('Ⱳ', 'Ⱳ'),
+    ('Ⱶ', 'Ⱶ'),
+    ('Ȿ', 'Ⲁ'),
+    ('Ⲃ', 'Ⲃ'),
+    ('Ⲅ', 'Ⲅ'),
+    ('Ⲇ', 'Ⲇ'),
+    ('Ⲉ', 'Ⲉ'),
+    ('Ⲋ', 'Ⲋ'),
+    ('Ⲍ', 'Ⲍ'),
+    ('Ⲏ', 'Ⲏ'),
+    ('Ⲑ', 'Ⲑ'),
+    ('Ⲓ', 'Ⲓ'),
+    ('Ⲕ', 'Ⲕ'),
+    ('Ⲗ', 'Ⲗ'),
+    ('Ⲙ', 'Ⲙ'),
+    ('Ⲛ', 'Ⲛ'),
+    ('Ⲝ', 'Ⲝ'),
+    ('Ⲟ', 'Ⲟ'),
+    ('Ⲡ', 'Ⲡ'),
+    ('Ⲣ', 'Ⲣ'),
+    ('Ⲥ', 'Ⲥ'),
+    ('Ⲧ', 'Ⲧ'),
+    ('Ⲩ', 'Ⲩ'),
+    ('Ⲫ', 'Ⲫ'),
+    ('Ⲭ', 'Ⲭ'),
+    ('Ⲯ', 'Ⲯ'),
+    ('Ⲱ', 'Ⲱ'),
+    ('Ⲳ', 'Ⲳ'),
+    ('Ⲵ', 'Ⲵ'),
+    ('Ⲷ', 'Ⲷ'),
+    ('Ⲹ', 'Ⲹ'),
+    ('Ⲻ', 'Ⲻ'),
+    ('Ⲽ', 'Ⲽ'),
+    ('Ⲿ', 'Ⲿ'),
+    ('Ⳁ', 'Ⳁ'),
+    ('Ⳃ', 'Ⳃ'),
+    ('Ⳅ', 'Ⳅ'),
+    ('Ⳇ', 'Ⳇ'),
+    ('Ⳉ', 'Ⳉ'),
+    ('Ⳋ', 'Ⳋ'),
+    ('Ⳍ', 'Ⳍ'),
+    ('Ⳏ', 'Ⳏ'),
+    ('Ⳑ', 'Ⳑ'),
+    ('Ⳓ', 'Ⳓ'),
+    ('Ⳕ', 'Ⳕ'),
+    ('Ⳗ', 'Ⳗ'),
+    ('Ⳙ', 'Ⳙ'),
+    ('Ⳛ', 'Ⳛ'),
+    ('Ⳝ', 'Ⳝ'),
+    ('Ⳟ', 'Ⳟ'),
+    ('Ⳡ', 'Ⳡ'),
+    ('Ⳣ', 'Ⳣ'),
+    ('Ⳬ', 'Ⳬ'),
+    ('Ⳮ', 'Ⳮ'),
+    ('Ⳳ', 'Ⳳ'),
+    ('Ꙁ', 'Ꙁ'),
+    ('Ꙃ', 'Ꙃ'),
+    ('Ꙅ', 'Ꙅ'),
+    ('Ꙇ', 'Ꙇ'),
+    ('Ꙉ', 'Ꙉ'),
+    ('Ꙋ', 'Ꙋ'),
+    ('Ꙍ', 'Ꙍ'),
+    ('Ꙏ', 'Ꙏ'),
+    ('Ꙑ', 'Ꙑ'),
+    ('Ꙓ', 'Ꙓ'),
+    ('Ꙕ', 'Ꙕ'),
+    ('Ꙗ', 'Ꙗ'),
+    ('Ꙙ', 'Ꙙ'),
+    ('Ꙛ', 'Ꙛ'),
+    ('Ꙝ', 'Ꙝ'),
+    ('Ꙟ', 'Ꙟ'),
+    ('Ꙡ', 'Ꙡ'),
+    ('Ꙣ', 'Ꙣ'),
+    ('Ꙥ', 'Ꙥ'),
+    ('Ꙧ', 'Ꙧ'),
+    ('Ꙩ', 'Ꙩ'),
+    ('Ꙫ', 'Ꙫ'),
+    ('Ꙭ', 'Ꙭ'),
+    ('Ꚁ', 'Ꚁ'),
+    ('Ꚃ', 'Ꚃ'),
+    ('Ꚅ', 'Ꚅ'),
+    ('Ꚇ', 'Ꚇ'),
+    ('Ꚉ', 'Ꚉ'),
+    ('Ꚋ', 'Ꚋ'),
+    ('Ꚍ', 'Ꚍ'),
+    ('Ꚏ', 'Ꚏ'),
+    ('Ꚑ', 'Ꚑ'),
+    ('Ꚓ', 'Ꚓ'),
+    ('Ꚕ', 'Ꚕ'),
+    ('Ꚗ', 'Ꚗ'),
+    ('Ꚙ', 'Ꚙ'),
+    ('Ꚛ', 'Ꚛ'),
+    ('Ꜣ', 'Ꜣ'),
+    ('Ꜥ', 'Ꜥ'),
+    ('Ꜧ', 'Ꜧ'),
+    ('Ꜩ', 'Ꜩ'),
+    ('Ꜫ', 'Ꜫ'),
+    ('Ꜭ', 'Ꜭ'),
+    ('Ꜯ', 'Ꜯ'),
+    ('Ꜳ', 'Ꜳ'),
+    ('Ꜵ', 'Ꜵ'),
+    ('Ꜷ', 'Ꜷ'),
+    ('Ꜹ', 'Ꜹ'),
+    ('Ꜻ', 'Ꜻ'),
+    ('Ꜽ', 'Ꜽ'),
+    ('Ꜿ', 'Ꜿ'),
+    ('Ꝁ', 'Ꝁ'),
+    ('Ꝃ', 'Ꝃ'),
+    ('Ꝅ', 'Ꝅ'),
+    ('Ꝇ', 'Ꝇ'),
+    ('Ꝉ', 'Ꝉ'),
+    ('Ꝋ', 'Ꝋ'),
+    ('Ꝍ', 'Ꝍ'),
+    ('Ꝏ', 'Ꝏ'),
+    ('Ꝑ', 'Ꝑ'),
+    ('Ꝓ', 'Ꝓ'),
+    ('Ꝕ', 'Ꝕ'),
+    ('Ꝗ', 'Ꝗ'),
+    ('Ꝙ', 'Ꝙ'),
+    ('Ꝛ', 'Ꝛ'),
+    ('Ꝝ', 'Ꝝ'),
+    ('Ꝟ', 'Ꝟ'),
+    ('Ꝡ', 'Ꝡ'),
+    ('Ꝣ', 'Ꝣ'),
+    ('Ꝥ', 'Ꝥ'),
+    ('Ꝧ', 'Ꝧ'),
+    ('Ꝩ', 'Ꝩ'),
+    ('Ꝫ', 'Ꝫ'),
+    ('Ꝭ', 'Ꝭ'),
+    ('Ꝯ', 'Ꝯ'),
+    ('Ꝺ', 'Ꝺ'),
+    ('Ꝼ', 'Ꝼ'),
+    ('Ᵹ', 'Ꝿ'),
+    ('Ꞁ', 'Ꞁ'),
+    ('Ꞃ', 'Ꞃ'),
+    ('Ꞅ', 'Ꞅ'),
+    ('Ꞇ', 'Ꞇ'),
+    ('Ꞌ', 'Ꞌ'),
+    ('Ɥ', 'Ɥ'),
+    ('Ꞑ', 'Ꞑ'),
+    ('Ꞓ', 'Ꞓ'),
+    ('Ꞗ', 'Ꞗ'),
+    ('Ꞙ', 'Ꞙ'),
+    ('Ꞛ', 'Ꞛ'),
+    ('Ꞝ', 'Ꞝ'),
+    ('Ꞟ', 'Ꞟ'),
+    ('Ꞡ', 'Ꞡ'),
+    ('Ꞣ', 'Ꞣ'),
+    ('Ꞥ', 'Ꞥ'),
+    ('Ꞧ', 'Ꞧ'),
+    ('Ꞩ', 'Ꞩ'),
+    ('Ɦ', 'Ɪ'),
+    ('Ʞ', 'Ꞵ'),
+    ('Ꞷ', 'Ꞷ'),
+    ('Ꞹ', 'Ꞹ'),
+    ('Ꞻ', 'Ꞻ'),
+    ('Ꞽ', 'Ꞽ'),
+    ('Ꞿ', 'Ꞿ'),
+    ('Ꟃ', 'Ꟃ'),
+    ('Ꞔ', '\u{a7c7}'),
+    ('\u{a7c9}', '\u{a7c9}'),
+    ('\u{a7f5}', '\u{a7f5}'),
+    ('Ａ', 'Ｚ'),
+    ('𐐀', '𐐧'),
+    ('𐒰', '𐓓'),
+    ('𐲀', '𐲲'),
+    ('𑢠', '𑢿'),
+    ('𖹀', '𖹟'),
+    ('𝐀', '𝐙'),
+    ('𝐴', '𝑍'),
+    ('𝑨', '𝒁'),
+    ('𝒜', '𝒜'),
+    ('𝒞', '𝒟'),
+    ('𝒢', '𝒢'),
+    ('𝒥', '𝒦'),
+    ('𝒩', '𝒬'),
+    ('𝒮', '𝒵'),
+    ('𝓐', '𝓩'),
+    ('𝔄', '𝔅'),
+    ('𝔇', '𝔊'),
+    ('𝔍', '𝔔'),
+    ('𝔖', '𝔜'),
+    ('𝔸', '𝔹'),
+    ('𝔻', '𝔾'),
+    ('𝕀', '𝕄'),
+    ('𝕆', '𝕆'),
+    ('𝕊', '𝕐'),
+    ('𝕬', '𝖅'),
+    ('𝖠', '𝖹'),
+    ('𝗔', '𝗭'),
+    ('𝘈', '𝘡'),
+    ('𝘼', '𝙕'),
+    ('𝙰', '𝚉'),
+    ('𝚨', '𝛀'),
+    ('𝛢', '𝛺'),
+    ('𝜜', '𝜴'),
+    ('𝝖', '𝝮'),
+    ('𝞐', '𝞨'),
+    ('𝟊', '𝟊'),
+    ('𞤀', '𞤡'),
+    ('🄰', '🅉'),
+    ('🅐', '🅩'),
+    ('🅰', '🆉'),
+];

diff --git a/src/unicode_tables/word_break.rs b/src/unicode_tables/word_break.rs
new file mode 100644
index 0000000..5c01342
--- /dev/null
+++ b/src/unicode_tables/word_break.rs

@@ -0,0 +1,1058 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+//  ucd-generate word-break ucd-13.0.0 --chars
+//
+// ucd-generate 0.2.7 is available on crates.io.
+
+pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
+    ("ALetter", ALETTER),
+    ("CR", CR),
+    ("Double_Quote", DOUBLE_QUOTE),
+    ("Extend", EXTEND),
+    ("ExtendNumLet", EXTENDNUMLET),
+    ("Format", FORMAT),
+    ("Hebrew_Letter", HEBREW_LETTER),
+    ("Katakana", KATAKANA),
+    ("LF", LF),
+    ("MidLetter", MIDLETTER),
+    ("MidNum", MIDNUM),
+    ("MidNumLet", MIDNUMLET),
+    ("Newline", NEWLINE),
+    ("Numeric", NUMERIC),
+    ("Regional_Indicator", REGIONAL_INDICATOR),
+    ("Single_Quote", SINGLE_QUOTE),
+    ("WSegSpace", WSEGSPACE),
+    ("ZWJ", ZWJ),
+];
+
+pub const ALETTER: &'static [(char, char)] = &[
+    ('A', 'Z'),
+    ('a', 'z'),
+    ('ª', 'ª'),
+    ('µ', 'µ'),
+    ('º', 'º'),
+    ('À', 'Ö'),
+    ('Ø', 'ö'),
+    ('ø', '˗'),
+    ('˞', '˿'),
+    ('Ͱ', 'ʹ'),
+    ('Ͷ', 'ͷ'),
+    ('ͺ', 'ͽ'),
+    ('Ϳ', 'Ϳ'),
+    ('Ά', 'Ά'),
+    ('Έ', 'Ί'),
+    ('Ό', 'Ό'),
+    ('Ύ', 'Ρ'),
+    ('Σ', 'ϵ'),
+    ('Ϸ', 'ҁ'),
+    ('Ҋ', 'ԯ'),
+    ('Ա', 'Ֆ'),
+    ('ՙ', '՜'),
+    ('՞', '՞'),
+    ('ՠ', 'ֈ'),
+    ('֊', '֊'),
+    ('׳', '׳'),
+    ('ؠ', 'ي'),
+    ('ٮ', 'ٯ'),
+    ('ٱ', 'ۓ'),
+    ('ە', 'ە'),
+    ('ۥ', 'ۦ'),
+    ('ۮ', 'ۯ'),
+    ('ۺ', 'ۼ'),
+    ('ۿ', 'ۿ'),
+    ('ܐ', 'ܐ'),
+    ('ܒ', 'ܯ'),
+    ('ݍ', 'ޥ'),
+    ('ޱ', 'ޱ'),
+    ('ߊ', 'ߪ'),
+    ('ߴ', 'ߵ'),
+    ('ߺ', 'ߺ'),
+    ('ࠀ', 'ࠕ'),
+    ('ࠚ', 'ࠚ'),
+    ('ࠤ', 'ࠤ'),
+    ('ࠨ', 'ࠨ'),
+    ('ࡀ', 'ࡘ'),
+    ('ࡠ', 'ࡪ'),
+    ('ࢠ', 'ࢴ'),
+    ('ࢶ', '\u{8c7}'),
+    ('ऄ', 'ह'),
+    ('ऽ', 'ऽ'),
+    ('ॐ', 'ॐ'),
+    ('क़', 'ॡ'),
+    ('ॱ', 'ঀ'),
+    ('অ', 'ঌ'),
+    ('এ', 'ঐ'),
+    ('ও', 'ন'),
+    ('প', 'র'),
+    ('ল', 'ল'),
+    ('শ', 'হ'),
+    ('ঽ', 'ঽ'),
+    ('ৎ', 'ৎ'),
+    ('ড়', 'ঢ়'),
+    ('য়', 'ৡ'),
+    ('ৰ', 'ৱ'),
+    ('ৼ', 'ৼ'),
+    ('ਅ', 'ਊ'),
+    ('ਏ', 'ਐ'),
+    ('ਓ', 'ਨ'),
+    ('ਪ', 'ਰ'),
+    ('ਲ', 'ਲ਼'),
+    ('ਵ', 'ਸ਼'),
+    ('ਸ', 'ਹ'),
+    ('ਖ਼', 'ੜ'),
+    ('ਫ਼', 'ਫ਼'),
+    ('ੲ', 'ੴ'),
+    ('અ', 'ઍ'),
+    ('એ', 'ઑ'),
+    ('ઓ', 'ન'),
+    ('પ', 'ર'),
+    ('લ', 'ળ'),
+    ('વ', 'હ'),
+    ('ઽ', 'ઽ'),
+    ('ૐ', 'ૐ'),
+    ('ૠ', 'ૡ'),
+    ('ૹ', 'ૹ'),
+    ('ଅ', 'ଌ'),
+    ('ଏ', 'ଐ'),
+    ('ଓ', 'ନ'),
+    ('ପ', 'ର'),
+    ('ଲ', 'ଳ'),
+    ('ଵ', 'ହ'),
+    ('ଽ', 'ଽ'),
+    ('ଡ଼', 'ଢ଼'),
+    ('ୟ', 'ୡ'),
+    ('ୱ', 'ୱ'),
+    ('ஃ', 'ஃ'),
+    ('அ', 'ஊ'),
+    ('எ', 'ஐ'),
+    ('ஒ', 'க'),
+    ('ங', 'ச'),
+    ('ஜ', 'ஜ'),
+    ('ஞ', 'ட'),
+    ('ண', 'த'),
+    ('ந', 'ப'),
+    ('ம', 'ஹ'),
+    ('ௐ', 'ௐ'),
+    ('అ', 'ఌ'),
+    ('ఎ', 'ఐ'),
+    ('ఒ', 'న'),
+    ('ప', 'హ'),
+    ('ఽ', 'ఽ'),
+    ('ౘ', 'ౚ'),
+    ('ౠ', 'ౡ'),
+    ('ಀ', 'ಀ'),
+    ('ಅ', 'ಌ'),
+    ('ಎ', 'ಐ'),
+    ('ಒ', 'ನ'),
+    ('ಪ', 'ಳ'),
+    ('ವ', 'ಹ'),
+    ('ಽ', 'ಽ'),
+    ('ೞ', 'ೞ'),
+    ('ೠ', 'ೡ'),
+    ('ೱ', 'ೲ'),
+    ('\u{d04}', 'ഌ'),
+    ('എ', 'ഐ'),
+    ('ഒ', 'ഺ'),
+    ('ഽ', 'ഽ'),
+    ('ൎ', 'ൎ'),
+    ('ൔ', 'ൖ'),
+    ('ൟ', 'ൡ'),
+    ('ൺ', 'ൿ'),
+    ('අ', 'ඖ'),
+    ('ක', 'න'),
+    ('ඳ', 'ර'),
+    ('ල', 'ල'),
+    ('ව', 'ෆ'),
+    ('ༀ', 'ༀ'),
+    ('ཀ', 'ཇ'),
+    ('ཉ', 'ཬ'),
+    ('ྈ', 'ྌ'),
+    ('Ⴀ', 'Ⴥ'),
+    ('Ⴧ', 'Ⴧ'),
+    ('Ⴭ', 'Ⴭ'),
+    ('ა', 'ჺ'),
+    ('ჼ', 'ቈ'),
+    ('ቊ', 'ቍ'),
+    ('ቐ', 'ቖ'),
+    ('ቘ', 'ቘ'),
+    ('ቚ', 'ቝ'),
+    ('በ', 'ኈ'),
+    ('ኊ', 'ኍ'),
+    ('ነ', 'ኰ'),
+    ('ኲ', 'ኵ'),
+    ('ኸ', 'ኾ'),
+    ('ዀ', 'ዀ'),
+    ('ዂ', 'ዅ'),
+    ('ወ', 'ዖ'),
+    ('ዘ', 'ጐ'),
+    ('ጒ', 'ጕ'),
+    ('ጘ', 'ፚ'),
+    ('ᎀ', 'ᎏ'),
+    ('Ꭰ', 'Ᏽ'),
+    ('ᏸ', 'ᏽ'),
+    ('ᐁ', 'ᙬ'),
+    ('ᙯ', 'ᙿ'),
+    ('ᚁ', 'ᚚ'),
+    ('ᚠ', 'ᛪ'),
+    ('ᛮ', 'ᛸ'),
+    ('ᜀ', 'ᜌ'),
+    ('ᜎ', 'ᜑ'),
+    ('ᜠ', 'ᜱ'),
+    ('ᝀ', 'ᝑ'),
+    ('ᝠ', 'ᝬ'),
+    ('ᝮ', 'ᝰ'),
+    ('ᠠ', 'ᡸ'),
+    ('ᢀ', 'ᢄ'),
+    ('ᢇ', 'ᢨ'),
+    ('ᢪ', 'ᢪ'),
+    ('ᢰ', 'ᣵ'),
+    ('ᤀ', 'ᤞ'),
+    ('ᨀ', 'ᨖ'),
+    ('ᬅ', 'ᬳ'),
+    ('ᭅ', 'ᭋ'),
+    ('ᮃ', 'ᮠ'),
+    ('ᮮ', 'ᮯ'),
+    ('ᮺ', 'ᯥ'),
+    ('ᰀ', 'ᰣ'),
+    ('ᱍ', 'ᱏ'),
+    ('ᱚ', 'ᱽ'),
+    ('ᲀ', 'ᲈ'),
+    ('Ა', 'Ჺ'),
+    ('Ჽ', 'Ჿ'),
+    ('ᳩ', 'ᳬ'),
+    ('ᳮ', 'ᳳ'),
+    ('ᳵ', 'ᳶ'),
+    ('ᳺ', 'ᳺ'),
+    ('ᴀ', 'ᶿ'),
+    ('Ḁ', 'ἕ'),
+    ('Ἐ', 'Ἕ'),
+    ('ἠ', 'ὅ'),
+    ('Ὀ', 'Ὅ'),
+    ('ὐ', 'ὗ'),
+    ('Ὑ', 'Ὑ'),
+    ('Ὓ', 'Ὓ'),
+    ('Ὕ', 'Ὕ'),
+    ('Ὗ', 'ώ'),
+    ('ᾀ', 'ᾴ'),
+    ('ᾶ', 'ᾼ'),
+    ('ι', 'ι'),
+    ('ῂ', 'ῄ'),
+    ('ῆ', 'ῌ'),
+    ('ῐ', 'ΐ'),
+    ('ῖ', 'Ί'),
+    ('ῠ', 'Ῥ'),
+    ('ῲ', 'ῴ'),
+    ('ῶ', 'ῼ'),
+    ('ⁱ', 'ⁱ'),
+    ('ⁿ', 'ⁿ'),
+    ('ₐ', 'ₜ'),
+    ('ℂ', 'ℂ'),
+    ('ℇ', 'ℇ'),
+    ('ℊ', 'ℓ'),
+    ('ℕ', 'ℕ'),
+    ('ℙ', 'ℝ'),
+    ('ℤ', 'ℤ'),
+    ('Ω', 'Ω'),
+    ('ℨ', 'ℨ'),
+    ('K', 'ℭ'),
+    ('ℯ', 'ℹ'),
+    ('ℼ', 'ℿ'),
+    ('ⅅ', 'ⅉ'),
+    ('ⅎ', 'ⅎ'),
+    ('Ⅰ', 'ↈ'),
+    ('Ⓐ', 'ⓩ'),
+    ('Ⰰ', 'Ⱞ'),
+    ('ⰰ', 'ⱞ'),
+    ('Ⱡ', 'ⳤ'),
+    ('Ⳬ', 'ⳮ'),
+    ('Ⳳ', 'ⳳ'),
+    ('ⴀ', 'ⴥ'),
+    ('ⴧ', 'ⴧ'),
+    ('ⴭ', 'ⴭ'),
+    ('ⴰ', 'ⵧ'),
+    ('ⵯ', 'ⵯ'),
+    ('ⶀ', 'ⶖ'),
+    ('ⶠ', 'ⶦ'),
+    ('ⶨ', 'ⶮ'),
+    ('ⶰ', 'ⶶ'),
+    ('ⶸ', 'ⶾ'),
+    ('ⷀ', 'ⷆ'),
+    ('ⷈ', 'ⷎ'),
+    ('ⷐ', 'ⷖ'),
+    ('ⷘ', 'ⷞ'),
+    ('ⸯ', 'ⸯ'),
+    ('々', '々'),
+    ('〻', '〼'),
+    ('ㄅ', 'ㄯ'),
+    ('ㄱ', 'ㆎ'),
+    ('ㆠ', '\u{31bf}'),
+    ('ꀀ', 'ꒌ'),
+    ('ꓐ', 'ꓽ'),
+    ('ꔀ', 'ꘌ'),
+    ('ꘐ', 'ꘟ'),
+    ('ꘪ', 'ꘫ'),
+    ('Ꙁ', 'ꙮ'),
+    ('ꙿ', 'ꚝ'),
+    ('ꚠ', 'ꛯ'),
+    ('꜈', 'ꞿ'),
+    ('Ꟃ', '\u{a7ca}'),
+    ('\u{a7f5}', 'ꠁ'),
+    ('ꠃ', 'ꠅ'),
+    ('ꠇ', 'ꠊ'),
+    ('ꠌ', 'ꠢ'),
+    ('ꡀ', 'ꡳ'),
+    ('ꢂ', 'ꢳ'),
+    ('ꣲ', 'ꣷ'),
+    ('ꣻ', 'ꣻ'),
+    ('ꣽ', 'ꣾ'),
+    ('ꤊ', 'ꤥ'),
+    ('ꤰ', 'ꥆ'),
+    ('ꥠ', 'ꥼ'),
+    ('ꦄ', 'ꦲ'),
+    ('ꧏ', 'ꧏ'),
+    ('ꨀ', 'ꨨ'),
+    ('ꩀ', 'ꩂ'),
+    ('ꩄ', 'ꩋ'),
+    ('ꫠ', 'ꫪ'),
+    ('ꫲ', 'ꫴ'),
+    ('ꬁ', 'ꬆ'),
+    ('ꬉ', 'ꬎ'),
+    ('ꬑ', 'ꬖ'),
+    ('ꬠ', 'ꬦ'),
+    ('ꬨ', 'ꬮ'),
+    ('ꬰ', '\u{ab69}'),
+    ('ꭰ', 'ꯢ'),
+    ('가', '힣'),
+    ('ힰ', 'ퟆ'),
+    ('ퟋ', 'ퟻ'),
+    ('ﬀ', 'ﬆ'),
+    ('ﬓ', 'ﬗ'),
+    ('ﭐ', 'ﮱ'),
+    ('ﯓ', 'ﴽ'),
+    ('ﵐ', 'ﶏ'),
+    ('ﶒ', 'ﷇ'),
+    ('ﷰ', 'ﷻ'),
+    ('ﹰ', 'ﹴ'),
+    ('ﹶ', 'ﻼ'),
+    ('Ａ', 'Ｚ'),
+    ('ａ', 'ｚ'),
+    ('ﾠ', 'ﾾ'),
+    ('ￂ', 'ￇ'),
+    ('ￊ', 'ￏ'),
+    ('ￒ', 'ￗ'),
+    ('ￚ', 'ￜ'),
+    ('𐀀', '𐀋'),
+    ('𐀍', '𐀦'),
+    ('𐀨', '𐀺'),
+    ('𐀼', '𐀽'),
+    ('𐀿', '𐁍'),
+    ('𐁐', '𐁝'),
+    ('𐂀', '𐃺'),
+    ('𐅀', '𐅴'),
+    ('𐊀', '𐊜'),
+    ('𐊠', '𐋐'),
+    ('𐌀', '𐌟'),
+    ('𐌭', '𐍊'),
+    ('𐍐', '𐍵'),
+    ('𐎀', '𐎝'),
+    ('𐎠', '𐏃'),
+    ('𐏈', '𐏏'),
+    ('𐏑', '𐏕'),
+    ('𐐀', '𐒝'),
+    ('𐒰', '𐓓'),
+    ('𐓘', '𐓻'),
+    ('𐔀', '𐔧'),
+    ('𐔰', '𐕣'),
+    ('𐘀', '𐜶'),
+    ('𐝀', '𐝕'),
+    ('𐝠', '𐝧'),
+    ('𐠀', '𐠅'),
+    ('𐠈', '𐠈'),
+    ('𐠊', '𐠵'),
+    ('𐠷', '𐠸'),
+    ('𐠼', '𐠼'),
+    ('𐠿', '𐡕'),
+    ('𐡠', '𐡶'),
+    ('𐢀', '𐢞'),
+    ('𐣠', '𐣲'),
+    ('𐣴', '𐣵'),
+    ('𐤀', '𐤕'),
+    ('𐤠', '𐤹'),
+    ('𐦀', '𐦷'),
+    ('𐦾', '𐦿'),
+    ('𐨀', '𐨀'),
+    ('𐨐', '𐨓'),
+    ('𐨕', '𐨗'),
+    ('𐨙', '𐨵'),
+    ('𐩠', '𐩼'),
+    ('𐪀', '𐪜'),
+    ('𐫀', '𐫇'),
+    ('𐫉', '𐫤'),
+    ('𐬀', '𐬵'),
+    ('𐭀', '𐭕'),
+    ('𐭠', '𐭲'),
+    ('𐮀', '𐮑'),
+    ('𐰀', '𐱈'),
+    ('𐲀', '𐲲'),
+    ('𐳀', '𐳲'),
+    ('𐴀', '𐴣'),
+    ('\u{10e80}', '\u{10ea9}'),
+    ('\u{10eb0}', '\u{10eb1}'),
+    ('𐼀', '𐼜'),
+    ('𐼧', '𐼧'),
+    ('𐼰', '𐽅'),
+    ('\u{10fb0}', '\u{10fc4}'),
+    ('𐿠', '𐿶'),
+    ('𑀃', '𑀷'),
+    ('𑂃', '𑂯'),
+    ('𑃐', '𑃨'),
+    ('𑄃', '𑄦'),
+    ('𑅄', '𑅄'),
+    ('\u{11147}', '\u{11147}'),
+    ('𑅐', '𑅲'),
+    ('𑅶', '𑅶'),
+    ('𑆃', '𑆲'),
+    ('𑇁', '𑇄'),
+    ('𑇚', '𑇚'),
+    ('𑇜', '𑇜'),
+    ('𑈀', '𑈑'),
+    ('𑈓', '𑈫'),
+    ('𑊀', '𑊆'),
+    ('𑊈', '𑊈'),
+    ('𑊊', '𑊍'),
+    ('𑊏', '𑊝'),
+    ('𑊟', '𑊨'),
+    ('𑊰', '𑋞'),
+    ('𑌅', '𑌌'),
+    ('𑌏', '𑌐'),
+    ('𑌓', '𑌨'),
+    ('𑌪', '𑌰'),
+    ('𑌲', '𑌳'),
+    ('𑌵', '𑌹'),
+    ('𑌽', '𑌽'),
+    ('𑍐', '𑍐'),
+    ('𑍝', '𑍡'),
+    ('𑐀', '𑐴'),
+    ('𑑇', '𑑊'),
+    ('𑑟', '\u{11461}'),
+    ('𑒀', '𑒯'),
+    ('𑓄', '𑓅'),
+    ('𑓇', '𑓇'),
+    ('𑖀', '𑖮'),
+    ('𑗘', '𑗛'),
+    ('𑘀', '𑘯'),
+    ('𑙄', '𑙄'),
+    ('𑚀', '𑚪'),
+    ('𑚸', '𑚸'),
+    ('𑠀', '𑠫'),
+    ('𑢠', '𑣟'),
+    ('𑣿', '\u{11906}'),
+    ('\u{11909}', '\u{11909}'),
+    ('\u{1190c}', '\u{11913}'),
+    ('\u{11915}', '\u{11916}'),
+    ('\u{11918}', '\u{1192f}'),
+    ('\u{1193f}', '\u{1193f}'),
+    ('\u{11941}', '\u{11941}'),
+    ('𑦠', '𑦧'),
+    ('𑦪', '𑧐'),
+    ('𑧡', '𑧡'),
+    ('𑧣', '𑧣'),
+    ('𑨀', '𑨀'),
+    ('𑨋', '𑨲'),
+    ('𑨺', '𑨺'),
+    ('𑩐', '𑩐'),
+    ('𑩜', '𑪉'),
+    ('𑪝', '𑪝'),
+    ('𑫀', '𑫸'),
+    ('𑰀', '𑰈'),
+    ('𑰊', '𑰮'),
+    ('𑱀', '𑱀'),
+    ('𑱲', '𑲏'),
+    ('𑴀', '𑴆'),
+    ('𑴈', '𑴉'),
+    ('𑴋', '𑴰'),
+    ('𑵆', '𑵆'),
+    ('𑵠', '𑵥'),
+    ('𑵧', '𑵨'),
+    ('𑵪', '𑶉'),
+    ('𑶘', '𑶘'),
+    ('𑻠', '𑻲'),
+    ('\u{11fb0}', '\u{11fb0}'),
+    ('𒀀', '𒎙'),
+    ('𒐀', '𒑮'),
+    ('𒒀', '𒕃'),
+    ('𓀀', '𓐮'),
+    ('𔐀', '𔙆'),
+    ('𖠀', '𖨸'),
+    ('𖩀', '𖩞'),
+    ('𖫐', '𖫭'),
+    ('𖬀', '𖬯'),
+    ('𖭀', '𖭃'),
+    ('𖭣', '𖭷'),
+    ('𖭽', '𖮏'),
+    ('𖹀', '𖹿'),
+    ('𖼀', '𖽊'),
+    ('𖽐', '𖽐'),
+    ('𖾓', '𖾟'),
+    ('𖿠', '𖿡'),
+    ('𖿣', '𖿣'),
+    ('𛰀', '𛱪'),
+    ('𛱰', '𛱼'),
+    ('𛲀', '𛲈'),
+    ('𛲐', '𛲙'),
+    ('𝐀', '𝑔'),
+    ('𝑖', '𝒜'),
+    ('𝒞', '𝒟'),
+    ('𝒢', '𝒢'),
+    ('𝒥', '𝒦'),
+    ('𝒩', '𝒬'),
+    ('𝒮', '𝒹'),
+    ('𝒻', '𝒻'),
+    ('𝒽', '𝓃'),
+    ('𝓅', '𝔅'),
+    ('𝔇', '𝔊'),
+    ('𝔍', '𝔔'),
+    ('𝔖', '𝔜'),
+    ('𝔞', '𝔹'),
+    ('𝔻', '𝔾'),
+    ('𝕀', '𝕄'),
+    ('𝕆', '𝕆'),
+    ('𝕊', '𝕐'),
+    ('𝕒', '𝚥'),
+    ('𝚨', '𝛀'),
+    ('𝛂', '𝛚'),
+    ('𝛜', '𝛺'),
+    ('𝛼', '𝜔'),
+    ('𝜖', '𝜴'),
+    ('𝜶', '𝝎'),
+    ('𝝐', '𝝮'),
+    ('𝝰', '𝞈'),
+    ('𝞊', '𝞨'),
+    ('𝞪', '𝟂'),
+    ('𝟄', '𝟋'),
+    ('𞄀', '𞄬'),
+    ('𞄷', '𞄽'),
+    ('𞅎', '𞅎'),
+    ('𞋀', '𞋫'),
+    ('𞠀', '𞣄'),
+    ('𞤀', '𞥃'),
+    ('𞥋', '𞥋'),
+    ('𞸀', '𞸃'),
+    ('𞸅', '𞸟'),
+    ('𞸡', '𞸢'),
+    ('𞸤', '𞸤'),
+    ('𞸧', '𞸧'),
+    ('𞸩', '𞸲'),
+    ('𞸴', '𞸷'),
+    ('𞸹', '𞸹'),
+    ('𞸻', '𞸻'),
+    ('𞹂', '𞹂'),
+    ('𞹇', '𞹇'),
+    ('𞹉', '𞹉'),
+    ('𞹋', '𞹋'),
+    ('𞹍', '𞹏'),
+    ('𞹑', '𞹒'),
+    ('𞹔', '𞹔'),
+    ('𞹗', '𞹗'),
+    ('𞹙', '𞹙'),
+    ('𞹛', '𞹛'),
+    ('𞹝', '𞹝'),
+    ('𞹟', '𞹟'),
+    ('𞹡', '𞹢'),
+    ('𞹤', '𞹤'),
+    ('𞹧', '𞹪'),
+    ('𞹬', '𞹲'),
+    ('𞹴', '𞹷'),
+    ('𞹹', '𞹼'),
+    ('𞹾', '𞹾'),
+    ('𞺀', '𞺉'),
+    ('𞺋', '𞺛'),
+    ('𞺡', '𞺣'),
+    ('𞺥', '𞺩'),
+    ('𞺫', '𞺻'),
+    ('🄰', '🅉'),
+    ('🅐', '🅩'),
+    ('🅰', '🆉'),
+];
+
+pub const CR: &'static [(char, char)] = &[('\r', '\r')];
+
+pub const DOUBLE_QUOTE: &'static [(char, char)] = &[('\"', '\"')];
+
+pub const EXTEND: &'static [(char, char)] = &[
+    ('\u{300}', '\u{36f}'),
+    ('\u{483}', '\u{489}'),
+    ('\u{591}', '\u{5bd}'),
+    ('\u{5bf}', '\u{5bf}'),
+    ('\u{5c1}', '\u{5c2}'),
+    ('\u{5c4}', '\u{5c5}'),
+    ('\u{5c7}', '\u{5c7}'),
+    ('\u{610}', '\u{61a}'),
+    ('\u{64b}', '\u{65f}'),
+    ('\u{670}', '\u{670}'),
+    ('\u{6d6}', '\u{6dc}'),
+    ('\u{6df}', '\u{6e4}'),
+    ('\u{6e7}', '\u{6e8}'),
+    ('\u{6ea}', '\u{6ed}'),
+    ('\u{711}', '\u{711}'),
+    ('\u{730}', '\u{74a}'),
+    ('\u{7a6}', '\u{7b0}'),
+    ('\u{7eb}', '\u{7f3}'),
+    ('\u{7fd}', '\u{7fd}'),
+    ('\u{816}', '\u{819}'),
+    ('\u{81b}', '\u{823}'),
+    ('\u{825}', '\u{827}'),
+    ('\u{829}', '\u{82d}'),
+    ('\u{859}', '\u{85b}'),
+    ('\u{8d3}', '\u{8e1}'),
+    ('\u{8e3}', 'ः'),
+    ('\u{93a}', '\u{93c}'),
+    ('ा', 'ॏ'),
+    ('\u{951}', '\u{957}'),
+    ('\u{962}', '\u{963}'),
+    ('\u{981}', 'ঃ'),
+    ('\u{9bc}', '\u{9bc}'),
+    ('\u{9be}', '\u{9c4}'),
+    ('ে', 'ৈ'),
+    ('ো', '\u{9cd}'),
+    ('\u{9d7}', '\u{9d7}'),
+    ('\u{9e2}', '\u{9e3}'),
+    ('\u{9fe}', '\u{9fe}'),
+    ('\u{a01}', 'ਃ'),
+    ('\u{a3c}', '\u{a3c}'),
+    ('ਾ', '\u{a42}'),
+    ('\u{a47}', '\u{a48}'),
+    ('\u{a4b}', '\u{a4d}'),
+    ('\u{a51}', '\u{a51}'),
+    ('\u{a70}', '\u{a71}'),
+    ('\u{a75}', '\u{a75}'),
+    ('\u{a81}', 'ઃ'),
+    ('\u{abc}', '\u{abc}'),
+    ('ા', '\u{ac5}'),
+    ('\u{ac7}', 'ૉ'),
+    ('ો', '\u{acd}'),
+    ('\u{ae2}', '\u{ae3}'),
+    ('\u{afa}', '\u{aff}'),
+    ('\u{b01}', 'ଃ'),
+    ('\u{b3c}', '\u{b3c}'),
+    ('\u{b3e}', '\u{b44}'),
+    ('େ', 'ୈ'),
+    ('ୋ', '\u{b4d}'),
+    ('\u{b55}', '\u{b57}'),
+    ('\u{b62}', '\u{b63}'),
+    ('\u{b82}', '\u{b82}'),
+    ('\u{bbe}', 'ூ'),
+    ('ெ', 'ை'),
+    ('ொ', '\u{bcd}'),
+    ('\u{bd7}', '\u{bd7}'),
+    ('\u{c00}', '\u{c04}'),
+    ('\u{c3e}', 'ౄ'),
+    ('\u{c46}', '\u{c48}'),
+    ('\u{c4a}', '\u{c4d}'),
+    ('\u{c55}', '\u{c56}'),
+    ('\u{c62}', '\u{c63}'),
+    ('\u{c81}', 'ಃ'),
+    ('\u{cbc}', '\u{cbc}'),
+    ('ಾ', 'ೄ'),
+    ('\u{cc6}', 'ೈ'),
+    ('ೊ', '\u{ccd}'),
+    ('\u{cd5}', '\u{cd6}'),
+    ('\u{ce2}', '\u{ce3}'),
+    ('\u{d00}', 'ഃ'),
+    ('\u{d3b}', '\u{d3c}'),
+    ('\u{d3e}', '\u{d44}'),
+    ('െ', 'ൈ'),
+    ('ൊ', '\u{d4d}'),
+    ('\u{d57}', '\u{d57}'),
+    ('\u{d62}', '\u{d63}'),
+    ('\u{d81}', 'ඃ'),
+    ('\u{dca}', '\u{dca}'),
+    ('\u{dcf}', '\u{dd4}'),
+    ('\u{dd6}', '\u{dd6}'),
+    ('ෘ', '\u{ddf}'),
+    ('ෲ', 'ෳ'),
+    ('\u{e31}', '\u{e31}'),
+    ('\u{e34}', '\u{e3a}'),
+    ('\u{e47}', '\u{e4e}'),
+    ('\u{eb1}', '\u{eb1}'),
+    ('\u{eb4}', '\u{ebc}'),
+    ('\u{ec8}', '\u{ecd}'),
+    ('\u{f18}', '\u{f19}'),
+    ('\u{f35}', '\u{f35}'),
+    ('\u{f37}', '\u{f37}'),
+    ('\u{f39}', '\u{f39}'),
+    ('༾', '༿'),
+    ('\u{f71}', '\u{f84}'),
+    ('\u{f86}', '\u{f87}'),
+    ('\u{f8d}', '\u{f97}'),
+    ('\u{f99}', '\u{fbc}'),
+    ('\u{fc6}', '\u{fc6}'),
+    ('ါ', '\u{103e}'),
+    ('ၖ', '\u{1059}'),
+    ('\u{105e}', '\u{1060}'),
+    ('ၢ', 'ၤ'),
+    ('ၧ', 'ၭ'),
+    ('\u{1071}', '\u{1074}'),
+    ('\u{1082}', '\u{108d}'),
+    ('ႏ', 'ႏ'),
+    ('ႚ', '\u{109d}'),
+    ('\u{135d}', '\u{135f}'),
+    ('\u{1712}', '\u{1714}'),
+    ('\u{1732}', '\u{1734}'),
+    ('\u{1752}', '\u{1753}'),
+    ('\u{1772}', '\u{1773}'),
+    ('\u{17b4}', '\u{17d3}'),
+    ('\u{17dd}', '\u{17dd}'),
+    ('\u{180b}', '\u{180d}'),
+    ('\u{1885}', '\u{1886}'),
+    ('\u{18a9}', '\u{18a9}'),
+    ('\u{1920}', 'ᤫ'),
+    ('ᤰ', '\u{193b}'),
+    ('\u{1a17}', '\u{1a1b}'),
+    ('ᩕ', '\u{1a5e}'),
+    ('\u{1a60}', '\u{1a7c}'),
+    ('\u{1a7f}', '\u{1a7f}'),
+    ('\u{1ab0}', '\u{1ac0}'),
+    ('\u{1b00}', 'ᬄ'),
+    ('\u{1b34}', '᭄'),
+    ('\u{1b6b}', '\u{1b73}'),
+    ('\u{1b80}', 'ᮂ'),
+    ('ᮡ', '\u{1bad}'),
+    ('\u{1be6}', '᯳'),
+    ('ᰤ', '\u{1c37}'),
+    ('\u{1cd0}', '\u{1cd2}'),
+    ('\u{1cd4}', '\u{1ce8}'),
+    ('\u{1ced}', '\u{1ced}'),
+    ('\u{1cf4}', '\u{1cf4}'),
+    ('᳷', '\u{1cf9}'),
+    ('\u{1dc0}', '\u{1df9}'),
+    ('\u{1dfb}', '\u{1dff}'),
+    ('\u{200c}', '\u{200c}'),
+    ('\u{20d0}', '\u{20f0}'),
+    ('\u{2cef}', '\u{2cf1}'),
+    ('\u{2d7f}', '\u{2d7f}'),
+    ('\u{2de0}', '\u{2dff}'),
+    ('\u{302a}', '\u{302f}'),
+    ('\u{3099}', '\u{309a}'),
+    ('\u{a66f}', '\u{a672}'),
+    ('\u{a674}', '\u{a67d}'),
+    ('\u{a69e}', '\u{a69f}'),
+    ('\u{a6f0}', '\u{a6f1}'),
+    ('\u{a802}', '\u{a802}'),
+    ('\u{a806}', '\u{a806}'),
+    ('\u{a80b}', '\u{a80b}'),
+    ('ꠣ', 'ꠧ'),
+    ('\u{a82c}', '\u{a82c}'),
+    ('ꢀ', 'ꢁ'),
+    ('ꢴ', '\u{a8c5}'),
+    ('\u{a8e0}', '\u{a8f1}'),
+    ('\u{a8ff}', '\u{a8ff}'),
+    ('\u{a926}', '\u{a92d}'),
+    ('\u{a947}', '꥓'),
+    ('\u{a980}', 'ꦃ'),
+    ('\u{a9b3}', '꧀'),
+    ('\u{a9e5}', '\u{a9e5}'),
+    ('\u{aa29}', '\u{aa36}'),
+    ('\u{aa43}', '\u{aa43}'),
+    ('\u{aa4c}', 'ꩍ'),
+    ('ꩻ', 'ꩽ'),
+    ('\u{aab0}', '\u{aab0}'),
+    ('\u{aab2}', '\u{aab4}'),
+    ('\u{aab7}', '\u{aab8}'),
+    ('\u{aabe}', '\u{aabf}'),
+    ('\u{aac1}', '\u{aac1}'),
+    ('ꫫ', 'ꫯ'),
+    ('ꫵ', '\u{aaf6}'),
+    ('ꯣ', 'ꯪ'),
+    ('꯬', '\u{abed}'),
+    ('\u{fb1e}', '\u{fb1e}'),
+    ('\u{fe00}', '\u{fe0f}'),
+    ('\u{fe20}', '\u{fe2f}'),
+    ('\u{ff9e}', '\u{ff9f}'),
+    ('\u{101fd}', '\u{101fd}'),
+    ('\u{102e0}', '\u{102e0}'),
+    ('\u{10376}', '\u{1037a}'),
+    ('\u{10a01}', '\u{10a03}'),
+    ('\u{10a05}', '\u{10a06}'),
+    ('\u{10a0c}', '\u{10a0f}'),
+    ('\u{10a38}', '\u{10a3a}'),
+    ('\u{10a3f}', '\u{10a3f}'),
+    ('\u{10ae5}', '\u{10ae6}'),
+    ('\u{10d24}', '\u{10d27}'),
+    ('\u{10eab}', '\u{10eac}'),
+    ('\u{10f46}', '\u{10f50}'),
+    ('𑀀', '𑀂'),
+    ('\u{11038}', '\u{11046}'),
+    ('\u{1107f}', '𑂂'),
+    ('𑂰', '\u{110ba}'),
+    ('\u{11100}', '\u{11102}'),
+    ('\u{11127}', '\u{11134}'),
+    ('𑅅', '𑅆'),
+    ('\u{11173}', '\u{11173}'),
+    ('\u{11180}', '𑆂'),
+    ('𑆳', '𑇀'),
+    ('\u{111c9}', '\u{111cc}'),
+    ('\u{111ce}', '\u{111cf}'),
+    ('𑈬', '\u{11237}'),
+    ('\u{1123e}', '\u{1123e}'),
+    ('\u{112df}', '\u{112ea}'),
+    ('\u{11300}', '𑌃'),
+    ('\u{1133b}', '\u{1133c}'),
+    ('\u{1133e}', '𑍄'),
+    ('𑍇', '𑍈'),
+    ('𑍋', '𑍍'),
+    ('\u{11357}', '\u{11357}'),
+    ('𑍢', '𑍣'),
+    ('\u{11366}', '\u{1136c}'),
+    ('\u{11370}', '\u{11374}'),
+    ('𑐵', '\u{11446}'),
+    ('\u{1145e}', '\u{1145e}'),
+    ('\u{114b0}', '\u{114c3}'),
+    ('\u{115af}', '\u{115b5}'),
+    ('𑖸', '\u{115c0}'),
+    ('\u{115dc}', '\u{115dd}'),
+    ('𑘰', '\u{11640}'),
+    ('\u{116ab}', '\u{116b7}'),
+    ('\u{1171d}', '\u{1172b}'),
+    ('𑠬', '\u{1183a}'),
+    ('\u{11930}', '\u{11935}'),
+    ('\u{11937}', '\u{11938}'),
+    ('\u{1193b}', '\u{1193e}'),
+    ('\u{11940}', '\u{11940}'),
+    ('\u{11942}', '\u{11943}'),
+    ('𑧑', '\u{119d7}'),
+    ('\u{119da}', '\u{119e0}'),
+    ('𑧤', '𑧤'),
+    ('\u{11a01}', '\u{11a0a}'),
+    ('\u{11a33}', '𑨹'),
+    ('\u{11a3b}', '\u{11a3e}'),
+    ('\u{11a47}', '\u{11a47}'),
+    ('\u{11a51}', '\u{11a5b}'),
+    ('\u{11a8a}', '\u{11a99}'),
+    ('𑰯', '\u{11c36}'),
+    ('\u{11c38}', '\u{11c3f}'),
+    ('\u{11c92}', '\u{11ca7}'),
+    ('𑲩', '\u{11cb6}'),
+    ('\u{11d31}', '\u{11d36}'),
+    ('\u{11d3a}', '\u{11d3a}'),
+    ('\u{11d3c}', '\u{11d3d}'),
+    ('\u{11d3f}', '\u{11d45}'),
+    ('\u{11d47}', '\u{11d47}'),
+    ('𑶊', '𑶎'),
+    ('\u{11d90}', '\u{11d91}'),
+    ('𑶓', '\u{11d97}'),
+    ('\u{11ef3}', '𑻶'),
+    ('\u{16af0}', '\u{16af4}'),
+    ('\u{16b30}', '\u{16b36}'),
+    ('\u{16f4f}', '\u{16f4f}'),
+    ('𖽑', '𖾇'),
+    ('\u{16f8f}', '\u{16f92}'),
+    ('\u{16fe4}', '\u{16fe4}'),
+    ('\u{16ff0}', '\u{16ff1}'),
+    ('\u{1bc9d}', '\u{1bc9e}'),
+    ('\u{1d165}', '\u{1d169}'),
+    ('𝅭', '\u{1d172}'),
+    ('\u{1d17b}', '\u{1d182}'),
+    ('\u{1d185}', '\u{1d18b}'),
+    ('\u{1d1aa}', '\u{1d1ad}'),
+    ('\u{1d242}', '\u{1d244}'),
+    ('\u{1da00}', '\u{1da36}'),
+    ('\u{1da3b}', '\u{1da6c}'),
+    ('\u{1da75}', '\u{1da75}'),
+    ('\u{1da84}', '\u{1da84}'),
+    ('\u{1da9b}', '\u{1da9f}'),
+    ('\u{1daa1}', '\u{1daaf}'),
+    ('\u{1e000}', '\u{1e006}'),
+    ('\u{1e008}', '\u{1e018}'),
+    ('\u{1e01b}', '\u{1e021}'),
+    ('\u{1e023}', '\u{1e024}'),
+    ('\u{1e026}', '\u{1e02a}'),
+    ('\u{1e130}', '\u{1e136}'),
+    ('\u{1e2ec}', '\u{1e2ef}'),
+    ('\u{1e8d0}', '\u{1e8d6}'),
+    ('\u{1e944}', '\u{1e94a}'),
+    ('🏻', '🏿'),
+    ('\u{e0020}', '\u{e007f}'),
+    ('\u{e0100}', '\u{e01ef}'),
+];
+
+pub const EXTENDNUMLET: &'static [(char, char)] = &[
+    ('_', '_'),
+    ('\u{202f}', '\u{202f}'),
+    ('‿', '⁀'),
+    ('⁔', '⁔'),
+    ('︳', '︴'),
+    ('﹍', '﹏'),
+    ('＿', '＿'),
+];
+
+pub const FORMAT: &'static [(char, char)] = &[
+    ('\u{ad}', '\u{ad}'),
+    ('\u{600}', '\u{605}'),
+    ('\u{61c}', '\u{61c}'),
+    ('\u{6dd}', '\u{6dd}'),
+    ('\u{70f}', '\u{70f}'),
+    ('\u{8e2}', '\u{8e2}'),
+    ('\u{180e}', '\u{180e}'),
+    ('\u{200e}', '\u{200f}'),
+    ('\u{202a}', '\u{202e}'),
+    ('\u{2060}', '\u{2064}'),
+    ('\u{2066}', '\u{206f}'),
+    ('\u{feff}', '\u{feff}'),
+    ('\u{fff9}', '\u{fffb}'),
+    ('\u{110bd}', '\u{110bd}'),
+    ('\u{110cd}', '\u{110cd}'),
+    ('\u{13430}', '\u{13438}'),
+    ('\u{1bca0}', '\u{1bca3}'),
+    ('\u{1d173}', '\u{1d17a}'),
+    ('\u{e0001}', '\u{e0001}'),
+];
+
+pub const HEBREW_LETTER: &'static [(char, char)] = &[
+    ('א', 'ת'),
+    ('ׯ', 'ײ'),
+    ('יִ', 'יִ'),
+    ('ײַ', 'ﬨ'),
+    ('שׁ', 'זּ'),
+    ('טּ', 'לּ'),
+    ('מּ', 'מּ'),
+    ('נּ', 'סּ'),
+    ('ףּ', 'פּ'),
+    ('צּ', 'ﭏ'),
+];
+
+pub const KATAKANA: &'static [(char, char)] = &[
+    ('〱', '〵'),
+    ('゛', '゜'),
+    ('゠', 'ヺ'),
+    ('ー', 'ヿ'),
+    ('ㇰ', 'ㇿ'),
+    ('㋐', '㋾'),
+    ('㌀', '㍗'),
+    ('ｦ', 'ﾝ'),
+    ('𛀀', '𛀀'),
+    ('𛅤', '𛅧'),
+];
+
+pub const LF: &'static [(char, char)] = &[('\n', '\n')];
+
+pub const MIDLETTER: &'static [(char, char)] = &[
+    (':', ':'),
+    ('·', '·'),
+    ('·', '·'),
+    ('՟', '՟'),
+    ('״', '״'),
+    ('‧', '‧'),
+    ('︓', '︓'),
+    ('﹕', '﹕'),
+    ('：', '：'),
+];
+
+pub const MIDNUM: &'static [(char, char)] = &[
+    (',', ','),
+    (';', ';'),
+    (';', ';'),
+    ('։', '։'),
+    ('،', '؍'),
+    ('٬', '٬'),
+    ('߸', '߸'),
+    ('⁄', '⁄'),
+    ('︐', '︐'),
+    ('︔', '︔'),
+    ('﹐', '﹐'),
+    ('﹔', '﹔'),
+    ('，', '，'),
+    ('；', '；'),
+];
+
+pub const MIDNUMLET: &'static [(char, char)] = &[
+    ('.', '.'),
+    ('‘', '’'),
+    ('․', '․'),
+    ('﹒', '﹒'),
+    ('＇', '＇'),
+    ('．', '．'),
+];
+
+pub const NEWLINE: &'static [(char, char)] =
+    &[('\u{b}', '\u{c}'), ('\u{85}', '\u{85}'), ('\u{2028}', '\u{2029}')];
+
+pub const NUMERIC: &'static [(char, char)] = &[
+    ('0', '9'),
+    ('٠', '٩'),
+    ('٫', '٫'),
+    ('۰', '۹'),
+    ('߀', '߉'),
+    ('०', '९'),
+    ('০', '৯'),
+    ('੦', '੯'),
+    ('૦', '૯'),
+    ('୦', '୯'),
+    ('௦', '௯'),
+    ('౦', '౯'),
+    ('೦', '೯'),
+    ('൦', '൯'),
+    ('෦', '෯'),
+    ('๐', '๙'),
+    ('໐', '໙'),
+    ('༠', '༩'),
+    ('၀', '၉'),
+    ('႐', '႙'),
+    ('០', '៩'),
+    ('᠐', '᠙'),
+    ('᥆', '᥏'),
+    ('᧐', '᧙'),
+    ('᪀', '᪉'),
+    ('᪐', '᪙'),
+    ('᭐', '᭙'),
+    ('᮰', '᮹'),
+    ('᱀', '᱉'),
+    ('᱐', '᱙'),
+    ('꘠', '꘩'),
+    ('꣐', '꣙'),
+    ('꤀', '꤉'),
+    ('꧐', '꧙'),
+    ('꧰', '꧹'),
+    ('꩐', '꩙'),
+    ('꯰', '꯹'),
+    ('０', '９'),
+    ('𐒠', '𐒩'),
+    ('𐴰', '𐴹'),
+    ('𑁦', '𑁯'),
+    ('𑃰', '𑃹'),
+    ('𑄶', '𑄿'),
+    ('𑇐', '𑇙'),
+    ('𑋰', '𑋹'),
+    ('𑑐', '𑑙'),
+    ('𑓐', '𑓙'),
+    ('𑙐', '𑙙'),
+    ('𑛀', '𑛉'),
+    ('𑜰', '𑜹'),
+    ('𑣠', '𑣩'),
+    ('\u{11950}', '\u{11959}'),
+    ('𑱐', '𑱙'),
+    ('𑵐', '𑵙'),
+    ('𑶠', '𑶩'),
+    ('𖩠', '𖩩'),
+    ('𖭐', '𖭙'),
+    ('𝟎', '𝟿'),
+    ('𞅀', '𞅉'),
+    ('𞋰', '𞋹'),
+    ('𞥐', '𞥙'),
+    ('\u{1fbf0}', '\u{1fbf9}'),
+];
+
+pub const REGIONAL_INDICATOR: &'static [(char, char)] = &[('🇦', '🇿')];
+
+pub const SINGLE_QUOTE: &'static [(char, char)] = &[('\'', '\'')];
+
+pub const WSEGSPACE: &'static [(char, char)] = &[
+    (' ', ' '),
+    ('\u{1680}', '\u{1680}'),
+    ('\u{2000}', '\u{2006}'),
+    ('\u{2008}', '\u{200a}'),
+    ('\u{205f}', '\u{205f}'),
+    ('\u{3000}', '\u{3000}'),
+];
+
+pub const ZWJ: &'static [(char, char)] = &[('\u{200d}', '\u{200d}')];

diff --git a/src/utf8.rs b/src/utf8.rs
new file mode 100644
index 0000000..489729e
--- /dev/null
+++ b/src/utf8.rs

@@ -0,0 +1,583 @@
+/*!
+Converts ranges of Unicode scalar values to equivalent ranges of UTF-8 bytes.
+
+This is sub-module is useful for constructing byte based automatons that need
+to embed UTF-8 decoding. The most common use of this module is in conjunction
+with the [`hir::ClassUnicodeRange`](../hir/struct.ClassUnicodeRange.html) type.
+
+See the documentation on the `Utf8Sequences` iterator for more details and
+an example.
+
+# Wait, what is this?
+
+This is simplest to explain with an example. Let's say you wanted to test
+whether a particular byte sequence was a Cyrillic character. One possible
+scalar value range is `[0400-04FF]`. The set of allowed bytes for this
+range can be expressed as a sequence of byte ranges:
+
+```ignore
+[D0-D3][80-BF]
+```
+
+This is simple enough: simply encode the boundaries, `0400` encodes to
+`D0 80` and `04FF` encodes to `D3 BF`, and create ranges from each
+corresponding pair of bytes: `D0` to `D3` and `80` to `BF`.
+
+However, what if you wanted to add the Cyrillic Supplementary characters to
+your range? Your range might then become `[0400-052F]`. The same procedure
+as above doesn't quite work because `052F` encodes to `D4 AF`. The byte ranges
+you'd get from the previous transformation would be `[D0-D4][80-AF]`. However,
+this isn't quite correct because this range doesn't capture many characters,
+for example, `04FF` (because its last byte, `BF` isn't in the range `80-AF`).
+
+Instead, you need multiple sequences of byte ranges:
+
+```ignore
+[D0-D3][80-BF]  # matches codepoints 0400-04FF
+[D4][80-AF]     # matches codepoints 0500-052F
+```
+
+This gets even more complicated if you want bigger ranges, particularly if
+they naively contain surrogate codepoints. For example, the sequence of byte
+ranges for the basic multilingual plane (`[0000-FFFF]`) look like this:
+
+```ignore
+[0-7F]
+[C2-DF][80-BF]
+[E0][A0-BF][80-BF]
+[E1-EC][80-BF][80-BF]
+[ED][80-9F][80-BF]
+[EE-EF][80-BF][80-BF]
+```
+
+Note that the byte ranges above will *not* match any erroneous encoding of
+UTF-8, including encodings of surrogate codepoints.
+
+And, of course, for all of Unicode (`[000000-10FFFF]`):
+
+```ignore
+[0-7F]
+[C2-DF][80-BF]
+[E0][A0-BF][80-BF]
+[E1-EC][80-BF][80-BF]
+[ED][80-9F][80-BF]
+[EE-EF][80-BF][80-BF]
+[F0][90-BF][80-BF][80-BF]
+[F1-F3][80-BF][80-BF][80-BF]
+[F4][80-8F][80-BF][80-BF]
+```
+
+This module automates the process of creating these byte ranges from ranges of
+Unicode scalar values.
+
+# Lineage
+
+I got the idea and general implementation strategy from Russ Cox in his
+[article on regexps](https://web.archive.org/web/20160404141123/https://swtch.com/~rsc/regexp/regexp3.html) and RE2.
+Russ Cox got it from Ken Thompson's `grep` (no source, folk lore?).
+I also got the idea from
+[Lucene](https://github.com/apache/lucene-solr/blob/ae93f4e7ac6a3908046391de35d4f50a0d3c59ca/lucene/core/src/java/org/apache/lucene/util/automaton/UTF32ToUTF8.java),
+which uses it for executing automata on their term index.
+*/
+
+#![deny(missing_docs)]
+
+use std::char;
+use std::fmt;
+use std::slice;
+
+const MAX_UTF8_BYTES: usize = 4;
+
+/// Utf8Sequence represents a sequence of byte ranges.
+///
+/// To match a Utf8Sequence, a candidate byte sequence must match each
+/// successive range.
+///
+/// For example, if there are two ranges, `[C2-DF][80-BF]`, then the byte
+/// sequence `\xDD\x61` would not match because `0x61 < 0x80`.
+#[derive(Copy, Clone, Eq, PartialEq, PartialOrd, Ord)]
+pub enum Utf8Sequence {
+    /// One byte range.
+    One(Utf8Range),
+    /// Two successive byte ranges.
+    Two([Utf8Range; 2]),
+    /// Three successive byte ranges.
+    Three([Utf8Range; 3]),
+    /// Four successive byte ranges.
+    Four([Utf8Range; 4]),
+}
+
+impl Utf8Sequence {
+    /// Creates a new UTF-8 sequence from the encoded bytes of a scalar value
+    /// range.
+    ///
+    /// This assumes that `start` and `end` have the same length.
+    fn from_encoded_range(start: &[u8], end: &[u8]) -> Self {
+        assert_eq!(start.len(), end.len());
+        match start.len() {
+            2 => Utf8Sequence::Two([
+                Utf8Range::new(start[0], end[0]),
+                Utf8Range::new(start[1], end[1]),
+            ]),
+            3 => Utf8Sequence::Three([
+                Utf8Range::new(start[0], end[0]),
+                Utf8Range::new(start[1], end[1]),
+                Utf8Range::new(start[2], end[2]),
+            ]),
+            4 => Utf8Sequence::Four([
+                Utf8Range::new(start[0], end[0]),
+                Utf8Range::new(start[1], end[1]),
+                Utf8Range::new(start[2], end[2]),
+                Utf8Range::new(start[3], end[3]),
+            ]),
+            n => unreachable!("invalid encoded length: {}", n),
+        }
+    }
+
+    /// Returns the underlying sequence of byte ranges as a slice.
+    pub fn as_slice(&self) -> &[Utf8Range] {
+        use self::Utf8Sequence::*;
+        match *self {
+            One(ref r) => slice::from_ref(r),
+            Two(ref r) => &r[..],
+            Three(ref r) => &r[..],
+            Four(ref r) => &r[..],
+        }
+    }
+
+    /// Returns the number of byte ranges in this sequence.
+    ///
+    /// The length is guaranteed to be in the closed interval `[1, 4]`.
+    pub fn len(&self) -> usize {
+        self.as_slice().len()
+    }
+
+    /// Reverses the ranges in this sequence.
+    ///
+    /// For example, if this corresponds to the following sequence:
+    ///
+    /// ```ignore
+    /// [D0-D3][80-BF]
+    /// ```
+    ///
+    /// Then after reversal, it will be
+    ///
+    /// ```ignore
+    /// [80-BF][D0-D3]
+    /// ```
+    ///
+    /// This is useful when one is constructing a UTF-8 automaton to match
+    /// character classes in reverse.
+    pub fn reverse(&mut self) {
+        match *self {
+            Utf8Sequence::One(_) => {}
+            Utf8Sequence::Two(ref mut x) => x.reverse(),
+            Utf8Sequence::Three(ref mut x) => x.reverse(),
+            Utf8Sequence::Four(ref mut x) => x.reverse(),
+        }
+    }
+
+    /// Returns true if and only if a prefix of `bytes` matches this sequence
+    /// of byte ranges.
+    pub fn matches(&self, bytes: &[u8]) -> bool {
+        if bytes.len() < self.len() {
+            return false;
+        }
+        for (&b, r) in bytes.iter().zip(self) {
+            if !r.matches(b) {
+                return false;
+            }
+        }
+        true
+    }
+}
+
+impl<'a> IntoIterator for &'a Utf8Sequence {
+    type IntoIter = slice::Iter<'a, Utf8Range>;
+    type Item = &'a Utf8Range;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.as_slice().into_iter()
+    }
+}
+
+impl fmt::Debug for Utf8Sequence {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        use self::Utf8Sequence::*;
+        match *self {
+            One(ref r) => write!(f, "{:?}", r),
+            Two(ref r) => write!(f, "{:?}{:?}", r[0], r[1]),
+            Three(ref r) => write!(f, "{:?}{:?}{:?}", r[0], r[1], r[2]),
+            Four(ref r) => {
+                write!(f, "{:?}{:?}{:?}{:?}", r[0], r[1], r[2], r[3])
+            }
+        }
+    }
+}
+
+/// A single inclusive range of UTF-8 bytes.
+#[derive(Clone, Copy, Eq, PartialEq, PartialOrd, Ord)]
+pub struct Utf8Range {
+    /// Start of byte range (inclusive).
+    pub start: u8,
+    /// End of byte range (inclusive).
+    pub end: u8,
+}
+
+impl Utf8Range {
+    fn new(start: u8, end: u8) -> Self {
+        Utf8Range { start, end }
+    }
+
+    /// Returns true if and only if the given byte is in this range.
+    pub fn matches(&self, b: u8) -> bool {
+        self.start <= b && b <= self.end
+    }
+}
+
+impl fmt::Debug for Utf8Range {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        if self.start == self.end {
+            write!(f, "[{:X}]", self.start)
+        } else {
+            write!(f, "[{:X}-{:X}]", self.start, self.end)
+        }
+    }
+}
+
+/// An iterator over ranges of matching UTF-8 byte sequences.
+///
+/// The iteration represents an alternation of comprehensive byte sequences
+/// that match precisely the set of UTF-8 encoded scalar values.
+///
+/// A byte sequence corresponds to one of the scalar values in the range given
+/// if and only if it completely matches exactly one of the sequences of byte
+/// ranges produced by this iterator.
+///
+/// Each sequence of byte ranges matches a unique set of bytes. That is, no two
+/// sequences will match the same bytes.
+///
+/// # Example
+///
+/// This shows how to match an arbitrary byte sequence against a range of
+/// scalar values.
+///
+/// ```rust
+/// use regex_syntax::utf8::{Utf8Sequences, Utf8Sequence};
+///
+/// fn matches(seqs: &[Utf8Sequence], bytes: &[u8]) -> bool {
+///     for range in seqs {
+///         if range.matches(bytes) {
+///             return true;
+///         }
+///     }
+///     false
+/// }
+///
+/// // Test the basic multilingual plane.
+/// let seqs: Vec<_> = Utf8Sequences::new('\u{0}', '\u{FFFF}').collect();
+///
+/// // UTF-8 encoding of 'a'.
+/// assert!(matches(&seqs, &[0x61]));
+/// // UTF-8 encoding of '☃' (`\u{2603}`).
+/// assert!(matches(&seqs, &[0xE2, 0x98, 0x83]));
+/// // UTF-8 encoding of `\u{10348}` (outside the BMP).
+/// assert!(!matches(&seqs, &[0xF0, 0x90, 0x8D, 0x88]));
+/// // Tries to match against a UTF-8 encoding of a surrogate codepoint,
+/// // which is invalid UTF-8, and therefore fails, despite the fact that
+/// // the corresponding codepoint (0xD800) falls in the range given.
+/// assert!(!matches(&seqs, &[0xED, 0xA0, 0x80]));
+/// // And fails against plain old invalid UTF-8.
+/// assert!(!matches(&seqs, &[0xFF, 0xFF]));
+/// ```
+///
+/// If this example seems circuitous, that's because it is! It's meant to be
+/// illustrative. In practice, you could just try to decode your byte sequence
+/// and compare it with the scalar value range directly. However, this is not
+/// always possible (for example, in a byte based automaton).
+pub struct Utf8Sequences {
+    range_stack: Vec<ScalarRange>,
+}
+
+impl Utf8Sequences {
+    /// Create a new iterator over UTF-8 byte ranges for the scalar value range
+    /// given.
+    pub fn new(start: char, end: char) -> Self {
+        let mut it = Utf8Sequences { range_stack: vec![] };
+        it.push(start as u32, end as u32);
+        it
+    }
+
+    /// reset resets the scalar value range.
+    /// Any existing state is cleared, but resources may be reused.
+    ///
+    /// N.B. Benchmarks say that this method is dubious.
+    #[doc(hidden)]
+    pub fn reset(&mut self, start: char, end: char) {
+        self.range_stack.clear();
+        self.push(start as u32, end as u32);
+    }
+
+    fn push(&mut self, start: u32, end: u32) {
+        self.range_stack.push(ScalarRange { start, end });
+    }
+}
+
+struct ScalarRange {
+    start: u32,
+    end: u32,
+}
+
+impl fmt::Debug for ScalarRange {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "ScalarRange({:X}, {:X})", self.start, self.end)
+    }
+}
+
+impl Iterator for Utf8Sequences {
+    type Item = Utf8Sequence;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        'TOP: while let Some(mut r) = self.range_stack.pop() {
+            'INNER: loop {
+                if let Some((r1, r2)) = r.split() {
+                    self.push(r2.start, r2.end);
+                    r.start = r1.start;
+                    r.end = r1.end;
+                    continue 'INNER;
+                }
+                if !r.is_valid() {
+                    continue 'TOP;
+                }
+                for i in 1..MAX_UTF8_BYTES {
+                    let max = max_scalar_value(i);
+                    if r.start <= max && max < r.end {
+                        self.push(max + 1, r.end);
+                        r.end = max;
+                        continue 'INNER;
+                    }
+                }
+                if let Some(ascii_range) = r.as_ascii() {
+                    return Some(Utf8Sequence::One(ascii_range));
+                }
+                for i in 1..MAX_UTF8_BYTES {
+                    let m = (1 << (6 * i)) - 1;
+                    if (r.start & !m) != (r.end & !m) {
+                        if (r.start & m) != 0 {
+                            self.push((r.start | m) + 1, r.end);
+                            r.end = r.start | m;
+                            continue 'INNER;
+                        }
+                        if (r.end & m) != m {
+                            self.push(r.end & !m, r.end);
+                            r.end = (r.end & !m) - 1;
+                            continue 'INNER;
+                        }
+                    }
+                }
+                let mut start = [0; MAX_UTF8_BYTES];
+                let mut end = [0; MAX_UTF8_BYTES];
+                let n = r.encode(&mut start, &mut end);
+                return Some(Utf8Sequence::from_encoded_range(
+                    &start[0..n],
+                    &end[0..n],
+                ));
+            }
+        }
+        None
+    }
+}
+
+impl ScalarRange {
+    /// split splits this range if it overlaps with a surrogate codepoint.
+    ///
+    /// Either or both ranges may be invalid.
+    fn split(&self) -> Option<(ScalarRange, ScalarRange)> {
+        if self.start < 0xE000 && self.end > 0xD7FF {
+            Some((
+                ScalarRange { start: self.start, end: 0xD7FF },
+                ScalarRange { start: 0xE000, end: self.end },
+            ))
+        } else {
+            None
+        }
+    }
+
+    /// is_valid returns true if and only if start <= end.
+    fn is_valid(&self) -> bool {
+        self.start <= self.end
+    }
+
+    /// as_ascii returns this range as a Utf8Range if and only if all scalar
+    /// values in this range can be encoded as a single byte.
+    fn as_ascii(&self) -> Option<Utf8Range> {
+        if self.is_ascii() {
+            Some(Utf8Range::new(self.start as u8, self.end as u8))
+        } else {
+            None
+        }
+    }
+
+    /// is_ascii returns true if the range is ASCII only (i.e., takes a single
+    /// byte to encode any scalar value).
+    fn is_ascii(&self) -> bool {
+        self.is_valid() && self.end <= 0x7f
+    }
+
+    /// encode writes the UTF-8 encoding of the start and end of this range
+    /// to the corresponding destination slices, and returns the number of
+    /// bytes written.
+    ///
+    /// The slices should have room for at least `MAX_UTF8_BYTES`.
+    fn encode(&self, start: &mut [u8], end: &mut [u8]) -> usize {
+        let cs = char::from_u32(self.start).unwrap();
+        let ce = char::from_u32(self.end).unwrap();
+        let ss = cs.encode_utf8(start);
+        let se = ce.encode_utf8(end);
+        assert_eq!(ss.len(), se.len());
+        ss.len()
+    }
+}
+
+fn max_scalar_value(nbytes: usize) -> u32 {
+    match nbytes {
+        1 => 0x007F,
+        2 => 0x07FF,
+        3 => 0xFFFF,
+        4 => 0x10FFFF,
+        _ => unreachable!("invalid UTF-8 byte sequence size"),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::char;
+
+    use utf8::{Utf8Range, Utf8Sequences};
+
+    fn rutf8(s: u8, e: u8) -> Utf8Range {
+        Utf8Range::new(s, e)
+    }
+
+    fn never_accepts_surrogate_codepoints(start: char, end: char) {
+        for cp in 0xD800..0xE000 {
+            let buf = encode_surrogate(cp);
+            for r in Utf8Sequences::new(start, end) {
+                if r.matches(&buf) {
+                    panic!(
+                        "Sequence ({:X}, {:X}) contains range {:?}, \
+                         which matches surrogate code point {:X} \
+                         with encoded bytes {:?}",
+                        start as u32, end as u32, r, cp, buf,
+                    );
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn codepoints_no_surrogates() {
+        never_accepts_surrogate_codepoints('\u{0}', '\u{FFFF}');
+        never_accepts_surrogate_codepoints('\u{0}', '\u{10FFFF}');
+        never_accepts_surrogate_codepoints('\u{0}', '\u{10FFFE}');
+        never_accepts_surrogate_codepoints('\u{80}', '\u{10FFFF}');
+        never_accepts_surrogate_codepoints('\u{D7FF}', '\u{E000}');
+    }
+
+    #[test]
+    fn single_codepoint_one_sequence() {
+        // Tests that every range of scalar values that contains a single
+        // scalar value is recognized by one sequence of byte ranges.
+        for i in 0x0..(0x10FFFF + 1) {
+            let c = match char::from_u32(i) {
+                None => continue,
+                Some(c) => c,
+            };
+            let seqs: Vec<_> = Utf8Sequences::new(c, c).collect();
+            assert_eq!(seqs.len(), 1);
+        }
+    }
+
+    #[test]
+    fn bmp() {
+        use utf8::Utf8Sequence::*;
+
+        let seqs = Utf8Sequences::new('\u{0}', '\u{FFFF}').collect::<Vec<_>>();
+        assert_eq!(
+            seqs,
+            vec![
+                One(rutf8(0x0, 0x7F)),
+                Two([rutf8(0xC2, 0xDF), rutf8(0x80, 0xBF)]),
+                Three([
+                    rutf8(0xE0, 0xE0),
+                    rutf8(0xA0, 0xBF),
+                    rutf8(0x80, 0xBF)
+                ]),
+                Three([
+                    rutf8(0xE1, 0xEC),
+                    rutf8(0x80, 0xBF),
+                    rutf8(0x80, 0xBF)
+                ]),
+                Three([
+                    rutf8(0xED, 0xED),
+                    rutf8(0x80, 0x9F),
+                    rutf8(0x80, 0xBF)
+                ]),
+                Three([
+                    rutf8(0xEE, 0xEF),
+                    rutf8(0x80, 0xBF),
+                    rutf8(0x80, 0xBF)
+                ]),
+            ]
+        );
+    }
+
+    #[test]
+    fn reverse() {
+        use utf8::Utf8Sequence::*;
+
+        let mut s = One(rutf8(0xA, 0xB));
+        s.reverse();
+        assert_eq!(s.as_slice(), &[rutf8(0xA, 0xB)]);
+
+        let mut s = Two([rutf8(0xA, 0xB), rutf8(0xB, 0xC)]);
+        s.reverse();
+        assert_eq!(s.as_slice(), &[rutf8(0xB, 0xC), rutf8(0xA, 0xB)]);
+
+        let mut s = Three([rutf8(0xA, 0xB), rutf8(0xB, 0xC), rutf8(0xC, 0xD)]);
+        s.reverse();
+        assert_eq!(
+            s.as_slice(),
+            &[rutf8(0xC, 0xD), rutf8(0xB, 0xC), rutf8(0xA, 0xB)]
+        );
+
+        let mut s = Four([
+            rutf8(0xA, 0xB),
+            rutf8(0xB, 0xC),
+            rutf8(0xC, 0xD),
+            rutf8(0xD, 0xE),
+        ]);
+        s.reverse();
+        assert_eq!(
+            s.as_slice(),
+            &[
+                rutf8(0xD, 0xE),
+                rutf8(0xC, 0xD),
+                rutf8(0xB, 0xC),
+                rutf8(0xA, 0xB)
+            ]
+        );
+    }
+
+    fn encode_surrogate(cp: u32) -> [u8; 3] {
+        const TAG_CONT: u8 = 0b1000_0000;
+        const TAG_THREE_B: u8 = 0b1110_0000;
+
+        assert!(0xD800 <= cp && cp < 0xE000);
+        let mut dst = [0; 3];
+        dst[0] = (cp >> 12 & 0x0F) as u8 | TAG_THREE_B;
+        dst[1] = (cp >> 6 & 0x3F) as u8 | TAG_CONT;
+        dst[2] = (cp & 0x3F) as u8 | TAG_CONT;
+        dst
+    }
+}

diff --git a/test b/test
new file mode 100755
index 0000000..9970a99
--- /dev/null
+++ b/test

@@ -0,0 +1,20 @@
+#!/bin/bash
+
+# This is a convenience script for running a broad swath of the syntax tests.
+echo "===== DEFAULT FEATURES ==="
+cargo test
+
+features=(
+    unicode
+    unicode-age
+    unicode-bool
+    unicode-case
+    unicode-gencat
+    unicode-perl
+    unicode-script
+    unicode-segment
+)
+for f in "${features[@]}"; do
+    echo "===== FEATURE: $f ==="
+    cargo test --no-default-features --features "$f"
+done
commit	048fc04e7fd0dd2014a94e73fb34f6e59021c170	[log] [tgz]
author	Chih-Hung Hsieh <chh@google.com>	Thu Apr 16 10:44:22 2020 -0700
committer	Chih-Hung Hsieh <chh@google.com>	Thu Apr 16 10:46:29 2020 -0700
tree	f9b501b392f13f2ca9d86d0a9d377b181f624dfb
parent	2dd07da5420c40e365646bf756938d8d1abf6d62 [diff]