diff --git a/CHANGELOG.md b/CHANGELOG.md
index 99b7437eac..226d34b1f2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,37 @@
+TBD
+===
+This release includes a ground-up rewrite of the regex-syntax crate, which has
+been in development for over a year.
+
+New features:
+
+* Error messages for invalid regexes have been greatly improved. You get these
+  automatically; you don't need to do anything. In addition to better
+  formatting, error messages will now explicitly call out the use of look
+  around. When regex 1.0 is released, this will happen for backreferences as
+  well.
+* Full support for intersection, difference and symmetric difference of
+  character classes. These can be used via the `&&`, `--` and `~~` binary
+  operators within classes.
+* A Unicode Level 1 conformat implementation of `\p{..}` character classes.
+  Things like `\p{scx:Hira}`, `\p{age:3.2}` or `\p{Changes_When_Casefolded}`
+  now work. All property name and value aliases are supported, and properties
+  are selected via loose matching. e.g., `\p{Greek}` is the same as
+  `\p{G r E e K}`.
+* A new `UNICODE.md` document has been added to this repository that
+  exhaustively documents support for UTS#18.
+* Empty sub-expressions are now permitted in most places. That is, `()+` is
+  now a valid regex.
+* Almost everything in regex-syntax now uses constant stack space, even when
+  performing anaylsis that requires structural induction. This reduces the risk
+  of a user provided regular expression causing a stack overflow.
+* [FEATURE #174](https://github.com/rust-lang/regex/issues/174):
+  The `Ast` type in `regex-syntax` now contains span information.
+* [FEATURE #424](https://github.com/rust-lang/regex/issues/424):
+  Support `\u`, `\u{...}`, `\U` and `\U{...}` syntax for specifying code points
+  in a regular expression.
+
+
 0.2.6 (2018-02-08)
 ==================
 Bug fixes:
diff --git a/Cargo.toml b/Cargo.toml
index 92ddcc9872..743bb0a48d 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -18,7 +18,9 @@ travis-ci = { repository = "rust-lang/regex" }
 appveyor = { repository = "rust-lang-libs/regex" }
 
 [workspace]
-members = ["bench", "regex-capi", "regex-debug", "regex-syntax"]
+members = [
+  "bench", "regex-capi", "regex-debug", "regex-syntax",
+]
 
 [dependencies]
 # For very fast prefix literal matching.
@@ -28,7 +30,7 @@ memchr = "2.0.0"
 # For managing regex caches quickly across multiple threads.
 thread_local = "0.3.2"
 # For parsing regular expressions.
-regex-syntax = { path = "regex-syntax", version = "0.4.1" }
+regex-syntax = { path = "regex-syntax", version = "0.5.0" }
 # For accelerating text search.
 simd = { version = "0.2.1", optional = true }
 # For compiling UTF-8 decoding into automata.
diff --git a/README.md b/README.md
index b4b6501886..91dd968c0b 100644
--- a/README.md
+++ b/README.md
@@ -15,8 +15,8 @@ by [RE2](https://github.com/google/re2).
 ### Documentation
 
 [Module documentation with examples](https://docs.rs/regex).
-The module documentation also include a comprehensive description of the syntax
-supported.
+The module documentation also includes a comprehensive description of the
+syntax supported.
 
 Documentation with examples for the various matching functions and iterators
 can be found on the
@@ -192,12 +192,13 @@ assert!(matches.matched(6));
 ### Usage: a regular expression parser
 
 This repository contains a crate that provides a well tested regular expression
-parser and abstract syntax. It provides no facilities for compilation or
-execution. This may be useful if you're implementing your own regex engine or
-otherwise need to do analysis on the syntax of a regular expression. It is
-otherwise not recommended for general use.
+parser, abstract syntax and a high-level intermediate representation for
+convenient analysis. It provides no facilities for compilation or execution.
+This may be useful if you're implementing your own regex engine or otherwise
+need to do analysis on the syntax of a regular expression. It is otherwise not
+recommended for general use.
 
-[Documentation for `regex-syntax` with examples](https://docs.rs/regex-syntax).
+[Documentation `regex-syntax`.](https://docs.rs/regex-syntax)
 
 # License
 
diff --git a/UNICODE.md b/UNICODE.md
new file mode 100644
index 0000000000..0a3b783ae4
--- /dev/null
+++ b/UNICODE.md
@@ -0,0 +1,250 @@
+# Unicode conformance
+
+This document describes the regex crate's conformance to Unicode's
+[UTS#18](http://unicode.org/reports/tr18/)
+report, which lays out 3 levels of support: Basic, Extended and Tailored.
+
+Full support for Level 1 ("Basic Unicode Support") is provided with two
+exceptions:
+
+1. Line boundaries are not Unicode aware. Namely, only the `\n`
+   (`END OF LINE`) character is recognized as a line boundary.
+2. The compatibility properties specified by
+   [RL1.2a](http://unicode.org/reports/tr18/#RL1.2a)
+   are ASCII-only definitions.
+
+Little to no support is provided for either Level 2 or Level 3. For the most
+part, this is because the features are either complex/hard to implement, or at
+the very least, very difficult to implement without sacrificing performance.
+For example, tackling canonical equivalence such that matching worked as one
+would expect regardless of normalization form would be a significant
+undertaking. This is at least partially a result of the fact that this regex
+engine is based on finite automata, which admits less flexibility normally
+associated with backtracking implementations.
+
+
+## RL1.1 Hex Notation
+
+[UTS#18 RL1.1](https://unicode.org/reports/tr18/#Hex_notation)
+
+Hex Notation refers to the ability to specify a Unicode code point in a regular
+expression via its hexadecimal code point representation. This is useful in
+environments that have poor Unicode font rendering or if you need to express a
+code point that is not normally displayable. All forms of hexadecimal notation
+are supported
+
+    \x7F        hex character code (exactly two digits)
+    \x{10FFFF}  any hex character code corresponding to a Unicode code point
+    \u007F      hex character code (exactly four digits)
+    \u{7F}      any hex character code corresponding to a Unicode code point
+    \U0000007F  hex character code (exactly eight digits)
+    \U{7F}      any hex character code corresponding to a Unicode code point
+
+Briefly, the `\x{...}`, `\u{...}` and `\U{...}` are all exactly equivalent ways
+of expressing hexadecimal code points. Any number of digits can be written
+within the brackets. In contrast, `\xNN`, `\uNNNN`, `\UNNNNNNNN` are all
+fixed-width variants of the same idea.
+
+Note that when Unicode mode is disabled, any non-ASCII Unicode codepoint is
+banned. Additionally, the `\xNN` syntax represents arbitrary bytes when Unicode
+mode is disabled. That is, the regex `\xFF` matches the Unicode codepoint
+U+00FF (encoded as `\xC3\xBF` in UTF-8) while the regex `(?-u)\xFF` matches
+the literal byte `\xFF`.
+
+
+## RL1.2 Properties
+
+[UTS#18 RL1.2](https://unicode.org/reports/tr18/#Categories)
+
+Full support for Unicode property syntax is provided. Unicode properties
+provide a convenient way to construct character classes of groups of code
+points specified by Unicode. The regex crate does not provide exhaustive
+support, but covers a useful subset. In particular:
+
+* [General categories](http://unicode.org/reports/tr18/#General_Category_Property)
+* [Scripts and Script Extensions](http://unicode.org/reports/tr18/#Script_Property)
+* [Age](http://unicode.org/reports/tr18/#Age)
+* A smattering of boolean properties, including all of those specified by
+  [RL1.2](http://unicode.org/reports/tr18/#RL1.2) explicitly.
+
+In all cases, property name and value abbreviations are supported, and all
+names/values are matched loosely without regard for case, whitespace or
+underscores. Property name aliases can be found in Unicode's
+[`PropertyAliases.txt`](http://www.unicode.org/Public/UCD/latest/ucd/PropertyAliases.txt)
+file, while property value aliases can be found in Unicode's
+[`PropertyValueAliases.txt`](http://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt)
+file.
+
+The syntax supported is also consistent with the UTS#18 recommendation:
+
+* `\p{Greek}` selects the `Greek` script. Equivalent expressions follow:
+  `\p{sc:Greek}`, `\p{Script:Greek}`, `\p{Sc=Greek}`, `\p{script=Greek}`,
+  `\P{sc!=Greek}`. Similarly for `General_Category` (or `gc` for short) and
+  `Script_Extensions` (or `scx` for short).
+* `\p{age:3.2}` selects all code points in Unicode 3.2.
+* `\p{Alphabetic}` selects the "alphabetic" property and can be abbreviated
+  via `\p{alpha}` (for example).
+* Single letter variants for properties with single letter abbreviations.
+  For example, `\p{Letter}` can be equivalently written as `\pL`.
+
+The following is a list of all properties supported by the regex crate (starred
+properties correspond to properties required by RL1.2):
+
+* `General_Category` \* (including `Any`, `ASCII` and `Assigned`)
+* `Script` \*
+* `Script_Extensions` \*
+* `Age`
+* `ASCII_Hex_Digit`
+* `Alphabetic` \*
+* `Bidi_Control`
+* `Case_Ignorable`
+* `Cased`
+* `Changes_When_Casefolded`
+* `Changes_When_Casemapped`
+* `Changes_When_Lowercased`
+* `Changes_When_Titlecased`
+* `Changes_When_Uppercased`
+* `Dash`
+* `Default_Ignorable_Code_Point` \*
+* `Deprecated`
+* `Diacritic`
+* `Extender`
+* `Grapheme_Base`
+* `Grapheme_Extend`
+* `Hex_Digit`
+* `IDS_Binary_Operator`
+* `IDS_Trinary_Operator`
+* `ID_Continue`
+* `ID_Start`
+* `Join_Control`
+* `Logical_Order_Exception`
+* `Lowercase` \*
+* `Math`
+* `Noncharacter_Code_Point` \*
+* `Pattern_Syntax`
+* `Pattern_White_Space`
+* `Prepended_Concatenation_Mark`
+* `Quotation_Mark`
+* `Radical`
+* `Regional_Indicator`
+* `Sentence_Terminal`
+* `Soft_Dotted`
+* `Terminal_Punctuation`
+* `Unified_Ideograph`
+* `Uppercase` \*
+* `Variation_Selector`
+* `White_Space` \*
+* `XID_Continue`
+* `XID_Start`
+
+
+## RL1.2a Compatibility Properties
+
+[UTS#18 RL1.2a](http://unicode.org/reports/tr18/#RL1.2a)
+
+The regex crate only provides ASCII definitions of the
+[compatibility properties documented in UTS#18 Annex C](http://unicode.org/reports/tr18/#Compatibility_Properties)
+(sans the `\X` class, for matching grapheme clusters, which isn't provided
+at all). This is because it seems to be consistent with most other regular
+expression engines, and in particular, because these are often referred to as
+"ASCII" or "POSIX" character classes.
+
+Note that the `\w`, `\s` and `\d` character classes **are** Unicode aware.
+Their traditional ASCII definition can be used by disabling Unicode. That is,
+`[[:word:]]` and `(?-u)\w` are equivalent.
+
+
+## RL1.3 Subtraction and Intersection
+
+[UTS#18 RL1.3](http://unicode.org/reports/tr18/#Subtraction_and_Intersection)
+
+The regex crate provides full support for nested character classes, along with
+union, intersection (`&&`), difference (`--`) and symmetric difference (`~~`)
+operations on arbitrary character classes.
+
+For example, to match all non-ASCII letters, you could use either
+`[\p{Letter}--\p{Ascii}]` (difference) or `[\p{Letter}&&[^\p{Ascii}]]`
+(intersecting the negation).
+
+
+## RL1.4 Simple Word Boundaries
+
+[UTS#18 RL1.4](http://unicode.org/reports/tr18/#Simple_Word_Boundaries)
+
+The regex crate provides basic Unicode aware word boundary assertions. A word
+boundary assertion can be written as `\b`, or `\B` as its negation. A word
+boundary negation corresponds to a zero-width match, where its adjacent
+characters correspond to word and non-word, or non-word and word characters.
+
+Conformance in this case chooses to define word character in the same way that
+the `\w` character class is defined: a code point that is a member of one of
+the following classes:
+
+* `\p{Alphabetic}`
+* `\p{Join_Control}`
+* `\p{gc:Mark}`
+* `\p{gc:Decimal_Number}`
+* `\p{gc:Connector_Punctuation}`
+
+In particular, this differs slightly from the
+[prescription given in RL1.4](http://unicode.org/reports/tr18/#Simple_Word_Boundaries)
+but is permissible according to
+[UTS#18 Annex C](http://unicode.org/reports/tr18/#Compatibility_Properties).
+Namely, it is convenient and simpler to have `\w` and `\b` be in sync with
+one another.
+
+Finally, Unicode word boundaries can be disabled, which will cause ASCII word
+boundaries to be used instead. That is, `\b` is a Unicode word boundary while
+`(?-u)\b` is an ASCII-only word boundary. This can occasionally be beneficial
+if performance is important, since the implementation of Unicode word
+boundaries is currently sub-optimal on non-ASCII text.
+
+
+## RL1.5 Simple Loose Matches
+
+[UTS#18 RL1.5](http://unicode.org/reports/tr18/#Simple_Loose_Matches)
+
+The regex crate provides full support for case insensitive matching in
+accordance with RL1.5. That is, it uses the "simple" case folding mapping. The
+"simple" mapping was chosen because of a key convenient property: every
+"simple" mapping is a mapping from exactly one code point to exactly one other
+code point. This makes case insensitive matching of character classes, for
+example, straight-forward to implement.
+
+When case insensitive mode is enabled (e.g., `(?i)[a]` is equivalent to `a|A`),
+then all characters classes are case folded as well.
+
+
+## RL1.6 Line Boundaries
+
+[UTS#18 RL1.6](http://unicode.org/reports/tr18/#Line_Boundaries)
+
+The regex crate only provides support for recognizing the `\n` (`END OF LINE`)
+character as a line boundary. This choice was made mostly for implementation
+convenience, and to avoid performance cliffs that Unicode word boundaries are
+subject to.
+
+Ideally, it would be nice to at least support `\r\n` as a line boundary as
+well, and in theory, this could be done efficiently.
+
+
+## RL1.7 Code Points
+
+[UTS#18 RL1.7](http://unicode.org/reports/tr18/#Supplementary_Characters)
+
+The regex crate provides full support for Unicode code point matching. Namely,
+the fundamental atom of any match is always a single code point.
+
+Given Rust's strong ties to UTF-8, the following guarantees are also provided:
+
+* All matches are reported on valid UTF-8 code unit boundaries. That is, any
+  match range returned by the public regex API is guaranteed to successfully
+  slice the string that was searched.
+* By consequence of the above, it is impossible to match surrogode code points.
+  No support for UTF-16 is provided, so this is never necessary.
+
+Note that when Unicode mode is disabled, the fundamental atom of matching is
+no longer a code point but a single byte. When Unicode mode is disabled, many
+Unicode features are disabled as well. For example, `(?-u)\pL` is not a valid
+regex but `\pL(?-u)\xFF` (matches any Unicode `Letter` followed by the literal
+byte `\xFF`) is, for example.
diff --git a/bench/Cargo.toml b/bench/Cargo.toml
index c6248fb0d6..7a3eb8f8ff 100644
--- a/bench/Cargo.toml
+++ b/bench/Cargo.toml
@@ -19,7 +19,7 @@ onig = { version = "3", optional = true }
 libpcre-sys = { version = "0.2", optional = true }
 memmap = "0.6"
 regex = { version = "0.2.0", path = "..", features = ["simd-accel"] }
-regex-syntax = { version = "0.4.0", path = "../regex-syntax" }
+regex-syntax = { version = "0.5.0", path = "../regex-syntax" }
 serde = "1"
 serde_derive = "1"
 
diff --git a/bench/src/rust_compile.rs b/bench/src/rust_compile.rs
index 9a89981fa0..943a091f17 100644
--- a/bench/src/rust_compile.rs
+++ b/bench/src/rust_compile.rs
@@ -8,7 +8,7 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 
-use regex_syntax::Expr;
+use regex_syntax::Parser;
 use test::Bencher;
 
 use regex::internal::Compiler;
@@ -16,7 +16,7 @@ use regex::internal::Compiler;
 #[bench]
 fn compile_simple(b: &mut Bencher) {
     b.iter(|| {
-        let re = Expr::parse(r"^bc(d|e)*$").unwrap();
+        let re = Parser::new().parse(r"^bc(d|e)*$").unwrap();
         Compiler::new().compile(&[re]).unwrap()
     });
 }
@@ -24,7 +24,7 @@ fn compile_simple(b: &mut Bencher) {
 #[bench]
 fn compile_simple_bytes(b: &mut Bencher) {
     b.iter(|| {
-        let re = Expr::parse(r"^bc(d|e)*$").unwrap();
+        let re = Parser::new().parse(r"^bc(d|e)*$").unwrap();
         Compiler::new().bytes(true).compile(&[re]).unwrap()
     });
 }
@@ -39,7 +39,7 @@ fn compile_simple_full(b: &mut Bencher) {
 #[bench]
 fn compile_small(b: &mut Bencher) {
     b.iter(|| {
-        let re = Expr::parse(r"\p{L}|\p{N}|\s|.|\d").unwrap();
+        let re = Parser::new().parse(r"\p{L}|\p{N}|\s|.|\d").unwrap();
         Compiler::new().compile(&[re]).unwrap()
     });
 }
@@ -47,7 +47,7 @@ fn compile_small(b: &mut Bencher) {
 #[bench]
 fn compile_small_bytes(b: &mut Bencher) {
     b.iter(|| {
-        let re = Expr::parse(r"\p{L}|\p{N}|\s|.|\d").unwrap();
+        let re = Parser::new().parse(r"\p{L}|\p{N}|\s|.|\d").unwrap();
         Compiler::new().bytes(true).compile(&[re]).unwrap()
     });
 }
@@ -62,7 +62,7 @@ fn compile_small_full(b: &mut Bencher) {
 #[bench]
 fn compile_huge(b: &mut Bencher) {
     b.iter(|| {
-        let re = Expr::parse(r"\p{L}{100}").unwrap();
+        let re = Parser::new().parse(r"\p{L}{100}").unwrap();
         Compiler::new().compile(&[re]).unwrap()
     });
 }
@@ -70,7 +70,7 @@ fn compile_huge(b: &mut Bencher) {
 #[bench]
 fn compile_huge_bytes(b: &mut Bencher) {
     b.iter(|| {
-        let re = Expr::parse(r"\p{L}{100}").unwrap();
+        let re = Parser::new().parse(r"\p{L}{100}").unwrap();
         Compiler::new().bytes(true).compile(&[re]).unwrap()
     });
 }
diff --git a/regex-capi/ctest/test.c b/regex-capi/ctest/test.c
index 2df4945837..1dc6565f60 100644
--- a/regex-capi/ctest/test.c
+++ b/regex-capi/ctest/test.c
@@ -303,7 +303,7 @@ bool test_compile_error() {
         rure_free(re);
     }
     const char *msg = rure_error_message(err);
-    if (NULL == strstr(msg, "Unclosed parenthesis")) {
+    if (NULL == strstr(msg, "unclosed group")) {
         if (DEBUG) {
             fprintf(stderr,
                     "[test_compile_error] "
diff --git a/regex-debug/Cargo.toml b/regex-debug/Cargo.toml
index 54b2bb511d..3c8f70f0e2 100644
--- a/regex-debug/Cargo.toml
+++ b/regex-debug/Cargo.toml
@@ -13,6 +13,7 @@ workspace = ".."
 [dependencies]
 docopt = "0.8"
 regex = { version = "0.2", path = ".." }
-regex-syntax = { version = "0.4.0", path = "../regex-syntax" }
+regex-syntax = { version = "0.5.0", path = "../regex-syntax" }
 serde = "1"
 serde_derive = "1"
+utf8-ranges = "1"
diff --git a/regex-debug/src/main.rs b/regex-debug/src/main.rs
index f31dc22a9c..64b55a5906 100644
--- a/regex-debug/src/main.rs
+++ b/regex-debug/src/main.rs
@@ -4,6 +4,7 @@ extern crate regex_syntax as syntax;
 extern crate serde;
 #[macro_use]
 extern crate serde_derive;
+extern crate utf8_ranges;
 
 use std::error;
 use std::io::{self, Write};
@@ -11,17 +12,20 @@ use std::process;
 use std::result;
 
 use docopt::Docopt;
+use syntax::hir::Hir;
+use syntax::hir::literal::Literals;
 use regex::internal::{Compiler, LiteralSearcher};
-use syntax::{ExprBuilder, Expr, Literals};
 
 const USAGE: &'static str = "
 Usage:
     regex-debug [options] ast <pattern>
+    regex-debug [options] hir <pattern>
     regex-debug [options] prefixes <patterns> ...
     regex-debug [options] suffixes <patterns> ...
     regex-debug [options] anchors <pattern>
     regex-debug [options] captures <pattern>
     regex-debug [options] compile <patterns> ...
+    regex-debug [options] utf8-ranges <class>
     regex-debug --help
 
 Options:
@@ -51,14 +55,17 @@ Options:
 #[derive(Deserialize)]
 struct Args {
     cmd_ast: bool,
+    cmd_hir: bool,
     cmd_prefixes: bool,
     cmd_suffixes: bool,
     cmd_anchors: bool,
     cmd_captures: bool,
     cmd_compile: bool,
+    cmd_utf8_ranges: bool,
 
     arg_pattern: String,
     arg_patterns: Vec<String>,
+    arg_class: String,
 
     flag_size_limit: usize,
     flag_bytes: bool,
@@ -93,6 +100,8 @@ fn main() {
 fn run(args: &Args) -> Result<()> {
     if args.cmd_ast {
         cmd_ast(args)
+    } else if args.cmd_hir {
+        cmd_hir(args)
     } else if args.cmd_prefixes {
         cmd_literals(args)
     } else if args.cmd_suffixes {
@@ -103,13 +112,30 @@ fn run(args: &Args) -> Result<()> {
         cmd_captures(args)
     } else if args.cmd_compile {
         cmd_compile(args)
+    } else if args.cmd_utf8_ranges {
+        cmd_utf8_ranges(args)
     } else {
         unreachable!()
     }
 }
 
 fn cmd_ast(args: &Args) -> Result<()> {
-    println!("{:#?}", try!(args.parse_one()));
+    use syntax::ast::parse::Parser;
+
+    let mut parser = Parser::new();
+    let ast = try!(parser.parse(&args.arg_pattern));
+    println!("{:#?}", ast);
+    Ok(())
+}
+
+fn cmd_hir(args: &Args) -> Result<()> {
+    use syntax::ParserBuilder;
+
+    let mut parser = ParserBuilder::new()
+        .allow_invalid_utf8(false)
+        .build();
+    let hir = try!(parser.parse(&args.arg_pattern));
+    println!("{:#?}", hir);
     Ok(())
 }
 
@@ -182,18 +208,49 @@ fn cmd_compile(args: &Args) -> Result<()> {
     Ok(())
 }
 
+fn cmd_utf8_ranges(args: &Args) -> Result<()> {
+    use syntax::ParserBuilder;
+    use syntax::hir::{self, HirKind};
+    use utf8_ranges::Utf8Sequences;
+
+    let hir = try!(ParserBuilder::new()
+        .build()
+        .parse(&format!("[{}]", args.arg_class)));
+    let cls = match hir.into_kind() {
+        HirKind::Class(hir::Class::Unicode(cls)) => cls,
+        _ => return Err(
+            format!("unexpected HIR, expected Unicode class").into(),
+        ),
+    };
+    for (i, range) in cls.iter().enumerate() {
+        if i > 0 {
+            println!("----------------------------");
+        }
+        for seq in Utf8Sequences::new(range.start(), range.end()) {
+            for (i, utf8_range) in seq.into_iter().enumerate() {
+                if i > 0 {
+                    print!("|");
+                }
+                print!("[{:02X}-{:02X}]", utf8_range.start, utf8_range.end);
+            }
+            println!();
+        }
+    }
+    Ok(())
+}
+
 impl Args {
-    fn parse_one(&self) -> Result<Expr> {
+    fn parse_one(&self) -> Result<Hir> {
         parse(&self.arg_pattern)
     }
 
-    fn parse_many(&self) -> Result<Vec<Expr>> {
+    fn parse_many(&self) -> Result<Vec<Hir>> {
         self.arg_patterns.iter().map(|s| parse(s)).collect()
     }
 
-    fn literals<F: Fn(&mut Literals, &Expr) -> bool>(
+    fn literals<F: Fn(&mut Literals, &Hir) -> bool>(
         &self,
-        exprs: &[Expr],
+        exprs: &[Hir],
         get_literals: F,
     ) -> Literals {
         let mut lits = Some(self.empty_literals());
@@ -221,8 +278,13 @@ impl Args {
     }
 }
 
-fn parse(re: &str) -> Result<Expr> {
-    ExprBuilder::new().allow_bytes(true).parse(re).map_err(From::from)
+fn parse(re: &str) -> Result<Hir> {
+    use syntax::ParserBuilder;
+    ParserBuilder::new()
+        .allow_invalid_utf8(true)
+        .build()
+        .parse(re)
+        .map_err(From::from)
 }
 
 fn escape_unicode(bytes: &[u8]) -> String {
diff --git a/regex-syntax/Cargo.toml b/regex-syntax/Cargo.toml
index 6418a73bf6..97473f7b0c 100644
--- a/regex-syntax/Cargo.toml
+++ b/regex-syntax/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "regex-syntax"
-version = "0.4.2"  #:version
+version = "0.5.0"  #:version
 authors = ["The Rust Project Developers"]
 license = "MIT/Apache-2.0"
 repository = "https://github.com/rust-lang/regex"
@@ -9,6 +9,5 @@ homepage = "https://github.com/rust-lang/regex"
 description = "A regular expression parser."
 workspace = ".."
 
-[dev-dependencies]
-quickcheck = { version = "0.6", default-features = false }
-rand = "0.4"
+[dependencies]
+ucd-util = "0.1.0"
diff --git a/regex-syntax/benches/bench.rs b/regex-syntax/benches/bench.rs
new file mode 100644
index 0000000000..60aea9e984
--- /dev/null
+++ b/regex-syntax/benches/bench.rs
@@ -0,0 +1,73 @@
+// Copyright 2018 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#![feature(test)]
+
+extern crate regex_syntax;
+extern crate test;
+
+use regex_syntax::Parser;
+use test::Bencher;
+
+#[bench]
+fn parse_simple1(b: &mut Bencher) {
+    b.iter(|| {
+        let re = r"^bc(d|e)*$";
+        Parser::new().parse(re).unwrap()
+    });
+}
+
+#[bench]
+fn parse_simple2(b: &mut Bencher) {
+    b.iter(|| {
+        let re = r"'[a-zA-Z_][a-zA-Z0-9_]*(')\b";
+        Parser::new().parse(re).unwrap()
+    });
+}
+
+#[bench]
+fn parse_small1(b: &mut Bencher) {
+    b.iter(|| {
+        let re = r"\p{L}|\p{N}|\s|.|\d";
+        Parser::new().parse(re).unwrap()
+    });
+}
+
+#[bench]
+fn parse_medium1(b: &mut Bencher) {
+    b.iter(|| {
+        let re = r"\pL\p{Greek}\p{Hiragana}\p{Alphabetic}\p{Hebrew}\p{Arabic}";
+        Parser::new().parse(re).unwrap()
+    });
+}
+
+#[bench]
+fn parse_medium2(b: &mut Bencher) {
+    b.iter(|| {
+        let re = r"\s\S\w\W\d\D";
+        Parser::new().parse(re).unwrap()
+    });
+}
+
+#[bench]
+fn parse_medium3(b: &mut Bencher) {
+    b.iter(|| {
+        let re = r"\p{age:3.2}\p{hira}\p{scx:hira}\p{alphabetic}\p{sc:Greek}\pL";
+        Parser::new().parse(re).unwrap()
+    });
+}
+
+#[bench]
+fn parse_huge(b: &mut Bencher) {
+    b.iter(|| {
+        let re = r"\p{L}{100}";
+        Parser::new().parse(re).unwrap()
+    });
+}
diff --git a/regex-syntax/src/ast/mod.rs b/regex-syntax/src/ast/mod.rs
new file mode 100644
index 0000000000..ad63a1b491
--- /dev/null
+++ b/regex-syntax/src/ast/mod.rs
@@ -0,0 +1,1507 @@
+// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+/*!
+Defines an abstract syntax for regular expressions.
+*/
+
+use std::cmp::Ordering;
+use std::error;
+use std::fmt;
+
+pub use ast::visitor::{Visitor, visit};
+
+pub mod parse;
+pub mod print;
+mod visitor;
+
+/// An error that occurred while parsing a regular expression into an abstract
+/// syntax tree.
+///
+/// Note that note all ASTs represents a valid regular expression. For example,
+/// an AST is constructed without error for `\p{Quux}`, but `Quux` is not a
+/// valid Unicode property name. That particular error is reported when
+/// translating an AST to the high-level intermediate representation (`HIR`).
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Error {
+    /// The kind of error.
+    kind: ErrorKind,
+    /// The original pattern that the parser generated the error from. Every
+    /// span in an error is a valid range into this string.
+    pattern: String,
+    /// The span of this error.
+    span: Span,
+}
+
+impl Error {
+    /// Return the type of this error.
+    pub fn kind(&self) -> &ErrorKind {
+        &self.kind
+    }
+
+    /// The original pattern string in which this error occurred.
+    ///
+    /// Every span reported by this error is reported in terms of this string.
+    pub fn pattern(&self) -> &str {
+        &self.pattern
+    }
+
+    /// Return the span at which this error occurred.
+    pub fn span(&self) -> &Span {
+        &self.span
+    }
+
+    /// Return an auxiliary span. This span exists only for some errors that
+    /// benefit from being able to point to two locations in the original
+    /// regular expression. For example, "duplicate" errors will have the
+    /// main error position set to the duplicate occurrence while its
+    /// auxiliary span will be set to the initial occurrence.
+    pub fn auxiliary_span(&self) -> Option<&Span> {
+        use self::ErrorKind::*;
+        match self.kind {
+            FlagDuplicate { ref original } => Some(original),
+            FlagRepeatedNegation { ref original, .. } => Some(original),
+            GroupNameDuplicate { ref original, .. } => Some(original),
+            _ => None,
+        }
+    }
+}
+
+/// The type of an error that occurred while building an AST.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum ErrorKind {
+    /// The capturing group limit was exceeded.
+    ///
+    /// Note that this represents a limit on the total number of capturing
+    /// groups in a regex and not necessarily the number of nested capturing
+    /// groups. That is, the nest limit can be low and it is still possible for
+    /// this error to occur.
+    CaptureLimitExceeded,
+    /// An invalid escape sequence was found in a character class set.
+    ClassEscapeInvalid,
+    /// An invalid character class range was found. An invalid range is any
+    /// range where the start is greater than the end.
+    ClassRangeInvalid,
+    /// An opening `[` was found with no corresponding closing `]`.
+    ClassUnclosed,
+    /// An empty decimal number was given where one was expected.
+    DecimalEmpty,
+    /// An invalid decimal number was given where one was expected.
+    DecimalInvalid,
+    /// A bracketed hex literal was empty.
+    EscapeHexEmpty,
+    /// A bracketed hex literal did not correspond to a Unicode scalar value.
+    EscapeHexInvalid,
+    /// An invalid hexadecimal digit was found.
+    EscapeHexInvalidDigit,
+    /// EOF was found before an escape sequence was completed.
+    EscapeUnexpectedEof,
+    /// An unrecognized escape sequence.
+    EscapeUnrecognized,
+    /// A dangling negation was used when setting flags, e.g., `i-`.
+    FlagDanglingNegation,
+    /// A flag was used twice, e.g., `i-i`.
+    FlagDuplicate {
+        /// The position of the original flag. The error position
+        /// points to the duplicate flag.
+        original: Span,
+    },
+    /// The negation operator was used twice, e.g., `-i-s`.
+    FlagRepeatedNegation {
+        /// The position of the original negation operator. The error position
+        /// points to the duplicate negation operator.
+        original: Span,
+    },
+    /// Expected a flag but got EOF, e.g., `(?`.
+    FlagUnexpectedEof,
+    /// Unrecognized flag, e.g., `a`.
+    FlagUnrecognized,
+    /// A duplicate capture name was found.
+    GroupNameDuplicate {
+        /// The position of the initial occurrence of the capture name. The
+        /// error position itself points to the duplicate occurrence.
+        original: Span,
+    },
+    /// A capture group name is empty, e.g., `(?P<>abc)`.
+    GroupNameEmpty,
+    /// An invalid character was seen for a capture group name. This includes
+    /// errors where the first character is a digit (even though subsequent
+    /// characters are allowed to be digits).
+    GroupNameInvalid,
+    /// A closing `>` could not be found for a capture group name.
+    GroupNameUnexpectedEof,
+    /// An unclosed group, e.g., `(ab`.
+    ///
+    /// The span of this error corresponds to the unclosed parenthesis.
+    GroupUnclosed,
+    /// An unopened group, e.g., `ab)`.
+    GroupUnopened,
+    /// The nest limit was exceeded. The limit stored here is the limit
+    /// configured in the parser.
+    NestLimitExceeded(u32),
+    /// The range provided in a counted repetition operator is invalid. The
+    /// range is invalid if the start is greater than the end.
+    RepetitionCountInvalid,
+    /// An opening `{` was found with no corresponding closing `}`.
+    RepetitionCountUnclosed,
+    /// A repetition operator was applied to a missing sub-expression. This
+    /// occurs, for example, in the regex consisting of just a `*`. It is,
+    /// however, possible to create a repetition operating on an empty
+    /// sub-expression. For example, `()*` is still considered valid.
+    RepetitionMissing,
+    /// When octal support is disabled, this error is produced when an octal
+    /// escape is used. The octal escape is assumed to be an invocation of
+    /// a backreference, which is the common case.
+    UnsupportedBackreference,
+    /// When syntax similar to PCRE's look-around is used, this error is
+    /// returned. Some example syntaxes that are rejected include, but are
+    /// not necessarily limited to, `(?=re)`, `(?!re)`, `(?<=re)` and
+    /// `(?<!re)`. Note that all of these syntaxes are otherwise invalid; this
+    /// error is used to improve the user experience.
+    UnsupportedLookAround,
+    /// Hints that destructuring should not be exhaustive.
+    ///
+    /// This enum may grow additional variants, so this makes sure clients
+    /// don't count on exhaustive matching. (Otherwise, adding a new variant
+    /// could break existing code.)
+    #[doc(hidden)]
+    __Nonexhaustive,
+}
+
+impl error::Error for Error {
+    fn description(&self) -> &str {
+        use self::ErrorKind::*;
+        match self.kind {
+            CaptureLimitExceeded => "capture group limit exceeded",
+            ClassEscapeInvalid => "invalid escape sequence in character class",
+            ClassRangeInvalid => "invalid character class range",
+            ClassUnclosed => "unclosed character class",
+            DecimalEmpty => "empty decimal literal",
+            DecimalInvalid => "invalid decimal literal",
+            EscapeHexEmpty => "empty hexadecimal literal",
+            EscapeHexInvalid => "invalid hexadecimal literal",
+            EscapeHexInvalidDigit => "invalid hexadecimal digit",
+            EscapeUnexpectedEof => "unexpected eof (escape sequence)",
+            EscapeUnrecognized => "unrecognized escape sequence",
+            FlagDanglingNegation => "dangling flag negation operator",
+            FlagDuplicate{..} => "duplicate flag",
+            FlagRepeatedNegation{..} => "repeated negation",
+            FlagUnexpectedEof => "unexpected eof (flag)",
+            FlagUnrecognized => "unrecognized flag",
+            GroupNameDuplicate{..} => "duplicate capture group name",
+            GroupNameEmpty => "empty capture group name",
+            GroupNameInvalid => "invalid capture group name",
+            GroupNameUnexpectedEof => "unclosed capture group name",
+            GroupUnclosed => "unclosed group",
+            GroupUnopened => "unopened group",
+            NestLimitExceeded(_) => "nest limit exceeded",
+            RepetitionCountInvalid => "invalid repetition count range",
+            RepetitionCountUnclosed => "unclosed counted repetition",
+            RepetitionMissing => "repetition operator missing expression",
+            UnsupportedBackreference => "backreferences are not supported",
+            UnsupportedLookAround => "look-around is not supported",
+            _ => unreachable!(),
+        }
+    }
+}
+
+impl fmt::Display for Error {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        ::error::Formatter::from(self).fmt(f)
+    }
+}
+
+impl fmt::Display for ErrorKind {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        use self::ErrorKind::*;
+        match *self {
+            CaptureLimitExceeded => {
+                write!(f, "exceeded the maximum number of \
+                           capturing groups ({})", ::std::u32::MAX)
+            }
+            ClassEscapeInvalid => {
+                write!(f, "invalid escape sequence found in character class")
+            }
+            ClassRangeInvalid => {
+                write!(f, "invalid character class range, \
+                           the start must be <= the end")
+            }
+            ClassUnclosed => {
+                write!(f, "unclosed character class")
+            }
+            DecimalEmpty => {
+                write!(f, "decimal literal empty")
+            }
+            DecimalInvalid => {
+                write!(f, "decimal literal invalid")
+            }
+            EscapeHexEmpty => {
+                write!(f, "hexadecimal literal empty")
+            }
+            EscapeHexInvalid => {
+                write!(f, "hexadecimal literal is not a Unicode scalar value")
+            }
+            EscapeHexInvalidDigit => {
+                write!(f, "invalid hexadecimal digit")
+            }
+            EscapeUnexpectedEof => {
+                write!(f, "incomplete escape sequence, \
+                           reached end of pattern prematurely")
+            }
+            EscapeUnrecognized => {
+                write!(f, "unrecognized escape sequence")
+            }
+            FlagDanglingNegation => {
+                write!(f, "dangling flag negation operator")
+            }
+            FlagDuplicate{..} => {
+                write!(f, "duplicate flag")
+            }
+            FlagRepeatedNegation{..} => {
+                write!(f, "flag negation operator repeated")
+            }
+            FlagUnexpectedEof => {
+                write!(f, "expected flag but got end of regex")
+            }
+            FlagUnrecognized => {
+                write!(f, "unrecognized flag")
+            }
+            GroupNameDuplicate{..} => {
+                write!(f, "duplicate capture group name")
+            }
+            GroupNameEmpty => {
+                write!(f, "empty capture group name")
+            }
+            GroupNameInvalid => {
+                write!(f, "invalid capture group character")
+            }
+            GroupNameUnexpectedEof => {
+                write!(f, "unclosed capture group name")
+            }
+            GroupUnclosed => {
+                write!(f, "unclosed group")
+            }
+            GroupUnopened => {
+                write!(f, "unopened group")
+            }
+            NestLimitExceeded(limit) => {
+                write!(f, "exceed the maximum number of \
+                           nested parentheses/brackets ({})", limit)
+            }
+            RepetitionCountInvalid => {
+                write!(f, "invalid repetition count range, \
+                           the start must be <= the end")
+            }
+            RepetitionCountUnclosed => {
+                write!(f, "unclosed counted repetition")
+            }
+            RepetitionMissing => {
+                write!(f, "repetition operator missing expression")
+            }
+            UnsupportedBackreference => {
+                write!(f, "backreferences are not supported")
+            }
+            UnsupportedLookAround => {
+                write!(f, "look-around, including look-ahead and look-behind, \
+                           is not supported")
+            }
+            _ => unreachable!(),
+        }
+    }
+}
+
+/// Span represents the position information of a single AST item.
+///
+/// All span positions are absolute byte offsets that can be used on the
+/// original regular expression that was parsed.
+#[derive(Clone, Copy, Eq, PartialEq)]
+pub struct Span {
+    /// The start byte offset.
+    pub start: Position,
+    /// The end byte offset.
+    pub end: Position,
+}
+
+impl fmt::Debug for Span {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "Span({:?}, {:?})", self.start, self.end)
+    }
+}
+
+impl Ord for Span {
+    fn cmp(&self, other: &Span) -> Ordering {
+        (&self.start, &self.end).cmp(&(&other.start, &other.end))
+    }
+}
+
+impl PartialOrd for Span {
+    fn partial_cmp(&self, other: &Span) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+/// A single position in a regular expression.
+///
+/// A position encodes one half of a span, and include the byte offset, line
+/// number and column number.
+#[derive(Clone, Copy, Eq, PartialEq)]
+pub struct Position {
+    /// The absolute offset of this position, starting at `0` from the
+    /// beginning of the regular expression pattern string.
+    pub offset: usize,
+    /// The line number, starting at `1`.
+    pub line: usize,
+    /// The approximate column number, starting at `1`.
+    pub column: usize,
+}
+
+impl fmt::Debug for Position {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(
+            f,
+            "Position(o: {:?}, l: {:?}, c: {:?})",
+            self.offset, self.line, self.column)
+    }
+}
+
+impl Ord for Position {
+    fn cmp(&self, other: &Position) -> Ordering {
+        self.offset.cmp(&other.offset)
+    }
+}
+
+impl PartialOrd for Position {
+    fn partial_cmp(&self, other: &Position) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl Span {
+    /// Create a new span with the given positions.
+    pub fn new(start: Position, end: Position) -> Span {
+        Span { start: start, end: end }
+    }
+
+    /// Create a new span using the given position as the start and end.
+    pub fn splat(pos: Position) -> Span {
+        Span::new(pos, pos)
+    }
+
+    /// Create a new span by replacing the starting the position with the one
+    /// given.
+    pub fn with_start(self, pos: Position) -> Span {
+        Span { start: pos, ..self }
+    }
+
+    /// Create a new span by replacing the ending the position with the one
+    /// given.
+    pub fn with_end(self, pos: Position) -> Span {
+        Span { end: pos, ..self }
+    }
+
+    /// Returns true if and only if this span occurs on a single line.
+    pub fn is_one_line(&self) -> bool {
+        self.start.line == self.end.line
+    }
+
+    /// Returns true if and only if this span is empty. That is, it points to
+    /// a single position in the concrete syntax of a regular expression.
+    pub fn is_empty(&self) -> bool {
+        self.start.offset == self.end.offset
+    }
+}
+
+impl Position {
+    /// Create a new position with the given information.
+    ///
+    /// `offset` is the absolute offset of the position, starting at `0` from
+    /// the beginning of the regular expression pattern string.
+    ///
+    /// `line` is the line number, starting at `1`.
+    ///
+    /// `column` is the approximate column number, starting at `1`.
+    pub fn new(offset: usize, line: usize, column: usize) -> Position {
+        Position { offset: offset, line: line, column: column }
+    }
+}
+
+/// An abstract syntax tree for a singular expression along with comments
+/// found.
+///
+/// Comments are not stored in the tree itself to avoid complexity. Each
+/// comment contains a span of precisely where it occurred in the original
+/// regular expression.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct WithComments {
+    /// The actual ast.
+    pub ast: Ast,
+    /// All comments found in the original regular expression.
+    pub comments: Vec<Comment>,
+}
+
+/// A comment from a regular expression with an associated span.
+///
+/// A regular expression can only contain comments when the `x` flag is
+/// enabled.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Comment {
+    /// The span of this comment, including the beginning `#` and ending `\n`.
+    pub span: Span,
+    /// The comment text, starting with the first character following the `#`
+    /// and ending with the last character preceding the `\n`.
+    pub comment: String,
+}
+
+/// An abstract syntax tree for a single regular expression.
+///
+/// An `Ast`'s `fmt::Display` implementation uses constant stack space and heap
+/// space proportional to the size of the `Ast`.
+///
+/// This type defines its own destructor that uses constant stack space and
+/// heap space proportional to the size of the `Ast`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum Ast {
+    /// An empty regex that matches everything.
+    Empty(Span),
+    /// A set of flags, e.g., `(?is)`.
+    Flags(SetFlags),
+    /// A single character literal, which includes escape sequences.
+    Literal(Literal),
+    /// The "any character" class.
+    Dot(Span),
+    /// A single zero-width assertion.
+    Assertion(Assertion),
+    /// A single character class. This includes all forms of character classes
+    /// except for `.`. e.g., `\d`, `\pN`, `[a-z]` and `[[:alpha:]]`.
+    Class(Class),
+    /// A repetition operator applied to an arbitrary regular expression.
+    Repetition(Repetition),
+    /// A grouped regular expression.
+    Group(Group),
+    /// An alternation of regular expressions.
+    Alternation(Alternation),
+    /// A concatenation of regular expressions.
+    Concat(Concat),
+}
+
+impl Ast {
+    /// Return the span of this abstract syntax tree.
+    pub fn span(&self) -> &Span {
+        match *self {
+            Ast::Empty(ref span) => span,
+            Ast::Flags(ref x) => &x.span,
+            Ast::Literal(ref x) => &x.span,
+            Ast::Dot(ref span) => span,
+            Ast::Assertion(ref x) => &x.span,
+            Ast::Class(ref x) => x.span(),
+            Ast::Repetition(ref x) => &x.span,
+            Ast::Group(ref x) => &x.span,
+            Ast::Alternation(ref x) => &x.span,
+            Ast::Concat(ref x) => &x.span,
+        }
+    }
+
+    /// Return true if and only if this Ast is empty.
+    pub fn is_empty(&self) -> bool {
+        match *self {
+            Ast::Empty(_) => true,
+            _ => false,
+        }
+    }
+
+    /// Returns true if and only if this AST has any (including possibly empty)
+    /// subexpressions.
+    fn has_subexprs(&self) -> bool {
+        match *self {
+            Ast::Empty(_)
+            | Ast::Flags(_)
+            | Ast::Literal(_)
+            | Ast::Dot(_)
+            | Ast::Assertion(_) => false,
+            Ast::Class(_)
+            | Ast::Repetition(_)
+            | Ast::Group(_)
+            | Ast::Alternation(_)
+            | Ast::Concat(_) => true,
+        }
+    }
+}
+
+/// Print a display representation of this Ast.
+///
+/// This does not preserve any of the original whitespace formatting that may
+/// have originally been present in the concrete syntax from which this Ast
+/// was generated.
+///
+/// This implementation uses constant stack space and heap space proportional
+/// to the size of the `Ast`.
+impl fmt::Display for Ast {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        use ast::print::Printer;
+        Printer::new().print(self, f)
+    }
+}
+
+/// An alternation of regular expressions.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Alternation {
+    /// The span of this alternation.
+    pub span: Span,
+    /// The alternate regular expressions.
+    pub asts: Vec<Ast>,
+}
+
+impl Alternation {
+    /// Return this alternation as an AST.
+    ///
+    /// If this alternation contains zero ASTs, then Ast::Empty is
+    /// returned. If this alternation contains exactly 1 AST, then the
+    /// corresponding AST is returned. Otherwise, Ast::Alternation is returned.
+    pub fn into_ast(mut self) -> Ast {
+        match self.asts.len() {
+            0 => Ast::Empty(self.span),
+            1 => self.asts.pop().unwrap(),
+            _ => Ast::Alternation(self),
+        }
+    }
+}
+
+/// A concatenation of regular expressions.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Concat {
+    /// The span of this concatenation.
+    pub span: Span,
+    /// The concatenation regular expressions.
+    pub asts: Vec<Ast>,
+}
+
+impl Concat {
+    /// Return this concatenation as an AST.
+    ///
+    /// If this concatenation contains zero ASTs, then Ast::Empty is
+    /// returned. If this concatenation contains exactly 1 AST, then the
+    /// corresponding AST is returned. Otherwise, Ast::Concat is returned.
+    pub fn into_ast(mut self) -> Ast {
+        match self.asts.len() {
+            0 => Ast::Empty(self.span),
+            1 => self.asts.pop().unwrap(),
+            _ => Ast::Concat(self),
+        }
+    }
+}
+
+/// A single literal expression.
+///
+/// A literal corresponds to a single Unicode scalar value. Literals may be
+/// represented in their literal form, e.g., `a` or in their escaped form,
+/// e.g., `\x61`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Literal {
+    /// The span of this literal.
+    pub span: Span,
+    /// The kind of this literal.
+    pub kind: LiteralKind,
+    /// The Unicode scalar value corresponding to this literal.
+    pub c: char,
+}
+
+impl Literal {
+    /// If this literal was written as a `\x` hex escape, then this returns
+    /// the corresponding byte value. Otherwise, this returns `None`.
+    pub fn byte(&self) -> Option<u8> {
+        let short_hex = LiteralKind::HexFixed(HexLiteralKind::X);
+        if self.c as u32 <= 255 && self.kind == short_hex {
+            Some(self.c as u8)
+        } else {
+            None
+        }
+    }
+}
+
+/// The kind of a single literal expression.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum LiteralKind {
+    /// The literal is written verbatim, e.g., `a` or `☃`.
+    Verbatim,
+    /// The literal is written as an escape because it is punctuation, e.g.,
+    /// `\*` or `\[`.
+    Punctuation,
+    /// The literal is written as an octal escape, e.g., `\141`.
+    Octal,
+    /// The literal is written as a hex code with a fixed number of digits
+    /// depending on the type of the escape, e.g., `\x61` or or `\u0061` or
+    /// `\U00000061`.
+    HexFixed(HexLiteralKind),
+    /// The literal is written as a hex code with a bracketed number of
+    /// digits. The only restriction is that the bracketed hex code must refer
+    /// to a valid Unicode scalar value.
+    HexBrace(HexLiteralKind),
+    /// The literal is written as a specially recognized escape, e.g., `\f`
+    /// or `\n`.
+    Special(SpecialLiteralKind),
+}
+
+/// The type of a special literal.
+///
+/// A special literal is a special escape sequence recognized by the regex
+/// parser, e.g., `\f` or `\n`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum SpecialLiteralKind {
+    /// Bell, spelled `\a` (`\x07`).
+    Bell,
+    /// Form feed, spelled `\f` (`\x0C`).
+    FormFeed,
+    /// Tab, spelled `\t` (`\x09`).
+    Tab,
+    /// Line feed, spelled `\n` (`\x0A`).
+    LineFeed,
+    /// Carriage return, spelled `\r` (`\x0D`).
+    CarriageReturn,
+    /// Vertical tab, spelled `\v` (`\x0B`).
+    VerticalTab,
+    /// Space, spelled `\ ` (`\x20`). Note that this can only appear when
+    /// parsing in verbose mode.
+    Space,
+}
+
+/// The type of a Unicode hex literal.
+///
+/// Note that all variants behave the same when used with brackets. They only
+/// differ when used without brackets in the number of hex digits that must
+/// follow.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum HexLiteralKind {
+    /// A `\x` prefix. When used without brackets, this form is limited to
+    /// two digits.
+    X,
+    /// A `\u` prefix. When used without brackets, this form is limited to
+    /// four digits.
+    UnicodeShort,
+    /// A `\U` prefix. When used without brackets, this form is limited to
+    /// eight digits.
+    UnicodeLong,
+}
+
+impl HexLiteralKind {
+    /// The number of digits that must be used with this literal form when
+    /// used without brackets. When used with brackets, there is no
+    /// restriction on the number of digits.
+    pub fn digits(&self) -> u32 {
+        match *self {
+            HexLiteralKind::X => 2,
+            HexLiteralKind::UnicodeShort => 4,
+            HexLiteralKind::UnicodeLong => 8,
+        }
+    }
+}
+
+/// A single character class expression.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum Class {
+    /// A Unicode character class, e.g., `\pL` or `\p{Greek}`.
+    Unicode(ClassUnicode),
+    /// A perl character class, e.g., `\d` or `\W`.
+    Perl(ClassPerl),
+    /// A bracketed character class set, which may contain zero or more
+    /// character ranges and/or zero or more nested classes. e.g.,
+    /// `[a-zA-Z\pL]`.
+    Bracketed(ClassBracketed),
+}
+
+impl Class {
+    /// Return the span of this character class.
+    pub fn span(&self) -> &Span {
+        match *self {
+            Class::Perl(ref x) => &x.span,
+            Class::Unicode(ref x) => &x.span,
+            Class::Bracketed(ref x) => &x.span,
+        }
+    }
+}
+
+/// A Perl character class.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct ClassPerl {
+    /// The span of this class.
+    pub span: Span,
+    /// The kind of Perl class.
+    pub kind: ClassPerlKind,
+    /// Whether the class is negated or not. e.g., `\d` is not negated but
+    /// `\D` is.
+    pub negated: bool,
+}
+
+/// The available Perl character classes.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum ClassPerlKind {
+    /// Decimal numbers.
+    Digit,
+    /// Whitespace.
+    Space,
+    /// Word characters.
+    Word,
+}
+
+/// An ASCII character class.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct ClassAscii {
+    /// The span of this class.
+    pub span: Span,
+    /// The kind of ASCII class.
+    pub kind: ClassAsciiKind,
+    /// Whether the class is negated or not. e.g., `[[:alpha:]]` is not negated
+    /// but `[[:^alpha:]]` is.
+    pub negated: bool,
+}
+
+/// The available ASCII character classes.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum ClassAsciiKind {
+    /// `[0-9A-Za-z]`
+    Alnum,
+    /// `[A-Za-z]`
+    Alpha,
+    /// `[\x00-\x7F]`
+    Ascii,
+    /// `[ \t]`
+    Blank,
+    /// `[\x00-\x1F\x7F]`
+    Cntrl,
+    /// `[0-9]`
+    Digit,
+    /// `[!-~]`
+    Graph,
+    /// `[a-z]`
+    Lower,
+    /// `[ -~]`
+    Print,
+    /// `[!-/:-@\[-`{-~]`
+    Punct,
+    /// `[\t\n\v\f\r ]`
+    Space,
+    /// `[A-Z]`
+    Upper,
+    /// `[0-9A-Za-z_]`
+    Word,
+    /// `[0-9A-Fa-f]`
+    Xdigit,
+}
+
+impl ClassAsciiKind {
+    /// Return the corresponding ClassAsciiKind variant for the given name.
+    ///
+    /// The name given should correspond to the lowercase version of the
+    /// variant name. e.g., `cntrl` is the name for `ClassAsciiKind::Cntrl`.
+    ///
+    /// If no variant with the corresponding name exists, then `None` is
+    /// returned.
+    pub fn from_name(name: &str) -> Option<ClassAsciiKind> {
+        use self::ClassAsciiKind::*;
+        match name {
+            "alnum" => Some(Alnum),
+            "alpha" => Some(Alpha),
+            "ascii" => Some(Ascii),
+            "blank" => Some(Blank),
+            "cntrl" => Some(Cntrl),
+            "digit" => Some(Digit),
+            "graph" => Some(Graph),
+            "lower" => Some(Lower),
+            "print" => Some(Print),
+            "punct" => Some(Punct),
+            "space" => Some(Space),
+            "upper" => Some(Upper),
+            "word" => Some(Word),
+            "xdigit" => Some(Xdigit),
+            _ => None,
+        }
+    }
+}
+
+/// A Unicode character class.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct ClassUnicode {
+    /// The span of this class.
+    pub span: Span,
+    /// Whether this class is negated or not.
+    ///
+    /// Note: be careful when using this attribute. This specifically refers
+    /// to whether the class is written as `\p` or `\P`, where the latter
+    /// is `negated = true`. However, it also possible to write something like
+    /// `\P{scx!=Katakana}` which is actually equivalent to
+    /// `\p{scx=Katakana}` and is therefore not actually negated even though
+    /// `negated = true` here. To test whether this class is truly negated
+    /// or not, use the `is_negated` method.
+    pub negated: bool,
+    /// The kind of Unicode class.
+    pub kind: ClassUnicodeKind,
+}
+
+impl ClassUnicode {
+    /// Returns true if this class has been negated.
+    ///
+    /// Note that this takes the Unicode op into account, if it's present.
+    /// e.g., `is_negated` for `\P{scx!=Katakana}` will return `false`.
+    pub fn is_negated(&self) -> bool {
+        match self.kind {
+            ClassUnicodeKind::NamedValue {
+                op: ClassUnicodeOpKind::NotEqual, ..
+            } => !self.negated,
+            _ => self.negated,
+        }
+    }
+}
+
+/// The available forms of Unicode character classes.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum ClassUnicodeKind {
+    /// A one letter abbreviated class, e.g., `\pN`.
+    OneLetter(char),
+    /// A binary property, general category or script. The string may be
+    /// empty.
+    Named(String),
+    /// A property name and an associated value.
+    NamedValue {
+        /// The type of Unicode op used to associate `name` with `value`.
+        op: ClassUnicodeOpKind,
+        /// The property name (which may be empty).
+        name: String,
+        /// The property value (which may be empty).
+        value: String,
+    },
+}
+
+/// The type of op used in a Unicode character class.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum ClassUnicodeOpKind {
+    /// A property set to a specific value, e.g., `\p{scx=Katakana}`.
+    Equal,
+    /// A property set to a specific value using a colon, e.g.,
+    /// `\p{scx:Katakana}`.
+    Colon,
+    /// A property that isn't a particular value, e.g., `\p{scx!=Katakana}`.
+    NotEqual,
+}
+
+impl ClassUnicodeOpKind {
+    /// Whether the op is an equality op or not.
+    pub fn is_equal(&self) -> bool {
+        match *self {
+            ClassUnicodeOpKind::Equal|ClassUnicodeOpKind::Colon => true,
+            _ => false,
+        }
+    }
+}
+
+/// A bracketed character class, e.g., `[a-z0-9]`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct ClassBracketed {
+    /// The span of this class.
+    pub span: Span,
+    /// Whether this class is negated or not. e.g., `[a]` is not negated but
+    /// `[^a]` is.
+    pub negated: bool,
+    /// The type of this set. A set is either a normal union of things, e.g.,
+    /// `[abc]` or a result of applying set operations, e.g., `[\pL--c]`.
+    pub kind: ClassSet,
+}
+
+/// A character class set.
+///
+/// This type corresponds to the internal structure of a bracketed character
+/// class. That is, every bracketed character is one of two types: a union of
+/// items (literals, ranges, other bracketed classes) or a tree of binary set
+/// operations.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum ClassSet {
+    /// An item, which can be a single literal, range, nested character class
+    /// or a union of items.
+    Item(ClassSetItem),
+    /// A single binary operation (i.e., &&, -- or ~~).
+    BinaryOp(ClassSetBinaryOp),
+}
+
+impl ClassSet {
+    /// Build a set from a union.
+    pub fn union(ast: ClassSetUnion) -> ClassSet {
+        ClassSet::Item(ClassSetItem::Union(ast))
+    }
+
+    /// Return the span of this character class set.
+    pub fn span(&self) -> &Span {
+        match *self {
+            ClassSet::Item(ref x) => x.span(),
+            ClassSet::BinaryOp(ref x) => &x.span,
+        }
+    }
+
+    /// Return true if and only if this class set is empty.
+    fn is_empty(&self) -> bool {
+        match *self {
+            ClassSet::Item(ClassSetItem::Empty(_)) => true,
+            _ => false,
+        }
+    }
+}
+
+/// A single component of a character class set.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum ClassSetItem {
+    /// An empty item.
+    ///
+    /// Note that a bracketed character class cannot contain a single empty
+    /// item. Empty items can appear when using one of the binary operators.
+    /// For example, `[&&]` is the intersection of two empty classes.
+    Empty(Span),
+    /// A single literal.
+    Literal(Literal),
+    /// A range between two literals.
+    Range(ClassSetRange),
+    /// An ASCII character class, e.g., `[:alnum:]` or `[:punct:]`.
+    Ascii(ClassAscii),
+    /// A Unicode character class, e.g., `\pL` or `\p{Greek}`.
+    Unicode(ClassUnicode),
+    /// A perl character class, e.g., `\d` or `\W`.
+    Perl(ClassPerl),
+    /// A bracketed character class set, which may contain zero or more
+    /// character ranges and/or zero or more nested classes. e.g.,
+    /// `[a-zA-Z\pL]`.
+    Bracketed(Box<ClassBracketed>),
+    /// A union of items.
+    Union(ClassSetUnion),
+}
+
+impl ClassSetItem {
+    /// Return the span of this character class set item.
+    pub fn span(&self) -> &Span {
+        match *self {
+            ClassSetItem::Empty(ref span) => span,
+            ClassSetItem::Literal(ref x) => &x.span,
+            ClassSetItem::Range(ref x) => &x.span,
+            ClassSetItem::Ascii(ref x) => &x.span,
+            ClassSetItem::Perl(ref x) => &x.span,
+            ClassSetItem::Unicode(ref x) => &x.span,
+            ClassSetItem::Bracketed(ref x) => &x.span,
+            ClassSetItem::Union(ref x) => &x.span,
+        }
+    }
+}
+
+/// A single character class range in a set.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct ClassSetRange {
+    /// The span of this range.
+    pub span: Span,
+    /// The start of this range.
+    pub start: Literal,
+    /// The end of this range.
+    pub end: Literal,
+}
+
+impl ClassSetRange {
+    /// Returns true if and only if this character class range is valid.
+    ///
+    /// The only case where a range is invalid is if its start is greater than
+    /// its end.
+    pub fn is_valid(&self) -> bool {
+        self.start.c <= self.end.c
+    }
+}
+
+/// A union of items inside a character class set.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct ClassSetUnion {
+    /// The span of the items in this operation. e.g., the `a-z0-9` in
+    /// `[^a-z0-9]`
+    pub span: Span,
+    /// The sequence of items that make up this union.
+    pub items: Vec<ClassSetItem>,
+}
+
+impl ClassSetUnion {
+    /// Push a new item in this union.
+    ///
+    /// The ending position of this union's span is updated to the ending
+    /// position of the span of the item given. If the union is empty, then
+    /// the starting position of this union is set to the starting position
+    /// of this item.
+    ///
+    /// In other words, if you only use this method to add items to a union
+    /// and you set the spans on each item correctly, then you should never
+    /// need to adjust the span of the union directly.
+    pub fn push(&mut self, item: ClassSetItem) {
+        if self.items.is_empty() {
+            self.span.start = item.span().start;
+        }
+        self.span.end = item.span().end;
+        self.items.push(item);
+    }
+
+    /// Return this union as a character class set item.
+    ///
+    /// If this union contains zero items, then an empty union is
+    /// returned. If this concatenation contains exactly 1 item, then the
+    /// corresponding item is returned. Otherwise, ClassSetItem::Union is
+    /// returned.
+    pub fn into_item(mut self) -> ClassSetItem {
+        match self.items.len() {
+            0 => ClassSetItem::Empty(self.span),
+            1 => self.items.pop().unwrap(),
+            _ => ClassSetItem::Union(self),
+        }
+    }
+}
+
+/// A Unicode character class set operation.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct ClassSetBinaryOp {
+    /// The span of this operation. e.g., the `a-z--[h-p]` in `[a-z--h-p]`.
+    pub span: Span,
+    /// The type of this set operation.
+    pub kind: ClassSetBinaryOpKind,
+    /// The left hand side of the operation.
+    pub lhs: Box<ClassSet>,
+    /// The right hand side of the operation.
+    pub rhs: Box<ClassSet>,
+}
+
+/// The type of a Unicode character class set operation.
+///
+/// Note that this doesn't explicitly represent union since there is no
+/// explicit union operator. Concatenation inside a character class corresponds
+/// to the union operation.
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum ClassSetBinaryOpKind {
+    /// The intersection of two sets, e.g., `\pN&&[a-z]`.
+    Intersection,
+    /// The difference of two sets, e.g., `\pN--[0-9]`.
+    Difference,
+    /// The symmetric difference of two sets. The symmetric difference is the
+    /// set of elements belonging to one but not both sets.
+    /// e.g., `[\pL~~[:ascii:]]`.
+    SymmetricDifference,
+}
+
+/// A single zero-width assertion.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Assertion {
+    /// The span of this assertion.
+    pub span: Span,
+    /// The assertion kind, e.g., `\b` or `^`.
+    pub kind: AssertionKind,
+}
+
+/// An assertion kind.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum AssertionKind {
+    /// `^`
+    StartLine,
+    /// `$`
+    EndLine,
+    /// `\A`
+    StartText,
+    /// `\z`
+    EndText,
+    /// `\b`
+    WordBoundary,
+    /// `\B`
+    NotWordBoundary,
+}
+
+/// A repetition operation applied to a regular expression.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Repetition {
+    /// The span of this operation.
+    pub span: Span,
+    /// The actual operation.
+    pub op: RepetitionOp,
+    /// Whether this operation was applied greedily or not.
+    pub greedy: bool,
+    /// The regular expression under repetition.
+    pub ast: Box<Ast>,
+}
+
+/// The repetition operator itself.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct RepetitionOp {
+    /// The span of this operator. This includes things like `+`, `*?` and
+    /// `{m,n}`.
+    pub span: Span,
+    /// The type of operation.
+    pub kind: RepetitionKind,
+}
+
+/// The kind of a repetition operator.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum RepetitionKind {
+    /// `?`
+    ZeroOrOne,
+    /// `*`
+    ZeroOrMore,
+    /// `+`
+    OneOrMore,
+    /// `{m,n}`
+    Range(RepetitionRange),
+}
+
+/// A range repetition operator.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum RepetitionRange {
+    /// `{m}`
+    Exactly(u32),
+    /// `{m,}`
+    AtLeast(u32),
+    /// `{m,n}`
+    Bounded(u32, u32),
+}
+
+impl RepetitionRange {
+    /// Returns true if and only if this repetition range is valid.
+    ///
+    /// The only case where a repetition range is invalid is if it is bounded
+    /// and its start is greater than its end.
+    pub fn is_valid(&self) -> bool {
+        match *self {
+            RepetitionRange::Bounded(s, e) if s > e => false,
+            _ => true,
+        }
+    }
+}
+
+/// A grouped regular expression.
+///
+/// This includes both capturing and non-capturing groups. This does **not**
+/// include flag-only groups like `(?is)`, but does contain any group that
+/// contains a sub-expression, e.g., `(a)`, `(?P<name>a)`, `(?:a)` and
+/// `(?is:a)`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Group {
+    /// The span of this group.
+    pub span: Span,
+    /// The kind of this group.
+    pub kind: GroupKind,
+    /// The regular expression in this group.
+    pub ast: Box<Ast>,
+}
+
+impl Group {
+    /// If this group is non-capturing, then this returns the (possibly empty)
+    /// set of flags. Otherwise, `None` is returned.
+    pub fn flags(&self) -> Option<&Flags> {
+        match self.kind {
+            GroupKind::NonCapturing(ref flags) => Some(flags),
+            _ => None,
+        }
+    }
+
+    /// Returns true if and only if this group is capturing.
+    pub fn is_capturing(&self) -> bool {
+        match self.kind {
+            GroupKind::CaptureIndex(_) | GroupKind::CaptureName(_) => true,
+            GroupKind::NonCapturing(_) => false,
+        }
+    }
+
+    /// Returns the capture index of this group, if this is a capturing group.
+    ///
+    /// This returns a capture index precisely when `is_capturing` is `true`.
+    pub fn capture_index(&self) -> Option<u32> {
+        match self.kind {
+            GroupKind::CaptureIndex(i) => Some(i),
+            GroupKind::CaptureName(ref x) => Some(x.index),
+            GroupKind::NonCapturing(_) => None,
+        }
+    }
+}
+
+/// The kind of a group.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum GroupKind {
+    /// `(a)`
+    CaptureIndex(u32),
+    /// `(?P<name>a)`
+    CaptureName(CaptureName),
+    /// `(?:a)` and `(?i:a)`
+    NonCapturing(Flags),
+}
+
+/// A capture name.
+///
+/// This corresponds to the name itself between the angle brackets in, e.g.,
+/// `(?P<foo>expr)`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct CaptureName {
+    /// The span of this capture name.
+    pub span: Span,
+    /// The capture name.
+    pub name: String,
+    /// The capture index.
+    pub index: u32,
+}
+
+/// A group of flags that is not applied to a particular regular expression.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct SetFlags {
+    /// The span of these flags, including the grouping parentheses.
+    pub span: Span,
+    /// The actual sequence of flags.
+    pub flags: Flags,
+}
+
+/// A group of flags.
+///
+/// This corresponds only to the sequence of flags themselves, e.g., `is-u`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Flags {
+    /// The span of this group of flags.
+    pub span: Span,
+    /// A sequence of flag items. Each item is either a flag or a negation
+    /// operator.
+    pub items: Vec<FlagsItem>,
+}
+
+impl Flags {
+    /// Add the given item to this sequence of flags.
+    ///
+    /// If the item was added successfully, then `None` is returned. If the
+    /// given item is a duplicate, then `Some(i)` is returned, where
+    /// `items[i].kind == item.kind`.
+    pub fn add_item(&mut self, item: FlagsItem) -> Option<usize> {
+        for (i, x) in self.items.iter().enumerate() {
+            if x.kind == item.kind {
+                return Some(i);
+            }
+        }
+        self.items.push(item);
+        None
+    }
+
+    /// Returns the state of the given flag in this set.
+    ///
+    /// If the given flag is in the set but is negated, then `Some(false)` is
+    /// returned.
+    ///
+    /// If the given flag is in the set and is not negated, then `Some(true)`
+    /// is returned.
+    ///
+    /// Otherwise, `None` is returned.
+    pub fn flag_state(&self, flag: Flag) -> Option<bool> {
+        let mut negated = false;
+        for x in &self.items {
+            match x.kind {
+                FlagsItemKind::Negation => {
+                    negated = true;
+                }
+                FlagsItemKind::Flag(ref xflag) if xflag == &flag => {
+                    return Some(!negated);
+                }
+                _ => {}
+            }
+        }
+        None
+    }
+}
+
+/// A single item in a group of flags.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct FlagsItem {
+    /// The span of this item.
+    pub span: Span,
+    /// The kind of this item.
+    pub kind: FlagsItemKind,
+}
+
+/// The kind of an item in a group of flags.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum FlagsItemKind {
+    /// A negation operator applied to all subsequent flags in the enclosing
+    /// group.
+    Negation,
+    /// A single flag in a group.
+    Flag(Flag),
+}
+
+impl FlagsItemKind {
+    /// Returns true if and only if this item is a negation operator.
+    pub fn is_negation(&self) -> bool {
+        match *self {
+            FlagsItemKind::Negation => true,
+            _ => false,
+        }
+    }
+}
+
+/// A single flag.
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum Flag {
+    /// `i`
+    CaseInsensitive,
+    /// `m`
+    MultiLine,
+    /// `s`
+    DotMatchesNewLine,
+    /// `U`
+    SwapGreed,
+    /// `u`
+    Unicode,
+    /// `x`
+    IgnoreWhitespace,
+}
+
+/// A custom `Drop` impl is used for `Ast` such that it uses constant stack
+/// space but heap space proportional to the depth of the `Ast`.
+impl Drop for Ast {
+    fn drop(&mut self) {
+        use std::mem;
+
+        match *self {
+            Ast::Empty(_)
+            | Ast::Flags(_)
+            | Ast::Literal(_)
+            | Ast::Dot(_)
+            | Ast::Assertion(_)
+            // Classes are recursive, so they get their own Drop impl.
+            | Ast::Class(_) => return,
+            Ast::Repetition(ref x) if !x.ast.has_subexprs() => return,
+            Ast::Group(ref x) if !x.ast.has_subexprs() => return,
+            Ast::Alternation(ref x) if x.asts.is_empty() => return,
+            Ast::Concat(ref x) if x.asts.is_empty() => return,
+            _ => {}
+        }
+
+        let empty_span = || Span::splat(Position::new(0, 0, 0));
+        let empty_ast = || Ast::Empty(empty_span());
+        let mut stack = vec![mem::replace(self, empty_ast())];
+        while let Some(mut ast) = stack.pop() {
+            match ast {
+                Ast::Empty(_)
+                | Ast::Flags(_)
+                | Ast::Literal(_)
+                | Ast::Dot(_)
+                | Ast::Assertion(_)
+                // Classes are recursive, so they get their own Drop impl.
+                | Ast::Class(_) => {}
+                Ast::Repetition(ref mut x) => {
+                    stack.push(mem::replace(&mut x.ast, empty_ast()));
+                }
+                Ast::Group(ref mut x) => {
+                    stack.push(mem::replace(&mut x.ast, empty_ast()));
+                }
+                Ast::Alternation(ref mut x) => {
+                    stack.extend(x.asts.drain(..));
+                }
+                Ast::Concat(ref mut x) => {
+                    stack.extend(x.asts.drain(..));
+                }
+            }
+        }
+    }
+}
+
+/// A custom `Drop` impl is used for `ClassSet` such that it uses constant
+/// stack space but heap space proportional to the depth of the `ClassSet`.
+impl Drop for ClassSet {
+    fn drop(&mut self) {
+        use std::mem;
+
+        match *self {
+            ClassSet::Item(ref item) => {
+                match *item {
+                    ClassSetItem::Empty(_)
+                    | ClassSetItem::Literal(_)
+                    | ClassSetItem::Range(_)
+                    | ClassSetItem::Ascii(_)
+                    | ClassSetItem::Unicode(_)
+                    | ClassSetItem::Perl(_) => return,
+                    ClassSetItem::Bracketed(ref x) => {
+                        if x.kind.is_empty() {
+                            return;
+                        }
+                    }
+                    ClassSetItem::Union(ref x) => {
+                        if x.items.is_empty() {
+                            return;
+                        }
+                    }
+                }
+            }
+            ClassSet::BinaryOp(ref op) => {
+                if op.lhs.is_empty() && op.rhs.is_empty() {
+                    return;
+                }
+            }
+        }
+
+        let empty_span = || Span::splat(Position::new(0, 0, 0));
+        let empty_set = || ClassSet::Item(ClassSetItem::Empty(empty_span()));
+        let mut stack = vec![mem::replace(self, empty_set())];
+        while let Some(mut set) = stack.pop() {
+            match set {
+                ClassSet::Item(ref mut item) => {
+                    match *item {
+                        ClassSetItem::Empty(_)
+                        | ClassSetItem::Literal(_)
+                        | ClassSetItem::Range(_)
+                        | ClassSetItem::Ascii(_)
+                        | ClassSetItem::Unicode(_)
+                        | ClassSetItem::Perl(_) => {}
+                        ClassSetItem::Bracketed(ref mut x) => {
+                            stack.push(mem::replace(&mut x.kind, empty_set()));
+                        }
+                        ClassSetItem::Union(ref mut x) => {
+                            stack.extend(
+                                x.items.drain(..).map(ClassSet::Item));
+                        }
+                    }
+                }
+                ClassSet::BinaryOp(ref mut op) => {
+                    stack.push(mem::replace(&mut op.lhs, empty_set()));
+                    stack.push(mem::replace(&mut op.rhs, empty_set()));
+                }
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // We use a thread with an explicit stack size to test that our destructor
+    // for Ast can handle arbitrarily sized expressions in constant stack
+    // space. In case we run on a platform without threads (WASM?), we limit
+    // this test to Windows/Unix.
+    #[test]
+    #[cfg(any(unix, windows))]
+    fn no_stack_overflow_on_drop() {
+        use std::thread;
+
+        let run = || {
+            let span = || Span::splat(Position::new(0, 0, 0));
+            let mut ast = Ast::Empty(span());
+            for i in 0..200 {
+                ast = Ast::Group(Group {
+                    span: span(),
+                    kind: GroupKind::CaptureIndex(i),
+                    ast: Box::new(ast),
+                });
+            }
+            assert!(!ast.is_empty());
+        };
+
+        // We run our test on a thread with a small stack size so we can
+        // force the issue more easily.
+        thread::Builder::new()
+            .stack_size(1<<10)
+            .spawn(run)
+            .unwrap()
+            .join()
+            .unwrap();
+    }
+}
diff --git a/regex-syntax/src/ast/parse.rs b/regex-syntax/src/ast/parse.rs
new file mode 100644
index 0000000000..a2f53d21b9
--- /dev/null
+++ b/regex-syntax/src/ast/parse.rs
@@ -0,0 +1,5257 @@
+// Copyright 2018 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+/*!
+This module provides a regular expression parser.
+*/
+
+use std::borrow::Borrow;
+use std::cell::{Cell, RefCell};
+use std::mem;
+use std::result;
+
+use ast::{self, Ast, Position, Span};
+use either::Either;
+
+use is_meta_character;
+
+type Result<T> = result::Result<T, ast::Error>;
+
+/// A primitive is an expression with no sub-expressions. This includes
+/// literals, assertions and non-set character classes. This representation
+/// is used as intermediate state in the parser.
+///
+/// This does not include ASCII character classes, since they can only appear
+/// within a set character class.
+#[derive(Clone, Debug, Eq, PartialEq)]
+enum Primitive {
+    Literal(ast::Literal),
+    Assertion(ast::Assertion),
+    Dot(Span),
+    Perl(ast::ClassPerl),
+    Unicode(ast::ClassUnicode),
+}
+
+impl Primitive {
+    /// Return the span of this primitive.
+    fn span(&self) -> &Span {
+        match *self {
+            Primitive::Literal(ref x) => &x.span,
+            Primitive::Assertion(ref x) => &x.span,
+            Primitive::Dot(ref span) => span,
+            Primitive::Perl(ref x) => &x.span,
+            Primitive::Unicode(ref x) => &x.span,
+        }
+    }
+
+    /// Convert this primitive into a proper AST.
+    fn into_ast(self) -> Ast {
+        match self {
+            Primitive::Literal(lit) => Ast::Literal(lit),
+            Primitive::Assertion(assert) => Ast::Assertion(assert),
+            Primitive::Dot(span) => Ast::Dot(span),
+            Primitive::Perl(cls) => Ast::Class(ast::Class::Perl(cls)),
+            Primitive::Unicode(cls) => Ast::Class(ast::Class::Unicode(cls)),
+        }
+    }
+
+    /// Convert this primitive into an item in a character class.
+    ///
+    /// If this primitive is not a legal item (i.e., an assertion or a dot),
+    /// then return an error.
+    fn into_class_set_item<P: Borrow<Parser>>(
+        self,
+        p: &ParserI<P>,
+    ) -> Result<ast::ClassSetItem> {
+        use ast::ClassSetItem;
+        use self::Primitive::*;
+
+        match self {
+            Literal(lit) => Ok(ClassSetItem::Literal(lit)),
+            Perl(cls) => Ok(ClassSetItem::Perl(cls)),
+            Unicode(cls) => Ok(ClassSetItem::Unicode(cls)),
+            x => Err(p.error(*x.span(), ast::ErrorKind::ClassEscapeInvalid)),
+        }
+    }
+
+    /// Convert this primitive into a literal in a character class. In
+    /// particular, literals are the only valid items that can appear in
+    /// ranges.
+    ///
+    /// If this primitive is not a legal item (i.e., a class, assertion or a
+    /// dot), then return an error.
+    fn into_class_literal<P: Borrow<Parser>>(
+        self,
+        p: &ParserI<P>,
+    ) -> Result<ast::Literal> {
+        use self::Primitive::*;
+
+        match self {
+            Literal(lit) => Ok(lit),
+            x => Err(p.error(*x.span(), ast::ErrorKind::ClassEscapeInvalid)),
+        }
+    }
+}
+
+/// Returns true if the given character is a hexadecimal digit.
+fn is_hex(c: char) -> bool {
+    ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')
+}
+
+/// Returns true if the given character is a valid in a capture group name.
+///
+/// If `first` is true, then `c` is treated as the first character in the
+/// group name (which is not allowed to be a digit).
+fn is_capture_char(c: char, first: bool) -> bool {
+    c == '_' || (!first && c >= '0' && c <= '9')
+    || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
+}
+
+/// A builder for a regular expression parser.
+///
+/// This builder permits modifying configuration options for the parser.
+#[derive(Clone, Debug)]
+pub struct ParserBuilder {
+    ignore_whitespace: bool,
+    nest_limit: u32,
+    octal: bool,
+}
+
+impl Default for ParserBuilder {
+    fn default() -> ParserBuilder {
+        ParserBuilder::new()
+    }
+}
+
+impl ParserBuilder {
+    /// Create a new parser builder with a default configuration.
+    pub fn new() -> ParserBuilder {
+        ParserBuilder {
+            ignore_whitespace: false,
+            nest_limit: 250,
+            octal: false,
+        }
+    }
+
+    /// Build a parser from this configuration with the given pattern.
+    pub fn build(&self) -> Parser {
+        Parser {
+            pos: Cell::new(Position { offset: 0, line: 1, column: 1 }),
+            capture_index: Cell::new(0),
+            nest_limit: self.nest_limit,
+            octal: self.octal,
+            initial_ignore_whitespace: self.ignore_whitespace,
+            ignore_whitespace: Cell::new(self.ignore_whitespace),
+            comments: RefCell::new(vec![]),
+            stack_group: RefCell::new(vec![]),
+            stack_class: RefCell::new(vec![]),
+            capture_names: RefCell::new(vec![]),
+            scratch: RefCell::new(String::new()),
+        }
+    }
+
+    /// Set the nesting limit for this parser.
+    ///
+    /// The nesting limit controls how deep the abstract syntax tree is allowed
+    /// to be. If the AST exceeds the given limit (e.g., with too many nested
+    /// groups), then an error is returned by the parser.
+    ///
+    /// The purpose of this limit is to act as a heuristic to prevent stack
+    /// overflow for consumers that do structural induction on an `Ast` using
+    /// explicit recursion. While this crate never does this (instead using
+    /// constant stack space and moving the call stack to the heap), other
+    /// crates may.
+    ///
+    /// This limit is not checked until the entire Ast is parsed. Therefore,
+    /// if callers want to put a limit on the amount of heap space used, then
+    /// they should impose a limit on the length, in bytes, of the concrete
+    /// pattern string. In particular, this is viable since this parser
+    /// implementation will limit itself to heap space proportional to the
+    /// lenth of the pattern string.
+    ///
+    /// Note that a nest limit of `0` will return a nest limit error for most
+    /// patterns but not all. For example, a nest limit of `0` permits `a` but
+    /// not `ab`, since `ab` requires a concatenation, which results in a nest
+    /// depth of `1`. In general, a nest limit is not something that manifests
+    /// in an obvious way in the concrete syntax, therefore, it should not be
+    /// used in a granular way.
+    pub fn nest_limit(&mut self, limit: u32) -> &mut ParserBuilder {
+        self.nest_limit = limit;
+        self
+    }
+
+    /// Whether to support octal syntax or not.
+    ///
+    /// Octal syntax is a little-known way of uttering Unicode codepoints in
+    /// a regular expression. For example, `a`, `\x61`, `\u0061` and
+    /// `\141` are all equivalent regular expressions, where the last example
+    /// shows octal syntax.
+    ///
+    /// While supporting octal syntax isn't in and of itself a problem, it does
+    /// make good error messages harder. That is, in PCRE based regex engines,
+    /// syntax like `\0` invokes a backreference, which is explicitly
+    /// unsupported in Rust's regex engine. However, many users expect it to
+    /// be supported. Therefore, when octal support is disabled, the error
+    /// message will explicitly mention that backreferences aren't supported.
+    ///
+    /// Octal syntax is disabled by default.
+    pub fn octal(&mut self, yes: bool) -> &mut ParserBuilder {
+        self.octal = yes;
+        self
+    }
+
+    /// Enable verbose mode in the regular expression.
+    ///
+    /// When enabled, verbose mode permits insigificant whitespace in many
+    /// places in the regular expression, as well as comments. Comments are
+    /// started using `#` and continue until the end of the line.
+    ///
+    /// By default, this is disabled. It may be selectively enabled in the
+    /// regular expression by using the `x` flag regardless of this setting.
+    pub fn ignore_whitespace(&mut self, yes: bool) -> &mut ParserBuilder {
+        self.ignore_whitespace = yes;
+        self
+    }
+}
+
+/// A regular expression parser.
+///
+/// This parses a string representation of a regular expression into an
+/// abstract syntax tree. The size of the tree is proportional to the length
+/// of the regular expression pattern.
+///
+/// A `Parser` can be configured in more detail via a
+/// [`ParserBuilder`](struct.ParserBuilder.html).
+#[derive(Clone, Debug)]
+pub struct Parser {
+    /// The current position of the parser.
+    pos: Cell<Position>,
+    /// The current capture index.
+    capture_index: Cell<u32>,
+    /// The maximum number of open parens/brackets allowed. If the parser
+    /// exceeds this number, then an error is returned.
+    nest_limit: u32,
+    /// Whether to support octal syntax or not. When `false`, the parser will
+    /// return an error helpfully pointing out that backreferences are not
+    /// supported.
+    octal: bool,
+    /// The initial setting for `ignore_whitespace` as provided by
+    /// Th`ParserBuilder`. is is used when reseting the parser's state.
+    initial_ignore_whitespace: bool,
+    /// Whether whitespace should be ignored. When enabled, comments are
+    /// also permitted.
+    ignore_whitespace: Cell<bool>,
+    /// A list of comments, in order of appearance.
+    comments: RefCell<Vec<ast::Comment>>,
+    /// A stack of grouped sub-expressions, including alternations.
+    stack_group: RefCell<Vec<GroupState>>,
+    /// A stack of nested character classes. This is only non-empty when
+    /// parsing a class.
+    stack_class: RefCell<Vec<ClassState>>,
+    /// A sorted sequence of capture names. This is used to detect duplicate
+    /// capture names and report an error if one is detected.
+    capture_names: RefCell<Vec<ast::CaptureName>>,
+    /// A scratch buffer used in various places. Mostly this is used to
+    /// accumulate relevant characters from parts of a pattern.
+    scratch: RefCell<String>,
+}
+
+/// ParserI is the internal parser implementation.
+///
+/// We use this separate type so that we can carry the provided pattern string
+/// along with us. In particular, a `Parser` internal state is not tied to any
+/// one pattern, but `ParserI` is.
+///
+/// This type also lets us use `ParserI<&Parser>` in production code while
+/// retaining the convenience of `ParserI<Parser>` for tests, which sometimes
+/// work against the internal interface of the parser.
+#[derive(Clone, Debug)]
+struct ParserI<'s, P> {
+    /// The parser state/configuration.
+    parser: P,
+    /// The full regular expression provided by the user.
+    pattern: &'s str,
+}
+
+/// GroupState represents a single stack frame while parsing nested groups
+/// and alternations. Each frame records the state up to an opening parenthesis
+/// or a alternating bracket `|`.
+#[derive(Clone, Debug)]
+enum GroupState {
+    /// This state is pushed whenever an opening group is found.
+    Group {
+        /// The concatenation immediately preceding the opening group.
+        concat: ast::Concat,
+        /// The group that has been opened. Its sub-AST is always empty.
+        group: ast::Group,
+        /// Whether this group has the `x` flag enabled or not.
+        ignore_whitespace: bool,
+    },
+    /// This state is pushed whenever a new alternation branch is found. If
+    /// an alternation branch is found and this state is at the top of the
+    /// stack, then this state should be modified to include the new
+    /// alternation.
+    Alternation(ast::Alternation),
+}
+
+/// ClassState represents a single stack frame while parsing character classes.
+/// Each frame records the state up to an intersection, difference, symmetric
+/// difference or nested class.
+///
+/// Note that a parser's character class stack is only non-empty when parsing
+/// a character class. In all other cases, it is empty.
+#[derive(Clone, Debug)]
+enum ClassState {
+    /// This state is pushed whenever an opening bracket is found.
+    Open {
+        /// The union of class items immediately preceding this class.
+        union: ast::ClassSetUnion,
+        /// The class that has been opened. Typically this just corresponds
+        /// to the `[`, but it can also include `[^` since `^` indicates
+        /// negation of the class.
+        set: ast::ClassBracketed,
+    },
+    /// This state is pushed when a operator is seen. When popped, the stored
+    /// set becomes the left hand side of the operator.
+    Op {
+        /// The type of the operation, i.e., &&, -- or ~~.
+        kind: ast::ClassSetBinaryOpKind,
+        /// The left-hand side of the operator.
+        lhs: ast::ClassSet,
+    },
+}
+
+impl Parser {
+    /// Create a new parser with a default configuration.
+    ///
+    /// The parser can be run with either the `parse` or `parse_with_comments`
+    /// methods. The parse methods return an abstract syntax tree.
+    ///
+    /// To set configuration options on the parser, use
+    /// [`ParserBuilder`](struct.ParserBuilder.html).
+    pub fn new() -> Parser {
+        ParserBuilder::new().build()
+    }
+
+    /// Parse the regular expression into an abstract syntax tree.
+    pub fn parse(&mut self, pattern: &str) -> Result<Ast> {
+        ParserI::new(self, pattern).parse()
+    }
+
+    /// Parse the regular expression and return an abstract syntax tree with
+    /// all of the comments found in the pattern.
+    pub fn parse_with_comments(
+        &mut self,
+        pattern: &str,
+    ) -> Result<ast::WithComments> {
+        ParserI::new(self, pattern).parse_with_comments()
+    }
+
+    /// Reset the internal state of a parser.
+    ///
+    /// This is called at the beginning of every parse. This prevents the
+    /// parser from running with inconsistent state (say, if a previous
+    /// invocation returned an error and the parser is reused).
+    fn reset(&self) {
+        // These settings should be in line with the construction
+        // in `ParserBuilder::build`.
+        self.pos.set(Position { offset: 0, line: 1, column: 1});
+        self.ignore_whitespace.set(self.initial_ignore_whitespace);
+        self.comments.borrow_mut().clear();
+        self.stack_group.borrow_mut().clear();
+        self.stack_class.borrow_mut().clear();
+    }
+}
+
+impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
+    /// Build an internal parser from a parser configuration and a pattern.
+    fn new(parser: P, pattern: &'s str) -> ParserI<'s, P> {
+        ParserI { parser: parser, pattern: pattern }
+    }
+
+    /// Return a reference to the parser state.
+    fn parser(&self) -> &Parser {
+        self.parser.borrow()
+    }
+
+    /// Return a reference to the pattern being parsed.
+    fn pattern(&self) -> &str {
+        self.pattern.borrow()
+    }
+
+    /// Create a new error with the given span and error type.
+    fn error(&self, span: Span, kind: ast::ErrorKind) -> ast::Error {
+        ast::Error {
+            kind: kind,
+            pattern: self.pattern().to_string(),
+            span: span,
+        }
+    }
+
+    /// Return the current offset of the parser.
+    ///
+    /// The offset starts at `0` from the beginning of the regular expression
+    /// pattern string.
+    fn offset(&self) -> usize {
+        self.parser().pos.get().offset
+    }
+
+    /// Return the current line number of the parser.
+    ///
+    /// The line number starts at `1`.
+    fn line(&self) -> usize {
+        self.parser().pos.get().line
+    }
+
+    /// Return the current column of the parser.
+    ///
+    /// The column number starts at `1` and is reset whenever a `\n` is seen.
+    fn column(&self) -> usize {
+        self.parser().pos.get().column
+    }
+
+    /// Return the next capturing index. Each subsequent call increments the
+    /// internal index.
+    ///
+    /// The span given should correspond to the location of the opening
+    /// parenthesis.
+    ///
+    /// If the capture limit is exceeded, then an error is returned.
+    fn next_capture_index(&self, span: Span) -> Result<u32> {
+        let current = self.parser().capture_index.get();
+        let i = try!(current.checked_add(1).ok_or_else(|| {
+            self.error(span, ast::ErrorKind::CaptureLimitExceeded)
+        }));
+        self.parser().capture_index.set(i);
+        Ok(i)
+    }
+
+    /// Adds the given capture name to this parser. If this capture name has
+    /// already been used, then an error is returned.
+    fn add_capture_name(&self, cap: &ast::CaptureName) -> Result<()> {
+        let mut names = self.parser().capture_names.borrow_mut();
+        match names.binary_search_by_key(
+            &cap.name.as_str(),
+            |c| c.name.as_str(),
+        ) {
+            Err(i) => {
+                names.insert(i, cap.clone());
+                Ok(())
+            }
+            Ok(i) => {
+                Err(self.error(cap.span, ast::ErrorKind::GroupNameDuplicate {
+                    original: names[i].span,
+                }))
+            }
+        }
+    }
+
+    /// Return whether the parser should ignore whitespace or not.
+    fn ignore_whitespace(&self) -> bool {
+        self.parser().ignore_whitespace.get()
+    }
+
+    /// Return the character at the current position of the parser.
+    ///
+    /// This panics if the current position does not point to a valid char.
+    fn char(&self) -> char {
+        self.char_at(self.offset())
+    }
+
+    /// Return the character at the given position.
+    ///
+    /// This panics if the given position does not point to a valid char.
+    fn char_at(&self, i: usize) -> char {
+        self.pattern()[i..].chars().next()
+            .unwrap_or_else(|| {
+                panic!("expected char at offset {}", i)
+            })
+    }
+
+    /// Bump the parser to the next Unicode scalar value.
+    ///
+    /// If the end of the input has been reached, then `false` is returned.
+    fn bump(&self) -> bool {
+        if self.is_eof() {
+            return false;
+        }
+        let Position { mut offset, mut line, mut column } = self.pos();
+        if self.char() == '\n' {
+            line = line.checked_add(1).unwrap();
+            column = 1;
+        } else {
+            column = column.checked_add(1).unwrap();
+        }
+        offset += self.char().len_utf8();
+        self.parser().pos.set(Position {
+            offset: offset,
+            line: line,
+            column: column,
+        });
+        self.pattern()[self.offset()..].chars().next().is_some()
+    }
+
+    /// If the substring starting at the current position of the parser has
+    /// the given prefix, then bump the parser to the character immediately
+    /// following the prefix and return true. Otherwise, don't bump the parser
+    /// and return false.
+    fn bump_if(&self, prefix: &str) -> bool {
+        if self.pattern()[self.offset()..].starts_with(prefix) {
+            for _ in 0..prefix.chars().count() {
+                self.bump();
+            }
+            true
+        } else {
+            false
+        }
+    }
+
+    /// Returns true if and only if the parser is positioned at a look-around
+    /// prefix. The conditions under which this returns true must always
+    /// correspond to a regular expression that would otherwise be consider
+    /// invalid.
+    ///
+    /// This should only be called immediately after parsing the opening of
+    /// a group or a set of flags.
+    fn is_lookaround_prefix(&self) -> bool {
+        self.bump_if("?=")
+        || self.bump_if("?!")
+        || self.bump_if("?<=")
+        || self.bump_if("?<!")
+    }
+
+    /// Bump the parser, and if the `x` flag is enabled, bump through any
+    /// subsequent spaces. Return true if and only if the parser is not at
+    /// EOF.
+    fn bump_and_bump_space(&self) -> bool {
+        if !self.bump() {
+            return false;
+        }
+        self.bump_space();
+        !self.is_eof()
+    }
+
+    /// If the `x` flag is enabled (i.e., whitespace insensitivity with
+    /// comments), then this will advance the parser through all whitespace
+    /// and comments to the next non-whitespace non-comment byte.
+    ///
+    /// If the `x` flag is disabled, then this is a no-op.
+    ///
+    /// This should be used selectively throughout the parser where
+    /// arbitrary whitespace is permitted when the `x` flag is enabled. For
+    /// example, `{   5  , 6}` is equivalent to `{5,6}`.
+    fn bump_space(&self) {
+        if !self.ignore_whitespace() {
+            return;
+        }
+        while !self.is_eof() {
+            if self.char().is_whitespace() {
+                self.bump();
+            } else if self.char() == '#' {
+                let start = self.pos();
+                let mut comment_text = String::new();
+                self.bump();
+                while !self.is_eof() {
+                    let c = self.char();
+                    self.bump();
+                    if c == '\n' {
+                        break;
+                    }
+                    comment_text.push(c);
+                }
+                let comment = ast::Comment {
+                    span: Span::new(start, self.pos()),
+                    comment: comment_text,
+                };
+                self.parser().comments.borrow_mut().push(comment);
+            } else {
+                break;
+            }
+        }
+    }
+
+    /// Peek at the next character in the input without advancing the parser.
+    ///
+    /// If the input has been exhausted, then this returns `None`.
+    fn peek(&self) -> Option<char> {
+        if self.is_eof() {
+            return None;
+        }
+        self.pattern()[self.offset() + self.char().len_utf8()..].chars().next()
+    }
+
+    /// Returns true if the next call to `bump` would return false.
+    fn is_eof(&self) -> bool {
+        self.offset() == self.pattern().len()
+    }
+
+    /// Return the current position of the parser, which includes the offset,
+    /// line and column.
+    fn pos(&self) -> Position {
+        self.parser().pos.get()
+    }
+
+    /// Create a span at the current position of the parser. Both the start
+    /// and end of the span are set.
+    fn span(&self) -> Span {
+        Span::splat(self.pos())
+    }
+
+    /// Create a span that covers the current character.
+    fn span_char(&self) -> Span {
+        let mut next = Position {
+            offset: self.offset().checked_add(self.char().len_utf8()).unwrap(),
+            line: self.line(),
+            column: self.column().checked_add(1).unwrap(),
+        };
+        if self.char() == '\n' {
+            next.line += 1;
+            next.column = 1;
+        }
+        Span::new(self.pos(), next)
+    }
+
+    /// Parse and push a single alternation on to the parser's internal stack.
+    /// If the top of the stack already has an alternation, then add to that
+    /// instead of pushing a new one.
+    ///
+    /// The concatenation given corresponds to a single alternation branch.
+    /// The concatenation returned starts the next branch and is empty.
+    ///
+    /// This assumes the parser is currently positioned at `|` and will advance
+    /// the parser to the character following `|`.
+    fn push_alternate(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
+        assert_eq!(self.char(), '|');
+        concat.span.end = self.pos();
+        self.push_or_add_alternation(concat);
+        self.bump();
+        Ok(ast::Concat {
+            span: self.span(),
+            asts: vec![],
+        })
+    }
+
+    /// Pushes or adds the given branch of an alternation to the parser's
+    /// internal stack of state.
+    fn push_or_add_alternation(&self, concat: ast::Concat) {
+        use self::GroupState::*;
+
+        let mut stack = self.parser().stack_group.borrow_mut();
+        if let Some(&mut Alternation(ref mut alts)) = stack.last_mut() {
+            alts.asts.push(concat.into_ast());
+            return;
+        }
+        stack.push(Alternation(ast::Alternation {
+            span: Span::new(concat.span.start, self.pos()),
+            asts: vec![concat.into_ast()],
+        }));
+    }
+
+    /// Parse and push a group AST (and its parent concatenation) on to the
+    /// parser's internal stack. Return a fresh concatenation corresponding
+    /// to the group's sub-AST.
+    ///
+    /// If a set of flags was found (with no group), then the concatenation
+    /// is returned with that set of flags added.
+    ///
+    /// This assumes that the parser is currently positioned on the opening
+    /// parenthesis. It advances the parser to the character at the start
+    /// of the sub-expression (or adjoining expression).
+    ///
+    /// If there was a problem parsing the start of the group, then an error
+    /// is returned.
+    fn push_group(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
+        assert_eq!(self.char(), '(');
+        match try!(self.parse_group()) {
+            Either::Left(set) => {
+                let ignore = set.flags.flag_state(ast::Flag::IgnoreWhitespace);
+                if let Some(v) = ignore {
+                    self.parser().ignore_whitespace.set(v);
+                }
+
+                concat.asts.push(Ast::Flags(set));
+                Ok(concat)
+            }
+            Either::Right(group) => {
+                let old_ignore_whitespace = self.ignore_whitespace();
+                let new_ignore_whitespace = group
+                    .flags()
+                    .and_then(|f| f.flag_state(ast::Flag::IgnoreWhitespace))
+                    .unwrap_or(old_ignore_whitespace);
+                self.parser().stack_group.borrow_mut().push(GroupState::Group {
+                    concat: concat,
+                    group: group,
+                    ignore_whitespace: old_ignore_whitespace,
+                });
+                self.parser().ignore_whitespace.set(new_ignore_whitespace);
+                Ok(ast::Concat {
+                    span: self.span(),
+                    asts: vec![],
+                })
+            }
+        }
+    }
+
+    /// Pop a group AST from the parser's internal stack and set the group's
+    /// AST to the given concatenation. Return the concatenation containing
+    /// the group.
+    ///
+    /// This assumes that the parser is currently positioned on the closing
+    /// parenthesis and advances the parser to the character following the `)`.
+    ///
+    /// If no such group could be popped, then an unopened group error is
+    /// returned.
+    fn pop_group(&self, mut group_concat: ast::Concat) -> Result<ast::Concat> {
+        use self::GroupState::*;
+
+        assert_eq!(self.char(), ')');
+        let mut stack = self.parser().stack_group.borrow_mut();
+        let (mut prior_concat, mut group, ignore_whitespace, alt) =
+            match stack.pop() {
+                Some(Group { concat, group, ignore_whitespace }) => {
+                    (concat, group, ignore_whitespace, None)
+                }
+                Some(Alternation(alt)) => {
+                    match stack.pop() {
+                        Some(Group { concat, group, ignore_whitespace }) => {
+                            (concat, group, ignore_whitespace, Some(alt))
+                        }
+                        None | Some(Alternation(_)) => {
+                            return Err(self.error(
+                                self.span_char(),
+                                ast::ErrorKind::GroupUnopened,
+                            ));
+                        }
+                    }
+                }
+                None => {
+                    return Err(self.error(
+                        self.span_char(),
+                        ast::ErrorKind::GroupUnopened,
+                    ));
+                }
+            };
+        self.parser().ignore_whitespace.set(ignore_whitespace);
+        group_concat.span.end = self.pos();
+        self.bump();
+        group.span.end = self.pos();
+        match alt {
+            Some(mut alt) => {
+                alt.span.end = group_concat.span.end;
+                alt.asts.push(group_concat.into_ast());
+                group.ast = Box::new(alt.into_ast());
+            }
+            None => {
+                group.ast = Box::new(group_concat.into_ast());
+            }
+        }
+        prior_concat.asts.push(Ast::Group(group));
+        Ok(prior_concat)
+    }
+
+    /// Pop the last state from the parser's internal stack, if it exists, and
+    /// add the given concatenation to it. There either must be no state or a
+    /// single alternation item on the stack. Any other scenario produces an
+    /// error.
+    ///
+    /// This assumes that the parser has advanced to the end.
+    fn pop_group_end(&self, mut concat: ast::Concat) -> Result<Ast> {
+        concat.span.end = self.pos();
+        let mut stack = self.parser().stack_group.borrow_mut();
+        let ast = match stack.pop() {
+            None => Ok(concat.into_ast()),
+            Some(GroupState::Alternation(mut alt)) => {
+                alt.span.end = self.pos();
+                alt.asts.push(concat.into_ast());
+                Ok(Ast::Alternation(alt))
+            }
+            Some(GroupState::Group { group, .. }) => {
+                return Err(self.error(
+                    group.span,
+                    ast::ErrorKind::GroupUnclosed,
+                ));
+            }
+        };
+        // If we try to pop again, there should be nothing.
+        match stack.pop() {
+            None => ast,
+            Some(GroupState::Alternation(_)) => {
+                // This unreachable is unfortunate. This case can't happen
+                // because the only way we can be here is if there were two
+                // `GroupState::Alternation`s adjacent in the parser's stack,
+                // which we guarantee to never happen because we never push a
+                // `GroupState::Alternation` if one is already at the top of
+                // the stack.
+                unreachable!()
+            }
+            Some(GroupState::Group { group, .. }) => {
+                Err(self.error(group.span, ast::ErrorKind::GroupUnclosed))
+            }
+        }
+    }
+
+    /// Parse the opening of a character class and push the current class
+    /// parsing context onto the parser's stack. This assumes that the parser
+    /// is positioned at an opening `[`. The given union should correspond to
+    /// the union of set items built up before seeing the `[`.
+    ///
+    /// If there was a problem parsing the opening of the class, then an error
+    /// is returned. Otherwise, a new union of set items for the class is
+    /// returned (which may be populated with either a `]` or a `-`).
+    fn push_class_open(
+        &self,
+        parent_union: ast::ClassSetUnion,
+    ) -> Result<ast::ClassSetUnion> {
+        assert_eq!(self.char(), '[');
+
+        let (nested_set, nested_union) = try!(self.parse_set_class_open());
+        self.parser().stack_class.borrow_mut().push(ClassState::Open {
+            union: parent_union,
+            set: nested_set,
+        });
+        Ok(nested_union)
+    }
+
+    /// Parse the end of a character class set and pop the character class
+    /// parser stack. The union given corresponds to the last union built
+    /// before seeing the closing `]`. The union returned corresponds to the
+    /// parent character class set with the nested class added to it.
+    ///
+    /// This assumes that the parser is positioned at a `]` and will advance
+    /// the parser to the byte immediately following the `]`.
+    ///
+    /// If the stack is empty after popping, then this returns the final
+    /// "top-level" character class AST (where a "top-level" character class
+    /// is one that is not nested inside any other character class).
+    ///
+    /// If there is no corresponding opening bracket on the parser's stack,
+    /// then an error is returned.
+    fn pop_class(
+        &self,
+        nested_union: ast::ClassSetUnion,
+    ) -> Result<Either<ast::ClassSetUnion, ast::Class>> {
+        assert_eq!(self.char(), ']');
+
+        let item = ast::ClassSet::Item(nested_union.into_item());
+        let prevset = self.pop_class_op(item);
+        let mut stack = self.parser().stack_class.borrow_mut();
+        match stack.pop() {
+            None => {
+                // We can never observe an empty stack:
+                //
+                // 1) We are guaranteed to start with a non-empty stack since
+                //    the character class parser is only initiated when it sees
+                //    a `[`.
+                // 2) If we ever observe an empty stack while popping after
+                //    seeing a `]`, then we signal the character class parser
+                //    to terminate.
+                panic!("unexpected empty character class stack")
+            },
+            Some(ClassState::Op { .. }) => {
+                // This panic is unfortunate, but this case is impossible
+                // since we already popped the Op state if one exists above.
+                // Namely, every push to the class parser stack is guarded by
+                // whether an existing Op is already on the top of the stack.
+                // If it is, the existing Op is modified. That is, the stack
+                // can never have consecutive Op states.
+                panic!("unexpected ClassState::Op")
+            }
+            Some(ClassState::Open { mut union, mut set }) => {
+                self.bump();
+                set.span.end = self.pos();
+                set.kind = prevset;
+                if stack.is_empty() {
+                    Ok(Either::Right(ast::Class::Bracketed(set)))
+                } else {
+                    union.push(ast::ClassSetItem::Bracketed(Box::new(set)));
+                    Ok(Either::Left(union))
+                }
+            }
+        }
+    }
+
+    /// Return an "unclosed class" error whose span points to the most
+    /// recently opened class.
+    ///
+    /// This should only be called while parsing a character class.
+    fn unclosed_class_error(&self) -> ast::Error {
+        for state in self.parser().stack_class.borrow().iter().rev() {
+            match *state {
+                ClassState::Open { ref set, .. } => {
+                    return self.error(set.span, ast::ErrorKind::ClassUnclosed);
+                }
+                _ => {}
+            }
+        }
+        // We are guaranteed to have a non-empty stack with at least
+        // one open bracket, so we should never get here.
+        panic!("no open character class found")
+    }
+
+    /// Push the current set of class items on to the class parser's stack as
+    /// the left hand side of the given operator.
+    ///
+    /// A fresh set union is returned, which should be used to build the right
+    /// hand side of this operator.
+    fn push_class_op(
+        &self,
+        next_kind: ast::ClassSetBinaryOpKind,
+        next_union: ast::ClassSetUnion,
+    ) -> ast::ClassSetUnion {
+
+        let item = ast::ClassSet::Item(next_union.into_item());
+        let new_lhs = self.pop_class_op(item);
+        self.parser().stack_class.borrow_mut().push(ClassState::Op {
+            kind: next_kind,
+            lhs: new_lhs,
+        });
+        ast::ClassSetUnion { span: self.span(), items: vec![] }
+    }
+
+    /// Pop a character class set from the character class parser stack. If the
+    /// top of the stack is just an item (not an operation), then return the
+    /// given set unchanged. If the top of the stack is an operation, then the
+    /// given set will be used as the rhs of the operation on the top of the
+    /// stack. In that case, the binary operation is returned as a set.
+    fn pop_class_op(&self, rhs: ast::ClassSet) -> ast::ClassSet {
+        let mut stack = self.parser().stack_class.borrow_mut();
+        let (kind, lhs) = match stack.pop() {
+            Some(ClassState::Op { kind, lhs }) => (kind, lhs),
+            Some(state @ ClassState::Open { .. }) => {
+                stack.push(state);
+                return rhs;
+            }
+            None => unreachable!(),
+        };
+        let span = Span::new(lhs.span().start, rhs.span().end);
+        ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
+            span: span,
+            kind: kind,
+            lhs: Box::new(lhs),
+            rhs: Box::new(rhs),
+        })
+    }
+}
+
+impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
+    /// Parse the regular expression into an abstract syntax tree.
+    fn parse(&self) -> Result<Ast> {
+        self.parse_with_comments().map(|astc| astc.ast)
+    }
+
+    /// Parse the regular expression and return an abstract syntax tree with
+    /// all of the comments found in the pattern.
+    fn parse_with_comments(&self) -> Result<ast::WithComments> {
+        assert_eq!(self.offset(), 0, "parser can only be used once");
+        self.parser().reset();
+        let mut concat = ast::Concat {
+            span: self.span(),
+            asts: vec![],
+        };
+        loop {
+            self.bump_space();
+            if self.is_eof() {
+                break;
+            }
+            match self.char() {
+                '(' => concat = try!(self.push_group(concat)),
+                ')' => concat = try!(self.pop_group(concat)),
+                '|' => concat = try!(self.push_alternate(concat)),
+                '[' => {
+                    let class = try!(self.parse_set_class());
+                    concat.asts.push(Ast::Class(class));
+                }
+                '?' => {
+                    concat = try!(self.parse_uncounted_repetition(
+                        concat, ast::RepetitionKind::ZeroOrOne));
+                }
+                '*' => {
+                    concat = try!(self.parse_uncounted_repetition(
+                        concat, ast::RepetitionKind::ZeroOrMore));
+                }
+                '+' => {
+                    concat = try!(self.parse_uncounted_repetition(
+                        concat, ast::RepetitionKind::OneOrMore));
+                }
+                '{' => {
+                    concat = try!(self.parse_counted_repetition(concat));
+                }
+                _ => concat.asts.push(try!(self.parse_primitive()).into_ast()),
+            }
+        }
+        let ast = try!(self.pop_group_end(concat));
+        try!(NestLimiter::new(self).check(&ast));
+        Ok(ast::WithComments {
+            ast: ast,
+            comments: mem::replace(
+                &mut *self.parser().comments.borrow_mut(),
+                vec![],
+            ),
+        })
+    }
+
+    /// Parses an uncounted repetition operation. An uncounted repetition
+    /// operator includes ?, * and +, but does not include the {m,n} syntax.
+    /// The given `kind` should correspond to the operator observed by the
+    /// caller.
+    ///
+    /// This assumes that the paser is currently positioned at the repetition
+    /// operator and advances the parser to the first character after the
+    /// operator. (Note that the operator may include a single additional `?`,
+    /// which makes the operator ungreedy.)
+    ///
+    /// The caller should include the concatenation that is being built. The
+    /// concatenation returned includes the repetition operator applied to the
+    /// last expression in the given concatenation.
+    fn parse_uncounted_repetition(
+        &self,
+        mut concat: ast::Concat,
+        kind: ast::RepetitionKind,
+    ) -> Result<ast::Concat> {
+        assert!(
+            self.char() == '?' || self.char() == '*' || self.char() == '+');
+        let op_start = self.pos();
+        let ast = match concat.asts.pop() {
+            Some(ast) => ast,
+            None => return Err(self.error(
+                self.span(),
+                ast::ErrorKind::RepetitionMissing,
+            )),
+        };
+        let mut greedy = true;
+        if self.bump() && self.char() == '?' {
+            greedy = false;
+            self.bump();
+        }
+        concat.asts.push(Ast::Repetition(ast::Repetition {
+            span: ast.span().with_end(self.pos()),
+            op: ast::RepetitionOp {
+                span: Span::new(op_start, self.pos()),
+                kind: kind,
+            },
+            greedy: greedy,
+            ast: Box::new(ast),
+        }));
+        Ok(concat)
+    }
+
+    /// Parses a counted repetition operation. A counted repetition operator
+    /// corresponds to the {m,n} syntax, and does not include the ?, * or +
+    /// operators.
+    ///
+    /// This assumes that the paser is currently positioned at the opening `{`
+    /// and advances the parser to the first character after the operator.
+    /// (Note that the operator may include a single additional `?`, which
+    /// makes the operator ungreedy.)
+    ///
+    /// The caller should include the concatenation that is being built. The
+    /// concatenation returned includes the repetition operator applied to the
+    /// last expression in the given concatenation.
+    fn parse_counted_repetition(
+        &self,
+        mut concat: ast::Concat,
+    ) -> Result<ast::Concat> {
+        assert!(self.char() == '{');
+        let start = self.pos();
+        let ast = match concat.asts.pop() {
+            Some(ast) => ast,
+            None => return Err(self.error(
+                self.span(),
+                ast::ErrorKind::RepetitionMissing,
+            )),
+        };
+        if !self.bump_and_bump_space() {
+            return Err(self.error(
+                Span::new(start, self.pos()),
+                ast::ErrorKind::RepetitionCountUnclosed,
+            ));
+        }
+        let count_start = try!(self.parse_decimal());
+        let mut range = ast::RepetitionRange::Exactly(count_start);
+        if self.is_eof() {
+            return Err(self.error(
+                Span::new(start, self.pos()),
+                ast::ErrorKind::RepetitionCountUnclosed,
+            ));
+        }
+        if self.char() == ',' {
+            if !self.bump_and_bump_space() {
+                return Err(self.error(
+                    Span::new(start, self.pos()),
+                    ast::ErrorKind::RepetitionCountUnclosed,
+                ));
+            }
+            if self.char() != '}' {
+                let count_end = try!(self.parse_decimal());
+                range = ast::RepetitionRange::Bounded(count_start, count_end);
+            } else {
+                range = ast::RepetitionRange::AtLeast(count_start);
+            }
+        }
+        if self.is_eof() || self.char() != '}' {
+            return Err(self.error(
+                Span::new(start, self.pos()),
+                ast::ErrorKind::RepetitionCountUnclosed,
+            ));
+        }
+
+        let mut greedy = true;
+        if self.bump_and_bump_space() && self.char() == '?' {
+            greedy = false;
+            self.bump();
+        }
+
+        let op_span = Span::new(start, self.pos());
+        if !range.is_valid() {
+            return Err(self.error(
+                op_span,
+                ast::ErrorKind::RepetitionCountInvalid,
+            ));
+        }
+        concat.asts.push(Ast::Repetition(ast::Repetition {
+            span: ast.span().with_end(self.pos()),
+            op: ast::RepetitionOp {
+                span: op_span,
+                kind: ast::RepetitionKind::Range(range),
+            },
+            greedy: greedy,
+            ast: Box::new(ast),
+        }));
+        Ok(concat)
+    }
+
+    /// Parse a group (which contains a sub-expression) or a set of flags.
+    ///
+    /// If a group was found, then it is returned with an empty AST. If a set
+    /// of flags is found, then that set is returned.
+    ///
+    /// The parser should be positioned at the opening parenthesis.
+    ///
+    /// This advances the parser to the character before the start of the
+    /// sub-expression (in the case of a group) or to the closing parenthesis
+    /// immediately following the set of flags.
+    ///
+    /// # Errors
+    ///
+    /// If flags are given and incorrectly specified, then a corresponding
+    /// error is returned.
+    ///
+    /// If a capture name is given and it is incorrectly specified, then a
+    /// corresponding error is returned.
+    fn parse_group(&self) -> Result<Either<ast::SetFlags, ast::Group>> {
+        assert_eq!(self.char(), '(');
+        let open_span = self.span_char();
+        self.bump();
+        self.bump_space();
+        if self.is_lookaround_prefix() {
+            return Err(self.error(
+                Span::new(open_span.start, self.span().end),
+                ast::ErrorKind::UnsupportedLookAround,
+            ));
+        }
+        let inner_span = self.span();
+        if self.bump_if("?P<") {
+            let capture_index = try!(self.next_capture_index(open_span));
+            let cap = try!(self.parse_capture_name(capture_index));
+            Ok(Either::Right(ast::Group {
+                span: open_span,
+                kind: ast::GroupKind::CaptureName(cap),
+                ast: Box::new(Ast::Empty(self.span())),
+            }))
+        } else if self.bump_if("?") {
+            if self.is_eof() {
+                return Err(self.error(
+                    open_span,
+                    ast::ErrorKind::GroupUnclosed,
+                ));
+            }
+            let flags = try!(self.parse_flags());
+            let char_end = self.char();
+            self.bump();
+            if char_end == ')' {
+                // We don't allow empty flags, e.g., `(?)`. We instead
+                // interpret it as a repetition operator missing its argument.
+                if flags.items.is_empty() {
+                    return Err(self.error(
+                        inner_span,
+                        ast::ErrorKind::RepetitionMissing,
+                    ));
+                }
+                Ok(Either::Left(ast::SetFlags {
+                    span: Span { end: self.pos(), ..open_span },
+                    flags: flags,
+                }))
+            } else {
+                assert_eq!(char_end, ':');
+                Ok(Either::Right(ast::Group {
+                    span: open_span,
+                    kind: ast::GroupKind::NonCapturing(flags),
+                    ast: Box::new(Ast::Empty(self.span())),
+                }))
+            }
+        } else {
+            let capture_index = try!(self.next_capture_index(open_span));
+            Ok(Either::Right(ast::Group {
+                span: open_span,
+                kind: ast::GroupKind::CaptureIndex(capture_index),
+                ast: Box::new(Ast::Empty(self.span())),
+            }))
+        }
+    }
+
+    /// Parses a capture group name. Assumes that the parser is positioned at
+    /// the first character in the name following the opening `<` (and may
+    /// possibly be EOF). This advances the parser to the first character
+    /// following the closing `>`.
+    ///
+    /// The caller must provide the capture index of the group for this name.
+    fn parse_capture_name(
+        &self,
+        capture_index: u32,
+    ) -> Result<ast::CaptureName> {
+        if self.is_eof() {
+            return Err(self.error(
+                self.span(),
+                ast::ErrorKind::GroupNameUnexpectedEof,
+            ));
+        }
+        let start = self.pos();
+        loop {
+            if self.char() == '>' {
+                break;
+            }
+            if !is_capture_char(self.char(), self.pos() == start) {
+                return Err(self.error(
+                    self.span_char(),
+                    ast::ErrorKind::GroupNameInvalid,
+                ));
+            }
+            if !self.bump() {
+                break;
+            }
+        }
+        let end = self.pos();
+        if self.is_eof() {
+            return Err(self.error(
+                self.span(),
+                ast::ErrorKind::GroupNameUnexpectedEof,
+            ));
+        }
+        assert_eq!(self.char(), '>');
+        self.bump();
+        let name = &self.pattern()[start.offset..end.offset];
+        if name.is_empty() {
+            return Err(self.error(
+                Span::new(start, start),
+                ast::ErrorKind::GroupNameEmpty,
+            ));
+        }
+        let capname = ast::CaptureName {
+            span: Span::new(start, end),
+            name: name.to_string(),
+            index: capture_index,
+        };
+        try!(self.add_capture_name(&capname));
+        Ok(capname)
+    }
+
+    /// Parse a sequence of flags starting at the current character.
+    ///
+    /// This advances the parser to the character immediately following the
+    /// flags, which is guaranteed to be either `:` or `)`.
+    ///
+    /// # Errors
+    ///
+    /// If any flags are duplicated, then an error is returned.
+    ///
+    /// If the negation operator is used more than once, then an error is
+    /// returned.
+    ///
+    /// If no flags could be found or if the negation operation is not followed
+    /// by any flags, then an error is returned.
+    fn parse_flags(&self) -> Result<ast::Flags> {
+        let mut flags = ast::Flags {
+            span: self.span(),
+            items: vec![],
+        };
+        let mut last_was_negation = None;
+        while self.char() != ':' && self.char() != ')' {
+            if self.char() == '-' {
+                last_was_negation = Some(self.span_char());
+                let item = ast::FlagsItem {
+                    span: self.span_char(),
+                    kind: ast::FlagsItemKind::Negation,
+                };
+                if let Some(i) = flags.add_item(item) {
+                    return Err(self.error(
+                        self.span_char(),
+                        ast::ErrorKind::FlagRepeatedNegation {
+                            original: flags.items[i].span,
+                        },
+                    ));
+                }
+            } else {
+                last_was_negation = None;
+                let item = ast::FlagsItem {
+                    span: self.span_char(),
+                    kind: ast::FlagsItemKind::Flag(try!(self.parse_flag())),
+                };
+                if let Some(i) = flags.add_item(item) {
+                    return Err(self.error(
+                        self.span_char(),
+                        ast::ErrorKind::FlagDuplicate {
+                            original: flags.items[i].span,
+                        },
+                    ));
+                }
+            }
+            if !self.bump() {
+                return Err(self.error(
+                    self.span(),
+                    ast::ErrorKind::FlagUnexpectedEof,
+                ));
+            }
+        }
+        if let Some(span) = last_was_negation {
+            return Err(self.error(span, ast::ErrorKind::FlagDanglingNegation));
+        }
+        flags.span.end = self.pos();
+        Ok(flags)
+    }
+
+    /// Parse the current character as a flag. Do not advance the parser.
+    ///
+    /// # Errors
+    ///
+    /// If the flag is not recognized, then an error is returned.
+    fn parse_flag(&self) -> Result<ast::Flag> {
+        match self.char() {
+            'i' => Ok(ast::Flag::CaseInsensitive),
+            'm' => Ok(ast::Flag::MultiLine),
+            's' => Ok(ast::Flag::DotMatchesNewLine),
+            'U' => Ok(ast::Flag::SwapGreed),
+            'u' => Ok(ast::Flag::Unicode),
+            'x' => Ok(ast::Flag::IgnoreWhitespace),
+            _ => Err(self.error(
+                self.span_char(),
+                ast::ErrorKind::FlagUnrecognized,
+            )),
+        }
+    }
+
+    /// Parse a primitive AST. e.g., A literal, non-set character class or
+    /// assertion.
+    ///
+    /// This assumes that the parser expects a primitive at the current
+    /// location. i.e., All other non-primitive cases have been handled.
+    /// For example, if the parser's position is at `|`, then `|` will be
+    /// treated as a literal (e.g., inside a character class).
+    ///
+    /// This advances the parser to the first character immediately following
+    /// the primitive.
+    fn parse_primitive(&self) -> Result<Primitive> {
+        match self.char() {
+            '\\' => self.parse_escape(),
+            '.' => {
+                let ast = Primitive::Dot(self.span_char());
+                self.bump();
+                Ok(ast)
+            }
+            '^' => {
+                let ast = Primitive::Assertion(ast::Assertion {
+                    span: self.span_char(),
+                    kind: ast::AssertionKind::StartLine,
+                });
+                self.bump();
+                Ok(ast)
+            }
+            '$' => {
+                let ast = Primitive::Assertion(ast::Assertion {
+                    span: self.span_char(),
+                    kind: ast::AssertionKind::EndLine,
+                });
+                self.bump();
+                Ok(ast)
+            }
+            c => {
+                let ast = Primitive::Literal(ast::Literal {
+                    span: self.span_char(),
+                    kind: ast::LiteralKind::Verbatim,
+                    c: c,
+                });
+                self.bump();
+                Ok(ast)
+            }
+        }
+    }
+
+    /// Parse an escape sequence as a primitive AST.
+    ///
+    /// This assumes the parser is positioned at the start of the escape
+    /// sequence, i.e., `\`. It advances the parser to the first position
+    /// immediately following the escape sequence.
+    fn parse_escape(&self) -> Result<Primitive> {
+        assert_eq!(self.char(), '\\');
+        let start = self.pos();
+        if !self.bump() {
+            return Err(self.error(
+                Span::new(start, self.pos()),
+                ast::ErrorKind::EscapeUnexpectedEof,
+            ));
+        }
+        let c = self.char();
+        // Put some of the more complicated routines into helpers.
+        match c {
+            '0'...'7' => {
+                if !self.parser().octal {
+                    return Err(self.error(
+                        Span::new(start, self.span_char().end),
+                        ast::ErrorKind::UnsupportedBackreference,
+                    ));
+                }
+                let mut lit = self.parse_octal();
+                lit.span.start = start;
+                return Ok(Primitive::Literal(lit));
+            }
+            '8'...'9' if !self.parser().octal => {
+                return Err(self.error(
+                    Span::new(start, self.span_char().end),
+                    ast::ErrorKind::UnsupportedBackreference,
+                ));
+            }
+            'x' | 'u' | 'U' => {
+                let mut lit = try!(self.parse_hex());
+                lit.span.start = start;
+                return Ok(Primitive::Literal(lit));
+            }
+            'p' | 'P' => {
+                let mut cls = try!(self.parse_unicode_class());
+                cls.span.start = start;
+                return Ok(Primitive::Unicode(cls));
+            }
+            'd' | 's' | 'w' | 'D' | 'S' | 'W' => {
+                let mut cls = self.parse_perl_class();
+                cls.span.start = start;
+                return Ok(Primitive::Perl(cls));
+            }
+            _ => {}
+        }
+
+        // Handle all of the one letter sequences inline.
+        self.bump();
+        let span = Span::new(start, self.pos());
+        if is_meta_character(c) {
+            return Ok(Primitive::Literal(ast::Literal {
+                span: span,
+                kind: ast::LiteralKind::Punctuation,
+                c: c,
+            }));
+        }
+        let special = |kind, c| Ok(Primitive::Literal(ast::Literal {
+            span: span,
+            kind: ast::LiteralKind::Special(kind),
+            c: c,
+        }));
+        match c {
+            'a' => special(ast::SpecialLiteralKind::Bell, '\x07'),
+            'f' => special(ast::SpecialLiteralKind::FormFeed, '\x0C'),
+            't' => special(ast::SpecialLiteralKind::Tab, '\t'),
+            'n' => special(ast::SpecialLiteralKind::LineFeed, '\n'),
+            'r' => special(ast::SpecialLiteralKind::CarriageReturn, '\r'),
+            'v' => special(ast::SpecialLiteralKind::VerticalTab, '\x0B'),
+            ' ' if self.ignore_whitespace() => {
+                special(ast::SpecialLiteralKind::Space, ' ')
+            }
+            'A' => Ok(Primitive::Assertion(ast::Assertion {
+                span: span,
+                kind: ast::AssertionKind::StartText,
+            })),
+            'z' => Ok(Primitive::Assertion(ast::Assertion {
+                span: span,
+                kind: ast::AssertionKind::EndText,
+            })),
+            'b' => Ok(Primitive::Assertion(ast::Assertion {
+                span: span,
+                kind: ast::AssertionKind::WordBoundary,
+            })),
+            'B' => Ok(Primitive::Assertion(ast::Assertion {
+                span: span,
+                kind: ast::AssertionKind::NotWordBoundary,
+            })),
+            _ => Err(self.error(span, ast::ErrorKind::EscapeUnrecognized)),
+        }
+    }
+
+    /// Parse an octal representation of a Unicode codepoint up to 3 digits
+    /// long. This expects the parser to be positioned at the first octal
+    /// digit and advances the parser to the first character immediately
+    /// following the octal number. This also assumes that parsing octal
+    /// escapes is enabled.
+    ///
+    /// Assuming the preconditions are met, this routine can never fail.
+    fn parse_octal(&self) -> ast::Literal {
+        use std::char;
+        use std::u32;
+
+        assert!(self.parser().octal);
+        assert!('0' <= self.char() && self.char() <= '7');
+        let start = self.pos();
+        // Parse up to two more digits.
+        while
+            self.bump() &&
+            '0' <= self.char() && self.char() <= '7' &&
+            self.pos().offset - start.offset <= 2
+        {}
+        let end = self.pos();
+        let octal = &self.pattern()[start.offset..end.offset];
+        // Parsing the octal should never fail since the above guarantees a
+        // valid number.
+        let codepoint =
+            u32::from_str_radix(octal, 8).expect("valid octal number");
+        // The max value for 3 digit octal is 0777 = 511 and [0, 511] has no
+        // invalid Unicode scalar values.
+        let c = char::from_u32(codepoint).expect("Unicode scalar value");
+        ast::Literal {
+            span: Span::new(start, end),
+            kind: ast::LiteralKind::Octal,
+            c: c,
+        }
+    }
+
+    /// Parse a hex representation of a Unicode codepoint. This handles both
+    /// hex notations, i.e., `\xFF` and `\x{FFFF}`. This expects the parser to
+    /// be positioned at the `x`, `u` or `U` prefix. The parser is advanced to
+    /// the first character immediately following the hexadecimal literal.
+    fn parse_hex(&self) -> Result<ast::Literal> {
+        assert!(self.char() == 'x'
+                || self.char() == 'u'
+                || self.char() == 'U');
+
+        let hex_kind = match self.char() {
+            'x' => ast::HexLiteralKind::X,
+            'u' => ast::HexLiteralKind::UnicodeShort,
+            _ => ast::HexLiteralKind::UnicodeLong,
+        };
+        if !self.bump_and_bump_space() {
+            return Err(self.error(
+                self.span(),
+                ast::ErrorKind::EscapeUnexpectedEof,
+            ));
+        }
+        if self.char() == '{' {
+            self.parse_hex_brace(hex_kind)
+        } else {
+            self.parse_hex_digits(hex_kind)
+        }
+    }
+
+    /// Parse an N-digit hex representation of a Unicode codepoint. This
+    /// expects the parser to be positioned at the first digit and will advance
+    /// the parser to the first character immediately following the escape
+    /// sequence.
+    ///
+    /// The number of digits given must be 2 (for `\xNN`), 4 (for `\uNNNN`)
+    /// or 8 (for `\UNNNNNNNN`).
+    fn parse_hex_digits(
+        &self,
+        kind: ast::HexLiteralKind,
+    ) -> Result<ast::Literal> {
+        use std::char;
+        use std::u32;
+
+        let mut scratch = self.parser().scratch.borrow_mut();
+        scratch.clear();
+
+        let start = self.pos();
+        for i in 0..kind.digits() {
+            if i > 0 && !self.bump_and_bump_space() {
+                return Err(self.error(
+                    self.span(),
+                    ast::ErrorKind::EscapeUnexpectedEof,
+                ));
+            }
+            if !is_hex(self.char()) {
+                return Err(self.error(
+                    self.span_char(),
+                    ast::ErrorKind::EscapeHexInvalidDigit,
+                ));
+            }
+            scratch.push(self.char());
+        }
+        // The final bump just moves the parser past the literal, which may
+        // be EOF.
+        self.bump_and_bump_space();
+        let end = self.pos();
+        let hex = scratch.as_str();
+        match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
+            None => Err(self.error(
+                Span::new(start, end),
+                ast::ErrorKind::EscapeHexInvalid,
+            )),
+            Some(c) => Ok(ast::Literal {
+                span: Span::new(start, end),
+                kind: ast::LiteralKind::HexFixed(kind),
+                c: c,
+            }),
+        }
+    }
+
+    /// Parse a hex representation of any Unicode scalar value. This expects
+    /// the parser to be positioned at the opening brace `{` and will advance
+    /// the parser to the first character following the closing brace `}`.
+    fn parse_hex_brace(
+        &self,
+        kind: ast::HexLiteralKind,
+    ) -> Result<ast::Literal> {
+        use std::char;
+        use std::u32;
+
+        let mut scratch = self.parser().scratch.borrow_mut();
+        scratch.clear();
+
+        let brace_pos = self.pos();
+        let start = self.span_char().end;
+        while self.bump_and_bump_space() && self.char() != '}' {
+            if !is_hex(self.char()) {
+                return Err(self.error(
+                    self.span_char(),
+                    ast::ErrorKind::EscapeHexInvalidDigit,
+                ));
+            }
+            scratch.push(self.char());
+        }
+        if self.is_eof() {
+            return Err(self.error(
+                Span::new(brace_pos, self.pos()),
+                ast::ErrorKind::EscapeUnexpectedEof,
+            ));
+        }
+        let end = self.pos();
+        let hex = scratch.as_str();
+        assert_eq!(self.char(), '}');
+        self.bump_and_bump_space();
+
+        if hex.is_empty() {
+            return Err(self.error(
+                Span::new(brace_pos, self.pos()),
+                ast::ErrorKind::EscapeHexEmpty,
+            ));
+        }
+        match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
+            None => Err(self.error(
+                Span::new(start, end),
+                ast::ErrorKind::EscapeHexInvalid,
+            )),
+            Some(c) => Ok(ast::Literal {
+                span: Span::new(start, self.pos()),
+                kind: ast::LiteralKind::HexBrace(kind),
+                c: c,
+            }),
+        }
+    }
+
+    /// Parse a decimal number into a u32 while trimming leading and trailing
+    /// whitespace.
+    ///
+    /// This expects the parser to be positioned at the first position where
+    /// a decimal digit could occur. This will advance the parser to the byte
+    /// immediately following the last contiguous decimal digit.
+    ///
+    /// If no decimal digit could be found or if there was a problem parsing
+    /// the complete set of digits into a u32, then an error is returned.
+    fn parse_decimal(&self) -> Result<u32> {
+        let mut scratch = self.parser().scratch.borrow_mut();
+        scratch.clear();
+
+        while !self.is_eof() && self.char().is_whitespace() {
+            self.bump();
+        }
+        let start = self.pos();
+        while !self.is_eof() && '0' <= self.char() && self.char() <= '9' {
+            scratch.push(self.char());
+            self.bump_and_bump_space();
+        }
+        let span = Span::new(start, self.pos());
+        while !self.is_eof() && self.char().is_whitespace() {
+            self.bump_and_bump_space();
+        }
+        let digits = scratch.as_str();
+        if digits.is_empty() {
+            return Err(self.error(span, ast::ErrorKind::DecimalEmpty));
+        }
+        match u32::from_str_radix(digits, 10).ok() {
+            Some(n) => Ok(n),
+            None => Err(self.error(span, ast::ErrorKind::DecimalInvalid)),
+        }
+    }
+
+    /// Parse a standard character class consisting primarily of characters or
+    /// character ranges, but can also contain nested character classes of
+    /// any type (sans `.`).
+    ///
+    /// This assumes the parser is positioned at the opening `[`. If parsing
+    /// is successful, then the parser is advanced to the position immediately
+    /// following the closing `]`.
+    fn parse_set_class(&self) -> Result<ast::Class> {
+        assert_eq!(self.char(), '[');
+
+        let mut union = ast::ClassSetUnion {
+            span: self.span(),
+            items: vec![],
+        };
+        loop {
+            self.bump_space();
+            if self.is_eof() {
+                return Err(self.unclosed_class_error());
+            }
+            match self.char() {
+                '[' => {
+                    // If we've already parsed the opening bracket, then
+                    // attempt to treat this as the beginning of an ASCII
+                    // class. If ASCII class parsing fails, then the parser
+                    // backs up to `[`.
+                    if !self.parser().stack_class.borrow().is_empty() {
+                        if let Some(cls) = self.maybe_parse_ascii_class() {
+                            union.push(ast::ClassSetItem::Ascii(cls));
+                            continue;
+                        }
+                    }
+                    union = try!(self.push_class_open(union));
+                }
+                ']' => {
+                    match try!(self.pop_class(union)) {
+                        Either::Left(nested_union) => { union = nested_union; }
+                        Either::Right(class) => return Ok(class),
+                    }
+                }
+                '&' if self.peek() == Some('&') => {
+                    assert!(self.bump_if("&&"));
+                    union = self.push_class_op(
+                        ast::ClassSetBinaryOpKind::Intersection, union);
+                }
+                '-' if self.peek() == Some('-') => {
+                    assert!(self.bump_if("--"));
+                    union = self.push_class_op(
+                        ast::ClassSetBinaryOpKind::Difference, union);
+                }
+                '~' if self.peek() == Some('~') => {
+                    assert!(self.bump_if("~~"));
+                    union = self.push_class_op(
+                        ast::ClassSetBinaryOpKind::SymmetricDifference, union);
+                }
+                _ => {
+                    union.push(try!(self.parse_set_class_range()));
+                }
+            }
+        }
+    }
+
+    /// Parse a single primitive item in a character class set. The item to
+    /// be parsed can either be one of a simple literal character, a range
+    /// between two simple literal characters or a "primitive" character
+    /// class like \w or \p{Greek}.
+    ///
+    /// If an invalid escape is found, or if a character class is found where
+    /// a simple literal is expected (e.g., in a range), then an error is
+    /// returned.
+    fn parse_set_class_range(&self) -> Result<ast::ClassSetItem> {
+        let prim1 = try!(self.parse_set_class_item());
+        self.bump_space();
+        if self.is_eof() {
+            return Err(self.unclosed_class_error());
+        }
+        // If the next char isn't a `-`, then we don't have a range.
+        // There are two exceptions. If the char after a `-` is a `]`, then
+        // `-` is interpreted as a literal `-`. Alternatively, if the char
+        // after a `-` is a `-`, then `--` corresponds to a "difference"
+        // operation.
+        if self.char() != '-'
+            || self.peek() == Some(']')
+            || self.peek() == Some('-')
+        {
+            return prim1.into_class_set_item(self);
+        }
+        // OK, now we're parsing a range, so bump past the `-` and parse the
+        // second half of the range.
+        if !self.bump_and_bump_space() {
+            return Err(self.unclosed_class_error());
+        }
+        let prim2 = try!(self.parse_set_class_item());
+        let range = ast::ClassSetRange {
+            span: Span::new(prim1.span().start, prim2.span().end),
+            start: try!(prim1.into_class_literal(self)),
+            end: try!(prim2.into_class_literal(self)),
+        };
+        if !range.is_valid() {
+            return Err(self.error(
+                range.span,
+                ast::ErrorKind::ClassRangeInvalid,
+            ));
+        }
+        Ok(ast::ClassSetItem::Range(range))
+    }
+
+    /// Parse a single item in a character class as a primitive, where the
+    /// primitive either consists of a verbatim literal or a single escape
+    /// sequence.
+    ///
+    /// This assumes the parser is positioned at the beginning of a primitive,
+    /// and advances the parser to the first position after the primitive if
+    /// successful.
+    ///
+    /// Note that it is the caller's responsibility to report an error if an
+    /// illegal primitive was parsed.
+    fn parse_set_class_item(&self) -> Result<Primitive> {
+        if self.char() == '\\' {
+            self.parse_escape()
+        } else {
+            let x = Primitive::Literal(ast::Literal {
+                span: self.span_char(),
+                kind: ast::LiteralKind::Verbatim,
+                c: self.char(),
+            });
+            self.bump();
+            Ok(x)
+        }
+    }
+
+    /// Parses the opening of a character class set. This includes the opening
+    /// bracket along with `^` if present to indicate negation. This also
+    /// starts parsing the opening set of unioned items if applicable, since
+    /// there are special rules applied to certain characters in the opening
+    /// of a character class. For example, `[^]]` is the class of all
+    /// characters not equal to `]`. (`]` would need to be escaped in any other
+    /// position.) Similarly for `-`.
+    ///
+    /// In all cases, the op inside the returned `ast::ClassBracketed` is an
+    /// empty union. This empty union should be replaced with the actual item
+    /// when it is popped from the parser's stack.
+    ///
+    /// This assumes the parser is positioned at the opening `[` and advances
+    /// the parser to the first non-special byte of the character class.
+    ///
+    /// An error is returned if EOF is found.
+    fn parse_set_class_open(
+        &self,
+    ) -> Result<(ast::ClassBracketed, ast::ClassSetUnion)> {
+        assert_eq!(self.char(), '[');
+        let start = self.pos();
+        if !self.bump_and_bump_space() {
+            return Err(self.error(
+                Span::new(start, self.pos()),
+                ast::ErrorKind::ClassUnclosed,
+            ));
+        }
+
+        let negated =
+            if self.char() != '^' {
+                false
+            } else {
+                if !self.bump_and_bump_space() {
+                    return Err(self.error(
+                        Span::new(start, self.pos()),
+                        ast::ErrorKind::ClassUnclosed,
+                    ));
+                }
+                true
+            };
+        // Accept any number of `-` as literal `-`.
+        let mut union = ast::ClassSetUnion {
+            span: self.span(),
+            items: vec![],
+        };
+        while self.char() == '-' {
+            union.push(ast::ClassSetItem::Literal(ast::Literal {
+                span: self.span_char(),
+                kind: ast::LiteralKind::Verbatim,
+                c: '-',
+            }));
+            if !self.bump_and_bump_space() {
+                return Err(self.error(
+                    Span::new(start, self.pos()),
+                    ast::ErrorKind::ClassUnclosed,
+                ));
+            }
+        }
+        // If `]` is the *first* char in a set, then interpret it as a literal
+        // `]`. That is, an empty class is impossible to write.
+        if union.items.is_empty() && self.char() == ']' {
+            union.push(ast::ClassSetItem::Literal(ast::Literal {
+                span: self.span_char(),
+                kind: ast::LiteralKind::Verbatim,
+                c: ']',
+            }));
+            if !self.bump_and_bump_space() {
+                return Err(self.error(
+                    Span::new(start, self.pos()),
+                    ast::ErrorKind::ClassUnclosed,
+                ));
+            }
+        }
+        let set = ast::ClassBracketed {
+            span: Span::new(start, self.pos()),
+            negated: negated,
+            kind: ast::ClassSet::union(ast::ClassSetUnion {
+                span: Span::new(union.span.start, union.span.start),
+                items: vec![],
+            }),
+        };
+        Ok((set, union))
+    }
+
+    /// Attempt to parse an ASCII character class, e.g., `[:alnum:]`.
+    ///
+    /// This assumes the parser is positioned at the opening `[`.
+    ///
+    /// If no valid ASCII character class could be found, then this does not
+    /// advance the parser and `None` is returned. Otherwise, the parser is
+    /// advanced to the first byte following the closing `]` and the
+    /// corresponding ASCII class is returned.
+    fn maybe_parse_ascii_class(&self) -> Option<ast::ClassAscii> {
+        // ASCII character classes are interesting from a parsing perspective
+        // because parsing cannot fail with any interesting error. For example,
+        // in order to use an ASCII character class, it must be enclosed in
+        // double brackets, e.g., `[[:alnum:]]`. Alternatively, you might think
+        // of it as "ASCII character characters have the syntax `[:NAME:]`
+        // which can only appear within character brackets." This means that
+        // things like `[[:lower:]A]` are legal constructs.
+        //
+        // However, if one types an incorrect ASCII character class, e.g.,
+        // `[[:loower:]]`, then we treat that as a normal nested character
+        // class containing the characters `:elorw`. One might argue that we
+        // should return an error instead since the repeated colons give away
+        // the intent to write an ASCII class. But what if the user typed
+        // `[[:lower]]` instead? How can we tell that was intended to be an
+        // ASCII class and not just a normal nested class?
+        //
+        // Reasonable people can probably disagree over this, but for better
+        // or worse, we implement semantics that never fails at the expense
+        // of better failure modes.
+        assert_eq!(self.char(), '[');
+        // If parsing fails, then we back up the parser to this starting point.
+        let start = self.pos();
+        let mut negated = false;
+        if !self.bump() || self.char() != ':' {
+            self.parser().pos.set(start);
+            return None;
+        }
+        if !self.bump() {
+            self.parser().pos.set(start);
+            return None;
+        }
+        if self.char() == '^' {
+            negated = true;
+            if !self.bump() {
+                self.parser().pos.set(start);
+                return None;
+            }
+        }
+        let name_start = self.offset();
+        while self.char() != ':' && self.bump() {}
+        if self.is_eof() {
+            self.parser().pos.set(start);
+            return None;
+        }
+        let name = &self.pattern()[name_start..self.offset()];
+        if !self.bump_if(":]") {
+            self.parser().pos.set(start);
+            return None;
+        }
+        let kind = match ast::ClassAsciiKind::from_name(name) {
+            Some(kind) => kind,
+            None => {
+                self.parser().pos.set(start);
+                return None;
+            }
+        };
+        Some(ast::ClassAscii {
+            span: Span::new(start, self.pos()),
+            kind: kind,
+            negated: negated,
+        })
+    }
+
+    /// Parse a Unicode class in either the single character notation, `\pN`
+    /// or the multi-character bracketed notation, `\p{Greek}`. This assumes
+    /// the parser is positioned at the `p` (or `P` for negation) and will
+    /// advance the parser to the character immediately following the class.
+    ///
+    /// Note that this does not check whether the class name is valid or not.
+    fn parse_unicode_class(&self) -> Result<ast::ClassUnicode> {
+        assert!(self.char() == 'p' || self.char() == 'P');
+
+        let mut scratch = self.parser().scratch.borrow_mut();
+        scratch.clear();
+
+        let negated = self.char() == 'P';
+        if !self.bump_and_bump_space() {
+            return Err(self.error(
+                self.span(),
+                ast::ErrorKind::EscapeUnexpectedEof,
+            ));
+        }
+        let (start, kind) =
+            if self.char() == '{' {
+                let start = self.span_char().end;
+                while self.bump_and_bump_space() && self.char() != '}' {
+                    scratch.push(self.char());
+                }
+                if self.is_eof() {
+                    return Err(self.error(
+                        self.span(),
+                        ast::ErrorKind::EscapeUnexpectedEof,
+                    ));
+                }
+                assert_eq!(self.char(), '}');
+                self.bump();
+
+                let name = scratch.as_str();
+                if let Some(i) = name.find("!=") {
+                    (start, ast::ClassUnicodeKind::NamedValue {
+                        op: ast::ClassUnicodeOpKind::NotEqual,
+                        name: name[..i].to_string(),
+                        value: name[i+2..].to_string(),
+                    })
+                } else if let Some(i) = name.find(':') {
+                    (start, ast::ClassUnicodeKind::NamedValue {
+                        op: ast::ClassUnicodeOpKind::Colon,
+                        name: name[..i].to_string(),
+                        value: name[i+1..].to_string(),
+                    })
+                } else if let Some(i) = name.find('=') {
+                    (start, ast::ClassUnicodeKind::NamedValue {
+                        op: ast::ClassUnicodeOpKind::Equal,
+                        name: name[..i].to_string(),
+                        value: name[i+1..].to_string(),
+                    })
+                } else {
+                    (start, ast::ClassUnicodeKind::Named(name.to_string()))
+                }
+            } else {
+                let start = self.pos();
+                let c = self.char();
+                self.bump_and_bump_space();
+                let kind = ast::ClassUnicodeKind::OneLetter(c);
+                (start, kind)
+            };
+        Ok(ast::ClassUnicode {
+            span: Span::new(start, self.pos()),
+            negated: negated,
+            kind: kind,
+        })
+    }
+
+    /// Parse a Perl character class, e.g., `\d` or `\W`. This assumes the
+    /// parser is currently at a valid character class name and will be
+    /// advanced to the character immediately following the class.
+    fn parse_perl_class(&self) -> ast::ClassPerl {
+        let c = self.char();
+        let span = self.span_char();
+        self.bump();
+        let (negated, kind) = match c {
+            'd' => (false, ast::ClassPerlKind::Digit),
+            'D' => (true, ast::ClassPerlKind::Digit),
+            's' => (false, ast::ClassPerlKind::Space),
+            'S' => (true, ast::ClassPerlKind::Space),
+            'w' => (false, ast::ClassPerlKind::Word),
+            'W' => (true, ast::ClassPerlKind::Word),
+            c => panic!("expected valid Perl class but got '{}'", c),
+        };
+        ast::ClassPerl { span: span, kind: kind, negated: negated }
+    }
+}
+
+/// A type that traverses a fully parsed Ast and checks whether its depth
+/// exceeds the specified nesting limit. If it does, then an error is returned.
+#[derive(Debug)]
+struct NestLimiter<'p, 's: 'p, P: 'p + 's> {
+    /// The parser that is checking the nest limit.
+    p: &'p ParserI<'s, P>,
+    /// The current depth while walking an Ast.
+    depth: u32,
+}
+
+impl<'p, 's, P: Borrow<Parser>> NestLimiter<'p, 's, P> {
+    fn new(p: &'p ParserI<'s, P>) -> NestLimiter<'p, 's, P> {
+        NestLimiter { p: p, depth: 0 }
+    }
+
+    fn check(self, ast: &Ast) -> Result<()> {
+        ast::visit(ast, self)
+    }
+
+    fn increment_depth(&mut self, span: &Span) -> Result<()> {
+        let new = try!(self.depth.checked_add(1).ok_or_else(|| self.p.error(
+            span.clone(),
+            ast::ErrorKind::NestLimitExceeded(::std::u32::MAX),
+        )));
+        let limit = self.p.parser().nest_limit;
+        if new > limit {
+            return Err(self.p.error(
+                span.clone(),
+                ast::ErrorKind::NestLimitExceeded(limit),
+            ));
+        }
+        self.depth = new;
+        Ok(())
+    }
+
+    fn decrement_depth(&mut self) {
+        // Assuming the correctness of the visitor, this should never drop
+        // below 0.
+        self.depth.checked_sub(1).unwrap();
+    }
+}
+
+impl<'p, 's, P: Borrow<Parser>> ast::Visitor for NestLimiter<'p, 's, P> {
+    type Output = ();
+    type Err = ast::Error;
+
+    fn finish(self) -> Result<()> {
+        Ok(())
+    }
+
+    fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
+        let span = match *ast {
+            Ast::Empty(_)
+            | Ast::Flags(_)
+            | Ast::Literal(_)
+            | Ast::Dot(_)
+            | Ast::Assertion(_)
+            | Ast::Class(ast::Class::Unicode(_))
+            | Ast::Class(ast::Class::Perl(_)) => {
+                // These are all base cases, so we don't increment depth.
+                return Ok(());
+            }
+            Ast::Class(ast::Class::Bracketed(ref x)) => &x.span,
+            Ast::Repetition(ref x) => &x.span,
+            Ast::Group(ref x) => &x.span,
+            Ast::Alternation(ref x) => &x.span,
+            Ast::Concat(ref x) => &x.span,
+        };
+        self.increment_depth(span)
+    }
+
+    fn visit_post(&mut self, ast: &Ast) -> Result<()> {
+        match *ast {
+            Ast::Empty(_)
+            | Ast::Flags(_)
+            | Ast::Literal(_)
+            | Ast::Dot(_)
+            | Ast::Assertion(_)
+            | Ast::Class(ast::Class::Unicode(_))
+            | Ast::Class(ast::Class::Perl(_)) => {
+                // These are all base cases, so we don't decrement depth.
+                Ok(())
+            }
+            Ast::Class(ast::Class::Bracketed(_))
+            | Ast::Repetition(_)
+            | Ast::Group(_)
+            | Ast::Alternation(_)
+            | Ast::Concat(_) => {
+                self.decrement_depth();
+                Ok(())
+            }
+        }
+    }
+
+    fn visit_class_set_item_pre(
+        &mut self,
+        ast: &ast::ClassSetItem,
+    ) -> Result<()> {
+        let span = match *ast {
+            ast::ClassSetItem::Empty(_)
+            | ast::ClassSetItem::Literal(_)
+            | ast::ClassSetItem::Range(_)
+            | ast::ClassSetItem::Ascii(_)
+            | ast::ClassSetItem::Unicode(_)
+            | ast::ClassSetItem::Perl(_) => {
+                // These are all base cases, so we don't increment depth.
+                return Ok(());
+            }
+            ast::ClassSetItem::Bracketed(ref x) => &x.span,
+            ast::ClassSetItem::Union(ref x) => &x.span,
+        };
+        self.increment_depth(span)
+    }
+
+    fn visit_class_set_item_post(
+        &mut self,
+        ast: &ast::ClassSetItem,
+    ) -> Result<()> {
+        match *ast {
+            ast::ClassSetItem::Empty(_)
+            | ast::ClassSetItem::Literal(_)
+            | ast::ClassSetItem::Range(_)
+            | ast::ClassSetItem::Ascii(_)
+            | ast::ClassSetItem::Unicode(_)
+            | ast::ClassSetItem::Perl(_) => {
+                // These are all base cases, so we don't decrement depth.
+                Ok(())
+            }
+            ast::ClassSetItem::Bracketed(_)
+            | ast::ClassSetItem::Union(_) => {
+                self.decrement_depth();
+                Ok(())
+            }
+        }
+    }
+
+    fn visit_class_set_binary_op_pre(
+        &mut self,
+        ast: &ast::ClassSetBinaryOp,
+    ) -> Result<()> {
+        self.increment_depth(&ast.span)
+    }
+
+    fn visit_class_set_binary_op_post(
+        &mut self,
+        _ast: &ast::ClassSetBinaryOp,
+    ) -> Result<()> {
+        self.decrement_depth();
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::ops::Range;
+
+    use ast::{self, Ast, Position, Span};
+    use super::{Parser, ParserI, ParserBuilder, Primitive};
+
+    // Our own assert_eq, which has slightly better formatting (but honestly
+    // still kind of crappy).
+    macro_rules! assert_eq {
+        ($left:expr, $right:expr) => ({
+            match (&$left, &$right) {
+                (left_val, right_val) => {
+                    if !(*left_val == *right_val) {
+                        panic!("assertion failed: `(left == right)`\n\n\
+                               left:  `{:?}`\nright: `{:?}`\n\n",
+                               left_val, right_val)
+                    }
+                }
+            }
+        });
+    }
+
+    // We create these errors to compare with real ast::Errors in the tests.
+    // We define equality between TestError and ast::Error to disregard the
+    // pattern string in ast::Error, which is annoying to provide in tests.
+    #[derive(Clone, Debug)]
+    struct TestError {
+        span: Span,
+        kind: ast::ErrorKind,
+    }
+
+    impl PartialEq<ast::Error> for TestError {
+        fn eq(&self, other: &ast::Error) -> bool {
+            self.span == other.span && self.kind == other.kind
+        }
+    }
+
+    impl PartialEq<TestError> for ast::Error {
+        fn eq(&self, other: &TestError) -> bool {
+            self.span == other.span && self.kind == other.kind
+        }
+    }
+
+    fn s(str: &str) -> String {
+        str.to_string()
+    }
+
+    fn parser(pattern: &str) -> ParserI<Parser> {
+        ParserI::new(Parser::new(), pattern)
+    }
+
+    fn parser_octal(pattern: &str) -> ParserI<Parser> {
+        let parser = ParserBuilder::new().octal(true).build();
+        ParserI::new(parser, pattern)
+    }
+
+    fn parser_nest_limit(pattern: &str, nest_limit: u32) -> ParserI<Parser> {
+        let p = ParserBuilder::new().nest_limit(nest_limit).build();
+        ParserI::new(p, pattern)
+    }
+
+    fn parser_ignore_whitespace(pattern: &str) -> ParserI<Parser> {
+        let p = ParserBuilder::new().ignore_whitespace(true).build();
+        ParserI::new(p, pattern)
+    }
+
+    /// Short alias for creating a new span.
+    fn nspan(start: Position, end: Position) -> Span {
+        Span::new(start, end)
+    }
+
+    /// Short alias for creating a new position.
+    fn npos(offset: usize, line: usize, column: usize) -> Position {
+        Position::new(offset, line, column)
+    }
+
+    /// Create a new span from the given offset range. This assumes a single
+    /// line and sets the columns based on the offsets. i.e., This only works
+    /// out of the box for ASCII, which is fine for most tests.
+    fn span(range: Range<usize>) -> Span {
+        let start = Position::new(range.start, 1, range.start + 1);
+        let end = Position::new(range.end, 1, range.end + 1);
+        Span::new(start, end)
+    }
+
+    /// Create a new span for the corresponding byte range in the given string.
+    fn span_range(subject: &str, range: Range<usize>) -> Span {
+        let start = Position {
+            offset: range.start,
+            line: 1 + subject[..range.start].matches('\n').count(),
+            column: 1 + subject[..range.start]
+                .chars()
+                .rev()
+                .position(|c| c == '\n')
+                .unwrap_or(subject[..range.start].chars().count()),
+        };
+        let end = Position {
+            offset: range.end,
+            line: 1 + subject[..range.end].matches('\n').count(),
+            column: 1 + subject[..range.end]
+                .chars()
+                .rev()
+                .position(|c| c == '\n')
+                .unwrap_or(subject[..range.end].chars().count()),
+        };
+        Span::new(start, end)
+    }
+
+    /// Create a verbatim literal starting at the given position.
+    fn lit(c: char, start: usize) -> Ast {
+        lit_with(c, span(start..start + c.len_utf8()))
+    }
+
+    /// Create a punctuation literal starting at the given position.
+    fn punct_lit(c: char, span: Span) -> Ast {
+        Ast::Literal(ast::Literal {
+            span: span,
+            kind: ast::LiteralKind::Punctuation,
+            c: c,
+        })
+    }
+
+    /// Create a verbatim literal with the given span.
+    fn lit_with(c: char, span: Span) -> Ast {
+        Ast::Literal(ast::Literal {
+            span: span,
+            kind: ast::LiteralKind::Verbatim,
+            c: c,
+        })
+    }
+
+    /// Create a concatenation with the given range.
+    fn concat(range: Range<usize>, asts: Vec<Ast>) -> Ast {
+        concat_with(span(range), asts)
+    }
+
+    /// Create a concatenation with the given span.
+    fn concat_with(span: Span, asts: Vec<Ast>) -> Ast {
+        Ast::Concat(ast::Concat { span: span, asts: asts })
+    }
+
+    /// Create an alternation with the given span.
+    fn alt(range: Range<usize>, asts: Vec<Ast>) -> Ast {
+        Ast::Alternation(ast::Alternation { span: span(range), asts: asts })
+    }
+
+    /// Create a capturing group with the given span.
+    fn group(range: Range<usize>, index: u32, ast: Ast) -> Ast {
+        Ast::Group(ast::Group {
+            span: span(range),
+            kind: ast::GroupKind::CaptureIndex(index),
+            ast: Box::new(ast),
+        })
+    }
+
+    /// Create an ast::SetFlags.
+    ///
+    /// The given pattern should be the full pattern string. The range given
+    /// should correspond to the byte offsets where the flag set occurs.
+    ///
+    /// If negated is true, then the set is interpreted as beginning with a
+    /// negation.
+    fn flag_set(
+        pat: &str,
+        range: Range<usize>,
+        flag: ast::Flag,
+        negated: bool,
+    ) -> Ast {
+        let mut items = vec![
+            ast::FlagsItem {
+                span: span_range(pat, (range.end - 2)..(range.end - 1)),
+                kind: ast::FlagsItemKind::Flag(flag),
+            },
+        ];
+        if negated {
+            items.insert(0, ast::FlagsItem {
+                span: span_range(pat, (range.start + 2)..(range.end - 2)),
+                kind: ast::FlagsItemKind::Negation,
+            });
+        }
+        Ast::Flags(ast::SetFlags {
+            span: span_range(pat, range.clone()),
+            flags: ast::Flags {
+                span: span_range(pat, (range.start + 2)..(range.end - 1)),
+                items: items,
+            },
+        })
+    }
+
+    #[test]
+    fn parse_nest_limit() {
+        // A nest limit of 0 still allows some types of regexes.
+        assert_eq!(
+            parser_nest_limit("", 0).parse(),
+            Ok(Ast::Empty(span(0..0))));
+        assert_eq!(
+            parser_nest_limit("a", 0).parse(),
+            Ok(lit('a', 0)));
+
+        // Test repetition operations, which require one level of nesting.
+        assert_eq!(
+            parser_nest_limit("a+", 0).parse().unwrap_err(),
+            TestError {
+                span: span(0..2),
+                kind: ast::ErrorKind::NestLimitExceeded(0),
+            });
+        assert_eq!(
+            parser_nest_limit("a+", 1).parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..2),
+                op: ast::RepetitionOp {
+                    span: span(1..2),
+                    kind: ast::RepetitionKind::OneOrMore,
+                },
+                greedy: true,
+                ast: Box::new(lit('a', 0)),
+            })));
+        assert_eq!(
+            parser_nest_limit("(a)+", 1).parse().unwrap_err(),
+            TestError {
+                span: span(0..3),
+                kind: ast::ErrorKind::NestLimitExceeded(1),
+            });
+        assert_eq!(
+            parser_nest_limit("a+*", 1).parse().unwrap_err(),
+            TestError {
+                span: span(0..2),
+                kind: ast::ErrorKind::NestLimitExceeded(1),
+            });
+        assert_eq!(
+            parser_nest_limit("a+*", 2).parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..3),
+                op: ast::RepetitionOp {
+                    span: span(2..3),
+                    kind: ast::RepetitionKind::ZeroOrMore,
+                },
+                greedy: true,
+                ast: Box::new(Ast::Repetition(ast::Repetition {
+                    span: span(0..2),
+                    op: ast::RepetitionOp {
+                        span: span(1..2),
+                        kind: ast::RepetitionKind::OneOrMore,
+                    },
+                    greedy: true,
+                    ast: Box::new(lit('a', 0)),
+                })),
+            })));
+
+        // Test concatenations. A concatenation requires one level of nesting.
+        assert_eq!(
+            parser_nest_limit("ab", 0).parse().unwrap_err(),
+            TestError {
+                span: span(0..2),
+                kind: ast::ErrorKind::NestLimitExceeded(0),
+            });
+        assert_eq!(
+            parser_nest_limit("ab", 1).parse(),
+            Ok(concat(0..2, vec![lit('a', 0), lit('b', 1)])));
+        assert_eq!(
+            parser_nest_limit("abc", 1).parse(),
+            Ok(concat(0..3, vec![lit('a', 0), lit('b', 1), lit('c', 2)])));
+
+        // Test alternations. An alternation requires one level of nesting.
+        assert_eq!(
+            parser_nest_limit("a|b", 0).parse().unwrap_err(),
+            TestError {
+                span: span(0..3),
+                kind: ast::ErrorKind::NestLimitExceeded(0),
+            });
+        assert_eq!(
+            parser_nest_limit("a|b", 1).parse(),
+            Ok(alt(0..3, vec![lit('a', 0), lit('b', 2)])));
+        assert_eq!(
+            parser_nest_limit("a|b|c", 1).parse(),
+            Ok(alt(0..5, vec![lit('a', 0), lit('b', 2), lit('c', 4)])));
+
+        // Test character classes. Classes form their own mini-recursive
+        // syntax!
+        assert_eq!(
+            parser_nest_limit("[a]", 0).parse().unwrap_err(),
+            TestError {
+                span: span(0..3),
+                kind: ast::ErrorKind::NestLimitExceeded(0),
+            });
+        assert_eq!(
+            parser_nest_limit("[a]", 1).parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..3),
+                negated: false,
+                kind: ast::ClassSet::Item(
+                    ast::ClassSetItem::Literal(ast::Literal {
+                        span: span(1..2),
+                        kind: ast::LiteralKind::Verbatim,
+                        c: 'a',
+                    })
+                ),
+            }))));
+        assert_eq!(
+            parser_nest_limit("[ab]", 1).parse().unwrap_err(),
+            TestError {
+                span: span(1..3),
+                kind: ast::ErrorKind::NestLimitExceeded(1),
+            });
+        assert_eq!(
+            parser_nest_limit("[ab[cd]]", 2).parse().unwrap_err(),
+            TestError {
+                span: span(3..7),
+                kind: ast::ErrorKind::NestLimitExceeded(2),
+            });
+        assert_eq!(
+            parser_nest_limit("[ab[cd]]", 3).parse().unwrap_err(),
+            TestError {
+                span: span(4..6),
+                kind: ast::ErrorKind::NestLimitExceeded(3),
+            });
+        assert_eq!(
+            parser_nest_limit("[a--b]", 1).parse().unwrap_err(),
+            TestError {
+                span: span(1..5),
+                kind: ast::ErrorKind::NestLimitExceeded(1),
+            });
+        assert_eq!(
+            parser_nest_limit("[a--bc]", 2).parse().unwrap_err(),
+            TestError {
+                span: span(4..6),
+                kind: ast::ErrorKind::NestLimitExceeded(2),
+            });
+    }
+
+    #[test]
+    fn parse_comments() {
+        let pat = "(?x)
+# This is comment 1.
+foo # This is comment 2.
+  # This is comment 3.
+bar
+# This is comment 4.";
+        let astc = parser(pat).parse_with_comments().unwrap();
+        assert_eq!(
+            astc.ast,
+            concat_with(span_range(pat, 0..pat.len()), vec![
+                flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
+                lit_with('f', span_range(pat, 26..27)),
+                lit_with('o', span_range(pat, 27..28)),
+                lit_with('o', span_range(pat, 28..29)),
+                lit_with('b', span_range(pat, 74..75)),
+                lit_with('a', span_range(pat, 75..76)),
+                lit_with('r', span_range(pat, 76..77)),
+            ]));
+        assert_eq!(astc.comments, vec![
+            ast::Comment {
+                span: span_range(pat, 5..26),
+                comment: s(" This is comment 1."),
+            },
+            ast::Comment {
+                span: span_range(pat, 30..51),
+                comment: s(" This is comment 2."),
+            },
+            ast::Comment {
+                span: span_range(pat, 53..74),
+                comment: s(" This is comment 3."),
+            },
+            ast::Comment {
+                span: span_range(pat, 78..98),
+                comment: s(" This is comment 4."),
+            },
+        ]);
+    }
+
+    #[test]
+    fn parse_holistic() {
+        assert_eq!(
+            parser("]").parse(),
+            Ok(lit(']', 0)));
+        assert_eq!(
+            parser(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#\&\-\~").parse(),
+            Ok(concat(0..36, vec![
+                punct_lit('\\', span(0..2)),
+                punct_lit('.', span(2..4)),
+                punct_lit('+', span(4..6)),
+                punct_lit('*', span(6..8)),
+                punct_lit('?', span(8..10)),
+                punct_lit('(', span(10..12)),
+                punct_lit(')', span(12..14)),
+                punct_lit('|', span(14..16)),
+                punct_lit('[', span(16..18)),
+                punct_lit(']', span(18..20)),
+                punct_lit('{', span(20..22)),
+                punct_lit('}', span(22..24)),
+                punct_lit('^', span(24..26)),
+                punct_lit('$', span(26..28)),
+                punct_lit('#', span(28..30)),
+                punct_lit('&', span(30..32)),
+                punct_lit('-', span(32..34)),
+                punct_lit('~', span(34..36)),
+            ])));
+    }
+
+    #[test]
+    fn parse_ignore_whitespace() {
+        // Test that basic whitespace insensitivity works.
+        let pat = "(?x)a b";
+        assert_eq!(
+            parser(pat).parse(),
+            Ok(concat_with(nspan(npos(0, 1, 1), npos(7, 1, 8)), vec![
+                flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
+                lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
+                lit_with('b', nspan(npos(6, 1, 7), npos(7, 1, 8))),
+            ])));
+
+        // Test that we can toggle whitespace insensitivity.
+        let pat = "(?x)a b(?-x)a b";
+        assert_eq!(
+            parser(pat).parse(),
+            Ok(concat_with(nspan(npos(0, 1, 1), npos(15, 1, 16)), vec![
+                flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
+                lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
+                lit_with('b', nspan(npos(6, 1, 7), npos(7, 1, 8))),
+                flag_set(pat, 7..12, ast::Flag::IgnoreWhitespace, true),
+                lit_with('a', nspan(npos(12, 1, 13), npos(13, 1, 14))),
+                lit_with(' ', nspan(npos(13, 1, 14), npos(14, 1, 15))),
+                lit_with('b', nspan(npos(14, 1, 15), npos(15, 1, 16))),
+            ])));
+
+        // Test that nesting whitespace insensitive flags works.
+        let pat = "a (?x:a )a ";
+        assert_eq!(
+            parser(pat).parse(),
+            Ok(concat_with(span_range(pat, 0..11), vec![
+                lit_with('a', span_range(pat, 0..1)),
+                lit_with(' ', span_range(pat, 1..2)),
+                Ast::Group(ast::Group {
+                    span: span_range(pat, 2..9),
+                    kind: ast::GroupKind::NonCapturing(ast::Flags {
+                        span: span_range(pat, 4..5),
+                        items: vec![
+                            ast::FlagsItem {
+                                span: span_range(pat, 4..5),
+                                kind: ast::FlagsItemKind::Flag(
+                                    ast::Flag::IgnoreWhitespace),
+                            },
+                        ],
+                    }),
+                    ast: Box::new(lit_with('a', span_range(pat, 6..7))),
+                }),
+                lit_with('a', span_range(pat, 9..10)),
+                lit_with(' ', span_range(pat, 10..11)),
+            ])));
+
+        // Test that whitespace after an opening paren is insignificant.
+        let pat = "(?x)( ?P<foo> a )";
+        assert_eq!(
+            parser(pat).parse(),
+            Ok(concat_with(span_range(pat, 0..pat.len()), vec![
+                flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
+                Ast::Group(ast::Group {
+                    span: span_range(pat, 4..pat.len()),
+                    kind: ast::GroupKind::CaptureName(ast::CaptureName {
+                        span: span_range(pat, 9..12),
+                        name: s("foo"),
+                        index: 1,
+                    }),
+                    ast: Box::new(lit_with('a', span_range(pat, 14..15))),
+                }),
+            ])));
+        let pat = "(?x)(  a )";
+        assert_eq!(
+            parser(pat).parse(),
+            Ok(concat_with(span_range(pat, 0..pat.len()), vec![
+                flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
+                Ast::Group(ast::Group {
+                    span: span_range(pat, 4..pat.len()),
+                    kind: ast::GroupKind::CaptureIndex(1),
+                    ast: Box::new(lit_with('a', span_range(pat, 7..8))),
+                }),
+            ])));
+        let pat = "(?x)(  ?:  a )";
+        assert_eq!(
+            parser(pat).parse(),
+            Ok(concat_with(span_range(pat, 0..pat.len()), vec![
+                flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
+                Ast::Group(ast::Group {
+                    span: span_range(pat, 4..pat.len()),
+                    kind: ast::GroupKind::NonCapturing(ast::Flags {
+                        span: span_range(pat, 8..8),
+                        items: vec![],
+                    }),
+                    ast: Box::new(lit_with('a', span_range(pat, 11..12))),
+                }),
+            ])));
+        let pat = r"(?x)\x { 53 }";
+        assert_eq!(
+            parser(pat).parse(),
+            Ok(concat_with(span_range(pat, 0..pat.len()), vec![
+                flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
+                Ast::Literal(ast::Literal {
+                    span: span(4..13),
+                    kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
+                    c: 'S',
+                }),
+            ])));
+
+        // Test that whitespace after an escape is OK.
+        let pat = r"(?x)\ ";
+        assert_eq!(
+            parser(pat).parse(),
+            Ok(concat_with(span_range(pat, 0..pat.len()), vec![
+                flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
+                Ast::Literal(ast::Literal {
+                    span: span_range(pat, 4..6),
+                    kind: ast::LiteralKind::Special(
+                        ast::SpecialLiteralKind::Space),
+                    c: ' ',
+                }),
+            ])));
+        // ... but only when `x` mode is enabled.
+        let pat = r"\ ";
+        assert_eq!(
+            parser(pat).parse().unwrap_err(),
+            TestError {
+                span: span_range(pat, 0..2),
+                kind: ast::ErrorKind::EscapeUnrecognized,
+            });
+    }
+
+    #[test]
+    fn parse_newlines() {
+        let pat = ".\n.";
+        assert_eq!(
+            parser(pat).parse(),
+            Ok(concat_with(span_range(pat, 0..3), vec![
+                Ast::Dot(span_range(pat, 0..1)),
+                lit_with('\n', span_range(pat, 1..2)),
+                Ast::Dot(span_range(pat, 2..3)),
+            ])));
+
+        let pat = "foobar\nbaz\nquux\n";
+        assert_eq!(
+            parser(pat).parse(),
+            Ok(concat_with(span_range(pat, 0..pat.len()), vec![
+                lit_with('f', nspan(npos(0, 1, 1), npos(1, 1, 2))),
+                lit_with('o', nspan(npos(1, 1, 2), npos(2, 1, 3))),
+                lit_with('o', nspan(npos(2, 1, 3), npos(3, 1, 4))),
+                lit_with('b', nspan(npos(3, 1, 4), npos(4, 1, 5))),
+                lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
+                lit_with('r', nspan(npos(5, 1, 6), npos(6, 1, 7))),
+                lit_with('\n', nspan(npos(6, 1, 7), npos(7, 2, 1))),
+                lit_with('b', nspan(npos(7, 2, 1), npos(8, 2, 2))),
+                lit_with('a', nspan(npos(8, 2, 2), npos(9, 2, 3))),
+                lit_with('z', nspan(npos(9, 2, 3), npos(10, 2, 4))),
+                lit_with('\n', nspan(npos(10, 2, 4), npos(11, 3, 1))),
+                lit_with('q', nspan(npos(11, 3, 1), npos(12, 3, 2))),
+                lit_with('u', nspan(npos(12, 3, 2), npos(13, 3, 3))),
+                lit_with('u', nspan(npos(13, 3, 3), npos(14, 3, 4))),
+                lit_with('x', nspan(npos(14, 3, 4), npos(15, 3, 5))),
+                lit_with('\n', nspan(npos(15, 3, 5), npos(16, 4, 1))),
+            ])));
+    }
+
+    #[test]
+    fn parse_uncounted_repetition() {
+        assert_eq!(
+            parser(r"a*").parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..2),
+                op: ast::RepetitionOp {
+                    span: span(1..2),
+                    kind: ast::RepetitionKind::ZeroOrMore,
+                },
+                greedy: true,
+                ast: Box::new(lit('a', 0)),
+            })));
+        assert_eq!(
+            parser(r"a+").parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..2),
+                op: ast::RepetitionOp {
+                    span: span(1..2),
+                    kind: ast::RepetitionKind::OneOrMore,
+                },
+                greedy: true,
+                ast: Box::new(lit('a', 0)),
+            })));
+
+        assert_eq!(
+            parser(r"a?").parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..2),
+                op: ast::RepetitionOp {
+                    span: span(1..2),
+                    kind: ast::RepetitionKind::ZeroOrOne,
+                },
+                greedy: true,
+                ast: Box::new(lit('a', 0)),
+            })));
+        assert_eq!(
+            parser(r"a??").parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..3),
+                op: ast::RepetitionOp {
+                    span: span(1..3),
+                    kind: ast::RepetitionKind::ZeroOrOne,
+                },
+                greedy: false,
+                ast: Box::new(lit('a', 0)),
+            })));
+        assert_eq!(
+            parser(r"a?").parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..2),
+                op: ast::RepetitionOp {
+                    span: span(1..2),
+                    kind: ast::RepetitionKind::ZeroOrOne,
+                },
+                greedy: true,
+                ast: Box::new(lit('a', 0)),
+            })));
+        assert_eq!(
+            parser(r"a?b").parse(),
+            Ok(concat(0..3, vec![
+                Ast::Repetition(ast::Repetition {
+                    span: span(0..2),
+                    op: ast::RepetitionOp {
+                        span: span(1..2),
+                        kind: ast::RepetitionKind::ZeroOrOne,
+                    },
+                    greedy: true,
+                    ast: Box::new(lit('a', 0)),
+                }),
+                lit('b', 2),
+            ])));
+        assert_eq!(
+            parser(r"a??b").parse(),
+            Ok(concat(0..4, vec![
+                Ast::Repetition(ast::Repetition {
+                    span: span(0..3),
+                    op: ast::RepetitionOp {
+                        span: span(1..3),
+                        kind: ast::RepetitionKind::ZeroOrOne,
+                    },
+                    greedy: false,
+                    ast: Box::new(lit('a', 0)),
+                }),
+                lit('b', 3),
+            ])));
+        assert_eq!(
+            parser(r"ab?").parse(),
+            Ok(concat(0..3, vec![
+                lit('a', 0),
+                Ast::Repetition(ast::Repetition {
+                    span: span(1..3),
+                    op: ast::RepetitionOp {
+                        span: span(2..3),
+                        kind: ast::RepetitionKind::ZeroOrOne,
+                    },
+                    greedy: true,
+                    ast: Box::new(lit('b', 1)),
+                }),
+            ])));
+        assert_eq!(
+            parser(r"(ab)?").parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..5),
+                op: ast::RepetitionOp {
+                    span: span(4..5),
+                    kind: ast::RepetitionKind::ZeroOrOne,
+                },
+                greedy: true,
+                ast: Box::new(group(0..4, 1, concat(1..3, vec![
+                    lit('a', 1),
+                    lit('b', 2),
+                ]))),
+            })));
+        assert_eq!(
+            parser(r"|a?").parse(),
+            Ok(alt(0..3, vec![
+                Ast::Empty(span(0..0)),
+                Ast::Repetition(ast::Repetition {
+                    span: span(1..3),
+                    op: ast::RepetitionOp {
+                        span: span(2..3),
+                        kind: ast::RepetitionKind::ZeroOrOne,
+                    },
+                    greedy: true,
+                    ast: Box::new(lit('a', 1)),
+                }),
+            ])));
+
+        assert_eq!(
+            parser(r"*").parse().unwrap_err(),
+            TestError {
+                span: span(0..0),
+                kind: ast::ErrorKind::RepetitionMissing,
+            });
+        assert_eq!(
+            parser(r"(*)").parse().unwrap_err(),
+            TestError {
+                span: span(1..1),
+                kind: ast::ErrorKind::RepetitionMissing,
+            });
+        assert_eq!(
+            parser(r"(?:?)").parse().unwrap_err(),
+            TestError {
+                span: span(3..3),
+                kind: ast::ErrorKind::RepetitionMissing,
+            });
+        assert_eq!(
+            parser(r"+").parse().unwrap_err(),
+            TestError {
+                span: span(0..0),
+                kind: ast::ErrorKind::RepetitionMissing,
+            });
+        assert_eq!(
+            parser(r"?").parse().unwrap_err(),
+            TestError {
+                span: span(0..0),
+                kind: ast::ErrorKind::RepetitionMissing,
+            });
+        assert_eq!(
+            parser(r"(?)").parse().unwrap_err(),
+            TestError {
+                span: span(1..1),
+                kind: ast::ErrorKind::RepetitionMissing,
+            });
+        assert_eq!(
+            parser(r"|*").parse().unwrap_err(),
+            TestError {
+                span: span(1..1),
+                kind: ast::ErrorKind::RepetitionMissing,
+            });
+        assert_eq!(
+            parser(r"|+").parse().unwrap_err(),
+            TestError {
+                span: span(1..1),
+                kind: ast::ErrorKind::RepetitionMissing,
+            });
+        assert_eq!(
+            parser(r"|?").parse().unwrap_err(),
+            TestError {
+                span: span(1..1),
+                kind: ast::ErrorKind::RepetitionMissing,
+            });
+    }
+
+    #[test]
+    fn parse_counted_repetition() {
+        assert_eq!(
+            parser(r"a{5}").parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..4),
+                op: ast::RepetitionOp {
+                    span: span(1..4),
+                    kind: ast::RepetitionKind::Range(
+                        ast::RepetitionRange::Exactly(5)),
+                },
+                greedy: true,
+                ast: Box::new(lit('a', 0)),
+            })));
+        assert_eq!(
+            parser(r"a{5,}").parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..5),
+                op: ast::RepetitionOp {
+                    span: span(1..5),
+                    kind: ast::RepetitionKind::Range(
+                        ast::RepetitionRange::AtLeast(5)),
+                },
+                greedy: true,
+                ast: Box::new(lit('a', 0)),
+            })));
+        assert_eq!(
+            parser(r"a{5,9}").parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..6),
+                op: ast::RepetitionOp {
+                    span: span(1..6),
+                    kind: ast::RepetitionKind::Range(
+                        ast::RepetitionRange::Bounded(5, 9)),
+                },
+                greedy: true,
+                ast: Box::new(lit('a', 0)),
+            })));
+        assert_eq!(
+            parser(r"a{5}?").parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..5),
+                op: ast::RepetitionOp {
+                    span: span(1..5),
+                    kind: ast::RepetitionKind::Range(
+                        ast::RepetitionRange::Exactly(5)),
+                },
+                greedy: false,
+                ast: Box::new(lit('a', 0)),
+            })));
+        assert_eq!(
+            parser(r"ab{5}").parse(),
+            Ok(concat(0..5, vec![
+                lit('a', 0),
+                Ast::Repetition(ast::Repetition {
+                    span: span(1..5),
+                    op: ast::RepetitionOp {
+                        span: span(2..5),
+                        kind: ast::RepetitionKind::Range(
+                            ast::RepetitionRange::Exactly(5)),
+                    },
+                    greedy: true,
+                    ast: Box::new(lit('b', 1)),
+                }),
+            ])));
+        assert_eq!(
+            parser(r"ab{5}c").parse(),
+            Ok(concat(0..6, vec![
+                lit('a', 0),
+                Ast::Repetition(ast::Repetition {
+                    span: span(1..5),
+                    op: ast::RepetitionOp {
+                        span: span(2..5),
+                        kind: ast::RepetitionKind::Range(
+                            ast::RepetitionRange::Exactly(5)),
+                    },
+                    greedy: true,
+                    ast: Box::new(lit('b', 1)),
+                }),
+                lit('c', 5),
+            ])));
+
+        assert_eq!(
+            parser(r"a{ 5 }").parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..6),
+                op: ast::RepetitionOp {
+                    span: span(1..6),
+                    kind: ast::RepetitionKind::Range(
+                        ast::RepetitionRange::Exactly(5)),
+                },
+                greedy: true,
+                ast: Box::new(lit('a', 0)),
+            })));
+        assert_eq!(
+            parser(r"a{ 5 , 9 }").parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..10),
+                op: ast::RepetitionOp {
+                    span: span(1..10),
+                    kind: ast::RepetitionKind::Range(
+                        ast::RepetitionRange::Bounded(5, 9)),
+                },
+                greedy: true,
+                ast: Box::new(lit('a', 0)),
+            })));
+        assert_eq!(
+            parser_ignore_whitespace(r"a{5,9} ?").parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..8),
+                op: ast::RepetitionOp {
+                    span: span(1..8),
+                    kind: ast::RepetitionKind::Range(
+                        ast::RepetitionRange::Bounded(5, 9)),
+                },
+                greedy: false,
+                ast: Box::new(lit('a', 0)),
+            })));
+
+        assert_eq!(
+            parser(r"a{").parse().unwrap_err(),
+            TestError {
+                span: span(1..2),
+                kind: ast::ErrorKind::RepetitionCountUnclosed,
+            });
+        assert_eq!(
+            parser(r"a{}").parse().unwrap_err(),
+            TestError {
+                span: span(2..2),
+                kind: ast::ErrorKind::DecimalEmpty,
+            });
+        assert_eq!(
+            parser(r"a{a").parse().unwrap_err(),
+            TestError {
+                span: span(2..2),
+                kind: ast::ErrorKind::DecimalEmpty,
+            });
+        assert_eq!(
+            parser(r"a{9999999999}").parse().unwrap_err(),
+            TestError {
+                span: span(2..12),
+                kind: ast::ErrorKind::DecimalInvalid,
+            });
+        assert_eq!(
+            parser(r"a{9").parse().unwrap_err(),
+            TestError {
+                span: span(1..3),
+                kind: ast::ErrorKind::RepetitionCountUnclosed,
+            });
+        assert_eq!(
+            parser(r"a{9,a").parse().unwrap_err(),
+            TestError {
+                span: span(4..4),
+                kind: ast::ErrorKind::DecimalEmpty,
+            });
+        assert_eq!(
+            parser(r"a{9,9999999999}").parse().unwrap_err(),
+            TestError {
+                span: span(4..14),
+                kind: ast::ErrorKind::DecimalInvalid,
+            });
+        assert_eq!(
+            parser(r"a{9,").parse().unwrap_err(),
+            TestError {
+                span: span(1..4),
+                kind: ast::ErrorKind::RepetitionCountUnclosed,
+            });
+        assert_eq!(
+            parser(r"a{9,11").parse().unwrap_err(),
+            TestError {
+                span: span(1..6),
+                kind: ast::ErrorKind::RepetitionCountUnclosed,
+            });
+        assert_eq!(
+            parser(r"a{2,1}").parse().unwrap_err(),
+            TestError {
+                span: span(1..6),
+                kind: ast::ErrorKind::RepetitionCountInvalid,
+            });
+        assert_eq!(
+            parser(r"{5}").parse().unwrap_err(),
+            TestError {
+                span: span(0..0),
+                kind: ast::ErrorKind::RepetitionMissing,
+            });
+        assert_eq!(
+            parser(r"|{5}").parse().unwrap_err(),
+            TestError {
+                span: span(1..1),
+                kind: ast::ErrorKind::RepetitionMissing,
+            });
+    }
+
+    #[test]
+    fn parse_alternate() {
+        assert_eq!(
+            parser(r"a|b").parse(),
+            Ok(Ast::Alternation(ast::Alternation {
+                span: span(0..3),
+                asts: vec![lit('a', 0), lit('b', 2)],
+            })));
+        assert_eq!(
+            parser(r"(a|b)").parse(),
+            Ok(group(0..5, 1, Ast::Alternation(ast::Alternation {
+                span: span(1..4),
+                asts: vec![lit('a', 1), lit('b', 3)],
+            }))));
+
+        assert_eq!(
+            parser(r"a|b|c").parse(),
+            Ok(Ast::Alternation(ast::Alternation {
+                span: span(0..5),
+                asts: vec![lit('a', 0), lit('b', 2), lit('c', 4)],
+            })));
+        assert_eq!(
+            parser(r"ax|by|cz").parse(),
+            Ok(Ast::Alternation(ast::Alternation {
+                span: span(0..8),
+                asts: vec![
+                    concat(0..2, vec![lit('a', 0), lit('x', 1)]),
+                    concat(3..5, vec![lit('b', 3), lit('y', 4)]),
+                    concat(6..8, vec![lit('c', 6), lit('z', 7)]),
+                ],
+            })));
+        assert_eq!(
+            parser(r"(ax|by|cz)").parse(),
+            Ok(group(0..10, 1, Ast::Alternation(ast::Alternation {
+                span: span(1..9),
+                asts: vec![
+                    concat(1..3, vec![lit('a', 1), lit('x', 2)]),
+                    concat(4..6, vec![lit('b', 4), lit('y', 5)]),
+                    concat(7..9, vec![lit('c', 7), lit('z', 8)]),
+                ],
+            }))));
+        assert_eq!(
+            parser(r"(ax|(by|(cz)))").parse(),
+            Ok(group(0..14, 1, alt(1..13, vec![
+                concat(1..3, vec![lit('a', 1), lit('x', 2)]),
+                group(4..13, 2, alt(5..12, vec![
+                    concat(5..7, vec![lit('b', 5), lit('y', 6)]),
+                    group(8..12, 3, concat(9..11, vec![
+                        lit('c', 9),
+                        lit('z', 10),
+                    ])),
+                ])),
+            ]))));
+
+        assert_eq!(
+            parser(r"|").parse(), Ok(alt(0..1, vec![
+                Ast::Empty(span(0..0)), Ast::Empty(span(1..1)),
+            ])));
+        assert_eq!(
+            parser(r"||").parse(), Ok(alt(0..2, vec![
+                Ast::Empty(span(0..0)),
+                Ast::Empty(span(1..1)),
+                Ast::Empty(span(2..2)),
+            ])));
+        assert_eq!(
+            parser(r"a|").parse(), Ok(alt(0..2, vec![
+                lit('a', 0), Ast::Empty(span(2..2)),
+            ])));
+        assert_eq!(
+            parser(r"|a").parse(), Ok(alt(0..2, vec![
+                Ast::Empty(span(0..0)), lit('a', 1),
+            ])));
+
+        assert_eq!(
+            parser(r"(|)").parse(), Ok(group(0..3, 1, alt(1..2, vec![
+                Ast::Empty(span(1..1)), Ast::Empty(span(2..2)),
+            ]))));
+        assert_eq!(
+            parser(r"(a|)").parse(), Ok(group(0..4, 1, alt(1..3, vec![
+                lit('a', 1), Ast::Empty(span(3..3)),
+            ]))));
+        assert_eq!(
+            parser(r"(|a)").parse(), Ok(group(0..4, 1, alt(1..3, vec![
+                Ast::Empty(span(1..1)), lit('a', 2),
+            ]))));
+
+        assert_eq!(
+            parser(r"a|b)").parse().unwrap_err(),
+            TestError {
+                span: span(3..4),
+                kind: ast::ErrorKind::GroupUnopened,
+            });
+        assert_eq!(
+            parser(r"(a|b").parse().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::GroupUnclosed,
+            });
+    }
+
+    #[test]
+    fn parse_unsupported_lookaround() {
+        assert_eq!(
+            parser(r"(?=a)").parse().unwrap_err(),
+            TestError {
+                span: span(0..3),
+                kind: ast::ErrorKind::UnsupportedLookAround,
+            });
+        assert_eq!(
+            parser(r"(?!a)").parse().unwrap_err(),
+            TestError {
+                span: span(0..3),
+                kind: ast::ErrorKind::UnsupportedLookAround,
+            });
+        assert_eq!(
+            parser(r"(?<=a)").parse().unwrap_err(),
+            TestError {
+                span: span(0..4),
+                kind: ast::ErrorKind::UnsupportedLookAround,
+            });
+        assert_eq!(
+            parser(r"(?<!a)").parse().unwrap_err(),
+            TestError {
+                span: span(0..4),
+                kind: ast::ErrorKind::UnsupportedLookAround,
+            });
+    }
+
+    #[test]
+    fn parse_group() {
+        assert_eq!(parser("(?i)").parse(), Ok(Ast::Flags(ast::SetFlags {
+            span: span(0..4),
+            flags: ast::Flags {
+                span: span(2..3),
+                items: vec![ast::FlagsItem {
+                    span: span(2..3),
+                    kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
+                }],
+            },
+        })));
+        assert_eq!(parser("(?iU)").parse(), Ok(Ast::Flags(ast::SetFlags {
+            span: span(0..5),
+            flags: ast::Flags {
+                span: span(2..4),
+                items: vec![
+                    ast::FlagsItem {
+                        span: span(2..3),
+                        kind: ast::FlagsItemKind::Flag(
+                            ast::Flag::CaseInsensitive),
+                    },
+                    ast::FlagsItem {
+                        span: span(3..4),
+                        kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
+                    },
+                ],
+            },
+        })));
+        assert_eq!(parser("(?i-U)").parse(), Ok(Ast::Flags(ast::SetFlags {
+            span: span(0..6),
+            flags: ast::Flags {
+                span: span(2..5),
+                items: vec![
+                    ast::FlagsItem {
+                        span: span(2..3),
+                        kind: ast::FlagsItemKind::Flag(
+                            ast::Flag::CaseInsensitive),
+                    },
+                    ast::FlagsItem {
+                        span: span(3..4),
+                        kind: ast::FlagsItemKind::Negation,
+                    },
+                    ast::FlagsItem {
+                        span: span(4..5),
+                        kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
+                    },
+                ],
+            },
+        })));
+
+        assert_eq!(parser("()").parse(), Ok(Ast::Group(ast::Group {
+            span: span(0..2),
+            kind: ast::GroupKind::CaptureIndex(1),
+            ast: Box::new(Ast::Empty(span(1..1))),
+        })));
+        assert_eq!(parser("(a)").parse(), Ok(Ast::Group(ast::Group {
+            span: span(0..3),
+            kind: ast::GroupKind::CaptureIndex(1),
+            ast: Box::new(lit('a', 1)),
+        })));
+        assert_eq!(parser("(())").parse(), Ok(Ast::Group(ast::Group {
+            span: span(0..4),
+            kind: ast::GroupKind::CaptureIndex(1),
+            ast: Box::new(Ast::Group(ast::Group {
+                span: span(1..3),
+                kind: ast::GroupKind::CaptureIndex(2),
+                ast: Box::new(Ast::Empty(span(2..2))),
+            })),
+        })));
+
+        assert_eq!(parser("(?:a)").parse(), Ok(Ast::Group(ast::Group {
+            span: span(0..5),
+            kind: ast::GroupKind::NonCapturing(ast::Flags {
+                span: span(2..2),
+                items: vec![],
+            }),
+            ast: Box::new(lit('a', 3)),
+        })));
+
+        assert_eq!(parser("(?i:a)").parse(), Ok(Ast::Group(ast::Group {
+            span: span(0..6),
+            kind: ast::GroupKind::NonCapturing(ast::Flags {
+                span: span(2..3),
+                items: vec![
+                    ast::FlagsItem {
+                        span: span(2..3),
+                        kind: ast::FlagsItemKind::Flag(
+                            ast::Flag::CaseInsensitive),
+                    },
+                ],
+            }),
+            ast: Box::new(lit('a', 4)),
+        })));
+        assert_eq!(parser("(?i-U:a)").parse(), Ok(Ast::Group(ast::Group {
+            span: span(0..8),
+            kind: ast::GroupKind::NonCapturing(ast::Flags {
+                span: span(2..5),
+                items: vec![
+                    ast::FlagsItem {
+                        span: span(2..3),
+                        kind: ast::FlagsItemKind::Flag(
+                            ast::Flag::CaseInsensitive),
+                    },
+                    ast::FlagsItem {
+                        span: span(3..4),
+                        kind: ast::FlagsItemKind::Negation,
+                    },
+                    ast::FlagsItem {
+                        span: span(4..5),
+                        kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
+                    },
+                ],
+            }),
+            ast: Box::new(lit('a', 6)),
+        })));
+
+        assert_eq!(
+            parser("(").parse().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::GroupUnclosed,
+            });
+        assert_eq!(
+            parser("(?").parse().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::GroupUnclosed,
+            });
+        assert_eq!(
+            parser("(?P").parse().unwrap_err(),
+            TestError {
+                span: span(2..3),
+                kind: ast::ErrorKind::FlagUnrecognized,
+            });
+        assert_eq!(
+            parser("(?P<").parse().unwrap_err(),
+            TestError {
+                span: span(4..4),
+                kind: ast::ErrorKind::GroupNameUnexpectedEof,
+            });
+        assert_eq!(
+            parser("(a").parse().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::GroupUnclosed,
+            });
+        assert_eq!(
+            parser("(()").parse().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::GroupUnclosed,
+            });
+        assert_eq!(
+            parser(")").parse().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::GroupUnopened,
+            });
+        assert_eq!(
+            parser("a)").parse().unwrap_err(),
+            TestError {
+                span: span(1..2),
+                kind: ast::ErrorKind::GroupUnopened,
+            });
+    }
+
+    #[test]
+    fn parse_capture_name() {
+        assert_eq!(parser("(?P<a>z)").parse(), Ok(Ast::Group(ast::Group {
+            span: span(0..8),
+            kind: ast::GroupKind::CaptureName(ast::CaptureName {
+                span: span(4..5),
+                name: s("a"),
+                index: 1,
+            }),
+            ast: Box::new(lit('z', 6)),
+        })));
+        assert_eq!(parser("(?P<abc>z)").parse(), Ok(Ast::Group(ast::Group {
+            span: span(0..10),
+            kind: ast::GroupKind::CaptureName(ast::CaptureName {
+                span: span(4..7),
+                name: s("abc"),
+                index: 1,
+            }),
+            ast: Box::new(lit('z', 8)),
+        })));
+
+        assert_eq!(
+            parser("(?P<").parse().unwrap_err(),
+            TestError {
+                span: span(4..4),
+                kind: ast::ErrorKind::GroupNameUnexpectedEof,
+            });
+        assert_eq!(
+            parser("(?P<>z)").parse().unwrap_err(),
+            TestError {
+                span: span(4..4),
+                kind: ast::ErrorKind::GroupNameEmpty,
+            });
+        assert_eq!(
+            parser("(?P<a").parse().unwrap_err(),
+            TestError {
+                span: span(5..5),
+                kind: ast::ErrorKind::GroupNameUnexpectedEof,
+            });
+        assert_eq!(
+            parser("(?P<ab").parse().unwrap_err(),
+            TestError {
+                span: span(6..6),
+                kind: ast::ErrorKind::GroupNameUnexpectedEof,
+            });
+        assert_eq!(
+            parser("(?P<0a").parse().unwrap_err(),
+            TestError {
+                span: span(4..5),
+                kind: ast::ErrorKind::GroupNameInvalid,
+            });
+        assert_eq!(
+            parser("(?P<~").parse().unwrap_err(),
+            TestError {
+                span: span(4..5),
+                kind: ast::ErrorKind::GroupNameInvalid,
+            });
+        assert_eq!(
+            parser("(?P<abc~").parse().unwrap_err(),
+            TestError {
+                span: span(7..8),
+                kind: ast::ErrorKind::GroupNameInvalid,
+            });
+        assert_eq!(
+            parser("(?P<a>y)(?P<a>z)").parse().unwrap_err(),
+            TestError {
+                span: span(12..13),
+                kind: ast::ErrorKind::GroupNameDuplicate {
+                    original: span(4..5),
+                },
+            });
+    }
+
+    #[test]
+    fn parse_flags() {
+        assert_eq!(parser("i:").parse_flags(), Ok(ast::Flags {
+            span: span(0..1),
+            items: vec![ast::FlagsItem {
+                span: span(0..1),
+                kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
+            }],
+        }));
+        assert_eq!(parser("i)").parse_flags(), Ok(ast::Flags {
+            span: span(0..1),
+            items: vec![ast::FlagsItem {
+                span: span(0..1),
+                kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
+            }],
+        }));
+
+        assert_eq!(parser("isU:").parse_flags(), Ok(ast::Flags {
+            span: span(0..3),
+            items: vec![
+                ast::FlagsItem {
+                    span: span(0..1),
+                    kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
+                },
+                ast::FlagsItem {
+                    span: span(1..2),
+                    kind: ast::FlagsItemKind::Flag(
+                        ast::Flag::DotMatchesNewLine),
+                },
+                ast::FlagsItem {
+                    span: span(2..3),
+                    kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
+                },
+            ],
+        }));
+
+        assert_eq!(parser("-isU:").parse_flags(), Ok(ast::Flags {
+            span: span(0..4),
+            items: vec![
+                ast::FlagsItem {
+                    span: span(0..1),
+                    kind: ast::FlagsItemKind::Negation,
+                },
+                ast::FlagsItem {
+                    span: span(1..2),
+                    kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
+                },
+                ast::FlagsItem {
+                    span: span(2..3),
+                    kind: ast::FlagsItemKind::Flag(
+                        ast::Flag::DotMatchesNewLine),
+                },
+                ast::FlagsItem {
+                    span: span(3..4),
+                    kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
+                },
+            ],
+        }));
+        assert_eq!(parser("i-sU:").parse_flags(), Ok(ast::Flags {
+            span: span(0..4),
+            items: vec![
+                ast::FlagsItem {
+                    span: span(0..1),
+                    kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
+                },
+                ast::FlagsItem {
+                    span: span(1..2),
+                    kind: ast::FlagsItemKind::Negation,
+                },
+                ast::FlagsItem {
+                    span: span(2..3),
+                    kind: ast::FlagsItemKind::Flag(
+                        ast::Flag::DotMatchesNewLine),
+                },
+                ast::FlagsItem {
+                    span: span(3..4),
+                    kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
+                },
+            ],
+        }));
+
+        assert_eq!(
+            parser("isU").parse_flags().unwrap_err(),
+            TestError {
+                span: span(3..3),
+                kind: ast::ErrorKind::FlagUnexpectedEof,
+            });
+        assert_eq!(
+            parser("isUa:").parse_flags().unwrap_err(),
+            TestError {
+                span: span(3..4),
+                kind: ast::ErrorKind::FlagUnrecognized,
+            });
+        assert_eq!(
+            parser("isUi:").parse_flags().unwrap_err(),
+            TestError {
+                span: span(3..4),
+                kind: ast::ErrorKind::FlagDuplicate {
+                    original: span(0..1),
+                },
+            });
+        assert_eq!(
+            parser("i-sU-i:").parse_flags().unwrap_err(),
+            TestError {
+                span: span(4..5),
+                kind: ast::ErrorKind::FlagRepeatedNegation {
+                    original: span(1..2),
+                },
+            });
+        assert_eq!(
+            parser("-)").parse_flags().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::FlagDanglingNegation,
+            });
+        assert_eq!(
+            parser("i-)").parse_flags().unwrap_err(),
+            TestError {
+                span: span(1..2),
+                kind: ast::ErrorKind::FlagDanglingNegation,
+            });
+        assert_eq!(
+            parser("iU-)").parse_flags().unwrap_err(),
+            TestError {
+                span: span(2..3),
+                kind: ast::ErrorKind::FlagDanglingNegation,
+            });
+    }
+
+    #[test]
+    fn parse_flag() {
+        assert_eq!(parser("i").parse_flag(), Ok(ast::Flag::CaseInsensitive));
+        assert_eq!(parser("m").parse_flag(), Ok(ast::Flag::MultiLine));
+        assert_eq!(parser("s").parse_flag(), Ok(ast::Flag::DotMatchesNewLine));
+        assert_eq!(parser("U").parse_flag(), Ok(ast::Flag::SwapGreed));
+        assert_eq!(parser("u").parse_flag(), Ok(ast::Flag::Unicode));
+        assert_eq!(parser("x").parse_flag(), Ok(ast::Flag::IgnoreWhitespace));
+
+        assert_eq!(
+            parser("a").parse_flag().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::FlagUnrecognized,
+            });
+        assert_eq!(
+            parser("☃").parse_flag().unwrap_err(),
+            TestError {
+                span: span_range("☃", 0..3),
+                kind: ast::ErrorKind::FlagUnrecognized,
+            });
+    }
+
+    #[test]
+    fn parse_primitive_non_escape() {
+        assert_eq!(
+            parser(r".").parse_primitive(),
+            Ok(Primitive::Dot(span(0..1))));
+        assert_eq!(
+            parser(r"^").parse_primitive(),
+            Ok(Primitive::Assertion(ast::Assertion {
+                span: span(0..1),
+                kind: ast::AssertionKind::StartLine,
+            })));
+        assert_eq!(
+            parser(r"$").parse_primitive(),
+            Ok(Primitive::Assertion(ast::Assertion {
+                span: span(0..1),
+                kind: ast::AssertionKind::EndLine,
+            })));
+
+        assert_eq!(
+            parser(r"a").parse_primitive(),
+            Ok(Primitive::Literal(ast::Literal {
+                span: span(0..1),
+                kind: ast::LiteralKind::Verbatim,
+                c: 'a',
+            })));
+        assert_eq!(
+            parser(r"|").parse_primitive(),
+            Ok(Primitive::Literal(ast::Literal {
+                span: span(0..1),
+                kind: ast::LiteralKind::Verbatim,
+                c: '|',
+            })));
+        assert_eq!(
+            parser(r"☃").parse_primitive(),
+            Ok(Primitive::Literal(ast::Literal {
+                span: span_range("☃", 0..3),
+                kind: ast::LiteralKind::Verbatim,
+                c: '☃',
+            })));
+    }
+
+    #[test]
+    fn parse_escape() {
+        assert_eq!(
+            parser(r"\|").parse_primitive(),
+            Ok(Primitive::Literal(ast::Literal {
+                span: span(0..2),
+                kind: ast::LiteralKind::Punctuation,
+                c: '|',
+            })));
+        let specials = &[
+            (r"\a", '\x07', ast::SpecialLiteralKind::Bell),
+            (r"\f", '\x0C', ast::SpecialLiteralKind::FormFeed),
+            (r"\t", '\t', ast::SpecialLiteralKind::Tab),
+            (r"\n", '\n', ast::SpecialLiteralKind::LineFeed),
+            (r"\r", '\r', ast::SpecialLiteralKind::CarriageReturn),
+            (r"\v", '\x0B', ast::SpecialLiteralKind::VerticalTab),
+        ];
+        for &(pat, c, ref kind) in specials {
+            assert_eq!(
+                parser(pat).parse_primitive(),
+                Ok(Primitive::Literal(ast::Literal {
+                    span: span(0..2),
+                    kind: ast::LiteralKind::Special(kind.clone()),
+                    c: c,
+                })));
+        }
+        assert_eq!(
+            parser(r"\A").parse_primitive(),
+            Ok(Primitive::Assertion(ast::Assertion {
+                span: span(0..2),
+                kind: ast::AssertionKind::StartText,
+            })));
+        assert_eq!(
+            parser(r"\z").parse_primitive(),
+            Ok(Primitive::Assertion(ast::Assertion {
+                span: span(0..2),
+                kind: ast::AssertionKind::EndText,
+            })));
+        assert_eq!(
+            parser(r"\b").parse_primitive(),
+            Ok(Primitive::Assertion(ast::Assertion {
+                span: span(0..2),
+                kind: ast::AssertionKind::WordBoundary,
+            })));
+        assert_eq!(
+            parser(r"\B").parse_primitive(),
+            Ok(Primitive::Assertion(ast::Assertion {
+                span: span(0..2),
+                kind: ast::AssertionKind::NotWordBoundary,
+            })));
+
+        assert_eq!(
+            parser(r"\").parse_escape().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::EscapeUnexpectedEof,
+            });
+        assert_eq!(
+            parser(r"\y").parse_escape().unwrap_err(),
+            TestError {
+                span: span(0..2),
+                kind: ast::ErrorKind::EscapeUnrecognized,
+            });
+    }
+
+    #[test]
+    fn parse_unsupported_backreference() {
+        assert_eq!(
+            parser(r"\0").parse_escape().unwrap_err(),
+            TestError {
+                span: span(0..2),
+                kind: ast::ErrorKind::UnsupportedBackreference,
+            });
+        assert_eq!(
+            parser(r"\9").parse_escape().unwrap_err(),
+            TestError {
+                span: span(0..2),
+                kind: ast::ErrorKind::UnsupportedBackreference,
+            });
+    }
+
+    #[test]
+    fn parse_octal() {
+        for i in 0..511 {
+            let pat = format!(r"\{:o}", i);
+            assert_eq!(
+                parser_octal(&pat).parse_escape(),
+                Ok(Primitive::Literal(ast::Literal {
+                    span: span(0..pat.len()),
+                    kind: ast::LiteralKind::Octal,
+                    c: ::std::char::from_u32(i).unwrap(),
+                })));
+        }
+        assert_eq!(
+            parser_octal(r"\778").parse_escape(),
+            Ok(Primitive::Literal(ast::Literal {
+                span: span(0..3),
+                kind: ast::LiteralKind::Octal,
+                c: '?',
+            })));
+        assert_eq!(
+            parser_octal(r"\7777").parse_escape(),
+            Ok(Primitive::Literal(ast::Literal {
+                span: span(0..4),
+                kind: ast::LiteralKind::Octal,
+                c: '\u{01FF}',
+            })));
+        assert_eq!(
+            parser_octal(r"\778").parse(),
+            Ok(Ast::Concat(ast::Concat {
+                span: span(0..4),
+                asts: vec![
+                    Ast::Literal(ast::Literal {
+                        span: span(0..3),
+                        kind: ast::LiteralKind::Octal,
+                        c: '?',
+                    }),
+                    Ast::Literal(ast::Literal {
+                        span: span(3..4),
+                        kind: ast::LiteralKind::Verbatim,
+                        c: '8',
+                    }),
+                ],
+            })));
+        assert_eq!(
+            parser_octal(r"\7777").parse(),
+            Ok(Ast::Concat(ast::Concat {
+                span: span(0..5),
+                asts: vec![
+                    Ast::Literal(ast::Literal {
+                        span: span(0..4),
+                        kind: ast::LiteralKind::Octal,
+                        c: '\u{01FF}',
+                    }),
+                    Ast::Literal(ast::Literal {
+                        span: span(4..5),
+                        kind: ast::LiteralKind::Verbatim,
+                        c: '7',
+                    }),
+                ],
+            })));
+
+        assert_eq!(
+            parser_octal(r"\8").parse_escape().unwrap_err(),
+            TestError {
+                span: span(0..2),
+                kind: ast::ErrorKind::EscapeUnrecognized,
+            });
+    }
+
+    #[test]
+    fn parse_hex_two() {
+        for i in 0..256 {
+            let pat = format!(r"\x{:02x}", i);
+            assert_eq!(
+                parser(&pat).parse_escape(),
+                Ok(Primitive::Literal(ast::Literal {
+                    span: span(0..pat.len()),
+                    kind: ast::LiteralKind::HexFixed(ast::HexLiteralKind::X),
+                    c: ::std::char::from_u32(i).unwrap(),
+                })));
+        }
+
+        assert_eq!(
+            parser(r"\xF").parse_escape().unwrap_err(),
+            TestError {
+                span: span(3..3),
+                kind: ast::ErrorKind::EscapeUnexpectedEof,
+            });
+        assert_eq!(
+            parser(r"\xG").parse_escape().unwrap_err(),
+            TestError {
+                span: span(2..3),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            });
+        assert_eq!(
+            parser(r"\xFG").parse_escape().unwrap_err(),
+            TestError {
+                span: span(3..4),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            });
+    }
+
+    #[test]
+    fn parse_hex_four() {
+        for i in 0..65536 {
+            let c = match ::std::char::from_u32(i) {
+                None => continue,
+                Some(c) => c,
+            };
+            let pat = format!(r"\u{:04x}", i);
+            assert_eq!(
+                parser(&pat).parse_escape(),
+                Ok(Primitive::Literal(ast::Literal {
+                    span: span(0..pat.len()),
+                    kind: ast::LiteralKind::HexFixed(
+                        ast::HexLiteralKind::UnicodeShort),
+                    c: c,
+                })));
+        }
+
+        assert_eq!(
+            parser(r"\uF").parse_escape().unwrap_err(),
+            TestError {
+                span: span(3..3),
+                kind: ast::ErrorKind::EscapeUnexpectedEof,
+            });
+        assert_eq!(
+            parser(r"\uG").parse_escape().unwrap_err(),
+            TestError {
+                span: span(2..3),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            });
+        assert_eq!(
+            parser(r"\uFG").parse_escape().unwrap_err(),
+            TestError {
+                span: span(3..4),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            });
+        assert_eq!(
+            parser(r"\uFFG").parse_escape().unwrap_err(),
+            TestError {
+                span: span(4..5),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            });
+        assert_eq!(
+            parser(r"\uFFFG").parse_escape().unwrap_err(),
+            TestError {
+                span: span(5..6),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            });
+        assert_eq!(
+            parser(r"\uD800").parse_escape().unwrap_err(),
+            TestError {
+                span: span(2..6),
+                kind: ast::ErrorKind::EscapeHexInvalid,
+            });
+    }
+
+    #[test]
+    fn parse_hex_eight() {
+        for i in 0..65536 {
+            let c = match ::std::char::from_u32(i) {
+                None => continue,
+                Some(c) => c,
+            };
+            let pat = format!(r"\U{:08x}", i);
+            assert_eq!(
+                parser(&pat).parse_escape(),
+                Ok(Primitive::Literal(ast::Literal {
+                    span: span(0..pat.len()),
+                    kind: ast::LiteralKind::HexFixed(
+                        ast::HexLiteralKind::UnicodeLong),
+                    c: c,
+                })));
+        }
+
+        assert_eq!(
+            parser(r"\UF").parse_escape().unwrap_err(),
+            TestError {
+                span: span(3..3),
+                kind: ast::ErrorKind::EscapeUnexpectedEof,
+            });
+        assert_eq!(
+            parser(r"\UG").parse_escape().unwrap_err(),
+            TestError {
+                span: span(2..3),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            });
+        assert_eq!(
+            parser(r"\UFG").parse_escape().unwrap_err(),
+            TestError {
+                span: span(3..4),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            });
+        assert_eq!(
+            parser(r"\UFFG").parse_escape().unwrap_err(),
+            TestError {
+                span: span(4..5),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            });
+        assert_eq!(
+            parser(r"\UFFFG").parse_escape().unwrap_err(),
+            TestError {
+                span: span(5..6),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            });
+        assert_eq!(
+            parser(r"\UFFFFG").parse_escape().unwrap_err(),
+            TestError {
+                span: span(6..7),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            });
+        assert_eq!(
+            parser(r"\UFFFFFG").parse_escape().unwrap_err(),
+            TestError {
+                span: span(7..8),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            });
+        assert_eq!(
+            parser(r"\UFFFFFFG").parse_escape().unwrap_err(),
+            TestError {
+                span: span(8..9),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            });
+        assert_eq!(
+            parser(r"\UFFFFFFFG").parse_escape().unwrap_err(),
+            TestError {
+                span: span(9..10),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            });
+    }
+
+    #[test]
+    fn parse_hex_brace() {
+        assert_eq!(
+            parser(r"\u{26c4}").parse_escape(),
+            Ok(Primitive::Literal(ast::Literal {
+                span: span(0..8),
+                kind: ast::LiteralKind::HexBrace(
+                    ast::HexLiteralKind::UnicodeShort),
+                c: '⛄',
+            })));
+        assert_eq!(
+            parser(r"\U{26c4}").parse_escape(),
+            Ok(Primitive::Literal(ast::Literal {
+                span: span(0..8),
+                kind: ast::LiteralKind::HexBrace(
+                    ast::HexLiteralKind::UnicodeLong),
+                c: '⛄',
+            })));
+        assert_eq!(
+            parser(r"\x{26c4}").parse_escape(),
+            Ok(Primitive::Literal(ast::Literal {
+                span: span(0..8),
+                kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
+                c: '⛄',
+            })));
+        assert_eq!(
+            parser(r"\x{26C4}").parse_escape(),
+            Ok(Primitive::Literal(ast::Literal {
+                span: span(0..8),
+                kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
+                c: '⛄',
+            })));
+        assert_eq!(
+            parser(r"\x{10fFfF}").parse_escape(),
+            Ok(Primitive::Literal(ast::Literal {
+                span: span(0..10),
+                kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
+                c: '\u{10FFFF}',
+            })));
+
+        assert_eq!(
+            parser(r"\x").parse_escape().unwrap_err(),
+            TestError {
+                span: span(2..2),
+                kind: ast::ErrorKind::EscapeUnexpectedEof,
+            });
+        assert_eq!(
+            parser(r"\x{").parse_escape().unwrap_err(),
+            TestError {
+                span: span(2..3),
+                kind: ast::ErrorKind::EscapeUnexpectedEof,
+            });
+        assert_eq!(
+            parser(r"\x{FF").parse_escape().unwrap_err(),
+            TestError {
+                span: span(2..5),
+                kind: ast::ErrorKind::EscapeUnexpectedEof,
+            });
+        assert_eq!(
+            parser(r"\x{}").parse_escape().unwrap_err(),
+            TestError {
+                span: span(2..4),
+                kind: ast::ErrorKind::EscapeHexEmpty,
+            });
+        assert_eq!(
+            parser(r"\x{FGF}").parse_escape().unwrap_err(),
+            TestError {
+                span: span(4..5),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            });
+        assert_eq!(
+            parser(r"\x{FFFFFF}").parse_escape().unwrap_err(),
+            TestError {
+                span: span(3..9),
+                kind: ast::ErrorKind::EscapeHexInvalid,
+            });
+        assert_eq!(
+            parser(r"\x{D800}").parse_escape().unwrap_err(),
+            TestError {
+                span: span(3..7),
+                kind: ast::ErrorKind::EscapeHexInvalid,
+            });
+        assert_eq!(
+            parser(r"\x{FFFFFFFFF}").parse_escape().unwrap_err(),
+            TestError {
+                span: span(3..12),
+                kind: ast::ErrorKind::EscapeHexInvalid,
+            });
+    }
+
+    #[test]
+    fn parse_decimal() {
+        assert_eq!(parser("123").parse_decimal(), Ok(123));
+        assert_eq!(parser("0").parse_decimal(), Ok(0));
+        assert_eq!(parser("01").parse_decimal(), Ok(1));
+
+        assert_eq!(
+            parser("-1").parse_decimal().unwrap_err(),
+            TestError {
+                span: span(0..0),
+                kind: ast::ErrorKind::DecimalEmpty,
+            });
+        assert_eq!(
+            parser("").parse_decimal().unwrap_err(),
+            TestError {
+                span: span(0..0),
+                kind: ast::ErrorKind::DecimalEmpty,
+            });
+        assert_eq!(
+            parser("9999999999").parse_decimal().unwrap_err(),
+            TestError {
+                span: span(0..10),
+                kind: ast::ErrorKind::DecimalInvalid,
+            });
+    }
+
+    #[test]
+    fn parse_set_class() {
+        fn union(span: Span, items: Vec<ast::ClassSetItem>) -> ast::ClassSet {
+            ast::ClassSet::union(ast::ClassSetUnion {
+                span: span,
+                items: items,
+            })
+        }
+
+        fn intersection(
+            span: Span,
+            lhs: ast::ClassSet,
+            rhs: ast::ClassSet,
+        ) -> ast::ClassSet {
+            ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
+                span: span,
+                kind: ast::ClassSetBinaryOpKind::Intersection,
+                lhs: Box::new(lhs),
+                rhs: Box::new(rhs),
+            })
+        }
+
+        fn difference(
+            span: Span,
+            lhs: ast::ClassSet,
+            rhs: ast::ClassSet,
+        ) -> ast::ClassSet {
+            ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
+                span: span,
+                kind: ast::ClassSetBinaryOpKind::Difference,
+                lhs: Box::new(lhs),
+                rhs: Box::new(rhs),
+            })
+        }
+
+        fn symdifference(
+            span: Span,
+            lhs: ast::ClassSet,
+            rhs: ast::ClassSet,
+        ) -> ast::ClassSet {
+            ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
+                span: span,
+                kind: ast::ClassSetBinaryOpKind::SymmetricDifference,
+                lhs: Box::new(lhs),
+                rhs: Box::new(rhs),
+            })
+        }
+
+        fn itemset(item: ast::ClassSetItem) -> ast::ClassSet {
+            ast::ClassSet::Item(item)
+        }
+
+        fn item_ascii(cls: ast::ClassAscii) -> ast::ClassSetItem {
+            ast::ClassSetItem::Ascii(cls)
+        }
+
+        fn item_unicode(cls: ast::ClassUnicode) -> ast::ClassSetItem {
+            ast::ClassSetItem::Unicode(cls)
+        }
+
+        fn item_perl(cls: ast::ClassPerl) -> ast::ClassSetItem {
+            ast::ClassSetItem::Perl(cls)
+        }
+
+        fn item_bracket(cls: ast::ClassBracketed) -> ast::ClassSetItem {
+            ast::ClassSetItem::Bracketed(Box::new(cls))
+        }
+
+        fn lit(span: Span, c: char) -> ast::ClassSetItem {
+            ast::ClassSetItem::Literal(ast::Literal {
+                span: span,
+                kind: ast::LiteralKind::Verbatim,
+                c: c,
+            })
+        }
+
+        fn empty(span: Span) -> ast::ClassSetItem {
+            ast::ClassSetItem::Empty(span)
+        }
+
+        fn range(span: Span, start: char, end: char) -> ast::ClassSetItem {
+            let pos1 = Position {
+                offset: span.start.offset + start.len_utf8(),
+                column: span.start.column + 1,
+                ..span.start
+            };
+            let pos2 = Position {
+                offset: span.end.offset - end.len_utf8(),
+                column: span.end.column - 1,
+                ..span.end
+            };
+            ast::ClassSetItem::Range(ast::ClassSetRange {
+                span: span,
+                start: ast::Literal {
+                    span: Span { end: pos1, ..span },
+                    kind: ast::LiteralKind::Verbatim,
+                    c: start,
+                },
+                end: ast::Literal {
+                    span: Span { start: pos2, ..span },
+                    kind: ast::LiteralKind::Verbatim,
+                    c: end,
+                },
+            })
+        }
+
+        fn alnum(span: Span, negated: bool) -> ast::ClassAscii {
+            ast::ClassAscii {
+                span: span,
+                kind: ast::ClassAsciiKind::Alnum,
+                negated: negated,
+            }
+        }
+
+        fn lower(span: Span, negated: bool) -> ast::ClassAscii {
+            ast::ClassAscii {
+                span: span,
+                kind: ast::ClassAsciiKind::Lower,
+                negated: negated,
+            }
+        }
+
+        assert_eq!(
+            parser("[[:alnum:]]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..11),
+                negated: false,
+                kind: itemset(item_ascii(alnum(span(1..10), false))),
+            }))));
+        assert_eq!(
+            parser("[[[:alnum:]]]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..13),
+                negated: false,
+                kind: itemset(item_bracket(ast::ClassBracketed {
+                    span: span(1..12),
+                    negated: false,
+                    kind: itemset(item_ascii(alnum(span(2..11), false))),
+                })),
+            }))));
+        assert_eq!(
+            parser("[[:alnum:]&&[:lower:]]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..22),
+                negated: false,
+                kind: intersection(
+                    span(1..21),
+                    itemset(item_ascii(alnum(span(1..10), false))),
+                    itemset(item_ascii(lower(span(12..21), false))),
+                ),
+            }))));
+        assert_eq!(
+            parser("[[:alnum:]--[:lower:]]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..22),
+                negated: false,
+                kind: difference(
+                    span(1..21),
+                    itemset(item_ascii(alnum(span(1..10), false))),
+                    itemset(item_ascii(lower(span(12..21), false))),
+                ),
+            }))));
+        assert_eq!(
+            parser("[[:alnum:]~~[:lower:]]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..22),
+                negated: false,
+                kind: symdifference(
+                    span(1..21),
+                    itemset(item_ascii(alnum(span(1..10), false))),
+                    itemset(item_ascii(lower(span(12..21), false))),
+                ),
+            }))));
+
+        assert_eq!(
+            parser("[a]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..3),
+                negated: false,
+                kind: itemset(lit(span(1..2), 'a')),
+            }))));
+        assert_eq!(
+            parser(r"[a\]]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..5),
+                negated: false,
+                kind: union(span(1..4), vec![
+                    lit(span(1..2), 'a'),
+                    ast::ClassSetItem::Literal(ast::Literal {
+                        span: span(2..4),
+                        kind: ast::LiteralKind::Punctuation,
+                        c: ']',
+                    }),
+                ]),
+            }))));
+        assert_eq!(
+            parser(r"[a\-z]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..6),
+                negated: false,
+                kind: union(span(1..5), vec![
+                    lit(span(1..2), 'a'),
+                    ast::ClassSetItem::Literal(ast::Literal {
+                        span: span(2..4),
+                        kind: ast::LiteralKind::Punctuation,
+                        c: '-',
+                    }),
+                    lit(span(4..5), 'z'),
+                ]),
+            }))));
+        assert_eq!(
+            parser("[ab]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..4),
+                negated: false,
+                kind: union(span(1..3), vec![
+                    lit(span(1..2), 'a'),
+                    lit(span(2..3), 'b'),
+                ]),
+            }))));
+        assert_eq!(
+            parser("[a-]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..4),
+                negated: false,
+                kind: union(span(1..3), vec![
+                    lit(span(1..2), 'a'),
+                    lit(span(2..3), '-'),
+                ]),
+            }))));
+        assert_eq!(
+            parser("[-a]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..4),
+                negated: false,
+                kind: union(span(1..3), vec![
+                    lit(span(1..2), '-'),
+                    lit(span(2..3), 'a'),
+                ]),
+            }))));
+        assert_eq!(
+            parser(r"[\pL]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..5),
+                negated: false,
+                kind: itemset(item_unicode(ast::ClassUnicode {
+                    span: span(1..4),
+                    negated: false,
+                    kind: ast::ClassUnicodeKind::OneLetter('L'),
+                })),
+            }))));
+        assert_eq!(
+            parser(r"[\w]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..4),
+                negated: false,
+                kind: itemset(item_perl(ast::ClassPerl {
+                    span: span(1..3),
+                    kind: ast::ClassPerlKind::Word,
+                    negated: false,
+                })),
+            }))));
+        assert_eq!(
+            parser(r"[a\wz]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..6),
+                negated: false,
+                kind: union(span(1..5), vec![
+                    lit(span(1..2), 'a'),
+                    item_perl(ast::ClassPerl {
+                        span: span(2..4),
+                        kind: ast::ClassPerlKind::Word,
+                        negated: false,
+                    }),
+                    lit(span(4..5), 'z'),
+                ]),
+            }))));
+
+        assert_eq!(
+            parser("[a-z]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..5),
+                negated: false,
+                kind: itemset(range(span(1..4), 'a', 'z')),
+            }))));
+        assert_eq!(
+            parser("[a-cx-z]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..8),
+                negated: false,
+                kind: union(span(1..7), vec![
+                    range(span(1..4), 'a', 'c'),
+                    range(span(4..7), 'x', 'z'),
+                ]),
+            }))));
+        assert_eq!(
+            parser(r"[\w&&a-cx-z]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..12),
+                negated: false,
+                kind: intersection(
+                    span(1..11),
+                    itemset(item_perl(ast::ClassPerl {
+                        span: span(1..3),
+                        kind: ast::ClassPerlKind::Word,
+                        negated: false,
+                    })),
+                    union(span(5..11), vec![
+                        range(span(5..8), 'a', 'c'),
+                        range(span(8..11), 'x', 'z'),
+                    ]),
+                ),
+            }))));
+        assert_eq!(
+            parser(r"[a-cx-z&&\w]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..12),
+                negated: false,
+                kind: intersection(
+                    span(1..11),
+                    union(span(1..7), vec![
+                        range(span(1..4), 'a', 'c'),
+                        range(span(4..7), 'x', 'z'),
+                    ]),
+                    itemset(item_perl(ast::ClassPerl {
+                        span: span(9..11),
+                        kind: ast::ClassPerlKind::Word,
+                        negated: false,
+                    })),
+                ),
+            }))));
+        assert_eq!(
+            parser(r"[a--b--c]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..9),
+                negated: false,
+                kind: difference(
+                    span(1..8),
+                    difference(
+                        span(1..5),
+                        itemset(lit(span(1..2), 'a')),
+                        itemset(lit(span(4..5), 'b')),
+                    ),
+                    itemset(lit(span(7..8), 'c')),
+                ),
+            }))));
+        assert_eq!(
+            parser(r"[a~~b~~c]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..9),
+                negated: false,
+                kind: symdifference(
+                    span(1..8),
+                    symdifference(
+                        span(1..5),
+                        itemset(lit(span(1..2), 'a')),
+                        itemset(lit(span(4..5), 'b')),
+                    ),
+                    itemset(lit(span(7..8), 'c')),
+                ),
+            }))));
+        assert_eq!(
+            parser(r"[\^&&^]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..7),
+                negated: false,
+                kind: intersection(
+                    span(1..6),
+                    itemset(ast::ClassSetItem::Literal(ast::Literal {
+                        span: span(1..3),
+                        kind: ast::LiteralKind::Punctuation,
+                        c: '^',
+                    })),
+                    itemset(lit(span(5..6), '^')),
+                ),
+            }))));
+        assert_eq!(
+            parser(r"[\&&&&]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..7),
+                negated: false,
+                kind: intersection(
+                    span(1..6),
+                    itemset(ast::ClassSetItem::Literal(ast::Literal {
+                        span: span(1..3),
+                        kind: ast::LiteralKind::Punctuation,
+                        c: '&',
+                    })),
+                    itemset(lit(span(5..6), '&')),
+                ),
+            }))));
+        assert_eq!(
+            parser(r"[&&&&]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..6),
+                negated: false,
+                kind: intersection(
+                    span(1..5),
+                    intersection(
+                        span(1..3),
+                        itemset(empty(span(1..1))),
+                        itemset(empty(span(3..3))),
+                    ),
+                    itemset(empty(span(5..5))),
+                ),
+            }))));
+
+        let pat = "[☃-⛄]";
+        assert_eq!(
+            parser(pat).parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span_range(pat, 0..9),
+                negated: false,
+                kind: itemset(ast::ClassSetItem::Range(ast::ClassSetRange {
+                    span: span_range(pat, 1..8),
+                    start: ast::Literal {
+                        span: span_range(pat, 1..4),
+                        kind: ast::LiteralKind::Verbatim,
+                        c: '☃',
+                    },
+                    end: ast::Literal {
+                        span: span_range(pat, 5..8),
+                        kind: ast::LiteralKind::Verbatim,
+                        c: '⛄',
+                    },
+                })),
+            }))));
+
+        assert_eq!(
+            parser(r"[]]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..3),
+                negated: false,
+                kind: itemset(lit(span(1..2), ']')),
+            }))));
+        assert_eq!(
+            parser(r"[]\[]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..5),
+                negated: false,
+                kind: union(span(1..4), vec![
+                    lit(span(1..2), ']'),
+                    ast::ClassSetItem::Literal(ast::Literal  {
+                        span: span(2..4),
+                        kind: ast::LiteralKind::Punctuation,
+                        c: '[',
+                    }),
+                ]),
+            }))));
+        assert_eq!(
+            parser(r"[\[]]").parse(),
+            Ok(concat(0..5, vec![
+                Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                    span: span(0..4),
+                    negated: false,
+                    kind: itemset(ast::ClassSetItem::Literal(ast::Literal  {
+                        span: span(1..3),
+                        kind: ast::LiteralKind::Punctuation,
+                        c: '[',
+                    })),
+                })),
+                Ast::Literal(ast::Literal {
+                    span: span(4..5),
+                    kind: ast::LiteralKind::Verbatim,
+                    c: ']',
+                }),
+            ])));
+
+        assert_eq!(
+            parser("[").parse().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::ClassUnclosed,
+            });
+        assert_eq!(
+            parser("[[").parse().unwrap_err(),
+            TestError {
+                span: span(1..2),
+                kind: ast::ErrorKind::ClassUnclosed,
+            });
+        assert_eq!(
+            parser("[[-]").parse().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::ClassUnclosed,
+            });
+        assert_eq!(
+            parser("[[[:alnum:]").parse().unwrap_err(),
+            TestError {
+                span: span(1..2),
+                kind: ast::ErrorKind::ClassUnclosed,
+            });
+        assert_eq!(
+            parser(r"[\b]").parse().unwrap_err(),
+            TestError {
+                span: span(1..3),
+                kind: ast::ErrorKind::ClassEscapeInvalid,
+            });
+        assert_eq!(
+            parser(r"[\w-a]").parse().unwrap_err(),
+            TestError {
+                span: span(1..3),
+                kind: ast::ErrorKind::ClassEscapeInvalid,
+            });
+        assert_eq!(
+            parser(r"[a-\w]").parse().unwrap_err(),
+            TestError {
+                span: span(3..5),
+                kind: ast::ErrorKind::ClassEscapeInvalid,
+            });
+        assert_eq!(
+            parser(r"[z-a]").parse().unwrap_err(),
+            TestError {
+                span: span(1..4),
+                kind: ast::ErrorKind::ClassRangeInvalid,
+            });
+
+        assert_eq!(
+            parser_ignore_whitespace("[a ").parse().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::ClassUnclosed,
+            });
+        assert_eq!(
+            parser_ignore_whitespace("[a- ").parse().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::ClassUnclosed,
+            });
+    }
+
+    #[test]
+    fn parse_set_class_open() {
+        assert_eq!(
+            parser("[a]").parse_set_class_open(), {
+                let set = ast::ClassBracketed {
+                    span: span(0..1),
+                    negated: false,
+                    kind: ast::ClassSet::union(ast::ClassSetUnion {
+                        span: span(1..1),
+                        items: vec![],
+                    }),
+                };
+                let union = ast::ClassSetUnion {
+                    span: span(1..1),
+                    items: vec![],
+                };
+                Ok((set, union))
+            });
+        assert_eq!(
+            parser_ignore_whitespace("[   a]").parse_set_class_open(), {
+                let set = ast::ClassBracketed {
+                    span: span(0..4),
+                    negated: false,
+                    kind: ast::ClassSet::union(ast::ClassSetUnion {
+                        span: span(4..4),
+                        items: vec![],
+                    }),
+                };
+                let union = ast::ClassSetUnion {
+                    span: span(4..4),
+                    items: vec![],
+                };
+                Ok((set, union))
+            });
+        assert_eq!(
+            parser("[^a]").parse_set_class_open(), {
+                let set = ast::ClassBracketed {
+                    span: span(0..2),
+                    negated: true,
+                    kind: ast::ClassSet::union(ast::ClassSetUnion {
+                        span: span(2..2),
+                        items: vec![],
+                    }),
+                };
+                let union = ast::ClassSetUnion {
+                    span: span(2..2),
+                    items: vec![],
+                };
+                Ok((set, union))
+            });
+        assert_eq!(
+            parser_ignore_whitespace("[ ^ a]").parse_set_class_open(), {
+                let set = ast::ClassBracketed {
+                    span: span(0..4),
+                    negated: true,
+                    kind: ast::ClassSet::union(ast::ClassSetUnion {
+                        span: span(4..4),
+                        items: vec![],
+                    }),
+                };
+                let union = ast::ClassSetUnion {
+                    span: span(4..4),
+                    items: vec![],
+                };
+                Ok((set, union))
+            });
+        assert_eq!(
+            parser("[-a]").parse_set_class_open(), {
+                let set = ast::ClassBracketed {
+                    span: span(0..2),
+                    negated: false,
+                    kind: ast::ClassSet::union(ast::ClassSetUnion {
+                        span: span(1..1),
+                        items: vec![],
+                    }),
+                };
+                let union = ast::ClassSetUnion {
+                    span: span(1..2),
+                    items: vec![
+                        ast::ClassSetItem::Literal(ast::Literal {
+                            span: span(1..2),
+                            kind: ast::LiteralKind::Verbatim,
+                            c: '-',
+                        }),
+                    ],
+                };
+                Ok((set, union))
+            });
+        assert_eq!(
+            parser_ignore_whitespace("[ - a]").parse_set_class_open(), {
+                let set = ast::ClassBracketed {
+                    span: span(0..4),
+                    negated: false,
+                    kind: ast::ClassSet::union(ast::ClassSetUnion {
+                        span: span(2..2),
+                        items: vec![],
+                    }),
+                };
+                let union = ast::ClassSetUnion {
+                    span: span(2..3),
+                    items: vec![
+                        ast::ClassSetItem::Literal(ast::Literal {
+                            span: span(2..3),
+                            kind: ast::LiteralKind::Verbatim,
+                            c: '-',
+                        }),
+                    ],
+                };
+                Ok((set, union))
+            });
+        assert_eq!(
+            parser("[^-a]").parse_set_class_open(), {
+                let set = ast::ClassBracketed {
+                    span: span(0..3),
+                    negated: true,
+                    kind: ast::ClassSet::union(ast::ClassSetUnion {
+                        span: span(2..2),
+                        items: vec![],
+                    }),
+                };
+                let union = ast::ClassSetUnion {
+                    span: span(2..3),
+                    items: vec![
+                        ast::ClassSetItem::Literal(ast::Literal {
+                            span: span(2..3),
+                            kind: ast::LiteralKind::Verbatim,
+                            c: '-',
+                        }),
+                    ],
+                };
+                Ok((set, union))
+            });
+        assert_eq!(
+            parser("[--a]").parse_set_class_open(), {
+                let set = ast::ClassBracketed {
+                    span: span(0..3),
+                    negated: false,
+                    kind: ast::ClassSet::union(ast::ClassSetUnion {
+                        span: span(1..1),
+                        items: vec![],
+                    }),
+                };
+                let union = ast::ClassSetUnion {
+                    span: span(1..3),
+                    items: vec![
+                        ast::ClassSetItem::Literal(ast::Literal {
+                            span: span(1..2),
+                            kind: ast::LiteralKind::Verbatim,
+                            c: '-',
+                        }),
+                        ast::ClassSetItem::Literal(ast::Literal {
+                            span: span(2..3),
+                            kind: ast::LiteralKind::Verbatim,
+                            c: '-',
+                        }),
+                    ],
+                };
+                Ok((set, union))
+            });
+        assert_eq!(
+            parser("[]a]").parse_set_class_open(), {
+                let set = ast::ClassBracketed {
+                    span: span(0..2),
+                    negated: false,
+                    kind: ast::ClassSet::union(ast::ClassSetUnion {
+                        span: span(1..1),
+                        items: vec![],
+                    }),
+                };
+                let union = ast::ClassSetUnion {
+                    span: span(1..2),
+                    items: vec![
+                        ast::ClassSetItem::Literal(ast::Literal {
+                            span: span(1..2),
+                            kind: ast::LiteralKind::Verbatim,
+                            c: ']',
+                        }),
+                    ],
+                };
+                Ok((set, union))
+            });
+        assert_eq!(
+            parser_ignore_whitespace("[ ] a]").parse_set_class_open(), {
+                let set = ast::ClassBracketed {
+                    span: span(0..4),
+                    negated: false,
+                    kind: ast::ClassSet::union(ast::ClassSetUnion {
+                        span: span(2..2),
+                        items: vec![],
+                    }),
+                };
+                let union = ast::ClassSetUnion {
+                    span: span(2..3),
+                    items: vec![
+                        ast::ClassSetItem::Literal(ast::Literal {
+                            span: span(2..3),
+                            kind: ast::LiteralKind::Verbatim,
+                            c: ']',
+                        }),
+                    ],
+                };
+                Ok((set, union))
+            });
+        assert_eq!(
+            parser("[^]a]").parse_set_class_open(), {
+                let set = ast::ClassBracketed {
+                    span: span(0..3),
+                    negated: true,
+                    kind: ast::ClassSet::union(ast::ClassSetUnion {
+                        span: span(2..2),
+                        items: vec![],
+                    }),
+                };
+                let union = ast::ClassSetUnion {
+                    span: span(2..3),
+                    items: vec![
+                        ast::ClassSetItem::Literal(ast::Literal {
+                            span: span(2..3),
+                            kind: ast::LiteralKind::Verbatim,
+                            c: ']',
+                        }),
+                    ],
+                };
+                Ok((set, union))
+            });
+        assert_eq!(
+            parser("[-]a]").parse_set_class_open(), {
+                let set = ast::ClassBracketed {
+                    span: span(0..2),
+                    negated: false,
+                    kind: ast::ClassSet::union(ast::ClassSetUnion {
+                        span: span(1..1),
+                        items: vec![],
+                    }),
+                };
+                let union = ast::ClassSetUnion {
+                    span: span(1..2),
+                    items: vec![
+                        ast::ClassSetItem::Literal(ast::Literal {
+                            span: span(1..2),
+                            kind: ast::LiteralKind::Verbatim,
+                            c: '-',
+                        }),
+                    ],
+                };
+                Ok((set, union))
+            });
+
+        assert_eq!(
+            parser("[").parse_set_class_open().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::ClassUnclosed,
+            });
+        assert_eq!(
+            parser_ignore_whitespace("[    ")
+            .parse_set_class_open()
+            .unwrap_err(),
+            TestError {
+                span: span(0..5),
+                kind: ast::ErrorKind::ClassUnclosed,
+            });
+        assert_eq!(
+            parser("[^").parse_set_class_open().unwrap_err(),
+            TestError {
+                span: span(0..2),
+                kind: ast::ErrorKind::ClassUnclosed,
+            });
+        assert_eq!(
+            parser("[]").parse_set_class_open().unwrap_err(),
+            TestError {
+                span: span(0..2),
+                kind: ast::ErrorKind::ClassUnclosed,
+            });
+        assert_eq!(
+            parser("[-").parse_set_class_open().unwrap_err(),
+            TestError {
+                span: span(0..2),
+                kind: ast::ErrorKind::ClassUnclosed,
+            });
+        assert_eq!(
+            parser("[--").parse_set_class_open().unwrap_err(),
+            TestError {
+                span: span(0..3),
+                kind: ast::ErrorKind::ClassUnclosed,
+            });
+    }
+
+    #[test]
+    fn maybe_parse_ascii_class() {
+        assert_eq!(
+            parser(r"[:alnum:]").maybe_parse_ascii_class(),
+            Some(ast::ClassAscii {
+                span: span(0..9),
+                kind: ast::ClassAsciiKind::Alnum,
+                negated: false,
+            }));
+        assert_eq!(
+            parser(r"[:alnum:]A").maybe_parse_ascii_class(),
+            Some(ast::ClassAscii {
+                span: span(0..9),
+                kind: ast::ClassAsciiKind::Alnum,
+                negated: false,
+            }));
+        assert_eq!(
+            parser(r"[:^alnum:]").maybe_parse_ascii_class(),
+            Some(ast::ClassAscii {
+                span: span(0..10),
+                kind: ast::ClassAsciiKind::Alnum,
+                negated: true,
+            }));
+
+        let p = parser(r"[:");
+        assert_eq!(p.maybe_parse_ascii_class(), None);
+        assert_eq!(p.offset(), 0);
+
+        let p = parser(r"[:^");
+        assert_eq!(p.maybe_parse_ascii_class(), None);
+        assert_eq!(p.offset(), 0);
+
+        let p = parser(r"[^:alnum:]");
+        assert_eq!(p.maybe_parse_ascii_class(), None);
+        assert_eq!(p.offset(), 0);
+
+        let p = parser(r"[:alnnum:]");
+        assert_eq!(p.maybe_parse_ascii_class(), None);
+        assert_eq!(p.offset(), 0);
+
+        let p = parser(r"[:alnum]");
+        assert_eq!(p.maybe_parse_ascii_class(), None);
+        assert_eq!(p.offset(), 0);
+
+        let p = parser(r"[:alnum:");
+        assert_eq!(p.maybe_parse_ascii_class(), None);
+        assert_eq!(p.offset(), 0);
+    }
+
+    #[test]
+    fn parse_unicode_class() {
+        assert_eq!(
+            parser(r"\pN").parse_escape(),
+            Ok(Primitive::Unicode(ast::ClassUnicode {
+                span: span(0..3),
+                negated: false,
+                kind: ast::ClassUnicodeKind::OneLetter('N'),
+            })));
+        assert_eq!(
+            parser(r"\PN").parse_escape(),
+            Ok(Primitive::Unicode(ast::ClassUnicode {
+                span: span(0..3),
+                negated: true,
+                kind: ast::ClassUnicodeKind::OneLetter('N'),
+            })));
+        assert_eq!(
+            parser(r"\p{N}").parse_escape(),
+            Ok(Primitive::Unicode(ast::ClassUnicode {
+                span: span(0..5),
+                negated: false,
+                kind: ast::ClassUnicodeKind::Named(s("N")),
+            })));
+        assert_eq!(
+            parser(r"\P{N}").parse_escape(),
+            Ok(Primitive::Unicode(ast::ClassUnicode {
+                span: span(0..5),
+                negated: true,
+                kind: ast::ClassUnicodeKind::Named(s("N")),
+            })));
+        assert_eq!(
+            parser(r"\p{Greek}").parse_escape(),
+            Ok(Primitive::Unicode(ast::ClassUnicode {
+                span: span(0..9),
+                negated: false,
+                kind: ast::ClassUnicodeKind::Named(s("Greek")),
+            })));
+
+        assert_eq!(
+            parser(r"\p{scx:Katakana}").parse_escape(),
+            Ok(Primitive::Unicode(ast::ClassUnicode {
+                span: span(0..16),
+                negated: false,
+                kind: ast::ClassUnicodeKind::NamedValue {
+                    op: ast::ClassUnicodeOpKind::Colon,
+                    name: s("scx"),
+                    value: s("Katakana"),
+                },
+            })));
+        assert_eq!(
+            parser(r"\p{scx=Katakana}").parse_escape(),
+            Ok(Primitive::Unicode(ast::ClassUnicode {
+                span: span(0..16),
+                negated: false,
+                kind: ast::ClassUnicodeKind::NamedValue {
+                    op: ast::ClassUnicodeOpKind::Equal,
+                    name: s("scx"),
+                    value: s("Katakana"),
+                },
+            })));
+        assert_eq!(
+            parser(r"\p{scx!=Katakana}").parse_escape(),
+            Ok(Primitive::Unicode(ast::ClassUnicode {
+                span: span(0..17),
+                negated: false,
+                kind: ast::ClassUnicodeKind::NamedValue {
+                    op: ast::ClassUnicodeOpKind::NotEqual,
+                    name: s("scx"),
+                    value: s("Katakana"),
+                },
+            })));
+
+        assert_eq!(
+            parser(r"\p{:}").parse_escape(),
+            Ok(Primitive::Unicode(ast::ClassUnicode {
+                span: span(0..5),
+                negated: false,
+                kind: ast::ClassUnicodeKind::NamedValue {
+                    op: ast::ClassUnicodeOpKind::Colon,
+                    name: s(""),
+                    value: s(""),
+                },
+            })));
+        assert_eq!(
+            parser(r"\p{=}").parse_escape(),
+            Ok(Primitive::Unicode(ast::ClassUnicode {
+                span: span(0..5),
+                negated: false,
+                kind: ast::ClassUnicodeKind::NamedValue {
+                    op: ast::ClassUnicodeOpKind::Equal,
+                    name: s(""),
+                    value: s(""),
+                },
+            })));
+        assert_eq!(
+            parser(r"\p{!=}").parse_escape(),
+            Ok(Primitive::Unicode(ast::ClassUnicode {
+                span: span(0..6),
+                negated: false,
+                kind: ast::ClassUnicodeKind::NamedValue {
+                    op: ast::ClassUnicodeOpKind::NotEqual,
+                    name: s(""),
+                    value: s(""),
+                },
+            })));
+
+        assert_eq!(
+            parser(r"\p").parse_escape().unwrap_err(),
+            TestError {
+                span: span(2..2),
+                kind: ast::ErrorKind::EscapeUnexpectedEof,
+            });
+        assert_eq!(
+            parser(r"\p{").parse_escape().unwrap_err(),
+            TestError {
+                span: span(3..3),
+                kind: ast::ErrorKind::EscapeUnexpectedEof,
+            });
+        assert_eq!(
+            parser(r"\p{N").parse_escape().unwrap_err(),
+            TestError {
+                span: span(4..4),
+                kind: ast::ErrorKind::EscapeUnexpectedEof,
+            });
+        assert_eq!(
+            parser(r"\p{Greek").parse_escape().unwrap_err(),
+            TestError {
+                span: span(8..8),
+                kind: ast::ErrorKind::EscapeUnexpectedEof,
+            });
+
+        assert_eq!(
+            parser(r"\pNz").parse(),
+            Ok(Ast::Concat(ast::Concat {
+                span: span(0..4),
+                asts: vec![
+                    Ast::Class(ast::Class::Unicode(ast::ClassUnicode {
+                        span: span(0..3),
+                        negated: false,
+                        kind: ast::ClassUnicodeKind::OneLetter('N'),
+                    })),
+                    Ast::Literal(ast::Literal {
+                        span: span(3..4),
+                        kind: ast::LiteralKind::Verbatim,
+                        c: 'z',
+                    }),
+                ],
+            })));
+        assert_eq!(
+            parser(r"\p{Greek}z").parse(),
+            Ok(Ast::Concat(ast::Concat {
+                span: span(0..10),
+                asts: vec![
+                    Ast::Class(ast::Class::Unicode(ast::ClassUnicode {
+                        span: span(0..9),
+                        negated: false,
+                        kind: ast::ClassUnicodeKind::Named(s("Greek")),
+                    })),
+                    Ast::Literal(ast::Literal {
+                        span: span(9..10),
+                        kind: ast::LiteralKind::Verbatim,
+                        c: 'z',
+                    }),
+                ],
+            })));
+    }
+
+    #[test]
+    fn parse_perl_class() {
+        assert_eq!(
+            parser(r"\d").parse_escape(),
+            Ok(Primitive::Perl(ast::ClassPerl {
+                span: span(0..2),
+                kind: ast::ClassPerlKind::Digit,
+                negated: false,
+            })));
+        assert_eq!(
+            parser(r"\D").parse_escape(),
+            Ok(Primitive::Perl(ast::ClassPerl {
+                span: span(0..2),
+                kind: ast::ClassPerlKind::Digit,
+                negated: true,
+            })));
+        assert_eq!(
+            parser(r"\s").parse_escape(),
+            Ok(Primitive::Perl(ast::ClassPerl {
+                span: span(0..2),
+                kind: ast::ClassPerlKind::Space,
+                negated: false,
+            })));
+        assert_eq!(
+            parser(r"\S").parse_escape(),
+            Ok(Primitive::Perl(ast::ClassPerl {
+                span: span(0..2),
+                kind: ast::ClassPerlKind::Space,
+                negated: true,
+            })));
+        assert_eq!(
+            parser(r"\w").parse_escape(),
+            Ok(Primitive::Perl(ast::ClassPerl {
+                span: span(0..2),
+                kind: ast::ClassPerlKind::Word,
+                negated: false,
+            })));
+        assert_eq!(
+            parser(r"\W").parse_escape(),
+            Ok(Primitive::Perl(ast::ClassPerl {
+                span: span(0..2),
+                kind: ast::ClassPerlKind::Word,
+                negated: true,
+            })));
+
+        assert_eq!(
+            parser(r"\d").parse(),
+            Ok(Ast::Class(ast::Class::Perl(ast::ClassPerl {
+                span: span(0..2),
+                kind: ast::ClassPerlKind::Digit,
+                negated: false,
+            }))));
+        assert_eq!(
+            parser(r"\dz").parse(),
+            Ok(Ast::Concat(ast::Concat {
+                span: span(0..3),
+                asts: vec![
+                    Ast::Class(ast::Class::Perl(ast::ClassPerl {
+                        span: span(0..2),
+                        kind: ast::ClassPerlKind::Digit,
+                        negated: false,
+                    })),
+                    Ast::Literal(ast::Literal {
+                        span: span(2..3),
+                        kind: ast::LiteralKind::Verbatim,
+                        c: 'z',
+                    }),
+                ],
+            })));
+    }
+}
diff --git a/regex-syntax/src/ast/print.rs b/regex-syntax/src/ast/print.rs
new file mode 100644
index 0000000000..0d6dfb0a20
--- /dev/null
+++ b/regex-syntax/src/ast/print.rs
@@ -0,0 +1,591 @@
+// Copyright 2018 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+/*!
+This module provides a regular expression printer.
+*/
+
+use std::fmt;
+
+use ast::{self, Ast};
+use ast::visitor::{self, Visitor};
+
+/// A builder for constructing a printer.
+///
+/// Note that since a printer doesn't have any configuration knobs, this type
+/// remains unexported.
+#[derive(Clone, Debug)]
+struct PrinterBuilder {
+    _priv: (),
+}
+
+impl Default for PrinterBuilder {
+    fn default() -> PrinterBuilder {
+        PrinterBuilder::new()
+    }
+}
+
+impl PrinterBuilder {
+    fn new() -> PrinterBuilder {
+        PrinterBuilder {
+            _priv: (),
+        }
+    }
+
+    fn build(&self) -> Printer {
+        Printer {
+            _priv: (),
+        }
+    }
+}
+
+/// A printer for a regular expression abstract syntax tree.
+///
+/// A printer converts an abstract syntax tree (AST) to a regular expression
+/// pattern string. This particular printer uses constant stack space and heap
+/// space proportional to the size of the AST.
+///
+/// This printer will not necessarily preserve the original formatting of the
+/// regular expression pattern string. For example, all whitespace and comments
+/// are ignored.
+#[derive(Debug)]
+pub struct Printer {
+    _priv: (),
+}
+
+impl Printer {
+    /// Create a new printer.
+    pub fn new() -> Printer {
+        PrinterBuilder::new().build()
+    }
+
+    /// Print the given `Ast` to the given writer. The writer must implement
+    /// `fmt::Write`. Typical implementations of `fmt::Write` that can be used
+    /// here are a `fmt::Formatter` (which is available in `fmt::Display`
+    /// implementations) or a `&mut String`.
+    pub fn print<W: fmt::Write>(&mut self, ast: &Ast, wtr: W) -> fmt::Result {
+        visitor::visit(ast, Writer { printer: self, wtr: wtr })
+    }
+}
+
+#[derive(Debug)]
+struct Writer<'p, W> {
+    printer: &'p mut Printer,
+    wtr: W,
+}
+
+impl<'p, W: fmt::Write> Visitor for Writer<'p, W> {
+    type Output = ();
+    type Err = fmt::Error;
+
+    fn finish(self) -> fmt::Result {
+        Ok(())
+    }
+
+    fn visit_pre(&mut self, ast: &Ast) -> fmt::Result {
+        match *ast {
+            Ast::Group(ref x) => self.fmt_group_pre(x),
+            Ast::Class(ast::Class::Bracketed(ref x)) => {
+                self.fmt_class_bracketed_pre(x)
+            }
+            _ => Ok(())
+        }
+    }
+
+    fn visit_post(&mut self, ast: &Ast) -> fmt::Result {
+        use ast::Class;
+
+        match *ast {
+            Ast::Empty(_) => Ok(()),
+            Ast::Flags(ref x) => self.fmt_set_flags(x),
+            Ast::Literal(ref x) => self.fmt_literal(x),
+            Ast::Dot(_) => self.wtr.write_str("."),
+            Ast::Assertion(ref x) => self.fmt_assertion(x),
+            Ast::Class(Class::Perl(ref x)) => self.fmt_class_perl(x),
+            Ast::Class(Class::Unicode(ref x)) => self.fmt_class_unicode(x),
+            Ast::Class(Class::Bracketed(ref x)) => {
+                self.fmt_class_bracketed_post(x)
+            }
+            Ast::Repetition(ref x) => self.fmt_repetition(x),
+            Ast::Group(ref x) => self.fmt_group_post(x),
+            Ast::Alternation(_) => Ok(()),
+            Ast::Concat(_) => Ok(()),
+        }
+    }
+
+    fn visit_alternation_in(&mut self) -> fmt::Result {
+        self.wtr.write_str("|")
+    }
+
+    fn visit_class_set_item_pre(
+        &mut self,
+        ast: &ast::ClassSetItem,
+    ) -> Result<(), Self::Err> {
+        match *ast {
+            ast::ClassSetItem::Bracketed(ref x) => {
+                self.fmt_class_bracketed_pre(x)
+            }
+            _ => Ok(()),
+        }
+    }
+
+    fn visit_class_set_item_post(
+        &mut self,
+        ast: &ast::ClassSetItem,
+    ) -> Result<(), Self::Err> {
+        use ast::ClassSetItem::*;
+
+        match *ast {
+            Empty(_) => Ok(()),
+            Literal(ref x) => self.fmt_literal(x),
+            Range(ref x) => {
+                try!(self.fmt_literal(&x.start));
+                try!(self.wtr.write_str("-"));
+                try!(self.fmt_literal(&x.end));
+                Ok(())
+            }
+            Ascii(ref x) => self.fmt_class_ascii(x),
+            Unicode(ref x) => self.fmt_class_unicode(x),
+            Perl(ref x) => self.fmt_class_perl(x),
+            Bracketed(ref x) => self.fmt_class_bracketed_post(x),
+            Union(_) => Ok(()),
+        }
+    }
+
+    fn visit_class_set_binary_op_in(
+        &mut self,
+        ast: &ast::ClassSetBinaryOp,
+    ) -> Result<(), Self::Err> {
+        self.fmt_class_set_binary_op_kind(&ast.kind)
+    }
+}
+
+impl<'p, W: fmt::Write> Writer<'p, W> {
+    fn fmt_group_pre(&mut self, ast: &ast::Group) -> fmt::Result {
+        use ast::GroupKind::*;
+        match ast.kind {
+            CaptureIndex(_) => self.wtr.write_str("("),
+            CaptureName(ref x) => {
+                try!(self.wtr.write_str("(?P<"));
+                try!(self.wtr.write_str(&x.name));
+                try!(self.wtr.write_str(">"));
+                Ok(())
+            }
+            NonCapturing(ref flags) => {
+                try!(self.wtr.write_str("(?"));
+                try!(self.fmt_flags(flags));
+                try!(self.wtr.write_str(":"));
+                Ok(())
+            }
+        }
+    }
+
+    fn fmt_group_post(&mut self, _ast: &ast::Group) -> fmt::Result {
+        self.wtr.write_str(")")
+    }
+
+    fn fmt_repetition(&mut self, ast: &ast::Repetition) -> fmt::Result {
+        use ast::RepetitionKind::*;
+        match ast.op.kind {
+            ZeroOrOne if ast.greedy => self.wtr.write_str("?"),
+            ZeroOrOne => self.wtr.write_str("??"),
+            ZeroOrMore if ast.greedy => self.wtr.write_str("*"),
+            ZeroOrMore => self.wtr.write_str("*?"),
+            OneOrMore if ast.greedy => self.wtr.write_str("+"),
+            OneOrMore => self.wtr.write_str("+?"),
+            Range(ref x) => {
+                try!(self.fmt_repetition_range(x));
+                if !ast.greedy {
+                    try!(self.wtr.write_str("?"));
+                }
+                Ok(())
+            }
+        }
+    }
+
+    fn fmt_repetition_range(
+        &mut self,
+        ast: &ast::RepetitionRange,
+    ) -> fmt::Result {
+        use ast::RepetitionRange::*;
+        match *ast {
+            Exactly(x) => write!(self.wtr, "{{{}}}", x),
+            AtLeast(x) => write!(self.wtr, "{{{},}}", x),
+            Bounded(x, y) => write!(self.wtr, "{{{},{}}}", x, y),
+        }
+    }
+
+    fn fmt_literal(&mut self, ast: &ast::Literal) -> fmt::Result {
+        use ast::LiteralKind::*;
+
+        match ast.kind {
+            Verbatim => self.wtr.write_char(ast.c),
+            Punctuation => write!(self.wtr, r"\{}", ast.c),
+            Octal => write!(self.wtr, r"\{:o}", ast.c as u32),
+            HexFixed(ast::HexLiteralKind::X) => {
+                write!(self.wtr, r"\x{:02X}", ast.c as u32)
+            }
+            HexFixed(ast::HexLiteralKind::UnicodeShort) => {
+                write!(self.wtr, r"\u{:04X}", ast.c as u32)
+            }
+            HexFixed(ast::HexLiteralKind::UnicodeLong) => {
+                write!(self.wtr, r"\U{:08X}", ast.c as u32)
+            }
+            HexBrace(ast::HexLiteralKind::X) => {
+                write!(self.wtr, r"\x{{{:X}}}", ast.c as u32)
+            }
+            HexBrace(ast::HexLiteralKind::UnicodeShort) => {
+                write!(self.wtr, r"\u{{{:X}}}", ast.c as u32)
+            }
+            HexBrace(ast::HexLiteralKind::UnicodeLong) => {
+                write!(self.wtr, r"\U{{{:X}}}", ast.c as u32)
+            }
+            Special(ast::SpecialLiteralKind::Bell) => {
+                self.wtr.write_str(r"\a")
+            }
+            Special(ast::SpecialLiteralKind::FormFeed) => {
+                self.wtr.write_str(r"\f")
+            }
+            Special(ast::SpecialLiteralKind::Tab) => {
+                self.wtr.write_str(r"\t")
+            }
+            Special(ast::SpecialLiteralKind::LineFeed) => {
+                self.wtr.write_str(r"\n")
+            }
+            Special(ast::SpecialLiteralKind::CarriageReturn) => {
+                self.wtr.write_str(r"\r")
+            }
+            Special(ast::SpecialLiteralKind::VerticalTab) => {
+                self.wtr.write_str(r"\v")
+            }
+            Special(ast::SpecialLiteralKind::Space) => {
+                self.wtr.write_str(r"\ ")
+            }
+        }
+    }
+
+    fn fmt_assertion(&mut self, ast: &ast::Assertion) -> fmt::Result {
+        use ast::AssertionKind::*;
+        match ast.kind {
+            StartLine => self.wtr.write_str("^"),
+            EndLine => self.wtr.write_str("$"),
+            StartText => self.wtr.write_str(r"\A"),
+            EndText => self.wtr.write_str(r"\z"),
+            WordBoundary => self.wtr.write_str(r"\b"),
+            NotWordBoundary => self.wtr.write_str(r"\B"),
+        }
+    }
+
+    fn fmt_set_flags(&mut self, ast: &ast::SetFlags) -> fmt::Result {
+        try!(self.wtr.write_str("(?"));
+        try!(self.fmt_flags(&ast.flags));
+        try!(self.wtr.write_str(")"));
+        Ok(())
+    }
+
+    fn fmt_flags(&mut self, ast: &ast::Flags) -> fmt::Result {
+        use ast::{Flag, FlagsItemKind};
+
+        for item in &ast.items {
+            try!(match item.kind {
+                FlagsItemKind::Negation => self.wtr.write_str("-"),
+                FlagsItemKind::Flag(ref flag) => {
+                    match *flag {
+                        Flag::CaseInsensitive => self.wtr.write_str("i"),
+                        Flag::MultiLine => self.wtr.write_str("m"),
+                        Flag::DotMatchesNewLine => self.wtr.write_str("s"),
+                        Flag::SwapGreed => self.wtr.write_str("U"),
+                        Flag::Unicode => self.wtr.write_str("u"),
+                        Flag::IgnoreWhitespace => self.wtr.write_str("x"),
+                    }
+                }
+            });
+        }
+        Ok(())
+    }
+
+    fn fmt_class_bracketed_pre(
+        &mut self,
+        ast: &ast::ClassBracketed,
+    ) -> fmt::Result {
+        if ast.negated {
+            self.wtr.write_str("[^")
+        } else {
+            self.wtr.write_str("[")
+        }
+    }
+
+    fn fmt_class_bracketed_post(
+        &mut self,
+        _ast: &ast::ClassBracketed,
+    ) -> fmt::Result {
+        self.wtr.write_str("]")
+    }
+
+    fn fmt_class_set_binary_op_kind(
+        &mut self,
+        ast: &ast::ClassSetBinaryOpKind,
+    ) -> fmt::Result {
+        use ast::ClassSetBinaryOpKind::*;
+        match *ast {
+            Intersection => self.wtr.write_str("&&"),
+            Difference => self.wtr.write_str("--"),
+            SymmetricDifference => self.wtr.write_str("~~"),
+        }
+    }
+
+    fn fmt_class_perl(&mut self, ast: &ast::ClassPerl) -> fmt::Result {
+        use ast::ClassPerlKind::*;
+        match ast.kind {
+            Digit if ast.negated => self.wtr.write_str(r"\D"),
+            Digit => self.wtr.write_str(r"\d"),
+            Space if ast.negated => self.wtr.write_str(r"\S"),
+            Space => self.wtr.write_str(r"\s"),
+            Word if ast.negated => self.wtr.write_str(r"\W"),
+            Word => self.wtr.write_str(r"\w"),
+        }
+    }
+
+    fn fmt_class_ascii(&mut self, ast: &ast::ClassAscii) -> fmt::Result {
+        use ast::ClassAsciiKind::*;
+        match ast.kind {
+            Alnum if ast.negated => self.wtr.write_str("[:^alnum:]"),
+            Alnum => self.wtr.write_str("[:alnum:]"),
+            Alpha if ast.negated => self.wtr.write_str("[:^alpha:]"),
+            Alpha => self.wtr.write_str("[:alpha:]"),
+            Ascii if ast.negated => self.wtr.write_str("[:^ascii:]"),
+            Ascii => self.wtr.write_str("[:ascii:]"),
+            Blank if ast.negated => self.wtr.write_str("[:^blank:]"),
+            Blank => self.wtr.write_str("[:blank:]"),
+            Cntrl if ast.negated => self.wtr.write_str("[:^cntrl:]"),
+            Cntrl => self.wtr.write_str("[:cntrl:]"),
+            Digit if ast.negated => self.wtr.write_str("[:^digit:]"),
+            Digit => self.wtr.write_str("[:digit:]"),
+            Graph if ast.negated => self.wtr.write_str("[:^graph:]"),
+            Graph => self.wtr.write_str("[:graph:]"),
+            Lower if ast.negated => self.wtr.write_str("[:^lower:]"),
+            Lower => self.wtr.write_str("[:lower:]"),
+            Print if ast.negated => self.wtr.write_str("[:^print:]"),
+            Print => self.wtr.write_str("[:print:]"),
+            Punct if ast.negated => self.wtr.write_str("[:^punct:]"),
+            Punct => self.wtr.write_str("[:punct:]"),
+            Space if ast.negated => self.wtr.write_str("[:^space:]"),
+            Space => self.wtr.write_str("[:space:]"),
+            Upper if ast.negated => self.wtr.write_str("[:^upper:]"),
+            Upper => self.wtr.write_str("[:upper:]"),
+            Word if ast.negated => self.wtr.write_str("[:^word:]"),
+            Word => self.wtr.write_str("[:word:]"),
+            Xdigit if ast.negated => self.wtr.write_str("[:^xdigit:]"),
+            Xdigit => self.wtr.write_str("[:xdigit:]"),
+        }
+    }
+
+    fn fmt_class_unicode(&mut self, ast: &ast::ClassUnicode) -> fmt::Result {
+        use ast::ClassUnicodeKind::*;
+        use ast::ClassUnicodeOpKind::*;
+
+        if ast.negated {
+            try!(self.wtr.write_str(r"\P"));
+        } else {
+            try!(self.wtr.write_str(r"\p"));
+        }
+        match ast.kind {
+            OneLetter(c) => self.wtr.write_char(c),
+            Named(ref x) => write!(self.wtr, "{{{}}}", x),
+            NamedValue { op: Equal, ref name, ref value } => {
+                write!(self.wtr, "{{{}={}}}", name, value)
+            }
+            NamedValue { op: Colon, ref name, ref value } => {
+                write!(self.wtr, "{{{}:{}}}", name, value)
+            }
+            NamedValue { op: NotEqual, ref name, ref value } => {
+                write!(self.wtr, "{{{}!={}}}", name, value)
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use ast::parse::ParserBuilder;
+    use super::Printer;
+
+    fn roundtrip(given: &str) {
+        roundtrip_with(|b| b, given);
+    }
+
+    fn roundtrip_with<F>(mut f: F, given: &str)
+        where F: FnMut(&mut ParserBuilder) -> &mut ParserBuilder
+    {
+        let mut builder = ParserBuilder::new();
+        f(&mut builder);
+        let ast = builder.build().parse(given).unwrap();
+
+        let mut printer = Printer::new();
+        let mut dst = String::new();
+        printer.print(&ast, &mut dst).unwrap();
+        assert_eq!(given, dst);
+    }
+
+    #[test]
+    fn scratch() {
+        roundtrip(".");
+    }
+
+    #[test]
+    fn print_literal() {
+        roundtrip("a");
+        roundtrip(r"\[");
+        roundtrip_with(|b| b.octal(true), r"\141");
+        roundtrip(r"\x61");
+        roundtrip(r"\x7F");
+        roundtrip(r"\u0061");
+        roundtrip(r"\U00000061");
+        roundtrip(r"\x{61}");
+        roundtrip(r"\x{7F}");
+        roundtrip(r"\u{61}");
+        roundtrip(r"\U{61}");
+
+        roundtrip(r"\a");
+        roundtrip(r"\f");
+        roundtrip(r"\t");
+        roundtrip(r"\n");
+        roundtrip(r"\r");
+        roundtrip(r"\v");
+        roundtrip(r"(?x)\ ");
+    }
+
+    #[test]
+    fn print_dot() {
+        roundtrip(".");
+    }
+
+    #[test]
+    fn print_concat() {
+        roundtrip("ab");
+        roundtrip("abcde");
+        roundtrip("a(bcd)ef");
+    }
+
+    #[test]
+    fn print_alternation() {
+        roundtrip("a|b");
+        roundtrip("a|b|c|d|e");
+        roundtrip("|a|b|c|d|e");
+        roundtrip("|a|b|c|d|e|");
+        roundtrip("a(b|c|d)|e|f");
+    }
+
+    #[test]
+    fn print_assertion() {
+        roundtrip(r"^");
+        roundtrip(r"$");
+        roundtrip(r"\A");
+        roundtrip(r"\z");
+        roundtrip(r"\b");
+        roundtrip(r"\B");
+    }
+
+    #[test]
+    fn print_repetition() {
+        roundtrip("a?");
+        roundtrip("a??");
+        roundtrip("a*");
+        roundtrip("a*?");
+        roundtrip("a+");
+        roundtrip("a+?");
+        roundtrip("a{5}");
+        roundtrip("a{5}?");
+        roundtrip("a{5,}");
+        roundtrip("a{5,}?");
+        roundtrip("a{5,10}");
+        roundtrip("a{5,10}?");
+    }
+
+    #[test]
+    fn print_flags() {
+        roundtrip("(?i)");
+        roundtrip("(?-i)");
+        roundtrip("(?s-i)");
+        roundtrip("(?-si)");
+        roundtrip("(?siUmux)");
+    }
+
+    #[test]
+    fn print_group() {
+        roundtrip("(?i:a)");
+        roundtrip("(?P<foo>a)");
+        roundtrip("(a)");
+    }
+
+    #[test]
+    fn print_class() {
+        roundtrip(r"[abc]");
+        roundtrip(r"[a-z]");
+        roundtrip(r"[^a-z]");
+        roundtrip(r"[a-z0-9]");
+        roundtrip(r"[-a-z0-9]");
+        roundtrip(r"[-a-z0-9]");
+        roundtrip(r"[a-z0-9---]");
+        roundtrip(r"[a-z&&m-n]");
+        roundtrip(r"[[a-z&&m-n]]");
+        roundtrip(r"[a-z--m-n]");
+        roundtrip(r"[a-z~~m-n]");
+        roundtrip(r"[a-z[0-9]]");
+        roundtrip(r"[a-z[^0-9]]");
+
+        roundtrip(r"\d");
+        roundtrip(r"\D");
+        roundtrip(r"\s");
+        roundtrip(r"\S");
+        roundtrip(r"\w");
+        roundtrip(r"\W");
+
+        roundtrip(r"[[:alnum:]]");
+        roundtrip(r"[[:^alnum:]]");
+        roundtrip(r"[[:alpha:]]");
+        roundtrip(r"[[:^alpha:]]");
+        roundtrip(r"[[:ascii:]]");
+        roundtrip(r"[[:^ascii:]]");
+        roundtrip(r"[[:blank:]]");
+        roundtrip(r"[[:^blank:]]");
+        roundtrip(r"[[:cntrl:]]");
+        roundtrip(r"[[:^cntrl:]]");
+        roundtrip(r"[[:digit:]]");
+        roundtrip(r"[[:^digit:]]");
+        roundtrip(r"[[:graph:]]");
+        roundtrip(r"[[:^graph:]]");
+        roundtrip(r"[[:lower:]]");
+        roundtrip(r"[[:^lower:]]");
+        roundtrip(r"[[:print:]]");
+        roundtrip(r"[[:^print:]]");
+        roundtrip(r"[[:punct:]]");
+        roundtrip(r"[[:^punct:]]");
+        roundtrip(r"[[:space:]]");
+        roundtrip(r"[[:^space:]]");
+        roundtrip(r"[[:upper:]]");
+        roundtrip(r"[[:^upper:]]");
+        roundtrip(r"[[:word:]]");
+        roundtrip(r"[[:^word:]]");
+        roundtrip(r"[[:xdigit:]]");
+        roundtrip(r"[[:^xdigit:]]");
+
+        roundtrip(r"\pL");
+        roundtrip(r"\PL");
+        roundtrip(r"\p{L}");
+        roundtrip(r"\P{L}");
+        roundtrip(r"\p{X=Y}");
+        roundtrip(r"\P{X=Y}");
+        roundtrip(r"\p{X:Y}");
+        roundtrip(r"\P{X:Y}");
+        roundtrip(r"\p{X!=Y}");
+        roundtrip(r"\P{X!=Y}");
+    }
+}
diff --git a/regex-syntax/src/ast/visitor.rs b/regex-syntax/src/ast/visitor.rs
new file mode 100644
index 0000000000..268ac45f1b
--- /dev/null
+++ b/regex-syntax/src/ast/visitor.rs
@@ -0,0 +1,557 @@
+// Copyright 2018 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use std::fmt;
+
+use ast::{self, Ast};
+
+/// A trait for visiting an abstract syntax tree (AST) in depth first order.
+///
+/// The principle aim of this trait is to enable callers to perform case
+/// analysis on an abstract syntax tree without necessarily using recursion.
+/// In particular, this permits callers to do case analysis with constant stack
+/// usage, which can be important since the size of an abstract syntax tree
+/// may be proportional to end user input.
+///
+/// Typical usage of this trait involves providing an implementation and then
+/// running it using the [`visit`](fn.visit.html) function.
+///
+/// Note that the abstract syntax tree for a regular expression is quite
+/// complex. Unless you specifically need it, you might be able to use the
+/// much simpler
+/// [high-level intermediate representation](../hir/struct.Hir.html)
+/// and its
+/// [corresponding `Visitor` trait](../hir/trait.Visitor.html)
+/// instead.
+pub trait Visitor {
+    /// The result of visiting an AST.
+    type Output;
+    /// An error that visiting an AST might return.
+    type Err;
+
+    /// All implementors of `Visitor` must provide a `finish` method, which
+    /// yields the result of visiting the AST or an error.
+    fn finish(self) -> Result<Self::Output, Self::Err>;
+
+    /// This method is called before beginning traversal of the AST.
+    fn start(&mut self) {}
+
+    /// This method is called on an `Ast` before descending into child `Ast`
+    /// nodes.
+    fn visit_pre(&mut self, _ast: &Ast) -> Result<(), Self::Err> {
+        Ok(())
+    }
+
+    /// This method is called on an `Ast` after descending all of its child
+    /// `Ast` nodes.
+    fn visit_post(&mut self, _ast: &Ast) -> Result<(), Self::Err> {
+        Ok(())
+    }
+
+    /// This method is called between child nodes of an
+    /// [`Alternation`](struct.Alternation.html).
+    fn visit_alternation_in(&mut self) -> Result<(), Self::Err> {
+        Ok(())
+    }
+
+    /// This method is called on every
+    /// [`ClassSetItem`](enum.ClassSetItem.html)
+    /// before descending into child nodes.
+    fn visit_class_set_item_pre(
+        &mut self,
+        _ast: &ast::ClassSetItem,
+    ) -> Result<(), Self::Err> {
+        Ok(())
+    }
+
+    /// This method is called on every
+    /// [`ClassSetItem`](enum.ClassSetItem.html)
+    /// after descending into child nodes.
+    fn visit_class_set_item_post(
+        &mut self,
+        _ast: &ast::ClassSetItem,
+    ) -> Result<(), Self::Err> {
+        Ok(())
+    }
+
+    /// This method is called on every
+    /// [`ClassSetBinaryOp`](struct.ClassSetBinaryOp.html)
+    /// before descending into child nodes.
+    fn visit_class_set_binary_op_pre(
+        &mut self,
+        _ast: &ast::ClassSetBinaryOp,
+    ) -> Result<(), Self::Err> {
+        Ok(())
+    }
+
+    /// This method is called on every
+    /// [`ClassSetBinaryOp`](struct.ClassSetBinaryOp.html)
+    /// after descending into child nodes.
+    fn visit_class_set_binary_op_post(
+        &mut self,
+        _ast: &ast::ClassSetBinaryOp,
+    ) -> Result<(), Self::Err> {
+        Ok(())
+    }
+
+    /// This method is called between the left hand and right hand child nodes
+    /// of a [`ClassSetBinaryOp`](struct.ClassSetBinaryOp.html).
+    fn visit_class_set_binary_op_in(
+        &mut self,
+        _ast: &ast::ClassSetBinaryOp,
+    ) -> Result<(), Self::Err> {
+        Ok(())
+    }
+}
+
+/// Executes an implementation of `Visitor` in constant stack space.
+///
+/// This function will visit every node in the given `Ast` while calling the
+/// appropriate methods provided by the
+/// [`Visitor`](trait.Visitor.html) trait.
+///
+/// The primary use case for this method is when one wants to perform case
+/// analysis over an `Ast` without using a stack size proportional to the depth
+/// of the `Ast`. Namely, this method will instead use constant stack size, but
+/// will use heap space proportional to the size of the `Ast`. This may be
+/// desirable in cases where the size of `Ast` is proportional to end user
+/// input.
+///
+/// If the visitor returns an error at any point, then visiting is stopped and
+/// the error is returned.
+pub fn visit<V: Visitor>(ast: &Ast, visitor: V) -> Result<V::Output, V::Err> {
+    HeapVisitor::new().visit(ast, visitor)
+}
+
+/// HeapVisitor visits every item in an `Ast` recursively using constant stack
+/// size and a heap size proportional to the size of the `Ast`.
+struct HeapVisitor<'a> {
+    /// A stack of `Ast` nodes. This is roughly analogous to the call stack
+    /// used in a typical recursive visitor.
+    stack: Vec<(&'a Ast, Frame<'a>)>,
+    /// Similar to the `Ast` stack above, but is used only for character
+    /// classes. In particular, character classes embed their own mini
+    /// recursive syntax.
+    stack_class: Vec<(ClassInduct<'a>, ClassFrame<'a>)>,
+}
+
+/// Represents a single stack frame while performing structural induction over
+/// an `Ast`.
+enum Frame<'a> {
+    /// A stack frame allocated just before descending into a repetition
+    /// operator's child node.
+    Repetition(&'a ast::Repetition),
+    /// A stack frame allocated just before descending into a group's child
+    /// node.
+    Group(&'a ast::Group),
+    /// The stack frame used while visiting every child node of a concatenation
+    /// of expressions.
+    Concat {
+        /// The child node we are currently visiting.
+        head: &'a Ast,
+        /// The remaining child nodes to visit (which may be empty).
+        tail: &'a [Ast],
+    },
+    /// The stack frame used while visiting every child node of an alternation
+    /// of expressions.
+    Alternation {
+        /// The child node we are currently visiting.
+        head: &'a Ast,
+        /// The remaining child nodes to visit (which may be empty).
+        tail: &'a [Ast],
+    },
+}
+
+/// Represents a single stack frame while performing structural induction over
+/// a character class.
+enum ClassFrame<'a> {
+    /// The stack frame used while visiting every child node of a union of
+    /// character class items.
+    Union {
+        /// The child node we are currently visiting.
+        head: &'a ast::ClassSetItem,
+        /// The remaining child nodes to visit (which may be empty).
+        tail: &'a [ast::ClassSetItem],
+    },
+    /// The stack frame used while a binary class operation.
+    Binary {
+        op: &'a ast::ClassSetBinaryOp,
+    },
+    /// A stack frame allocated just before descending into a binary operator's
+    /// left hand child node.
+    BinaryLHS {
+        op: &'a ast::ClassSetBinaryOp,
+        lhs: &'a ast::ClassSet,
+        rhs: &'a ast::ClassSet,
+    },
+    /// A stack frame allocated just before descending into a binary operator's
+    /// right hand child node.
+    BinaryRHS {
+        op: &'a ast::ClassSetBinaryOp,
+        rhs: &'a ast::ClassSet,
+    },
+}
+
+/// A representation of the inductive step when performing structural induction
+/// over a character class.
+///
+/// Note that there is no analogous explicit type for the inductive step for
+/// `Ast` nodes because the inductive step is just an `Ast`. For character
+/// classes, the inductive step can produce one of two possible child nodes:
+/// an item or a binary operation. (An item cannot be a binary operation
+/// because that would imply binary operations can be unioned in the concrete
+/// syntax, which is not possible.)
+enum ClassInduct<'a> {
+    Item(&'a ast::ClassSetItem),
+    BinaryOp(&'a ast::ClassSetBinaryOp),
+}
+
+impl<'a> HeapVisitor<'a> {
+    fn new() -> HeapVisitor<'a> {
+        HeapVisitor { stack: vec![], stack_class: vec![] }
+    }
+
+    fn visit<V: Visitor>(
+        &mut self,
+        mut ast: &'a Ast,
+        mut visitor: V,
+    ) -> Result<V::Output, V::Err> {
+        self.stack.clear();
+        self.stack_class.clear();
+
+        visitor.start();
+        loop {
+            try!(visitor.visit_pre(ast));
+            if let Some(x) = try!(self.induct(ast, &mut visitor)) {
+                let child = x.child();
+                self.stack.push((ast, x));
+                ast = child;
+                continue;
+            }
+            // No induction means we have a base case, so we can post visit
+            // it now.
+            try!(visitor.visit_post(ast));
+
+            // At this point, we now try to pop our call stack until it is
+            // either empty or we hit another inductive case.
+            loop {
+                let (post_ast, frame) = match self.stack.pop() {
+                    None => return visitor.finish(),
+                    Some((post_ast, frame)) => (post_ast, frame),
+                };
+                // If this is a concat/alternate, then we might have additional
+                // inductive steps to process.
+                if let Some(x) = self.pop(frame) {
+                    if let Frame::Alternation {..} = x {
+                        try!(visitor.visit_alternation_in());
+                    }
+                    ast = x.child();
+                    self.stack.push((post_ast, x));
+                    break;
+                }
+                // Otherwise, we've finished visiting all the child nodes for
+                // this AST, so we can post visit it now.
+                try!(visitor.visit_post(post_ast));
+            }
+        }
+    }
+
+    /// Build a stack frame for the given AST if one is needed (which occurs if
+    /// and only if there are child nodes in the AST). Otherwise, return None.
+    ///
+    /// If this visits a class, then the underlying visitor implementation may
+    /// return an error which will be passed on here.
+    fn induct<V: Visitor>(
+        &mut self,
+        ast: &'a Ast,
+        visitor: &mut V,
+    ) -> Result<Option<Frame<'a>>, V::Err> {
+        Ok(match *ast {
+            Ast::Class(ast::Class::Bracketed(ref x)) => {
+                try!(self.visit_class(x, visitor));
+                None
+            }
+            Ast::Repetition(ref x) => Some(Frame::Repetition(x)),
+            Ast::Group(ref x) => Some(Frame::Group(x)),
+            Ast::Concat(ref x) if x.asts.is_empty() => None,
+            Ast::Concat(ref x) => {
+                Some(Frame::Concat {
+                    head: &x.asts[0],
+                    tail: &x.asts[1..],
+                })
+            }
+            Ast::Alternation(ref x) if x.asts.is_empty() => None,
+            Ast::Alternation(ref x) => {
+                Some(Frame::Alternation {
+                    head: &x.asts[0],
+                    tail: &x.asts[1..],
+                })
+            }
+            _ => None,
+        })
+    }
+
+    /// Pops the given frame. If the frame has an additional inductive step,
+    /// then return it, otherwise return `None`.
+    fn pop(&self, induct: Frame<'a>) -> Option<Frame<'a>> {
+        match induct {
+            Frame::Repetition(_) => None,
+            Frame::Group(_) => None,
+            Frame::Concat { tail, .. } => {
+                if tail.is_empty() {
+                    None
+                } else {
+                    Some(Frame::Concat {
+                        head: &tail[0],
+                        tail: &tail[1..],
+                    })
+                }
+            }
+            Frame::Alternation { tail, .. } => {
+                if tail.is_empty() {
+                    None
+                } else {
+                    Some(Frame::Alternation {
+                        head: &tail[0],
+                        tail: &tail[1..],
+                    })
+                }
+            }
+        }
+    }
+
+    fn visit_class<V: Visitor>(
+        &mut self,
+        ast: &'a ast::ClassBracketed,
+        visitor: &mut V,
+    ) -> Result<(), V::Err> {
+        let mut ast = ClassInduct::from_bracketed(ast);
+        loop {
+            try!(self.visit_class_pre(&ast, visitor));
+            if let Some(x) = self.induct_class(&ast) {
+                let child = x.child();
+                self.stack_class.push((ast, x));
+                ast = child;
+                continue;
+            }
+            try!(self.visit_class_post(&ast, visitor));
+
+            // At this point, we now try to pop our call stack until it is
+            // either empty or we hit another inductive case.
+            loop {
+                let (post_ast, frame) = match self.stack_class.pop() {
+                    None => return Ok(()),
+                    Some((post_ast, frame)) => (post_ast, frame),
+                };
+                // If this is a union or a binary op, then we might have
+                // additional inductive steps to process.
+                if let Some(x) = self.pop_class(frame) {
+                    if let ClassFrame::BinaryRHS { ref op, .. } = x {
+                        try!(visitor.visit_class_set_binary_op_in(op));
+                    }
+                    ast = x.child();
+                    self.stack_class.push((post_ast, x));
+                    break;
+                }
+                // Otherwise, we've finished visiting all the child nodes for
+                // this class node, so we can post visit it now.
+                try!(self.visit_class_post(&post_ast, visitor));
+            }
+        }
+    }
+
+    /// Call the appropriate `Visitor` methods given an inductive step.
+    fn visit_class_pre<V: Visitor>(
+        &self,
+        ast: &ClassInduct<'a>,
+        visitor: &mut V,
+    ) -> Result<(), V::Err> {
+        match *ast {
+            ClassInduct::Item(item) => {
+                try!(visitor.visit_class_set_item_pre(item));
+            }
+            ClassInduct::BinaryOp(op) => {
+                try!(visitor.visit_class_set_binary_op_pre(op));
+            }
+        }
+        Ok(())
+    }
+
+    /// Call the appropriate `Visitor` methods given an inductive step.
+    fn visit_class_post<V: Visitor>(
+        &self,
+        ast: &ClassInduct<'a>,
+        visitor: &mut V,
+    ) -> Result<(), V::Err> {
+        match *ast {
+            ClassInduct::Item(item) => {
+                try!(visitor.visit_class_set_item_post(item));
+            }
+            ClassInduct::BinaryOp(op) => {
+                try!(visitor.visit_class_set_binary_op_post(op));
+            }
+        }
+        Ok(())
+    }
+
+    /// Build a stack frame for the given class node if one is needed (which
+    /// occurs if and only if there are child nodes). Otherwise, return None.
+    fn induct_class(
+        &self,
+        ast: &ClassInduct<'a>,
+    ) -> Option<ClassFrame<'a>> {
+        match *ast {
+            ClassInduct::Item(&ast::ClassSetItem::Bracketed(ref x)) => {
+                match x.kind {
+                    ast::ClassSet::Item(ref item) => {
+                        Some(ClassFrame::Union {
+                            head: item,
+                            tail: &[],
+                        })
+                    }
+                    ast::ClassSet::BinaryOp(ref op) => {
+                        Some(ClassFrame::Binary { op: op })
+                    }
+                }
+            }
+            ClassInduct::Item(&ast::ClassSetItem::Union(ref x)) => {
+                if x.items.is_empty() {
+                    None
+                } else {
+                    Some(ClassFrame::Union {
+                        head: &x.items[0],
+                        tail: &x.items[1..],
+                    })
+                }
+            }
+            ClassInduct::BinaryOp(op) => {
+                Some(ClassFrame::BinaryLHS {
+                    op: op,
+                    lhs: &op.lhs,
+                    rhs: &op.rhs,
+                })
+            }
+            _ => None,
+        }
+    }
+
+    /// Pops the given frame. If the frame has an additional inductive step,
+    /// then return it, otherwise return `None`.
+    fn pop_class(&self, induct: ClassFrame<'a>) -> Option<ClassFrame<'a>> {
+        match induct {
+            ClassFrame::Union { tail, .. } => {
+                if tail.is_empty() {
+                    None
+                } else {
+                    Some(ClassFrame::Union {
+                        head: &tail[0],
+                        tail: &tail[1..],
+                    })
+                }
+            }
+            ClassFrame::Binary {..} => None,
+            ClassFrame::BinaryLHS { op, rhs, .. } => {
+                Some(ClassFrame::BinaryRHS {
+                    op: op,
+                    rhs: rhs,
+                })
+            }
+            ClassFrame::BinaryRHS {..} => None,
+        }
+    }
+}
+
+impl<'a> Frame<'a> {
+    /// Perform the next inductive step on this frame and return the next
+    /// child AST node to visit.
+    fn child(&self) -> &'a Ast {
+        match *self {
+            Frame::Repetition(rep) => &rep.ast,
+            Frame::Group(group) => &group.ast,
+            Frame::Concat { head, .. } => head,
+            Frame::Alternation { head, .. } => head,
+        }
+    }
+}
+
+impl<'a> ClassFrame<'a> {
+    /// Perform the next inductive step on this frame and return the next
+    /// child class node to visit.
+    fn child(&self) -> ClassInduct<'a> {
+        match *self {
+            ClassFrame::Union { head, .. } => ClassInduct::Item(head),
+            ClassFrame::Binary { op, .. } => ClassInduct::BinaryOp(op),
+            ClassFrame::BinaryLHS { ref lhs, .. } => {
+                ClassInduct::from_set(lhs)
+            }
+            ClassFrame::BinaryRHS { ref rhs, .. } => {
+                ClassInduct::from_set(rhs)
+            }
+        }
+    }
+}
+
+impl<'a> ClassInduct<'a> {
+    fn from_bracketed(ast: &'a ast::ClassBracketed) -> ClassInduct<'a> {
+        ClassInduct::from_set(&ast.kind)
+    }
+
+    fn from_set(ast: &'a ast::ClassSet) -> ClassInduct<'a> {
+        match *ast {
+            ast::ClassSet::Item(ref item) => ClassInduct::Item(item),
+            ast::ClassSet::BinaryOp(ref op) => ClassInduct::BinaryOp(op),
+        }
+    }
+}
+
+impl<'a> fmt::Debug for ClassFrame<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let x = match *self {
+            ClassFrame::Union{..} => "Union",
+            ClassFrame::Binary{..} => "Binary",
+            ClassFrame::BinaryLHS{..} => "BinaryLHS",
+            ClassFrame::BinaryRHS{..} => "BinaryRHS",
+        };
+        write!(f, "{}", x)
+    }
+}
+
+impl<'a> fmt::Debug for ClassInduct<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let x = match *self {
+            ClassInduct::Item(it) => {
+                match *it {
+                    ast::ClassSetItem::Empty(_) => "Item(Empty)",
+                    ast::ClassSetItem::Literal(_) => "Item(Literal)",
+                    ast::ClassSetItem::Range(_) => "Item(Range)",
+                    ast::ClassSetItem::Ascii(_) => "Item(Ascii)",
+                    ast::ClassSetItem::Perl(_) => "Item(Perl)",
+                    ast::ClassSetItem::Unicode(_) => "Item(Unicode)",
+                    ast::ClassSetItem::Bracketed(_) => "Item(Bracketed)",
+                    ast::ClassSetItem::Union(_) => "Item(Union)",
+                }
+            }
+            ClassInduct::BinaryOp(it) => {
+                match it.kind {
+                    ast::ClassSetBinaryOpKind::Intersection => {
+                        "BinaryOp(Intersection)"
+                    }
+                    ast::ClassSetBinaryOpKind::Difference => {
+                        "BinaryOp(Difference)"
+                    }
+                    ast::ClassSetBinaryOpKind::SymmetricDifference => {
+                        "BinaryOp(SymmetricDifference)"
+                    }
+                }
+            }
+        };
+        write!(f, "{}", x)
+    }
+}
diff --git a/regex-syntax/src/either.rs b/regex-syntax/src/either.rs
new file mode 100644
index 0000000000..7ae41e4ced
--- /dev/null
+++ b/regex-syntax/src/either.rs
@@ -0,0 +1,8 @@
+/// A simple binary sum type.
+///
+/// This is occasionally useful in an ad hoc fashion.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum Either<Left, Right> {
+    Left(Left),
+    Right(Right),
+}
diff --git a/regex-syntax/src/error.rs b/regex-syntax/src/error.rs
new file mode 100644
index 0000000000..53f1231d8e
--- /dev/null
+++ b/regex-syntax/src/error.rs
@@ -0,0 +1,278 @@
+use std::cmp;
+use std::error;
+use std::fmt;
+use std::result;
+
+use ast;
+use hir;
+
+/// A type alias for dealing with errors returned by this crate.
+pub type Result<T> = result::Result<T, Error>;
+
+/// This error type encompasses any error that can be returned by this crate.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum Error {
+    /// An error that occurred while translating concrete syntax into abstract
+    /// syntax (AST).
+    Parse(ast::Error),
+    /// An error that occurred while translating abstract syntax into a high
+    /// level intermediate representation (HIR).
+    Translate(hir::Error),
+    /// Hints that destructuring should not be exhaustive.
+    ///
+    /// This enum may grow additional variants, so this makes sure clients
+    /// don't count on exhaustive matching. (Otherwise, adding a new variant
+    /// could break existing code.)
+    #[doc(hidden)]
+    __Nonexhaustive,
+}
+
+impl From<ast::Error> for Error {
+    fn from(err: ast::Error) -> Error {
+        Error::Parse(err)
+    }
+}
+
+impl From<hir::Error> for Error {
+    fn from(err: hir::Error) -> Error {
+        Error::Translate(err)
+    }
+}
+
+impl error::Error for Error {
+    fn description(&self) -> &str {
+        match *self {
+            Error::Parse(ref x) => x.description(),
+            Error::Translate(ref x) => x.description(),
+            _ => unreachable!(),
+        }
+    }
+}
+
+impl fmt::Display for Error {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match *self {
+            Error::Parse(ref x) => x.fmt(f),
+            Error::Translate(ref x) => x.fmt(f),
+            _ => unreachable!(),
+        }
+    }
+}
+
+/// A helper type for formatting nice error messages.
+///
+/// This type is responsible for reporting regex parse errors in a nice human
+/// readable format. Most of its complexity is from interspersing notational
+/// markers pointing out the position where an error occurred.
+#[derive(Debug)]
+pub struct Formatter<'e, E: 'e> {
+    /// The original regex pattern in which the error occurred.
+    pattern: &'e str,
+    /// The error kind. It must impl fmt::Display.
+    err: &'e E,
+    /// The primary span of the error.
+    span: &'e ast::Span,
+    /// An auxiliary and optional span, in case the error needs to point to
+    /// two locations (e.g., when reporting a duplicate capture group name).
+    aux_span: Option<&'e ast::Span>,
+}
+
+impl<'e> From<&'e ast::Error> for Formatter<'e, ast::ErrorKind> {
+    fn from(err: &'e ast::Error) -> Self {
+        Formatter {
+            pattern: err.pattern(),
+            err: err.kind(),
+            span: err.span(),
+            aux_span: err.auxiliary_span(),
+        }
+    }
+}
+
+impl<'e> From<&'e hir::Error> for Formatter<'e, hir::ErrorKind> {
+    fn from(err: &'e hir::Error) -> Self {
+        Formatter {
+            pattern: err.pattern(),
+            err: err.kind(),
+            span: err.span(),
+            aux_span: None,
+        }
+    }
+}
+
+impl<'e, E: fmt::Display> fmt::Display for Formatter<'e, E> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let spans = Spans::from_formatter(self);
+        if self.pattern.contains('\n') {
+            let divider = repeat_char('~', 79);
+
+            try!(writeln!(f, "regex parse error:"));
+            try!(writeln!(f, "{}", divider));
+            let notated = spans.notate();
+            try!(write!(f, "{}", notated));
+            try!(writeln!(f, "{}", divider));
+            // If we have error spans that cover multiple lines, then we just
+            // note the line numbers.
+            if !spans.multi_line.is_empty() {
+                let mut notes = vec![];
+                for span in &spans.multi_line {
+                    notes.push(format!(
+                        "on line {} (column {}) through line {} (column {})",
+                        span.start.line, span.start.column,
+                        span.end.line, span.end.column - 1));
+                }
+                try!(writeln!(f, "{}", notes.join("\n")));
+            }
+            try!(write!(f, "error: {}", self.err));
+        } else {
+            try!(writeln!(f, "regex parse error:"));
+            let notated = Spans::from_formatter(self).notate();
+            try!(write!(f, "{}", notated));
+            try!(write!(f, "error: {}", self.err));
+        }
+        Ok(())
+    }
+}
+
+/// This type represents an arbitrary number of error spans in a way that makes
+/// it convenient to notate the regex pattern. ("Notate" means "point out
+/// exactly where the error occurred in the regex pattern.")
+///
+/// Technically, we can only ever have two spans given our current error
+/// structure. However, after toiling with a specific algorithm for handling
+/// two spans, it became obvious that an algorithm to handle an arbitrary
+/// number of spans was actually much simpler.
+struct Spans<'p> {
+    /// The original regex pattern string.
+    pattern: &'p str,
+    /// The total width that should be used for line numbers. The width is
+    /// used for left padding the line numbers for alignment.
+    ///
+    /// A value of `0` means line numbers should not be displayed. That is,
+    /// the pattern is itself only one line.
+    line_number_width: usize,
+    /// All error spans that occur on a single line. This sequence always has
+    /// length equivalent to the number of lines in `pattern`, where the index
+    /// of the sequence represents a line number, starting at `0`. The spans
+    /// in each line are sorted in ascending order.
+    by_line: Vec<Vec<ast::Span>>,
+    /// All error spans that occur over one or more lines. That is, the start
+    /// and end position of the span have different line numbers. The spans are
+    /// sorted in ascending order.
+    multi_line: Vec<ast::Span>,
+}
+
+impl<'p> Spans<'p> {
+    /// Build a sequence of spans from a formatter.
+    fn from_formatter<'e, E: fmt::Display>(
+        fmter: &'p Formatter<'e, E>,
+    ) -> Spans<'p> {
+        let line_count = fmter.pattern.lines().count();
+        let line_number_width =
+            if line_count <= 1 {
+                0
+            } else {
+                line_count.to_string().len()
+            };
+        let mut spans = Spans {
+            pattern: &fmter.pattern,
+            line_number_width: line_number_width,
+            by_line: vec![vec![]; line_count],
+            multi_line: vec![],
+        };
+        spans.add(fmter.span.clone());
+        if let Some(span) = fmter.aux_span {
+            spans.add(span.clone());
+        }
+        spans
+    }
+
+    /// Add the given span to this sequence, putting it in the right place.
+    fn add(&mut self, span: ast::Span) {
+        // This is grossly inefficient since we sort after each add, but right
+        // now, we only ever add two spans at most.
+        if span.is_one_line() {
+            let i = span.start.line - 1; // because lines are 1-indexed
+            self.by_line[i].push(span);
+            self.by_line[i].sort();
+        } else {
+            self.multi_line.push(span);
+            self.multi_line.sort();
+        }
+    }
+
+    /// Notate the pattern string with carents (`^`) pointing at each span
+    /// location. This only applies to spans that occur within a single line.
+    fn notate(&self) -> String {
+        let mut notated = String::new();
+        for (i, line) in self.pattern.lines().enumerate() {
+            if self.line_number_width > 0 {
+                notated.push_str(&self.left_pad_line_number(i + 1));
+                notated.push_str(": ");
+            } else {
+                notated.push_str("    ");
+            }
+            notated.push_str(line);
+            notated.push('\n');
+            if let Some(notes) = self.notate_line(i) {
+                notated.push_str(&notes);
+                notated.push('\n');
+            }
+        }
+        notated
+    }
+
+    /// Return notes for the line indexed at `i` (zero-based). If there are no
+    /// spans for the given line, then `None` is returned. Otherwise, an
+    /// appropriately space padded string with correctly positioned `^` is
+    /// returned, accounting for line numbers.
+    fn notate_line(&self, i: usize) -> Option<String> {
+        let spans = &self.by_line[i];
+        if spans.is_empty() {
+            return None;
+        }
+        let mut notes = String::new();
+        for _ in 0..self.line_number_padding() {
+            notes.push(' ');
+        }
+        let mut pos = 0;
+        for span in spans {
+            for _ in pos..(span.start.column - 1) {
+                notes.push(' ');
+                pos += 1;
+            }
+            let note_len = span.end.column.saturating_sub(span.start.column);
+            for _ in 0..cmp::max(1, note_len) {
+                notes.push('^');
+                pos += 1;
+            }
+        }
+        Some(notes)
+    }
+
+    /// Left pad the given line number with spaces such that it is aligned with
+    /// other line numbers.
+    fn left_pad_line_number(&self, n: usize) -> String {
+        let n = n.to_string();
+        let pad = self.line_number_width.checked_sub(n.len()).unwrap();
+        let mut result = repeat_char(' ', pad);
+        result.push_str(&n);
+        result
+    }
+
+    /// Return the line number padding beginning at the start of each line of
+    /// the pattern.
+    ///
+    /// If the pattern is only one line, then this returns a fixed padding
+    /// for visual indentation.
+    fn line_number_padding(&self) -> usize {
+        if self.line_number_width == 0 {
+            4
+        } else {
+            2 + self.line_number_width
+        }
+    }
+}
+
+fn repeat_char(c: char, count: usize) -> String {
+    ::std::iter::repeat(c).take(count).collect()
+}
diff --git a/regex-syntax/src/hir/interval.rs b/regex-syntax/src/hir/interval.rs
new file mode 100644
index 0000000000..a7e70ef596
--- /dev/null
+++ b/regex-syntax/src/hir/interval.rs
@@ -0,0 +1,490 @@
+use std::char;
+use std::cmp;
+use std::fmt::Debug;
+use std::slice;
+use std::u8;
+
+// This module contains an *internal* implementation of interval sets.
+//
+// The primary invariant that interval sets guards is canonical ordering. That
+// is, every interval set contains an ordered sequence of intervals where
+// no two intervals are overlapping or adjacent. While this invariant is
+// ocassionally broken within the implementation, it should be impossible for
+// callers to observe it.
+//
+// Since case folding (as implemented below) breaks that invariant, we roll
+// that into this API even though it is a little out of place in an otherwise
+// generic interval set.
+//
+// Some of the implementation complexity here is a result of me wanting to
+// preserve the sequential representation without using additional memory.
+// In many cases, we do use linear extra memory, but it is at most 2x and it
+// is amortized. If we relaxed the memory requirements, this implementation
+// could become much simpler. The extra memory is honestly probably OK, but
+// character classes (especially of the Unicode variety) can become quite
+// large, and it would be nice to keep regex compilation snappy even in debug
+// builds. (In the past, I have been careless with this area of code and it has
+// caused slow regex compilations in debug mode, so this isn't entirely
+// unwarranted.)
+//
+// Tests on this are relegated to the public API of HIR in src/hir.rs.
+
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct IntervalSet<I> {
+    ranges: Vec<I>,
+}
+
+impl<I: Interval> IntervalSet<I> {
+    /// Create a new set from a sequence of intervals. Each interval is
+    /// specified as a pair of bounds, where both bounds are inclusive.
+    ///
+    /// The given ranges do not need to be in any specific order, and ranges
+    /// may overlap.
+    pub fn new<T: IntoIterator<Item=I>>(intervals: T) -> IntervalSet<I> {
+        let mut set = IntervalSet { ranges: intervals.into_iter().collect() };
+        set.canonicalize();
+        set
+    }
+
+    /// Add a new interval to this set.
+    pub fn push(&mut self, interval: I) {
+        // TODO: This could be faster. e.g., Push the interval such that
+        // it preserves canonicalization.
+        self.ranges.push(interval);
+        self.canonicalize();
+    }
+
+    /// Return an iterator over all intervals in this set.
+    ///
+    /// The iterator yields intervals in ascending order.
+    pub fn iter(&self) -> IntervalSetIter<I> {
+        IntervalSetIter(self.ranges.iter())
+    }
+
+    /// Return an immutable slice of intervals in this set.
+    ///
+    /// The sequence returned is in canonical ordering.
+    pub fn intervals(&self) -> &[I] {
+        &self.ranges
+    }
+
+    /// Expand this interval set such that it contains all case folded
+    /// characters. For example, if this class consists of the range `a-z`,
+    /// then applying case folding will result in the class containing both the
+    /// ranges `a-z` and `A-Z`.
+    pub fn case_fold_simple(&mut self) {
+        let len = self.ranges.len();
+        for i in 0..len {
+            let range = self.ranges[i];
+            range.case_fold_simple(&mut self.ranges);
+        }
+        self.canonicalize();
+    }
+
+    /// Union this set with the given set, in place.
+    pub fn union(&mut self, other: &IntervalSet<I>) {
+        // This could almost certainly be done more efficiently.
+        self.ranges.extend(&other.ranges);
+        self.canonicalize();
+    }
+
+    /// Intersect this set with the given set, in place.
+    pub fn intersect(&mut self, other: &IntervalSet<I>) {
+        if self.ranges.is_empty() {
+            return;
+        }
+        if other.ranges.is_empty() {
+            self.ranges.clear();
+            return;
+        }
+
+        // There should be a way to do this in-place with constant memory,
+        // but I couldn't figure out a simple way to do it. So just append
+        // the intersection to the end of this range, and then drain it before
+        // we're done.
+        let drain_end = self.ranges.len();
+
+        let mut ita = (0..drain_end).into_iter();
+        let mut itb = (0..other.ranges.len()).into_iter();
+        let mut a = ita.next().unwrap();
+        let mut b = itb.next().unwrap();
+        loop {
+            if let Some(ab) = self.ranges[a].intersect(&other.ranges[b]) {
+                self.ranges.push(ab);
+            }
+            let (it, aorb) =
+                if self.ranges[a].upper() < other.ranges[b].upper() {
+                    (&mut ita, &mut a)
+                } else {
+                    (&mut itb, &mut b)
+                };
+            match it.next() {
+                Some(v) => *aorb = v,
+                None => break,
+            }
+        }
+        self.ranges.drain(..drain_end);
+    }
+
+    /// Subtract the given set from this set, in place.
+    pub fn difference(&mut self, other: &IntervalSet<I>) {
+        if self.ranges.is_empty() || other.ranges.is_empty() {
+            return;
+        }
+
+        // This algorithm is (to me) surprisingly complex. A search of the
+        // interwebs indicate that this is a potentially interesting problem.
+        // Folks seem to suggest interval or segment trees, but I'd like to
+        // avoid the overhead (both runtime and conceptual) of that.
+        //
+        // The following is basically my Shitty First Draft. Therefore, in
+        // order to grok it, you probably need to read each line carefully.
+        // Simplifications are most welcome!
+        //
+        // Remember, we can assume the canonical format invariant here, which
+        // says that all ranges are sorted, not overlapping and not adjacent in
+        // each class.
+        let drain_end = self.ranges.len();
+        let (mut a, mut b) = (0, 0);
+    'LOOP:
+        while a < drain_end && b < other.ranges.len() {
+            // Basically, the easy cases are when neither range overlaps with
+            // each other. If the `b` range is less than our current `a`
+            // range, then we can skip it and move on.
+            if other.ranges[b].upper() < self.ranges[a].lower() {
+                b += 1;
+                continue;
+            }
+            // ... similarly for the `a` range. If it's less than the smallest
+            // `b` range, then we can add it as-is.
+            if self.ranges[a].upper() < other.ranges[b].lower() {
+                let range = self.ranges[a];
+                self.ranges.push(range);
+                a += 1;
+                continue;
+            }
+            // Otherwise, we have overlapping ranges.
+            assert!(!self.ranges[a].is_intersection_empty(&other.ranges[b]));
+
+            // This part is tricky and was non-obvious to me without looking
+            // at explicit examples (see the tests). The trickiness stems from
+            // two things: 1) subtracting a range from another range could
+            // yield two ranges and 2) after subtracting a range, it's possible
+            // that future ranges can have an impact. The loop below advances
+            // the `b` ranges until they can't possible impact the current
+            // range.
+            //
+            // For example, if our `a` range is `a-t` and our next three `b`
+            // ranges are `a-c`, `g-i`, `r-t` and `x-z`, then we need to apply
+            // subtraction three times before moving on to the next `a` range.
+            let mut range = self.ranges[a];
+            while b < other.ranges.len()
+                && !range.is_intersection_empty(&other.ranges[b])
+            {
+                let old_range = range;
+                range = match range.difference(&other.ranges[b]) {
+                    (None, None) => {
+                        // We lost the entire range, so move on to the next
+                        // without adding this one.
+                        a += 1;
+                        continue 'LOOP;
+                    }
+                    (Some(range1), None) | (None, Some(range1)) => range1,
+                    (Some(range1), Some(range2)) => {
+                        self.ranges.push(range1);
+                        range2
+                    }
+                };
+                // It's possible that the `b` range has more to contribute
+                // here. In particular, if it is greater than the original
+                // range, then it might impact the next `a` range *and* it
+                // has impacted the current `a` range as much as possible,
+                // so we can quit. We don't bump `b` so that the next `a`
+                // range can apply it.
+                if other.ranges[b].upper() > old_range.upper() {
+                    break;
+                }
+                // Otherwise, the next `b` range might apply to the current
+                // `a` range.
+                b += 1;
+            }
+            self.ranges.push(range);
+            a += 1;
+        }
+        while a < drain_end {
+            let range = self.ranges[a];
+            self.ranges.push(range);
+            a += 1;
+        }
+        self.ranges.drain(..drain_end);
+    }
+
+    /// Compute the symmetric difference of the two sets, in place.
+    ///
+    /// This computes the symmetric difference of two interval sets. This
+    /// removes all elements in this set that are also in the given set,
+    /// but also adds all elements from the given set that aren't in this
+    /// set. That is, the set will contain all elements in either set,
+    /// but will not contain any elements that are in both sets.
+    pub fn symmetric_difference(&mut self, other: &IntervalSet<I>) {
+        // TODO(burntsushi): Fix this so that it amortizes allocation.
+        let mut intersection = self.clone();
+        intersection.intersect(other);
+        self.union(other);
+        self.difference(&intersection);
+    }
+
+    /// Negate this interval set.
+    ///
+    /// For all `x` where `x` is any element, if `x` was in this set, then it
+    /// will not be in this set after negation.
+    pub fn negate(&mut self) {
+        if self.ranges.is_empty() {
+            let (min, max) = (I::Bound::min_value(), I::Bound::max_value());
+            self.ranges.push(I::create(min, max));
+            return;
+        }
+
+        // There should be a way to do this in-place with constant memory,
+        // but I couldn't figure out a simple way to do it. So just append
+        // the negation to the end of this range, and then drain it before
+        // we're done.
+        let drain_end = self.ranges.len();
+
+        // We do checked arithmetic below because of the canonical ordering
+        // invariant.
+        if self.ranges[0].lower() > I::Bound::min_value() {
+            let upper = self.ranges[0].lower().decrement();
+            self.ranges.push(I::create(I::Bound::min_value(), upper));
+        }
+        for i in 1..drain_end {
+            let lower = self.ranges[i - 1].upper().increment();
+            let upper = self.ranges[i].lower().decrement();
+            self.ranges.push(I::create(lower, upper));
+        }
+        if self.ranges[drain_end - 1].upper() < I::Bound::max_value() {
+            let lower = self.ranges[drain_end - 1].upper().increment();
+            self.ranges.push(I::create(lower, I::Bound::max_value()));
+        }
+        self.ranges.drain(..drain_end);
+    }
+
+    /// Converts this set into a canonical ordering.
+    fn canonicalize(&mut self) {
+        if self.is_canonical() {
+            return;
+        }
+        self.ranges.sort();
+        assert!(!self.ranges.is_empty());
+
+        // Is there a way to do this in-place with constant memory? I couldn't
+        // figure out a way to do it. So just append the canonicalization to
+        // the end of this range, and then drain it before we're done.
+        let drain_end = self.ranges.len();
+        for oldi in 0..drain_end {
+            // If we've added at least one new range, then check if we can
+            // merge this range in the previously added range.
+            if self.ranges.len() > drain_end {
+                let (last, rest) = self.ranges.split_last_mut().unwrap();
+                if let Some(union) = last.union(&rest[oldi]) {
+                    *last = union;
+                    continue;
+                }
+            }
+            let range = self.ranges[oldi];
+            self.ranges.push(range);
+        }
+        self.ranges.drain(..drain_end);
+    }
+
+    /// Returns true if and only if this class is in a canonical ordering.
+    fn is_canonical(&self) -> bool {
+        for pair in self.ranges.windows(2) {
+            if pair[0] >= pair[1] {
+                return false;
+            }
+            if pair[0].is_contiguous(&pair[1]) {
+                return false;
+            }
+        }
+        true
+    }
+}
+
+/// An iterator over intervals.
+#[derive(Debug)]
+pub struct IntervalSetIter<'a, I: 'a>(slice::Iter<'a, I>);
+
+impl<'a, I> Iterator for IntervalSetIter<'a, I> {
+    type Item = &'a I;
+
+    fn next(&mut self) -> Option<&'a I> {
+        self.0.next()
+    }
+}
+
+pub trait Interval:
+    Clone + Copy + Debug + Default + Eq + PartialEq + PartialOrd + Ord
+{
+    type Bound: Bound;
+
+    fn lower(&self) -> Self::Bound;
+    fn upper(&self) -> Self::Bound;
+    fn set_lower(&mut self, bound: Self::Bound);
+    fn set_upper(&mut self, bound: Self::Bound);
+    fn case_fold_simple(&self, intervals: &mut Vec<Self>);
+
+    /// Create a new interval.
+    fn create(lower: Self::Bound, upper: Self::Bound) -> Self {
+        let mut int = Self::default();
+        if lower <= upper {
+            int.set_lower(lower);
+            int.set_upper(upper);
+        } else {
+            int.set_lower(upper);
+            int.set_upper(lower);
+        }
+        int
+    }
+
+    /// Union the given overlapping range into this range.
+    ///
+    /// If the two ranges aren't contiguous, then this returns `None`.
+    fn union(&self, other: &Self) -> Option<Self> {
+        if !self.is_contiguous(other) {
+            return None;
+        }
+        let lower = cmp::min(self.lower(), other.lower());
+        let upper = cmp::max(self.upper(), other.upper());
+        Some(Self::create(lower, upper))
+    }
+
+    /// Intersect this range with the given range and return the result.
+    ///
+    /// If the intersection is empty, then this returns `None`.
+    fn intersect(&self, other: &Self) -> Option<Self> {
+        let lower = cmp::max(self.lower(), other.lower());
+        let upper = cmp::min(self.upper(), other.upper());
+        if lower <= upper {
+            Some(Self::create(lower, upper))
+        } else {
+            None
+        }
+    }
+
+    /// Subtract the given range from this range and return the resulting
+    /// ranges.
+    ///
+    /// If subtraction would result in an empty range, then no ranges are
+    /// returned.
+    fn difference(&self, other: &Self) -> (Option<Self>, Option<Self>) {
+        if self.is_subset(other) {
+            return (None, None);
+        }
+        if self.is_intersection_empty(other) {
+            return (Some(self.clone()), None);
+        }
+        let add_lower = other.lower() > self.lower();
+        let add_upper = other.upper() < self.upper();
+        // We know this because !self.is_subset(other) and the ranges have
+        // a non-empty intersection.
+        assert!(add_lower || add_upper);
+        let mut ret = (None, None);
+        if add_lower {
+            let upper = other.lower().decrement();
+            ret.0 = Some(Self::create(self.lower(), upper));
+        }
+        if add_upper {
+            let lower = other.upper().increment();
+            let range = Self::create(lower, self.upper());
+            if ret.0.is_none() {
+                ret.0 = Some(range);
+            } else {
+                ret.1 = Some(range);
+            }
+        }
+        ret
+    }
+
+    /// Compute the symmetric difference the given range from this range. This
+    /// returns the union of the two ranges minus its intersection.
+    fn symmetric_difference(
+        &self,
+        other: &Self,
+    ) -> (Option<Self>, Option<Self>) {
+        let union = match self.union(other) {
+            None => return (Some(self.clone()), Some(other.clone())),
+            Some(union) => union,
+        };
+        let intersection = match self.intersect(other) {
+            None => return (Some(self.clone()), Some(other.clone())),
+            Some(intersection) => intersection,
+        };
+        union.difference(&intersection)
+    }
+
+    /// Returns true if and only if the two ranges are contiguous. Two ranges
+    /// are contiguous if and only if the ranges are either overlapping or
+    /// adjacent.
+    fn is_contiguous(&self, other: &Self) -> bool {
+        let lower1 = self.lower().as_u32();
+        let upper1 = self.upper().as_u32();
+        let lower2 = other.lower().as_u32();
+        let upper2 = other.upper().as_u32();
+        cmp::max(lower1, lower2) <= cmp::min(upper1, upper2).saturating_add(1)
+    }
+
+    /// Returns true if and only if the intersection of this range and the
+    /// other range is empty.
+    fn is_intersection_empty(&self, other: &Self) -> bool {
+        let (lower1, upper1) = (self.lower(), self.upper());
+        let (lower2, upper2) = (other.lower(), other.upper());
+        cmp::max(lower1, lower2) > cmp::min(upper1, upper2)
+    }
+
+    /// Returns true if and only if this range is a subset of the other range.
+    fn is_subset(&self, other: &Self) -> bool {
+        let (lower1, upper1) = (self.lower(), self.upper());
+        let (lower2, upper2) = (other.lower(), other.upper());
+        (lower2 <= lower1 && lower1 <= upper2)
+        && (lower2 <= upper1 && upper1 <= upper2)
+    }
+}
+
+pub trait Bound: Copy + Clone + Debug + Eq + PartialEq + PartialOrd + Ord {
+    fn min_value() -> Self;
+    fn max_value() -> Self;
+    fn as_u32(self) -> u32;
+    fn increment(self) -> Self;
+    fn decrement(self) -> Self;
+}
+
+impl Bound for u8 {
+    fn min_value() -> Self { u8::MIN }
+    fn max_value() -> Self { u8::MAX }
+    fn as_u32(self) -> u32 { self as u32 }
+    fn increment(self) -> Self { self.checked_add(1).unwrap() }
+    fn decrement(self) -> Self { self.checked_sub(1).unwrap() }
+}
+
+impl Bound for char {
+    fn min_value() -> Self { '\x00' }
+    fn max_value() -> Self { '\u{10FFFF}' }
+    fn as_u32(self) -> u32 { self as u32 }
+
+    fn increment(self) -> Self {
+        match self {
+            '\u{D7FF}' => '\u{E000}',
+            c => char::from_u32((c as u32).checked_add(1).unwrap()).unwrap(),
+        }
+    }
+
+    fn decrement(self) -> Self {
+        match self {
+            '\u{E000}' => '\u{D7FF}',
+            c => char::from_u32((c as u32).checked_sub(1).unwrap()).unwrap(),
+        }
+    }
+}
+
+// Tests for interval sets are written in src/hir.rs against the public API.
diff --git a/regex-syntax/src/literals.rs b/regex-syntax/src/hir/literal/mod.rs
similarity index 81%
rename from regex-syntax/src/literals.rs
rename to regex-syntax/src/hir/literal/mod.rs
index e3de16732a..3113ec970f 100644
--- a/regex-syntax/src/literals.rs
+++ b/regex-syntax/src/hir/literal/mod.rs
@@ -1,4 +1,4 @@
-// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
+// Copyright 2018 The Rust Project Developers. See the COPYRIGHT
 // file at the top-level directory of this distribution and at
 // http://rust-lang.org/COPYRIGHT.
 //
@@ -8,31 +8,43 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 
+/*!
+Provides routines for extracting literal prefixes and suffixes from an `Hir`.
+*/
+
 use std::cmp;
 use std::fmt;
 use std::iter;
 use std::mem;
 use std::ops;
 
-use {Expr, CharClass, ClassRange, ByteClass, ByteRange, Repeater};
+use hir::{self, Hir, HirKind};
+use unicode;
 
 /// A set of literal byte strings extracted from a regular expression.
 ///
-/// Every member of the set is a `Lit`, which is represented by a `Vec<u8>`.
-/// (Notably, it may contain invalid UTF-8.) Every member is said to be either
-/// *complete* or *cut*. A complete literal means that it extends until the
-/// beginning (or end) of the regular expression. In some circumstances, this
-/// can be used to indicate a match in the regular expression.
+/// Every member of the set is a `Literal`, which is represented by a
+/// `Vec<u8>`. (Notably, it may contain invalid UTF-8.) Every member is
+/// said to be either *complete* or *cut*. A complete literal means that
+/// it extends until the beginning (or end) of the regular expression. In
+/// some circumstances, this can be used to indicate a match in the regular
+/// expression.
+///
+/// A key aspect of literal extraction is knowing when to stop. It is not
+/// feasible to blindly extract all literals from a regular expression, even if
+/// there are finitely many. For example, the regular expression `[0-9]{10}`
+/// has `10^10` distinct literals. For this reason, literal extraction is
+/// bounded to some low number by default using heuristics, but the limits can
+/// be tweaked.
 ///
-/// Note that a key aspect of literal extraction is knowing when to stop. It is
-/// not feasible to blindly extract all literals from a regular expression,
-/// even if there are finitely many. For example, the regular expression
-/// `[0-9]{10}` has `10^10` distinct literals. For this reason, literal
-/// extraction is bounded to some low number by default using heuristics, but
-/// the limits can be tweaked.
+/// **WARNING**: Literal extraction uses stack space proportional to the size
+/// of the `Hir` expression. At some point, this drawback will be eliminated.
+/// To protect yourself, set a reasonable
+/// [`nest_limit` on your `Parser`](../../struct.ParserBuilder.html#method.nest_limit).
+/// This is done for you by default.
 #[derive(Clone, Eq, PartialEq)]
 pub struct Literals {
-    lits: Vec<Lit>,
+    lits: Vec<Literal>,
     limit_size: usize,
     limit_class: usize,
 }
@@ -42,7 +54,7 @@ pub struct Literals {
 /// This type has `Deref` and `DerefMut` impls to `Vec<u8>` so that all slice
 /// and `Vec` operations are available.
 #[derive(Clone, Eq, Ord)]
-pub struct Lit {
+pub struct Literal {
     v: Vec<u8>,
     cut: bool,
 }
@@ -57,6 +69,20 @@ impl Literals {
         }
     }
 
+    /// Returns a set of literal prefixes extracted from the given `Hir`.
+    pub fn prefixes(expr: &Hir) -> Literals {
+        let mut lits = Literals::empty();
+        lits.union_prefixes(expr);
+        lits
+    }
+
+    /// Returns a set of literal suffixes extracted from the given `Hir`.
+    pub fn suffixes(expr: &Hir) -> Literals {
+        let mut lits = Literals::empty();
+        lits.union_suffixes(expr);
+        lits
+    }
+
     /// Get the approximate size limit (in bytes) of this set.
     pub fn limit_size(&self) -> usize {
         self.limit_size
@@ -95,7 +121,7 @@ impl Literals {
     }
 
     /// Returns the set of literals as a slice. Its order is unspecified.
-    pub fn literals(&self) -> &[Lit] {
+    pub fn literals(&self) -> &[Literal] {
         &self.lits
     }
 
@@ -216,7 +242,7 @@ impl Literals {
         if self.lits.is_empty() {
             return self.to_empty();
         }
-        let mut old: Vec<Lit> = self.lits.iter().cloned().collect();
+        let mut old: Vec<Literal> = self.lits.iter().cloned().collect();
         let mut new = self.to_empty();
     'OUTER:
         while let Some(mut candidate) = old.pop() {
@@ -298,7 +324,7 @@ impl Literals {
     /// Note that prefix literals extracted from `expr` are said to be complete
     /// if and only if the literal extends from the beginning of `expr` to the
     /// end of `expr`.
-    pub fn union_prefixes(&mut self, expr: &Expr) -> bool {
+    pub fn union_prefixes(&mut self, expr: &Hir) -> bool {
         let mut lits = self.to_empty();
         prefixes(expr, &mut lits);
         !lits.is_empty() && !lits.contains_empty() && self.union(lits)
@@ -313,7 +339,7 @@ impl Literals {
     /// Note that prefix literals extracted from `expr` are said to be complete
     /// if and only if the literal extends from the end of `expr` to the
     /// beginning of `expr`.
-    pub fn union_suffixes(&mut self, expr: &Expr) -> bool {
+    pub fn union_suffixes(&mut self, expr: &Hir) -> bool {
         let mut lits = self.to_empty();
         suffixes(expr, &mut lits);
         lits.reverse();
@@ -330,7 +356,7 @@ impl Literals {
             return false;
         }
         if lits.is_empty() {
-            self.lits.push(Lit::empty());
+            self.lits.push(Literal::empty());
         } else {
             self.lits.extend(lits.lits);
         }
@@ -373,7 +399,7 @@ impl Literals {
 
         let mut base = self.remove_complete();
         if base.is_empty() {
-            base = vec![Lit::empty()];
+            base = vec![Literal::empty()];
         }
         for lits_lit in lits.literals() {
             for mut self_lit in base.clone() {
@@ -402,7 +428,7 @@ impl Literals {
         }
         if self.lits.is_empty() {
             let i = cmp::min(self.limit_size, bytes.len());
-            self.lits.push(Lit::new(bytes[..i].to_owned()));
+            self.lits.push(Literal::new(bytes[..i].to_owned()));
             self.lits[0].cut = i < bytes.len();
             return !self.lits[0].is_cut();
         }
@@ -430,7 +456,7 @@ impl Literals {
     ///
     /// Returns false if adding this literal would cause the class to be too
     /// big.
-    pub fn add(&mut self, lit: Lit) -> bool {
+    pub fn add(&mut self, lit: Literal) -> bool {
         if self.num_bytes() + lit.len() > self.limit_size {
             return false;
         }
@@ -441,7 +467,7 @@ impl Literals {
     /// Extends each literal in this set with the character class given.
     ///
     /// Returns false if the character class was too big to add.
-    pub fn add_char_class(&mut self, cls: &CharClass) -> bool {
+    pub fn add_char_class(&mut self, cls: &hir::ClassUnicode) -> bool {
         self._add_char_class(cls, false)
     }
 
@@ -449,21 +475,25 @@ impl Literals {
     /// writing the bytes of each character in reverse.
     ///
     /// Returns false if the character class was too big to add.
-    fn add_char_class_reverse(&mut self, cls: &CharClass) -> bool {
+    fn add_char_class_reverse(&mut self, cls: &hir::ClassUnicode) -> bool {
         self._add_char_class(cls, true)
     }
 
-    fn _add_char_class(&mut self, cls: &CharClass, reverse: bool) -> bool {
+    fn _add_char_class(
+        &mut self,
+        cls: &hir::ClassUnicode,
+        reverse: bool,
+    ) -> bool {
         use std::char;
 
-        if self.class_exceeds_limits(cls.num_chars()) {
+        if self.class_exceeds_limits(cls_char_count(cls)) {
             return false;
         }
         let mut base = self.remove_complete();
         if base.is_empty() {
-            base = vec![Lit::empty()];
+            base = vec![Literal::empty()];
         }
-        for r in cls {
+        for r in cls.iter() {
             let (s, e) = (r.start as u32, r.end as u32 + 1);
             for c in (s..e).filter_map(char::from_u32) {
                 for mut lit in base.clone() {
@@ -482,15 +512,15 @@ impl Literals {
     /// Extends each literal in this set with the byte class given.
     ///
     /// Returns false if the byte class was too big to add.
-    pub fn add_byte_class(&mut self, cls: &ByteClass) -> bool {
-        if self.class_exceeds_limits(cls.num_bytes()) {
+    pub fn add_byte_class(&mut self, cls: &hir::ClassBytes) -> bool {
+        if self.class_exceeds_limits(cls_byte_count(cls)) {
             return false;
         }
         let mut base = self.remove_complete();
         if base.is_empty() {
-            base = vec![Lit::empty()];
+            base = vec![Literal::empty()];
         }
-        for r in cls {
+        for r in cls.iter() {
             let (s, e) = (r.start as u32, r.end as u32 + 1);
             for b in (s..e).map(|b| b as u8) {
                 for mut lit in base.clone() {
@@ -523,7 +553,7 @@ impl Literals {
     }
 
     /// Pops all complete literals out of this set.
-    fn remove_complete(&mut self) -> Vec<Lit> {
+    fn remove_complete(&mut self) -> Vec<Literal> {
         let mut base = vec![];
         for lit in mem::replace(&mut self.lits, vec![]) {
             if lit.is_cut() {
@@ -570,73 +600,67 @@ impl Literals {
     }
 }
 
-fn prefixes(expr: &Expr, lits: &mut Literals) {
-    use Expr::*;
-    match *expr {
-        Literal { ref chars, casei: false } => {
-            let s: String = chars.iter().cloned().collect();
-            lits.cross_add(s.as_bytes());
+fn prefixes(expr: &Hir, lits: &mut Literals) {
+    match *expr.kind() {
+        HirKind::Literal(hir::Literal::Unicode(c)) => {
+            let mut buf = [0u8; 4];
+            let i = unicode::encode_utf8(c, &mut buf).unwrap();
+            lits.cross_add(&buf[..i]);
         }
-        Literal { ref chars, casei: true } => {
-            for &c in chars {
-                let cls = CharClass::new(vec![
-                    ClassRange { start: c, end: c },
-                ]).case_fold();
-                if !lits.add_char_class(&cls) {
-                    lits.cut();
-                    return;
-                }
-            }
+        HirKind::Literal(hir::Literal::Byte(b)) => {
+            lits.cross_add(&[b]);
         }
-        LiteralBytes { ref bytes, casei: false } => {
-            lits.cross_add(bytes);
-        }
-        LiteralBytes { ref bytes, casei: true } => {
-            for &b in bytes {
-                let cls = ByteClass::new(vec![
-                    ByteRange { start: b, end: b },
-                ]).case_fold();
-                if !lits.add_byte_class(&cls) {
-                    lits.cut();
-                    return;
-                }
-            }
-        }
-        Class(ref cls) => {
+        HirKind::Class(hir::Class::Unicode(ref cls)) => {
             if !lits.add_char_class(cls) {
                 lits.cut();
             }
         }
-        ClassBytes(ref cls) => {
+        HirKind::Class(hir::Class::Bytes(ref cls)) => {
             if !lits.add_byte_class(cls) {
                 lits.cut();
             }
         }
-        Group { ref e, .. } => {
-            prefixes(&**e, lits);
-        }
-        Repeat { ref e, r: Repeater::ZeroOrOne, .. } => {
-            repeat_zero_or_one_literals(&**e, lits, prefixes);
-        }
-        Repeat { ref e, r: Repeater::ZeroOrMore, .. } => {
-            repeat_zero_or_more_literals(&**e, lits, prefixes);
+        HirKind::Group(hir::Group { ref hir, .. }) => {
+            prefixes(&**hir, lits);
         }
-        Repeat { ref e, r: Repeater::OneOrMore, .. } => {
-            repeat_one_or_more_literals(&**e, lits, prefixes);
-        }
-        Repeat { ref e, r: Repeater::Range { min, max }, greedy } => {
-            repeat_range_literals(&**e, min, max, greedy, lits, prefixes);
+        HirKind::Repetition(ref x) => {
+            match x.kind {
+                hir::RepetitionKind::ZeroOrOne => {
+                    repeat_zero_or_one_literals(&x.hir, lits, prefixes);
+                }
+                hir::RepetitionKind::ZeroOrMore => {
+                    repeat_zero_or_more_literals(&x.hir, lits, prefixes);
+                }
+                hir::RepetitionKind::OneOrMore => {
+                    repeat_one_or_more_literals(&x.hir, lits, prefixes);
+                }
+                hir::RepetitionKind::Range(ref rng) => {
+                    let (min, max) = match *rng {
+                        hir::RepetitionRange::Exactly(m) => {
+                            (m, Some(m))
+                        }
+                        hir::RepetitionRange::AtLeast(m) => {
+                            (m, None)
+                        }
+                        hir::RepetitionRange::Bounded(m, n) => {
+                            (m, Some(n))
+                        }
+                    };
+                    repeat_range_literals(
+                        &x.hir, min, max, x.greedy, lits, prefixes)
+                }
+            }
         }
-        Concat(ref es) if es.is_empty() => {}
-        Concat(ref es) if es.len() == 1 => prefixes(&es[0], lits),
-        Concat(ref es) => {
+        HirKind::Concat(ref es) if es.is_empty() => {}
+        HirKind::Concat(ref es) if es.len() == 1 => prefixes(&es[0], lits),
+        HirKind::Concat(ref es) => {
             for e in es {
-                if let StartText = *e {
+                if let HirKind::Anchor(hir::Anchor::StartText) = *e.kind() {
                     if !lits.is_empty() {
                         lits.cut();
                         break;
                     }
-                    lits.add(Lit::empty());
+                    lits.add(Literal::empty());
                     continue;
                 }
                 let mut lits2 = lits.to_empty();
@@ -650,83 +674,76 @@ fn prefixes(expr: &Expr, lits: &mut Literals) {
                 }
             }
         }
-        Alternate(ref es) => {
+        HirKind::Alternation(ref es) => {
             alternate_literals(es, lits, prefixes);
         }
         _ => lits.cut(),
     }
 }
 
-fn suffixes(expr: &Expr, lits: &mut Literals) {
-    use Expr::*;
-    match *expr {
-        Literal { ref chars, casei: false } => {
-            let s: String = chars.iter().cloned().collect();
-            let mut bytes = s.into_bytes();
-            bytes.reverse();
-            lits.cross_add(&bytes);
-        }
-        Literal { ref chars, casei: true } => {
-            for &c in chars.iter().rev() {
-                let cls = CharClass::new(vec![
-                    ClassRange { start: c, end: c },
-                ]).case_fold();
-                if !lits.add_char_class_reverse(&cls) {
-                    lits.cut();
-                    return;
-                }
-            }
+fn suffixes(expr: &Hir, lits: &mut Literals) {
+    match *expr.kind() {
+        HirKind::Literal(hir::Literal::Unicode(c)) => {
+            let mut buf = [0u8; 4];
+            let i = unicode::encode_utf8(c, &mut buf).unwrap();
+            let mut buf = &mut buf[..i];
+            buf.reverse();
+            lits.cross_add(buf);
         }
-        LiteralBytes { ref bytes, casei: false } => {
-            let b: Vec<u8> = bytes.iter().rev().cloned().collect();
-            lits.cross_add(&b);
+        HirKind::Literal(hir::Literal::Byte(b)) => {
+            lits.cross_add(&[b]);
         }
-        LiteralBytes { ref bytes, casei: true } => {
-            for &b in bytes.iter().rev() {
-                let cls = ByteClass::new(vec![
-                    ByteRange { start: b, end: b },
-                ]).case_fold();
-                if !lits.add_byte_class(&cls) {
-                    lits.cut();
-                    return;
-                }
-            }
-        }
-        Class(ref cls) => {
+        HirKind::Class(hir::Class::Unicode(ref cls)) => {
             if !lits.add_char_class_reverse(cls) {
                 lits.cut();
             }
         }
-        ClassBytes(ref cls) => {
+        HirKind::Class(hir::Class::Bytes(ref cls)) => {
             if !lits.add_byte_class(cls) {
                 lits.cut();
             }
         }
-        Group { ref e, .. } => {
-            suffixes(&**e, lits);
-        }
-        Repeat { ref e, r: Repeater::ZeroOrOne, .. } => {
-            repeat_zero_or_one_literals(&**e, lits, suffixes);
-        }
-        Repeat { ref e, r: Repeater::ZeroOrMore, .. } => {
-            repeat_zero_or_more_literals(&**e, lits, suffixes);
+        HirKind::Group(hir::Group { ref hir, .. }) => {
+            suffixes(&**hir, lits);
         }
-        Repeat { ref e, r: Repeater::OneOrMore, .. } => {
-            repeat_one_or_more_literals(&**e, lits, suffixes);
-        }
-        Repeat { ref e, r: Repeater::Range { min, max }, greedy } => {
-            repeat_range_literals(&**e, min, max, greedy, lits, suffixes);
+        HirKind::Repetition(ref x) => {
+            match x.kind {
+                hir::RepetitionKind::ZeroOrOne => {
+                    repeat_zero_or_one_literals(&x.hir, lits, suffixes);
+                }
+                hir::RepetitionKind::ZeroOrMore => {
+                    repeat_zero_or_more_literals(&x.hir, lits, suffixes);
+                }
+                hir::RepetitionKind::OneOrMore => {
+                    repeat_one_or_more_literals(&x.hir, lits, suffixes);
+                }
+                hir::RepetitionKind::Range(ref rng) => {
+                    let (min, max) = match *rng {
+                        hir::RepetitionRange::Exactly(m) => {
+                            (m, Some(m))
+                        }
+                        hir::RepetitionRange::AtLeast(m) => {
+                            (m, None)
+                        }
+                        hir::RepetitionRange::Bounded(m, n) => {
+                            (m, Some(n))
+                        }
+                    };
+                    repeat_range_literals(
+                        &x.hir, min, max, x.greedy, lits, suffixes)
+                }
+            }
         }
-        Concat(ref es) if es.is_empty() => {}
-        Concat(ref es) if es.len() == 1 => suffixes(&es[0], lits),
-        Concat(ref es) => {
+        HirKind::Concat(ref es) if es.is_empty() => {}
+        HirKind::Concat(ref es) if es.len() == 1 => suffixes(&es[0], lits),
+        HirKind::Concat(ref es) => {
             for e in es.iter().rev() {
-                if let EndText = *e {
+                if let HirKind::Anchor(hir::Anchor::EndText) = *e.kind() {
                     if !lits.is_empty() {
                         lits.cut();
                         break;
                     }
-                    lits.add(Lit::empty());
+                    lits.add(Literal::empty());
                     continue;
                 }
                 let mut lits2 = lits.to_empty();
@@ -740,15 +757,15 @@ fn suffixes(expr: &Expr, lits: &mut Literals) {
                 }
             }
         }
-        Alternate(ref es) => {
+        HirKind::Alternation(ref es) => {
             alternate_literals(es, lits, suffixes);
         }
         _ => lits.cut(),
     }
 }
 
-fn repeat_zero_or_one_literals<F: FnMut(&Expr, &mut Literals)>(
-    e: &Expr,
+fn repeat_zero_or_one_literals<F: FnMut(&Hir, &mut Literals)>(
+    e: &Hir,
     lits: &mut Literals,
     mut f: F,
 ) {
@@ -760,14 +777,14 @@ fn repeat_zero_or_one_literals<F: FnMut(&Expr, &mut Literals)>(
         lits.cut();
         return;
     }
-    lits2.add(Lit::empty());
+    lits2.add(Literal::empty());
     if !lits.union(lits2) {
         lits.cut();
     }
 }
 
-fn repeat_zero_or_more_literals<F: FnMut(&Expr, &mut Literals)>(
-    e: &Expr,
+fn repeat_zero_or_more_literals<F: FnMut(&Hir, &mut Literals)>(
+    e: &Hir,
     lits: &mut Literals,
     mut f: F,
 ) {
@@ -780,14 +797,14 @@ fn repeat_zero_or_more_literals<F: FnMut(&Expr, &mut Literals)>(
         return;
     }
     lits2.cut();
-    lits2.add(Lit::empty());
+    lits2.add(Literal::empty());
     if !lits.union(lits2) {
         lits.cut();
     }
 }
 
-fn repeat_one_or_more_literals<F: FnMut(&Expr, &mut Literals)>(
-    e: &Expr,
+fn repeat_one_or_more_literals<F: FnMut(&Hir, &mut Literals)>(
+    e: &Hir,
     lits: &mut Literals,
     mut f: F,
 ) {
@@ -795,30 +812,28 @@ fn repeat_one_or_more_literals<F: FnMut(&Expr, &mut Literals)>(
     lits.cut();
 }
 
-fn repeat_range_literals<F: FnMut(&Expr, &mut Literals)>(
-    e: &Expr,
+fn repeat_range_literals<F: FnMut(&Hir, &mut Literals)>(
+    e: &Hir,
     min: u32,
     max: Option<u32>,
     greedy: bool,
     lits: &mut Literals,
     mut f: F,
 ) {
-    use Expr::*;
-
     if min == 0 {
         // This is a bit conservative. If `max` is set, then we could
         // treat this as a finite set of alternations. For now, we
         // just treat it as `e*`.
-        f(&Repeat {
-            e: Box::new(e.clone()),
-            r: Repeater::ZeroOrMore,
+        f(&Hir::repetition(hir::Repetition {
+            kind: hir::RepetitionKind::ZeroOrMore,
             greedy: greedy,
-        }, lits);
+            hir: Box::new(e.clone()),
+        }), lits);
     } else {
         if min > 0 {
             let n = cmp::min(lits.limit_size, min as usize);
             let es = iter::repeat(e.clone()).take(n).collect();
-            f(&Concat(es), lits);
+            f(&Hir::concat(es), lits);
             if n < min as usize || lits.contains_empty() {
                 lits.cut();
             }
@@ -829,8 +844,8 @@ fn repeat_range_literals<F: FnMut(&Expr, &mut Literals)>(
     }
 }
 
-fn alternate_literals<F: FnMut(&Expr, &mut Literals)>(
-    es: &[Expr],
+fn alternate_literals<F: FnMut(&Hir, &mut Literals)>(
+    es: &[Hir],
     lits: &mut Literals,
     mut f: F,
 ) {
@@ -863,15 +878,15 @@ impl fmt::Debug for Literals {
     }
 }
 
-impl Lit {
+impl Literal {
     /// Returns a new complete literal with the bytes given.
-    pub fn new(bytes: Vec<u8>) -> Lit {
-        Lit { v: bytes, cut: false }
+    pub fn new(bytes: Vec<u8>) -> Literal {
+        Literal { v: bytes, cut: false }
     }
 
     /// Returns a new complete empty literal.
-    pub fn empty() -> Lit {
-        Lit { v: vec![], cut: false }
+    pub fn empty() -> Literal {
+        Literal { v: vec![], cut: false }
     }
 
     /// Returns true if this literal was "cut."
@@ -885,19 +900,19 @@ impl Lit {
     }
 }
 
-impl PartialEq for Lit {
-    fn eq(&self, other: &Lit) -> bool {
+impl PartialEq for Literal {
+    fn eq(&self, other: &Literal) -> bool {
         self.v == other.v
     }
 }
 
-impl PartialOrd for Lit {
-    fn partial_cmp(&self, other: &Lit) -> Option<cmp::Ordering> {
+impl PartialOrd for Literal {
+    fn partial_cmp(&self, other: &Literal) -> Option<cmp::Ordering> {
         self.v.partial_cmp(&other.v)
     }
 }
 
-impl fmt::Debug for Lit {
+impl fmt::Debug for Literal {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         if self.is_cut() {
             write!(f, "Cut({})", escape_unicode(&self.v))
@@ -907,16 +922,16 @@ impl fmt::Debug for Lit {
     }
 }
 
-impl AsRef<[u8]> for Lit {
+impl AsRef<[u8]> for Literal {
     fn as_ref(&self) -> &[u8] { &self.v }
 }
 
-impl ops::Deref for Lit {
+impl ops::Deref for Literal {
     type Target = Vec<u8>;
     fn deref(&self) -> &Vec<u8> { &self.v }
 }
 
-impl ops::DerefMut for Lit {
+impl ops::DerefMut for Literal {
     fn deref_mut(&mut self) -> &mut Vec<u8> { &mut self.v }
 }
 
@@ -972,28 +987,44 @@ fn escape_byte(byte: u8) -> String {
     String::from_utf8_lossy(&escaped).into_owned()
 }
 
+fn cls_char_count(cls: &hir::ClassUnicode) -> usize {
+    cls.iter()
+        .map(|&r| 1 + (r.end as u32) - (r.start as u32))
+        .sum::<u32>() as usize
+}
+
+fn cls_byte_count(cls: &hir::ClassBytes) -> usize {
+    cls.iter()
+        .map(|&r| 1 + (r.end as u32) - (r.start as u32))
+        .sum::<u32>() as usize
+}
+
 #[cfg(test)]
 mod tests {
     use std::fmt;
 
-    use {Expr, ExprBuilder};
-    use super::{Literals, Lit, escape_bytes};
+    use ParserBuilder;
+    use hir::Hir;
+    use super::{Literals, Literal, escape_bytes};
 
     // To make test failures easier to read.
     #[derive(Debug, Eq, PartialEq)]
-    struct Bytes(Vec<ULit>);
+    struct Bytes(Vec<ULiteral>);
     #[derive(Debug, Eq, PartialEq)]
-    struct Unicode(Vec<ULit>);
+    struct Unicode(Vec<ULiteral>);
 
-    fn escape_lits(blits: &[Lit]) -> Vec<ULit> {
+    fn escape_lits(blits: &[Literal]) -> Vec<ULiteral> {
         let mut ulits = vec![];
         for blit in blits {
-            ulits.push(ULit { v: escape_bytes(&blit), cut: blit.is_cut() });
+            ulits.push(ULiteral {
+                v: escape_bytes(&blit),
+                cut: blit.is_cut(),
+            });
         }
         ulits
     }
 
-    fn create_lits<I: IntoIterator<Item=Lit>>(it: I) -> Literals {
+    fn create_lits<I: IntoIterator<Item=Literal>>(it: I) -> Literals {
         Literals {
             lits: it.into_iter().collect(),
             limit_size: 0,
@@ -1003,16 +1034,16 @@ mod tests {
 
     // Needs to be pub for 1.3?
     #[derive(Clone, Eq, PartialEq)]
-    pub struct ULit {
+    pub struct ULiteral {
         v: String,
         cut: bool,
     }
 
-    impl ULit {
+    impl ULiteral {
         fn is_cut(&self) -> bool { self.cut }
     }
 
-    impl fmt::Debug for ULit {
+    impl fmt::Debug for ULiteral {
         fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
             if self.is_cut() {
                 write!(f, "Cut({})", self.v)
@@ -1022,34 +1053,38 @@ mod tests {
         }
     }
 
-    impl PartialEq<Lit> for ULit {
-        fn eq(&self, other: &Lit) -> bool {
+    impl PartialEq<Literal> for ULiteral {
+        fn eq(&self, other: &Literal) -> bool {
             self.v.as_bytes() == &*other.v && self.is_cut() == other.is_cut()
         }
     }
 
-    impl PartialEq<ULit> for Lit {
-        fn eq(&self, other: &ULit) -> bool {
+    impl PartialEq<ULiteral> for Literal {
+        fn eq(&self, other: &ULiteral) -> bool {
             &*self.v == other.v.as_bytes() && self.is_cut() == other.is_cut()
         }
     }
 
     #[allow(non_snake_case)]
-    fn C(s: &'static str) -> ULit { ULit { v: s.to_owned(), cut: true } }
+    fn C(s: &'static str) -> ULiteral {
+        ULiteral { v: s.to_owned(), cut: true }
+    }
     #[allow(non_snake_case)]
-    fn M(s: &'static str) -> ULit { ULit { v: s.to_owned(), cut: false } }
+    fn M(s: &'static str) -> ULiteral {
+        ULiteral { v: s.to_owned(), cut: false }
+    }
 
-    fn prefixes(lits: &mut Literals, expr: &Expr) {
+    fn prefixes(lits: &mut Literals, expr: &Hir) {
         lits.union_prefixes(expr);
     }
 
-    fn suffixes(lits: &mut Literals, expr: &Expr) {
+    fn suffixes(lits: &mut Literals, expr: &Hir) {
         lits.union_suffixes(expr);
     }
 
     macro_rules! assert_lit_eq {
         ($which:ident, $got_lits:expr, $($expected_lit:expr),*) => {{
-            let expected: Vec<ULit> = vec![$($expected_lit),*];
+            let expected: Vec<ULiteral> = vec![$($expected_lit),*];
             let lits = $got_lits;
             assert_eq!(
                 $which(expected.clone()),
@@ -1070,13 +1105,20 @@ mod tests {
         ($name:ident, $which:ident, $re:expr, $($lit:expr),*) => {
             #[test]
             fn $name() {
-                let expr = Expr::parse($re).unwrap();
-                let lits = expr.$which();
+                let expr = ParserBuilder::new()
+                    .build()
+                    .parse($re)
+                    .unwrap();
+                let lits = Literals::$which(&expr);
                 assert_lit_eq!(Unicode, lits, $($lit),*);
 
-                let expr = ExprBuilder::new().allow_bytes(true).unicode(false)
-                                       .parse($re).unwrap();
-                let lits = expr.$which();
+                let expr = ParserBuilder::new()
+                    .allow_invalid_utf8(true)
+                    .unicode(false)
+                    .build()
+                    .parse($re)
+                    .unwrap();
+                let lits = Literals::$which(&expr);
                 assert_lit_eq!(Bytes, lits, $($lit),*);
             }
         };
@@ -1201,14 +1243,21 @@ mod tests {
         ($name:ident, $which:ident, $re:expr, $($lit:expr),*) => {
             #[test]
             fn $name() {
-                let expr = Expr::parse($re).unwrap();
+                let expr = ParserBuilder::new()
+                    .build()
+                    .parse($re)
+                    .unwrap();
                 let mut lits = Literals::empty();
                 lits.set_limit_size(20).set_limit_class(10);
                 $which(&mut lits, &expr);
                 assert_lit_eq!(Unicode, lits, $($lit),*);
 
-                let expr = ExprBuilder::new().allow_bytes(true).unicode(false)
-                                       .parse($re).unwrap();
+                let expr = ParserBuilder::new()
+                    .allow_invalid_utf8(true)
+                    .unicode(false)
+                    .build()
+                    .parse($re)
+                    .unwrap();
                 let mut lits = Literals::empty();
                 lits.set_limit_size(20).set_limit_class(10);
                 $which(&mut lits, &expr);
@@ -1357,12 +1406,12 @@ mod tests {
         ($name:ident, $given:expr, $expected:expr) => {
             #[test]
             fn $name() {
-                let given: Vec<Lit> =
+                let given: Vec<Literal> =
                     $given
                     .into_iter()
                     .map(|ul| {
                         let cut = ul.is_cut();
-                        Lit { v: ul.v.into_bytes(), cut: cut }
+                        Literal { v: ul.v.into_bytes(), cut: cut }
                     })
                     .collect();
                 let lits = create_lits(given);
@@ -1410,12 +1459,12 @@ mod tests {
         ($name:ident, $trim:expr, $given:expr, $expected:expr) => {
             #[test]
             fn $name() {
-                let given: Vec<Lit> =
+                let given: Vec<Literal> =
                     $given
                     .into_iter()
                     .map(|ul| {
                         let cut = ul.is_cut();
-                        Lit { v: ul.v.into_bytes(), cut: cut }
+                        Literal { v: ul.v.into_bytes(), cut: cut }
                     })
                     .collect();
                 let lits = create_lits(given);
@@ -1438,10 +1487,10 @@ mod tests {
         ($name:ident, $given:expr, $expected:expr) => {
             #[test]
             fn $name() {
-                let given: Vec<Lit> =
+                let given: Vec<Literal> =
                     $given
                     .into_iter()
-                    .map(|s: &str| Lit {
+                    .map(|s: &str| Literal {
                         v: s.to_owned().into_bytes(),
                         cut: false,
                     })
@@ -1474,10 +1523,10 @@ mod tests {
         ($name:ident, $given:expr, $expected:expr) => {
             #[test]
             fn $name() {
-                let given: Vec<Lit> =
+                let given: Vec<Literal> =
                     $given
                     .into_iter()
-                    .map(|s: &str| Lit {
+                    .map(|s: &str| Literal {
                         v: s.to_owned().into_bytes(),
                         cut: false,
                     })
diff --git a/regex-syntax/src/hir/mod.rs b/regex-syntax/src/hir/mod.rs
new file mode 100644
index 0000000000..d443c538b3
--- /dev/null
+++ b/regex-syntax/src/hir/mod.rs
@@ -0,0 +1,2040 @@
+// Copyright 2018 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+/*!
+Defines a high-level intermediate representation for regular expressions.
+*/
+use std::char;
+use std::cmp;
+use std::error;
+use std::fmt;
+use std::u8;
+
+use ast::Span;
+use hir::interval::{Interval, IntervalSet, IntervalSetIter};
+use unicode;
+
+pub use hir::visitor::{Visitor, visit};
+
+mod interval;
+pub mod literal;
+pub mod translate;
+mod visitor;
+
+/// An error that can occur while translating an `Ast` to a `Hir`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Error {
+    /// The kind of error.
+    kind: ErrorKind,
+    /// The original pattern that the translator's Ast was parsed from. Every
+    /// span in an error is a valid range into this string.
+    pattern: String,
+    /// The span of this error, derived from the Ast given to the translator.
+    span: Span,
+}
+
+impl Error {
+    /// Return the type of this error.
+    pub fn kind(&self) -> &ErrorKind {
+        &self.kind
+    }
+
+    /// The original pattern string in which this error occurred.
+    ///
+    /// Every span reported by this error is reported in terms of this string.
+    pub fn pattern(&self) -> &str {
+        &self.pattern
+    }
+
+    /// Return the span at which this error occurred.
+    pub fn span(&self) -> &Span {
+        &self.span
+    }
+}
+
+/// The type of an error that occurred while building an `Hir`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum ErrorKind {
+    /// This error occurs when a Unicode feature is used when Unicode
+    /// support is disabled. For example `(?-u:\pL)` would trigger this error.
+    UnicodeNotAllowed,
+    /// This error occurs when translating a pattern that could match a byte
+    /// sequence that isn't UTF-8 and `allow_invalid_utf8` was disabled.
+    InvalidUtf8,
+    /// This occurs when an unrecognized Unicode property name could not
+    /// be found.
+    UnicodePropertyNotFound,
+    /// This occurs when an unrecognized Unicode property value could not
+    /// be found.
+    UnicodePropertyValueNotFound,
+    /// This occurs when the translator attempts to construct a character class
+    /// that is empty.
+    ///
+    /// Note that this restriction in the translator may be removed in the
+    /// future.
+    EmptyClassNotAllowed,
+    /// Hints that destructuring should not be exhaustive.
+    ///
+    /// This enum may grow additional variants, so this makes sure clients
+    /// don't count on exhaustive matching. (Otherwise, adding a new variant
+    /// could break existing code.)
+    #[doc(hidden)]
+    __Nonexhaustive,
+}
+
+impl ErrorKind {
+    fn description(&self) -> &str {
+        use self::ErrorKind::*;
+        match *self {
+            UnicodeNotAllowed => "Unicode not allowed here",
+            InvalidUtf8 => "pattern can match invalid UTF-8",
+            UnicodePropertyNotFound => "Unicode property not found",
+            UnicodePropertyValueNotFound => "Unicode property value not found",
+            EmptyClassNotAllowed => "empty character classes are not allowed",
+            _ => unreachable!(),
+        }
+    }
+}
+
+impl error::Error for Error {
+    fn description(&self) -> &str {
+        self.kind.description()
+    }
+}
+
+impl fmt::Display for Error {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        ::error::Formatter::from(self).fmt(f)
+    }
+}
+
+impl fmt::Display for ErrorKind {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.write_str(self.description())
+    }
+}
+
+/// A high-level intermediate representation (HIR) for a regular expression.
+///
+/// The HIR of a regular expression represents an intermediate step between its
+/// abstract syntax (a structured description of the concrete syntax) and
+/// compiled byte codes. The purpose of HIR is to make regular expressions
+/// easier to analyze. In particular, the AST is much more complex than the
+/// HIR. For example, while an AST supports arbitrarily nested character
+/// classes, the HIR will flatten all nested classes into a single set. The HIR
+/// will also "compile away" every flag present in the concrete syntax. For
+/// example, users of HIR expressions never need to worry about case folding;
+/// it is handled automatically by the translator (e.g., by translating `(?i)A`
+/// to `[aA]`).
+///
+/// If the HIR was produced by a translator that disallows invalid UTF-8, then
+/// the HIR is guaranteed to match UTF-8 exclusively.
+///
+/// This type defines its own destructor that uses constant stack space and
+/// heap space proportional to the size of the HIR.
+///
+/// The specific type of an HIR expression can be accessed via its `kind`
+/// or `into_kind` methods. This extra level of indirection exists for two
+/// reasons:
+///
+/// 1. Construction of an HIR expression *must* use the constructor methods
+///    on this `Hir` type instead of building the `HirKind` values directly.
+///    This permits construction to enforce invariants like "concatenations
+///    always consist of two or more sub-expressions."
+/// 2. Every HIR expression contains attributes that are defined inductively,
+///    and can be computed cheaply during the construction process. For
+///    example, one such attribute is whether the expression must match at the
+///    beginning of the text.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Hir {
+    /// The underlying HIR kind.
+    kind: HirKind,
+    /// Analysis info about this HIR, computed during construction.
+    info: HirInfo,
+}
+
+/// The kind of an arbitrary `Hir` expression.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum HirKind {
+    /// The empty regular expression, which matches everything, including the
+    /// empty string.
+    Empty,
+    /// A single literal character that matches exactly this character.
+    Literal(Literal),
+    /// A single character class that matches any of the characters in the
+    /// class. A class can either consist of Unicode scalar values as
+    /// characters, or it can use bytes.
+    Class(Class),
+    /// An anchor assertion. An anchor assertion match always has zero length.
+    Anchor(Anchor),
+    /// A word boundary assertion, which may or may not be Unicode aware. A
+    /// word boundary assertion match always has zero length.
+    WordBoundary(WordBoundary),
+    /// A repetition operation applied to a child expression.
+    Repetition(Repetition),
+    /// A possibly capturing group, which contains a child expression.
+    Group(Group),
+    /// A concatenation of expressions. A concatenation always has at least two
+    /// child expressions.
+    ///
+    /// A concatenation matches only if each of its child expression matches
+    /// one after the other.
+    Concat(Vec<Hir>),
+    /// An alternation of expressions. An alternation always has at least two
+    /// child expressions.
+    ///
+    /// An alternation matches only if at least one of its child expression
+    /// matches. If multiple expressions match, then the leftmost is preferred.
+    Alternation(Vec<Hir>),
+}
+
+impl Hir {
+    /// Returns a reference to the underlying HIR kind.
+    pub fn kind(&self) -> &HirKind {
+        &self.kind
+    }
+
+    /// Consumes ownership of this HIR expression and returns its underlying
+    /// `HirKind`.
+    pub fn into_kind(mut self) -> HirKind {
+        use std::mem;
+        mem::replace(&mut self.kind, HirKind::Empty)
+    }
+
+    /// Returns an empty HIR expression.
+    ///
+    /// An empty HIR expression always matches, including the empty string.
+    pub fn empty() -> Hir {
+        let mut info = HirInfo::new();
+        info.set_always_utf8(true);
+        info.set_all_assertions(true);
+        info.set_anchored_start(false);
+        info.set_anchored_end(false);
+        info.set_any_anchored_start(false);
+        info.set_any_anchored_end(false);
+        info.set_match_empty(true);
+        Hir {
+            kind: HirKind::Empty,
+            info: info,
+        }
+    }
+
+    /// Creates a literal HIR expression.
+    ///
+    /// If the given literal has a `Byte` variant with an ASCII byte, then this
+    /// method panics. This enforces the invariant that `Byte` variants are
+    /// only used to express matching of invalid UTF-8.
+    pub fn literal(lit: Literal) -> Hir {
+        if let Literal::Byte(b) = lit {
+            assert!(b > 0x7F);
+        }
+
+        let mut info = HirInfo::new();
+        info.set_always_utf8(lit.is_unicode());
+        info.set_all_assertions(false);
+        info.set_anchored_start(false);
+        info.set_anchored_end(false);
+        info.set_any_anchored_start(false);
+        info.set_any_anchored_end(false);
+        info.set_match_empty(false);
+        Hir {
+            kind: HirKind::Literal(lit),
+            info: info,
+        }
+    }
+
+    /// Creates a class HIR expression.
+    pub fn class(class: Class) -> Hir {
+        let mut info = HirInfo::new();
+        info.set_always_utf8(class.is_always_utf8());
+        info.set_all_assertions(false);
+        info.set_anchored_start(false);
+        info.set_anchored_end(false);
+        info.set_any_anchored_start(false);
+        info.set_any_anchored_end(false);
+        info.set_match_empty(false);
+        Hir {
+            kind: HirKind::Class(class),
+            info: info,
+        }
+    }
+
+    /// Creates an anchor assertion HIR expression.
+    pub fn anchor(anchor: Anchor) -> Hir {
+        let mut info = HirInfo::new();
+        info.set_always_utf8(true);
+        info.set_all_assertions(true);
+        info.set_anchored_start(false);
+        info.set_anchored_end(false);
+        info.set_any_anchored_start(false);
+        info.set_any_anchored_end(false);
+        info.set_match_empty(true);
+        if let Anchor::StartText = anchor {
+            info.set_anchored_start(true);
+            info.set_any_anchored_start(true);
+        }
+        if let Anchor::EndText = anchor {
+            info.set_anchored_end(true);
+            info.set_any_anchored_end(true);
+        }
+        Hir {
+            kind: HirKind::Anchor(anchor),
+            info: info,
+        }
+    }
+
+    /// Creates a word boundary assertion HIR expression.
+    pub fn word_boundary(word_boundary: WordBoundary) -> Hir {
+        let mut info = HirInfo::new();
+        info.set_always_utf8(true);
+        info.set_all_assertions(true);
+        info.set_anchored_start(false);
+        info.set_anchored_end(false);
+        info.set_any_anchored_start(false);
+        info.set_any_anchored_end(false);
+        // A negated word boundary matches the empty string, but a normal
+        // word boundary does not!
+        info.set_match_empty(word_boundary.is_negated());
+        // ASCII word boundaries can match invalid UTF-8.
+        if let WordBoundary::Ascii = word_boundary {
+            info.set_always_utf8(false);
+        }
+        if let WordBoundary::AsciiNegate = word_boundary {
+            info.set_always_utf8(false);
+        }
+        Hir {
+            kind: HirKind::WordBoundary(word_boundary),
+            info: info,
+        }
+    }
+
+    /// Creates a repetition HIR expression.
+    pub fn repetition(rep: Repetition) -> Hir {
+        let mut info = HirInfo::new();
+        info.set_always_utf8(rep.hir.is_always_utf8());
+        info.set_all_assertions(rep.hir.is_all_assertions());
+        // If this operator can match the empty string, then it can never
+        // be anchored.
+        info.set_anchored_start(
+            !rep.is_match_empty() && rep.hir.is_anchored_start()
+        );
+        info.set_anchored_end(
+            !rep.is_match_empty() && rep.hir.is_anchored_end()
+        );
+        info.set_any_anchored_start(rep.hir.is_any_anchored_start());
+        info.set_any_anchored_end(rep.hir.is_any_anchored_end());
+        info.set_match_empty(rep.is_match_empty() || rep.hir.is_match_empty());
+        Hir {
+            kind: HirKind::Repetition(rep),
+            info: info,
+        }
+    }
+
+    /// Creates a group HIR expression.
+    pub fn group(group: Group) -> Hir {
+        let mut info = HirInfo::new();
+        info.set_always_utf8(group.hir.is_always_utf8());
+        info.set_all_assertions(group.hir.is_all_assertions());
+        info.set_anchored_start(group.hir.is_anchored_start());
+        info.set_anchored_end(group.hir.is_anchored_end());
+        info.set_any_anchored_start(group.hir.is_any_anchored_start());
+        info.set_any_anchored_end(group.hir.is_any_anchored_end());
+        info.set_match_empty(group.hir.is_match_empty());
+        Hir {
+            kind: HirKind::Group(group),
+            info: info,
+        }
+    }
+
+    /// Returns the concatenation of the given expressions.
+    ///
+    /// This flattens the concatenation as appropriate.
+    pub fn concat(mut exprs: Vec<Hir>) -> Hir {
+        match exprs.len() {
+            0 => Hir::empty(),
+            1 => exprs.pop().unwrap(),
+            _ => {
+                let mut info = HirInfo::new();
+                info.set_always_utf8(true);
+                info.set_all_assertions(true);
+                info.set_any_anchored_start(false);
+                info.set_any_anchored_end(false);
+                info.set_match_empty(true);
+
+                // Some attributes require analyzing all sub-expressions.
+                for e in &exprs {
+                    let x = info.is_always_utf8() && e.is_always_utf8();
+                    info.set_always_utf8(x);
+
+                    let x = info.is_all_assertions() && e.is_all_assertions();
+                    info.set_all_assertions(x);
+
+                    let x =
+                        info.is_any_anchored_start()
+                        || e.is_any_anchored_start();
+                    info.set_any_anchored_start(x);
+
+                    let x =
+                        info.is_any_anchored_end()
+                        || e.is_any_anchored_end();
+                    info.set_any_anchored_end(x);
+
+                    let x = info.is_match_empty() && e.is_match_empty();
+                    info.set_match_empty(x);
+                }
+                // Anchored attributes require something slightly more
+                // sophisticated. Normally, WLOG, to determine whether an
+                // expression is anchored to the start, we'd only need to check
+                // the first expression of a concatenation. However,
+                // expressions like `$\b^` are still anchored to the start,
+                // but the first expression in the concatenation *isn't*
+                // anchored to the start. So the "first" expression to look at
+                // is actually one that is either not an assertion or is
+                // specifically the StartText assertion.
+                info.set_anchored_start(
+                    exprs.iter()
+                        .take_while(|e| {
+                            e.is_anchored_start() || e.is_all_assertions()
+                        })
+                        .any(|e| {
+                            e.is_anchored_start()
+                        }));
+                // Similarly for the end anchor, but in reverse.
+                info.set_anchored_end(
+                    exprs.iter()
+                        .rev()
+                        .take_while(|e| {
+                            e.is_anchored_end() || e.is_all_assertions()
+                        })
+                        .any(|e| {
+                            e.is_anchored_end()
+                        }));
+                Hir {
+                    kind: HirKind::Concat(exprs),
+                    info: info,
+                }
+            }
+        }
+    }
+
+    /// Returns the alternation of the given expressions.
+    ///
+    /// This flattens the alternation as appropriate.
+    pub fn alternation(mut exprs: Vec<Hir>) -> Hir {
+        match exprs.len() {
+            0 => Hir::empty(),
+            1 => exprs.pop().unwrap(),
+            _ => {
+                let mut info = HirInfo::new();
+                info.set_always_utf8(true);
+                info.set_all_assertions(true);
+                info.set_anchored_start(true);
+                info.set_anchored_end(true);
+                info.set_any_anchored_start(false);
+                info.set_any_anchored_end(false);
+                info.set_match_empty(false);
+
+                // Some attributes require analyzing all sub-expressions.
+                for e in &exprs {
+                    let x = info.is_always_utf8() && e.is_always_utf8();
+                    info.set_always_utf8(x);
+
+                    let x = info.is_all_assertions() && e.is_all_assertions();
+                    info.set_all_assertions(x);
+
+                    let x = info.is_anchored_start() && e.is_anchored_start();
+                    info.set_anchored_start(x);
+
+                    let x = info.is_anchored_end() && e.is_anchored_end();
+                    info.set_anchored_end(x);
+
+                    let x =
+                        info.is_any_anchored_start()
+                        || e.is_any_anchored_start();
+                    info.set_any_anchored_start(x);
+
+                    let x =
+                        info.is_any_anchored_end()
+                        || e.is_any_anchored_end();
+                    info.set_any_anchored_end(x);
+
+                    let x = info.is_match_empty() || e.is_match_empty();
+                    info.set_match_empty(x);
+                }
+                Hir {
+                    kind: HirKind::Alternation(exprs),
+                    info: info,
+                }
+            }
+        }
+    }
+
+    /// Build an HIR expression for `.`.
+    ///
+    /// A `.` expression matches any character except for `\n`. To build an
+    /// expression that matches any character, including `\n`, use the `any`
+    /// method.
+    ///
+    /// If `bytes` is `true`, then this assumes characters are limited to a
+    /// single byte.
+    pub fn dot(bytes: bool) -> Hir {
+        if bytes {
+            let mut cls = ClassBytes::empty();
+            cls.push(ClassBytesRange::new(b'\0', b'\x09'));
+            cls.push(ClassBytesRange::new(b'\x0B', b'\xFF'));
+            Hir::class(Class::Bytes(cls))
+        } else {
+            let mut cls = ClassUnicode::empty();
+            cls.push(ClassUnicodeRange::new('\0', '\x09'));
+            cls.push(ClassUnicodeRange::new('\x0B', '\u{10FFFF}'));
+            Hir::class(Class::Unicode(cls))
+        }
+    }
+
+    /// Build an HIR expression for `(?s).`.
+    ///
+    /// A `(?s).` expression matches any character, including `\n`. To build an
+    /// expression that matches any character except for `\n`, then use the
+    /// `dot` method.
+    ///
+    /// If `bytes` is `true`, then this assumes characters are limited to a
+    /// single byte.
+    pub fn any(bytes: bool) -> Hir {
+        if bytes {
+            let mut cls = ClassBytes::empty();
+            cls.push(ClassBytesRange::new(b'\0', b'\xFF'));
+            Hir::class(Class::Bytes(cls))
+        } else {
+            let mut cls = ClassUnicode::empty();
+            cls.push(ClassUnicodeRange::new('\0', '\u{10FFFF}'));
+            Hir::class(Class::Unicode(cls))
+        }
+    }
+
+    /// Return true if and only if this HIR will always match valid UTF-8.
+    ///
+    /// When this returns false, then it is possible for this HIR expression
+    /// to match invalid UTF-8.
+    pub fn is_always_utf8(&self) -> bool {
+        self.info.is_always_utf8()
+    }
+
+    /// Returns true if and only if this entire HIR expression is made up of
+    /// zero-width assertions.
+    ///
+    /// This includes expressions like `^$\b\A\z` and even `((\b)+())*^`, but
+    /// not `^a`.
+    pub fn is_all_assertions(&self) -> bool {
+        self.info.is_all_assertions()
+    }
+
+    /// Return true if and only if this HIR is required to match from the
+    /// beginning of text. This includes expressions like `^foo`, `^(foo|bar)`,
+    /// `^foo|^bar` but not `^foo|bar`.
+    pub fn is_anchored_start(&self) -> bool {
+        self.info.is_anchored_start()
+    }
+
+    /// Return true if and only if this HIR is required to match at the end
+    /// of text. This includes expressions like `foo$`, `(foo|bar)$`,
+    /// `foo$|bar$` but not `foo$|bar`.
+    pub fn is_anchored_end(&self) -> bool {
+        self.info.is_anchored_end()
+    }
+
+    /// Return true if and only if this HIR contains any sub-expression that
+    /// is required to match at the beginning of text. Specifically, this
+    /// returns true if the `^` symbol (when multiline mode is disabled) or the
+    /// `\A` escape appear anywhere in the regex.
+    pub fn is_any_anchored_start(&self) -> bool {
+        self.info.is_any_anchored_start()
+    }
+
+    /// Return true if and only if this HIR contains any sub-expression that is
+    /// required to match at the end of text. Specifically, this returns true
+    /// if the `$` symbol (when multiline mode is disabled) or the `\z` escape
+    /// appear anywhere in the regex.
+    pub fn is_any_anchored_end(&self) -> bool {
+        self.info.is_any_anchored_end()
+    }
+
+    /// Return true if and only if the empty string is part of the language
+    /// matched by this regular expression.
+    ///
+    /// This includes `a*`, `a?b*`, `a{0}`, `()`, `()+`, `^$`, `a|b?`, `\B`,
+    /// but not `a`, `a+` or `\b`.
+    pub fn is_match_empty(&self) -> bool {
+        self.info.is_match_empty()
+    }
+}
+
+impl HirKind {
+    /// Return true if and only if this HIR is the empty regular expression.
+    ///
+    /// Note that this is not defined inductively. That is, it only tests if
+    /// this kind is the `Empty` variant. To get the inductive definition,
+    /// use the `is_match_empty` method on [`Hir`](struct.Hir.html).
+    pub fn is_empty(&self) -> bool {
+        match *self {
+            HirKind::Empty => true,
+            _ => false,
+        }
+    }
+
+    /// Returns true if and only if this kind has any (including possibly
+    /// empty) subexpressions.
+    pub fn has_subexprs(&self) -> bool {
+        match *self {
+            HirKind::Empty
+            | HirKind::Literal(_)
+            | HirKind::Class(_)
+            | HirKind::Anchor(_)
+            | HirKind::WordBoundary(_) => false,
+            HirKind::Group(_)
+            | HirKind::Repetition(_)
+            | HirKind::Concat(_)
+            | HirKind::Alternation(_) => true,
+        }
+    }
+}
+
+/// The high-level intermediate representation of a literal.
+///
+/// A literal corresponds to a single character, where a character is either
+/// defined by a Unicode scalar value or an arbitrary byte. Unicode characters
+/// are preferred whenever possible. In particular, a `Byte` variant is only
+/// ever produced when it could match invalid UTF-8.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum Literal {
+    /// A single character represented by a Unicode scalar value.
+    Unicode(char),
+    /// A single character represented by an arbitrary byte.
+    Byte(u8),
+}
+
+impl Literal {
+    /// Returns true if and only if this literal corresponds to a Unicode
+    /// scalar value.
+    pub fn is_unicode(&self) -> bool {
+        match *self {
+            Literal::Unicode(_) => true,
+            Literal::Byte(b) if b <= 0x7F => true,
+            Literal::Byte(_) => false,
+        }
+    }
+}
+
+/// The high-level intermediate representation of a character class.
+///
+/// A character class corresponds to a set of characters. A character is either
+/// defined by a Unicode scalar value or a byte. Unicode characters are used
+/// by default, while bytes are used when Unicode mode (via the `u` flag) is
+/// disabled.
+///
+/// A character class, regardless of its character type, is represented by a
+/// sequence of non-overlapping non-adjacent ranges of characters.
+///
+/// Note that unlike [`Literal`](enum.Literal.html), a `Bytes` variant may
+/// be produced even when it exclusively matches valid UTF-8. This is because
+/// a `Bytes` variant represents an intention by the author of the regular
+/// expression to disable Unicode mode, which in turn impacts the semantics of
+/// case insensitive matching. For example, `(?i)k` and `(?i-u)k` will not
+/// match the same set of strings.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum Class {
+    /// A set of characters represented by Unicode scalar values.
+    Unicode(ClassUnicode),
+    /// A set of characters represented by arbitrary bytes (one byte per
+    /// character).
+    Bytes(ClassBytes),
+}
+
+impl Class {
+    /// Apply Unicode simple case folding to this character class, in place.
+    /// The character class will be expanded to include all simple case folded
+    /// character variants.
+    ///
+    /// If this is a byte oriented character class, then this will be limited
+    /// to the ASCII ranges `A-Z` and `a-z`.
+    pub fn case_fold_simple(&mut self) {
+        match *self {
+            Class::Unicode(ref mut x) => x.case_fold_simple(),
+            Class::Bytes(ref mut x) => x.case_fold_simple(),
+        }
+    }
+
+    /// Negate this character class in place.
+    ///
+    /// After completion, this character class will contain precisely the
+    /// characters that weren't previously in the class.
+    pub fn negate(&mut self) {
+        match *self {
+            Class::Unicode(ref mut x) => x.negate(),
+            Class::Bytes(ref mut x) => x.negate(),
+        }
+    }
+
+    /// Returns true if and only if this character class will only ever match
+    /// valid UTF-8.
+    ///
+    /// A character class can match invalid UTF-8 only when the following
+    /// conditions are met:
+    ///
+    /// 1. The translator was configured to permit generating an expression
+    ///    that can match invalid UTF-8. (By default, this is disabled.)
+    /// 2. Unicode mode (via the `u` flag) was disabled either in the concrete
+    ///    syntax or in the parser builder. By default, Unicode mode is
+    ///    enabled.
+    pub fn is_always_utf8(&self) -> bool {
+        match *self {
+            Class::Unicode(_) => true,
+            Class::Bytes(ref x) => x.is_all_ascii(),
+        }
+    }
+}
+
+/// A set of characters represented by Unicode scalar values.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct ClassUnicode {
+    set: IntervalSet<ClassUnicodeRange>,
+}
+
+impl ClassUnicode {
+    /// Create a new class from a sequence of ranges.
+    ///
+    /// The given ranges do not need to be in any specific order, and ranges
+    /// may overlap.
+    pub fn new<I>(ranges: I) -> ClassUnicode
+    where I: IntoIterator<Item=ClassUnicodeRange>
+    {
+        ClassUnicode { set: IntervalSet::new(ranges) }
+    }
+
+    /// Create a new class with no ranges.
+    pub fn empty() -> ClassUnicode {
+        ClassUnicode::new(vec![])
+    }
+
+    /// Add a new range to this set.
+    pub fn push(&mut self, range: ClassUnicodeRange) {
+        self.set.push(range);
+    }
+
+    /// Return an iterator over all ranges in this class.
+    ///
+    /// The iterator yields ranges in ascending order.
+    pub fn iter(&self) -> ClassUnicodeIter {
+        ClassUnicodeIter(self.set.iter())
+    }
+
+    /// Return the underlying ranges as a slice.
+    pub fn ranges(&self) -> &[ClassUnicodeRange] {
+        self.set.intervals()
+    }
+
+    /// Expand this character class such that it contains all case folded
+    /// characters, according to Unicode's "simple" mapping. For example, if
+    /// this class consists of the range `a-z`, then applying case folding will
+    /// result in the class containing both the ranges `a-z` and `A-Z`.
+    pub fn case_fold_simple(&mut self) {
+        self.set.case_fold_simple();
+    }
+
+    /// Negate this character class.
+    ///
+    /// For all `c` where `c` is a Unicode scalar value, if `c` was in this
+    /// set, then it will not be in this set after negation.
+    pub fn negate(&mut self) {
+        self.set.negate();
+    }
+
+    /// Union this character class with the given character class, in place.
+    pub fn union(&mut self, other: &ClassUnicode) {
+        self.set.union(&other.set);
+    }
+
+    /// Intersect this character class with the given character class, in
+    /// place.
+    pub fn intersect(&mut self, other: &ClassUnicode) {
+        self.set.intersect(&other.set);
+    }
+
+    /// Subtract the given character class from this character class, in place.
+    pub fn difference(&mut self, other: &ClassUnicode) {
+        self.set.difference(&other.set);
+    }
+
+    /// Compute the symmetric difference of the given character classes, in
+    /// place.
+    ///
+    /// This computes the symmetric difference of two character classes. This
+    /// removes all elements in this class that are also in the given class,
+    /// but all adds all elements from the given class that aren't in this
+    /// class. That is, the class will contain all elements in either class,
+    /// but will not contain any elements that are in both classes.
+    pub fn symmetric_difference(&mut self, other: &ClassUnicode) {
+        self.set.symmetric_difference(&other.set);
+    }
+}
+
+/// An iterator over all ranges in a Unicode character class.
+///
+/// The lifetime `'a` refers to the lifetime of the underlying class.
+#[derive(Debug)]
+pub struct ClassUnicodeIter<'a>(IntervalSetIter<'a, ClassUnicodeRange>);
+
+impl<'a> Iterator for ClassUnicodeIter<'a> {
+    type Item = &'a ClassUnicodeRange;
+
+    fn next(&mut self) -> Option<&'a ClassUnicodeRange> {
+        self.0.next()
+    }
+}
+
+/// A single range of characters represented by Unicode scalar values.
+///
+/// The range is closed. That is, the start and end of the range are included
+/// in the range.
+#[derive(Clone, Copy, Default, Eq, PartialEq, PartialOrd, Ord)]
+pub struct ClassUnicodeRange {
+    start: char,
+    end: char,
+}
+
+impl fmt::Debug for ClassUnicodeRange {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let start =
+            if !self.start.is_whitespace() && !self.start.is_control() {
+                self.start.to_string()
+            } else {
+                format!("0x{:X}", self.start as u32)
+            };
+        let end =
+            if !self.end.is_whitespace() && !self.end.is_control() {
+                self.end.to_string()
+            } else {
+                format!("0x{:X}", self.end as u32)
+            };
+        f.debug_struct("ClassUnicodeRange")
+         .field("start", &start)
+         .field("end", &end)
+         .finish()
+    }
+}
+
+impl Interval for ClassUnicodeRange {
+    type Bound = char;
+
+    #[inline] fn lower(&self) -> char { self.start }
+    #[inline] fn upper(&self) -> char { self.end }
+    #[inline] fn set_lower(&mut self, bound: char) { self.start = bound; }
+    #[inline] fn set_upper(&mut self, bound: char) { self.end = bound; }
+
+    /// Apply simple case folding to this Unicode scalar value range.
+    ///
+    /// Additional ranges are appended to the given vector. Canonical ordering
+    /// is *not* maintained in the given vector.
+    fn case_fold_simple(&self, ranges: &mut Vec<ClassUnicodeRange>) {
+        if !unicode::contains_simple_case_mapping(self.start, self.end) {
+            return;
+        }
+        let start = self.start as u32;
+        let end = (self.end as u32).saturating_add(1);
+        let mut next_simple_cp = None;
+        for cp in (start..end).filter_map(char::from_u32) {
+            if next_simple_cp.map_or(false, |next| cp < next) {
+                continue;
+            }
+            let it = match unicode::simple_fold(cp) {
+                Ok(it) => it,
+                Err(next) => {
+                    next_simple_cp = next;
+                    continue;
+                }
+            };
+            for cp_folded in it {
+                ranges.push(ClassUnicodeRange::new(cp_folded, cp_folded));
+            }
+        }
+    }
+}
+
+impl ClassUnicodeRange {
+    /// Create a new Unicode scalar value range for a character class.
+    ///
+    /// The returned range is always in a canonical form. That is, the range
+    /// returned always satisfies the invariant that `start <= end`.
+    pub fn new(start: char, end: char) -> ClassUnicodeRange {
+        ClassUnicodeRange::create(start, end)
+    }
+
+    /// Return the start of this range.
+    ///
+    /// The start of a range is always less than or equal to the end of the
+    /// range.
+    pub fn start(&self) -> char {
+        self.start
+    }
+
+    /// Return the end of this range.
+    ///
+    /// The end of a range is always greater than or equal to the start of the
+    /// range.
+    pub fn end(&self) -> char {
+        self.end
+    }
+}
+
+/// A set of characters represented by arbitrary bytes (where one byte
+/// corresponds to one character).
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct ClassBytes {
+    set: IntervalSet<ClassBytesRange>,
+}
+
+impl ClassBytes {
+    /// Create a new class from a sequence of ranges.
+    ///
+    /// The given ranges do not need to be in any specific order, and ranges
+    /// may overlap.
+    pub fn new<I>(ranges: I) -> ClassBytes
+    where I: IntoIterator<Item=ClassBytesRange>
+    {
+        ClassBytes { set: IntervalSet::new(ranges) }
+    }
+
+    /// Create a new class with no ranges.
+    pub fn empty() -> ClassBytes {
+        ClassBytes::new(vec![])
+    }
+
+    /// Add a new range to this set.
+    pub fn push(&mut self, range: ClassBytesRange) {
+        self.set.push(range);
+    }
+
+    /// Return an iterator over all ranges in this class.
+    ///
+    /// The iterator yields ranges in ascending order.
+    pub fn iter(&self) -> ClassBytesIter {
+        ClassBytesIter(self.set.iter())
+    }
+
+    /// Return the underlying ranges as a slice.
+    pub fn ranges(&self) -> &[ClassBytesRange] {
+        self.set.intervals()
+    }
+
+    /// Expand this character class such that it contains all case folded
+    /// characters. For example, if this class consists of the range `a-z`,
+    /// then applying case folding will result in the class containing both the
+    /// ranges `a-z` and `A-Z`.
+    ///
+    /// Note that this only applies ASCII case folding, which is limited to the
+    /// characters `a-z` and `A-Z`.
+    pub fn case_fold_simple(&mut self) {
+        self.set.case_fold_simple();
+    }
+
+    /// Negate this byte class.
+    ///
+    /// For all `b` where `b` is a any byte, if `b` was in this set, then it
+    /// will not be in this set after negation.
+    pub fn negate(&mut self) {
+        self.set.negate();
+    }
+
+    /// Union this byte class with the given byte class, in place.
+    pub fn union(&mut self, other: &ClassBytes) {
+        self.set.union(&other.set);
+    }
+
+    /// Intersect this byte class with the given byte class, in place.
+    pub fn intersect(&mut self, other: &ClassBytes) {
+        self.set.intersect(&other.set);
+    }
+
+    /// Subtract the given byte class from this byte class, in place.
+    pub fn difference(&mut self, other: &ClassBytes) {
+        self.set.difference(&other.set);
+    }
+
+    /// Compute the symmetric difference of the given byte classes, in place.
+    ///
+    /// This computes the symmetric difference of two byte classes. This
+    /// removes all elements in this class that are also in the given class,
+    /// but all adds all elements from the given class that aren't in this
+    /// class. That is, the class will contain all elements in either class,
+    /// but will not contain any elements that are in both classes.
+    pub fn symmetric_difference(&mut self, other: &ClassBytes) {
+        self.set.symmetric_difference(&other.set);
+    }
+
+    /// Returns true if and only if this character class will either match
+    /// nothing or only ASCII bytes. Stated differently, this returns false
+    /// if and only if this class contains a non-ASCII byte.
+    pub fn is_all_ascii(&self) -> bool {
+        self.set.intervals().last().map_or(true, |r| r.end <= 0x7F)
+    }
+}
+
+/// An iterator over all ranges in a byte character class.
+///
+/// The lifetime `'a` refers to the lifetime of the underlying class.
+#[derive(Debug)]
+pub struct ClassBytesIter<'a>(IntervalSetIter<'a, ClassBytesRange>);
+
+impl<'a> Iterator for ClassBytesIter<'a> {
+    type Item = &'a ClassBytesRange;
+
+    fn next(&mut self) -> Option<&'a ClassBytesRange> {
+        self.0.next()
+    }
+}
+
+/// A single range of characters represented by arbitrary bytes.
+///
+/// The range is closed. That is, the start and end of the range are included
+/// in the range.
+#[derive(Clone, Copy, Default, Eq, PartialEq, PartialOrd, Ord)]
+pub struct ClassBytesRange {
+    start: u8,
+    end: u8,
+}
+
+impl Interval for ClassBytesRange {
+    type Bound = u8;
+
+    #[inline] fn lower(&self) -> u8 { self.start }
+    #[inline] fn upper(&self) -> u8 { self.end }
+    #[inline] fn set_lower(&mut self, bound: u8) { self.start = bound; }
+    #[inline] fn set_upper(&mut self, bound: u8) { self.end = bound; }
+
+    /// Apply simple case folding to this byte range. Only ASCII case mappings
+    /// (for a-z) are applied.
+    ///
+    /// Additional ranges are appended to the given vector. Canonical ordering
+    /// is *not* maintained in the given vector.
+    fn case_fold_simple(&self, ranges: &mut Vec<ClassBytesRange>) {
+        if !ClassBytesRange::new(b'a', b'z').is_intersection_empty(self) {
+            let lower = cmp::max(self.start, b'a');
+            let upper = cmp::min(self.end, b'z');
+            ranges.push(ClassBytesRange::new(lower - 32, upper - 32));
+        }
+        if !ClassBytesRange::new(b'A', b'Z').is_intersection_empty(self) {
+            let lower = cmp::max(self.start, b'A');
+            let upper = cmp::min(self.end, b'Z');
+            ranges.push(ClassBytesRange::new(lower + 32, upper + 32));
+        }
+    }
+}
+
+impl ClassBytesRange {
+    /// Create a new byte range for a character class.
+    ///
+    /// The returned range is always in a canonical form. That is, the range
+    /// returned always satisfies the invariant that `start <= end`.
+    pub fn new(start: u8, end: u8) -> ClassBytesRange {
+        ClassBytesRange::create(start, end)
+    }
+
+    /// Return the start of this range.
+    ///
+    /// The start of a range is always less than or equal to the end of the
+    /// range.
+    pub fn start(&self) -> u8 {
+        self.start
+    }
+
+    /// Return the end of this range.
+    ///
+    /// The end of a range is always greater than or equal to the start of the
+    /// range.
+    pub fn end(&self) -> u8 {
+        self.end
+    }
+}
+
+impl fmt::Debug for ClassBytesRange {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let mut debug = f.debug_struct("ClassBytesRange");
+        if self.start <= 0x7F {
+            debug.field("start", &(self.start as char));
+        } else {
+            debug.field("start", &self.start);
+        }
+        if self.end <= 0x7F {
+            debug.field("end", &(self.end as char));
+        } else {
+            debug.field("end", &self.end);
+        }
+        debug.finish()
+    }
+}
+
+/// The high-level intermediate representation for an anchor assertion.
+///
+/// A matching anchor assertion is always zero-length.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum Anchor {
+    /// Match the beginning of a line or the beginning of text. Specifically,
+    /// this matches at the starting position of the input, or at the position
+    /// immediately following a `\n` character.
+    StartLine,
+    /// Match the end of a line or the end of text. Specifically,
+    /// this matches at the end position of the input, or at the position
+    /// immediately preceding a `\n` character.
+    EndLine,
+    /// Match the beginning of text. Specifically, this matches at the starting
+    /// position of the input.
+    StartText,
+    /// Match the end of text. Specifically, this matches at the ending
+    /// position of the input.
+    EndText,
+}
+
+/// The high-level intermediate representation for a word-boundary assertion.
+///
+/// A matching word boundary assertion is always zero-length.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum WordBoundary {
+    /// Match a Unicode-aware word boundary. That is, this matches a position
+    /// where the left adjacent character and right adjacent character
+    /// correspond to a word and non-word or a non-word and word character.
+    Unicode,
+    /// Match a Unicode-aware negation of a word boundary.
+    UnicodeNegate,
+    /// Match an ASCII-only word boundary. That is, this matches a position
+    /// where the left adjacent character and right adjacent character
+    /// correspond to a word and non-word or a non-word and word character.
+    Ascii,
+    /// Match an ASCII-only negation of a word boundary.
+    AsciiNegate,
+}
+
+impl WordBoundary {
+    /// Returns true if and only if this word boundary assertion is negated.
+    pub fn is_negated(&self) -> bool {
+        match *self {
+            WordBoundary::Unicode |  WordBoundary::Ascii => false,
+            WordBoundary::UnicodeNegate |  WordBoundary::AsciiNegate => true,
+        }
+    }
+}
+
+/// The high-level intermediate representation for a group.
+///
+/// This represents one of three possible group types:
+///
+/// 1. A non-capturing group (e.g., `(?:expr)`).
+/// 2. A capturing group (e.g., `(expr)`).
+/// 3. A named capturing group (e.g., `(?P<name>expr)`).
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Group {
+    /// The kind of this group. If it is a capturing group, then the kind
+    /// contains the capture group index (and the name, if it is a named
+    /// group).
+    pub kind: GroupKind,
+    /// The expression inside the capturing group, which may be empty.
+    pub hir: Box<Hir>,
+}
+
+/// The kind of group.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum GroupKind {
+    /// A normal unnamed capturing group.
+    ///
+    /// The value is the capture index of the group.
+    CaptureIndex(u32),
+    /// A named capturing group.
+    CaptureName {
+        /// The name of the group.
+        name: String,
+        /// The capture index of the group.
+        index: u32,
+    },
+    /// A non-capturing group.
+    NonCapturing,
+}
+
+/// The high-level intermediate representation of a repetition operator.
+///
+/// A repetition operator permits the repetition of an arbitrary
+/// sub-expression.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Repetition {
+    /// The kind of this repetition operator.
+    pub kind: RepetitionKind,
+    /// Whether this repetition operator is greedy or not. A greedy operator
+    /// will match as much as it can. A non-greedy operator will match as
+    /// little as it can.
+    ///
+    /// Typically, operators are greedy by default and are only non-greedy when
+    /// a `?` suffix is used, e.g., `(expr)*` is greedy while `(expr)*?` is
+    /// not. However, this can be inverted via the `U` "ungreedy" flag.
+    pub greedy: bool,
+    /// The expression being repeated.
+    pub hir: Box<Hir>,
+}
+
+impl Repetition {
+    /// Returns true if and only if this repetition operator makes it possible
+    /// to match the empty string.
+    ///
+    /// Note that this is not defined inductively. For example, while `a*`
+    /// will report `true`, `()+` will not, even though `()` matches the empty
+    /// string and one or more occurrences of something that matches the empty
+    /// string will always match the empty string. In order to get the
+    /// inductive definition, see the corresponding method on
+    /// [`Hir`](struct.Hir.html).
+    pub fn is_match_empty(&self) -> bool {
+        match self.kind {
+            RepetitionKind::ZeroOrOne => true,
+            RepetitionKind::ZeroOrMore => true,
+            RepetitionKind::OneOrMore => false,
+            RepetitionKind::Range(RepetitionRange::Exactly(m)) => m == 0,
+            RepetitionKind::Range(RepetitionRange::AtLeast(m)) => m == 0,
+            RepetitionKind::Range(RepetitionRange::Bounded(m, _)) => m == 0,
+        }
+    }
+}
+
+/// The kind of a repetition operator.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum RepetitionKind {
+    /// Matches a sub-expression zero or one times.
+    ZeroOrOne,
+    /// Matches a sub-expression zero or more times.
+    ZeroOrMore,
+    /// Matches a sub-expression one or more times.
+    OneOrMore,
+    /// Matches a sub-expression within a bounded range of times.
+    Range(RepetitionRange),
+}
+
+/// The kind of a counted repetition operator.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum RepetitionRange {
+    /// Matches a sub-expression exactly this many times.
+    Exactly(u32),
+    /// Matches a sub-expression at least this many times.
+    AtLeast(u32),
+    /// Matches a sub-expression at least `m` times and at most `n` times.
+    Bounded(u32, u32),
+}
+
+/// A custom `Drop` impl is used for `HirKind` such that it uses constant stack
+/// space but heap space proportional to the depth of the total `Hir`.
+impl Drop for Hir {
+    fn drop(&mut self) {
+        use std::mem;
+
+        match *self.kind() {
+            HirKind::Empty
+            | HirKind::Literal(_)
+            | HirKind::Class(_)
+            | HirKind::Anchor(_)
+            | HirKind::WordBoundary(_) => return,
+            HirKind::Group(ref x) if !x.hir.kind.has_subexprs() => return,
+            HirKind::Repetition(ref x) if !x.hir.kind.has_subexprs() => return,
+            HirKind::Concat(ref x) if x.is_empty() => return,
+            HirKind::Alternation(ref x) if x.is_empty() => return,
+            _ => {}
+        }
+
+        let mut stack = vec![mem::replace(self, Hir::empty())];
+        while let Some(mut expr) = stack.pop() {
+            match expr.kind {
+                HirKind::Empty
+                | HirKind::Literal(_)
+                | HirKind::Class(_)
+                | HirKind::Anchor(_)
+                | HirKind::WordBoundary(_) => {}
+                HirKind::Group(ref mut x) => {
+                    stack.push(mem::replace(&mut x.hir, Hir::empty()));
+                }
+                HirKind::Repetition(ref mut x) => {
+                    stack.push(mem::replace(&mut x.hir, Hir::empty()));
+                }
+                HirKind::Concat(ref mut x) => {
+                    stack.extend(x.drain(..));
+                }
+                HirKind::Alternation(ref mut x) => {
+                    stack.extend(x.drain(..));
+                }
+            }
+        }
+    }
+}
+
+/// A type that documents various attributes of an HIR expression.
+///
+/// These attributes are typically defined inductively on the HIR.
+#[derive(Clone, Debug, Eq, PartialEq)]
+struct HirInfo {
+    /// Represent yes/no questions by a bitfield to conserve space, since
+    /// this is included in every HIR expression.
+    ///
+    /// If more attributes need to be added, it is OK to increase the size of
+    /// this as appropriate.
+    bools: u8,
+}
+
+// A simple macro for defining bitfield accessors/mutators.
+macro_rules! define_bool {
+    ($bit:expr, $is_fn_name:ident, $set_fn_name:ident) => {
+        fn $is_fn_name(&self) -> bool {
+            self.bools & (0b1 << $bit) > 0
+        }
+
+        fn $set_fn_name(&mut self, yes: bool) {
+            if yes {
+                self.bools |= 1 << $bit;
+            } else {
+                self.bools &= !(1 << $bit);
+            }
+        }
+    }
+}
+
+impl HirInfo {
+    fn new() -> HirInfo {
+        HirInfo {
+            bools: 0,
+        }
+    }
+
+    define_bool!(0, is_always_utf8, set_always_utf8);
+    define_bool!(1, is_all_assertions, set_all_assertions);
+    define_bool!(2, is_anchored_start, set_anchored_start);
+    define_bool!(3, is_anchored_end, set_anchored_end);
+    define_bool!(4, is_any_anchored_start, set_any_anchored_start);
+    define_bool!(5, is_any_anchored_end, set_any_anchored_end);
+    define_bool!(6, is_match_empty, set_match_empty);
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn uclass(ranges: &[(char, char)]) -> ClassUnicode {
+        let ranges: Vec<ClassUnicodeRange> = ranges
+            .iter()
+            .map(|&(s, e)| ClassUnicodeRange::new(s, e))
+            .collect();
+        ClassUnicode::new(ranges)
+    }
+
+    fn bclass(ranges: &[(u8, u8)]) -> ClassBytes {
+        let ranges: Vec<ClassBytesRange> = ranges
+            .iter()
+            .map(|&(s, e)| ClassBytesRange::new(s, e))
+            .collect();
+        ClassBytes::new(ranges)
+    }
+
+    fn uranges(cls: &ClassUnicode) -> Vec<(char, char)> {
+        cls.iter().map(|x| (x.start(), x.end())).collect()
+    }
+
+    fn ucasefold(cls: &ClassUnicode) -> ClassUnicode {
+        let mut cls_ = cls.clone();
+        cls_.case_fold_simple();
+        cls_
+    }
+
+    fn uunion(cls1: &ClassUnicode, cls2: &ClassUnicode) -> ClassUnicode {
+        let mut cls_ = cls1.clone();
+        cls_.union(cls2);
+        cls_
+    }
+
+    fn uintersect(cls1: &ClassUnicode, cls2: &ClassUnicode) -> ClassUnicode {
+        let mut cls_ = cls1.clone();
+        cls_.intersect(cls2);
+        cls_
+    }
+
+    fn udifference(cls1: &ClassUnicode, cls2: &ClassUnicode) -> ClassUnicode {
+        let mut cls_ = cls1.clone();
+        cls_.difference(cls2);
+        cls_
+    }
+
+    fn usymdifference(cls1: &ClassUnicode, cls2: &ClassUnicode) -> ClassUnicode {
+        let mut cls_ = cls1.clone();
+        cls_.symmetric_difference(cls2);
+        cls_
+    }
+
+    fn unegate(cls: &ClassUnicode) -> ClassUnicode {
+        let mut cls_ = cls.clone();
+        cls_.negate();
+        cls_
+    }
+
+    fn branges(cls: &ClassBytes) -> Vec<(u8, u8)> {
+        cls.iter().map(|x| (x.start(), x.end())).collect()
+    }
+
+    fn bcasefold(cls: &ClassBytes) -> ClassBytes {
+        let mut cls_ = cls.clone();
+        cls_.case_fold_simple();
+        cls_
+    }
+
+    fn bunion(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes {
+        let mut cls_ = cls1.clone();
+        cls_.union(cls2);
+        cls_
+    }
+
+    fn bintersect(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes {
+        let mut cls_ = cls1.clone();
+        cls_.intersect(cls2);
+        cls_
+    }
+
+    fn bdifference(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes {
+        let mut cls_ = cls1.clone();
+        cls_.difference(cls2);
+        cls_
+    }
+
+    fn bsymdifference(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes {
+        let mut cls_ = cls1.clone();
+        cls_.symmetric_difference(cls2);
+        cls_
+    }
+
+    fn bnegate(cls: &ClassBytes) -> ClassBytes {
+        let mut cls_ = cls.clone();
+        cls_.negate();
+        cls_
+    }
+
+    #[test]
+    fn class_range_canonical_unicode() {
+        let range = ClassUnicodeRange::new('\u{00FF}', '\0');
+        assert_eq!('\0', range.start());
+        assert_eq!('\u{00FF}', range.end());
+    }
+
+    #[test]
+    fn class_range_canonical_bytes() {
+        let range = ClassBytesRange::new(b'\xFF', b'\0');
+        assert_eq!(b'\0', range.start());
+        assert_eq!(b'\xFF', range.end());
+    }
+
+    #[test]
+    fn class_canonicalize_unicode() {
+        let cls = uclass(&[('a', 'c'), ('x', 'z')]);
+        let expected = vec![('a', 'c'), ('x', 'z')];
+        assert_eq!(expected, uranges(&cls));
+
+        let cls = uclass(&[('x', 'z'), ('a', 'c')]);
+        let expected = vec![('a', 'c'), ('x', 'z')];
+        assert_eq!(expected, uranges(&cls));
+
+        let cls = uclass(&[('x', 'z'), ('w', 'y')]);
+        let expected = vec![('w', 'z')];
+        assert_eq!(expected, uranges(&cls));
+
+        let cls = uclass(&[
+            ('c', 'f'), ('a', 'g'), ('d', 'j'), ('a', 'c'),
+            ('m', 'p'), ('l', 's'),
+        ]);
+        let expected = vec![('a', 'j'), ('l', 's')];
+        assert_eq!(expected, uranges(&cls));
+
+        let cls = uclass(&[('x', 'z'), ('u', 'w')]);
+        let expected = vec![('u', 'z')];
+        assert_eq!(expected, uranges(&cls));
+
+        let cls = uclass(&[('\x00', '\u{10FFFF}'), ('\x00', '\u{10FFFF}')]);
+        let expected = vec![('\x00', '\u{10FFFF}')];
+        assert_eq!(expected, uranges(&cls));
+
+
+        let cls = uclass(&[('a', 'a'), ('b', 'b')]);
+        let expected = vec![('a', 'b')];
+        assert_eq!(expected, uranges(&cls));
+    }
+
+    #[test]
+    fn class_canonicalize_bytes() {
+        let cls = bclass(&[(b'a', b'c'), (b'x', b'z')]);
+        let expected = vec![(b'a', b'c'), (b'x', b'z')];
+        assert_eq!(expected, branges(&cls));
+
+        let cls = bclass(&[(b'x', b'z'), (b'a', b'c')]);
+        let expected = vec![(b'a', b'c'), (b'x', b'z')];
+        assert_eq!(expected, branges(&cls));
+
+        let cls = bclass(&[(b'x', b'z'), (b'w', b'y')]);
+        let expected = vec![(b'w', b'z')];
+        assert_eq!(expected, branges(&cls));
+
+        let cls = bclass(&[
+            (b'c', b'f'), (b'a', b'g'), (b'd', b'j'), (b'a', b'c'),
+            (b'm', b'p'), (b'l', b's'),
+        ]);
+        let expected = vec![(b'a', b'j'), (b'l', b's')];
+        assert_eq!(expected, branges(&cls));
+
+        let cls = bclass(&[(b'x', b'z'), (b'u', b'w')]);
+        let expected = vec![(b'u', b'z')];
+        assert_eq!(expected, branges(&cls));
+
+        let cls = bclass(&[(b'\x00', b'\xFF'), (b'\x00', b'\xFF')]);
+        let expected = vec![(b'\x00', b'\xFF')];
+        assert_eq!(expected, branges(&cls));
+
+        let cls = bclass(&[(b'a', b'a'), (b'b', b'b')]);
+        let expected = vec![(b'a', b'b')];
+        assert_eq!(expected, branges(&cls));
+    }
+
+    #[test]
+    fn class_case_fold_unicode() {
+        let cls = uclass(&[
+            ('C', 'F'), ('A', 'G'), ('D', 'J'), ('A', 'C'),
+            ('M', 'P'), ('L', 'S'), ('c', 'f'),
+        ]);
+        let expected = uclass(&[
+            ('A', 'J'), ('L', 'S'),
+            ('a', 'j'), ('l', 's'),
+            ('\u{17F}', '\u{17F}'),
+        ]);
+        assert_eq!(expected, ucasefold(&cls));
+
+        let cls = uclass(&[('A', 'Z')]);
+        let expected = uclass(&[
+            ('A', 'Z'), ('a', 'z'),
+            ('\u{17F}', '\u{17F}'),
+            ('\u{212A}', '\u{212A}'),
+        ]);
+        assert_eq!(expected, ucasefold(&cls));
+
+        let cls = uclass(&[('a', 'z')]);
+        let expected = uclass(&[
+            ('A', 'Z'), ('a', 'z'),
+            ('\u{17F}', '\u{17F}'),
+            ('\u{212A}', '\u{212A}'),
+        ]);
+        assert_eq!(expected, ucasefold(&cls));
+
+        let cls = uclass(&[('A', 'A'), ('_', '_')]);
+        let expected = uclass(&[('A', 'A'), ('_', '_'), ('a', 'a')]);
+        assert_eq!(expected, ucasefold(&cls));
+
+        let cls = uclass(&[('A', 'A'), ('=', '=')]);
+        let expected = uclass(&[('=', '='), ('A', 'A'), ('a', 'a')]);
+        assert_eq!(expected, ucasefold(&cls));
+
+        let cls = uclass(&[('\x00', '\x10')]);
+        assert_eq!(cls, ucasefold(&cls));
+
+        let cls = uclass(&[('k', 'k')]);
+        let expected = uclass(&[
+            ('K', 'K'), ('k', 'k'), ('\u{212A}', '\u{212A}'),
+        ]);
+        assert_eq!(expected, ucasefold(&cls));
+
+        let cls = uclass(&[('@', '@')]);
+        assert_eq!(cls, ucasefold(&cls));
+    }
+
+    #[test]
+    fn class_case_fold_bytes() {
+        let cls = bclass(&[
+            (b'C', b'F'), (b'A', b'G'), (b'D', b'J'), (b'A', b'C'),
+            (b'M', b'P'), (b'L', b'S'), (b'c', b'f'),
+        ]);
+        let expected = bclass(&[
+            (b'A', b'J'), (b'L', b'S'),
+            (b'a', b'j'), (b'l', b's'),
+        ]);
+        assert_eq!(expected, bcasefold(&cls));
+
+        let cls = bclass(&[(b'A', b'Z')]);
+        let expected = bclass(&[(b'A', b'Z'), (b'a', b'z')]);
+        assert_eq!(expected, bcasefold(&cls));
+
+        let cls = bclass(&[(b'a', b'z')]);
+        let expected = bclass(&[(b'A', b'Z'), (b'a', b'z')]);
+        assert_eq!(expected, bcasefold(&cls));
+
+        let cls = bclass(&[(b'A', b'A'), (b'_', b'_')]);
+        let expected = bclass(&[(b'A', b'A'), (b'_', b'_'), (b'a', b'a')]);
+        assert_eq!(expected, bcasefold(&cls));
+
+        let cls = bclass(&[(b'A', b'A'), (b'=', b'=')]);
+        let expected = bclass(&[(b'=', b'='), (b'A', b'A'), (b'a', b'a')]);
+        assert_eq!(expected, bcasefold(&cls));
+
+        let cls = bclass(&[(b'\x00', b'\x10')]);
+        assert_eq!(cls, bcasefold(&cls));
+
+        let cls = bclass(&[(b'k', b'k')]);
+        let expected = bclass(&[(b'K', b'K'), (b'k', b'k')]);
+        assert_eq!(expected, bcasefold(&cls));
+
+        let cls = bclass(&[(b'@', b'@')]);
+        assert_eq!(cls, bcasefold(&cls));
+    }
+
+    #[test]
+    fn class_negate_unicode() {
+        let cls = uclass(&[('a', 'a')]);
+        let expected = uclass(&[('\x00', '\x60'), ('\x62', '\u{10FFFF}')]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls = uclass(&[('a', 'a'), ('b', 'b')]);
+        let expected = uclass(&[('\x00', '\x60'), ('\x63', '\u{10FFFF}')]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls = uclass(&[('a', 'c'), ('x', 'z')]);
+        let expected = uclass(&[
+            ('\x00', '\x60'), ('\x64', '\x77'), ('\x7B', '\u{10FFFF}'),
+        ]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls = uclass(&[('\x00', 'a')]);
+        let expected = uclass(&[('\x62', '\u{10FFFF}')]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls = uclass(&[('a', '\u{10FFFF}')]);
+        let expected = uclass(&[('\x00', '\x60')]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls = uclass(&[('\x00', '\u{10FFFF}')]);
+        let expected = uclass(&[]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls = uclass(&[]);
+        let expected = uclass(&[('\x00', '\u{10FFFF}')]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls = uclass(&[
+            ('\x00', '\u{10FFFD}'), ('\u{10FFFF}', '\u{10FFFF}'),
+        ]);
+        let expected = uclass(&[('\u{10FFFE}', '\u{10FFFE}')]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls = uclass(&[('\x00', '\u{D7FF}')]);
+        let expected = uclass(&[('\u{E000}', '\u{10FFFF}')]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls = uclass(&[('\x00', '\u{D7FE}')]);
+        let expected = uclass(&[('\u{D7FF}', '\u{10FFFF}')]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls = uclass(&[('\u{E000}', '\u{10FFFF}')]);
+        let expected = uclass(&[('\x00', '\u{D7FF}')]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls = uclass(&[('\u{E001}', '\u{10FFFF}')]);
+        let expected = uclass(&[('\x00', '\u{E000}')]);
+        assert_eq!(expected, unegate(&cls));
+    }
+
+    #[test]
+    fn class_negate_bytes() {
+        let cls = bclass(&[(b'a', b'a')]);
+        let expected = bclass(&[(b'\x00', b'\x60'), (b'\x62', b'\xFF')]);
+        assert_eq!(expected, bnegate(&cls));
+
+        let cls = bclass(&[(b'a', b'a'), (b'b', b'b')]);
+        let expected = bclass(&[(b'\x00', b'\x60'), (b'\x63', b'\xFF')]);
+        assert_eq!(expected, bnegate(&cls));
+
+        let cls = bclass(&[(b'a', b'c'), (b'x', b'z')]);
+        let expected = bclass(&[
+            (b'\x00', b'\x60'), (b'\x64', b'\x77'), (b'\x7B', b'\xFF'),
+        ]);
+        assert_eq!(expected, bnegate(&cls));
+
+        let cls = bclass(&[(b'\x00', b'a')]);
+        let expected = bclass(&[(b'\x62', b'\xFF')]);
+        assert_eq!(expected, bnegate(&cls));
+
+        let cls = bclass(&[(b'a', b'\xFF')]);
+        let expected = bclass(&[(b'\x00', b'\x60')]);
+        assert_eq!(expected, bnegate(&cls));
+
+        let cls = bclass(&[(b'\x00', b'\xFF')]);
+        let expected = bclass(&[]);
+        assert_eq!(expected, bnegate(&cls));
+
+        let cls = bclass(&[]);
+        let expected = bclass(&[(b'\x00', b'\xFF')]);
+        assert_eq!(expected, bnegate(&cls));
+
+        let cls = bclass(&[(b'\x00', b'\xFD'), (b'\xFF', b'\xFF')]);
+        let expected = bclass(&[(b'\xFE', b'\xFE')]);
+        assert_eq!(expected, bnegate(&cls));
+    }
+
+    #[test]
+    fn class_union_unicode() {
+        let cls1 = uclass(&[('a', 'g'), ('m', 't'), ('A', 'C')]);
+        let cls2 = uclass(&[('a', 'z')]);
+        let expected = uclass(&[('a', 'z'), ('A', 'C')]);
+        assert_eq!(expected, uunion(&cls1, &cls2));
+    }
+
+    #[test]
+    fn class_union_bytes() {
+        let cls1 = bclass(&[(b'a', b'g'), (b'm', b't'), (b'A', b'C')]);
+        let cls2 = bclass(&[(b'a', b'z')]);
+        let expected = bclass(&[(b'a', b'z'), (b'A', b'C')]);
+        assert_eq!(expected, bunion(&cls1, &cls2));
+    }
+
+    #[test]
+    fn class_intersect_unicode() {
+        let cls1 = uclass(&[]);
+        let cls2 = uclass(&[('a', 'a')]);
+        let expected = uclass(&[]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'a')]);
+        let cls2 = uclass(&[('a', 'a')]);
+        let expected = uclass(&[('a', 'a')]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'a')]);
+        let cls2 = uclass(&[('b', 'b')]);
+        let expected = uclass(&[]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'a')]);
+        let cls2 = uclass(&[('a', 'c')]);
+        let expected = uclass(&[('a', 'a')]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'b')]);
+        let cls2 = uclass(&[('a', 'c')]);
+        let expected = uclass(&[('a', 'b')]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'b')]);
+        let cls2 = uclass(&[('b', 'c')]);
+        let expected = uclass(&[('b', 'b')]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'b')]);
+        let cls2 = uclass(&[('c', 'd')]);
+        let expected = uclass(&[]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('b', 'c')]);
+        let cls2 = uclass(&[('a', 'd')]);
+        let expected = uclass(&[('b', 'c')]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
+        let cls2 = uclass(&[('a', 'h')]);
+        let expected = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
+        let cls2 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
+        let expected = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'b'), ('g', 'h')]);
+        let cls2 = uclass(&[('d', 'e'), ('k', 'l')]);
+        let expected = uclass(&[]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
+        let cls2 = uclass(&[('h', 'h')]);
+        let expected = uclass(&[('h', 'h')]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'b'), ('e', 'f'), ('i', 'j')]);
+        let cls2 = uclass(&[('c', 'd'), ('g', 'h'), ('k', 'l')]);
+        let expected = uclass(&[]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'b'), ('c', 'd'), ('e', 'f')]);
+        let cls2 = uclass(&[('b', 'c'), ('d', 'e'), ('f', 'g')]);
+        let expected = uclass(&[('b', 'f')]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+    }
+
+    #[test]
+    fn class_intersect_bytes() {
+        let cls1 = bclass(&[]);
+        let cls2 = bclass(&[(b'a', b'a')]);
+        let expected = bclass(&[]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'a')]);
+        let cls2 = bclass(&[(b'a', b'a')]);
+        let expected = bclass(&[(b'a', b'a')]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'a')]);
+        let cls2 = bclass(&[(b'b', b'b')]);
+        let expected = bclass(&[]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'a')]);
+        let cls2 = bclass(&[(b'a', b'c')]);
+        let expected = bclass(&[(b'a', b'a')]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'b')]);
+        let cls2 = bclass(&[(b'a', b'c')]);
+        let expected = bclass(&[(b'a', b'b')]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'b')]);
+        let cls2 = bclass(&[(b'b', b'c')]);
+        let expected = bclass(&[(b'b', b'b')]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'b')]);
+        let cls2 = bclass(&[(b'c', b'd')]);
+        let expected = bclass(&[]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'b', b'c')]);
+        let cls2 = bclass(&[(b'a', b'd')]);
+        let expected = bclass(&[(b'b', b'c')]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
+        let cls2 = bclass(&[(b'a', b'h')]);
+        let expected = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
+        let cls2 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
+        let expected = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'b'), (b'g', b'h')]);
+        let cls2 = bclass(&[(b'd', b'e'), (b'k', b'l')]);
+        let expected = bclass(&[]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
+        let cls2 = bclass(&[(b'h', b'h')]);
+        let expected = bclass(&[(b'h', b'h')]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'b'), (b'e', b'f'), (b'i', b'j')]);
+        let cls2 = bclass(&[(b'c', b'd'), (b'g', b'h'), (b'k', b'l')]);
+        let expected = bclass(&[]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'b'), (b'c', b'd'), (b'e', b'f')]);
+        let cls2 = bclass(&[(b'b', b'c'), (b'd', b'e'), (b'f', b'g')]);
+        let expected = bclass(&[(b'b', b'f')]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+    }
+
+    #[test]
+    fn class_difference_unicode() {
+        let cls1 = uclass(&[('a', 'a')]);
+        let cls2 = uclass(&[('a', 'a')]);
+        let expected = uclass(&[]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'a')]);
+        let cls2 = uclass(&[]);
+        let expected = uclass(&[('a', 'a')]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[]);
+        let cls2 = uclass(&[('a', 'a')]);
+        let expected = uclass(&[]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'z')]);
+        let cls2 = uclass(&[('a', 'a')]);
+        let expected = uclass(&[('b', 'z')]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'z')]);
+        let cls2 = uclass(&[('z', 'z')]);
+        let expected = uclass(&[('a', 'y')]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'z')]);
+        let cls2 = uclass(&[('m', 'm')]);
+        let expected = uclass(&[('a', 'l'), ('n', 'z')]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]);
+        let cls2 = uclass(&[('a', 'z')]);
+        let expected = uclass(&[]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]);
+        let cls2 = uclass(&[('d', 'v')]);
+        let expected = uclass(&[('a', 'c')]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]);
+        let cls2 = uclass(&[('b', 'g'), ('s', 'u')]);
+        let expected = uclass(&[('a', 'a'), ('h', 'i'), ('r', 'r')]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]);
+        let cls2 = uclass(&[('b', 'd'), ('e', 'g'), ('s', 'u')]);
+        let expected = uclass(&[('a', 'a'), ('h', 'i'), ('r', 'r')]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[('x', 'z')]);
+        let cls2 = uclass(&[('a', 'c'), ('e', 'g'), ('s', 'u')]);
+        let expected = uclass(&[('x', 'z')]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'z')]);
+        let cls2 = uclass(&[('a', 'c'), ('e', 'g'), ('s', 'u')]);
+        let expected = uclass(&[('d', 'd'), ('h', 'r'), ('v', 'z')]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+    }
+
+    #[test]
+    fn class_difference_bytes() {
+        let cls1 = bclass(&[(b'a', b'a')]);
+        let cls2 = bclass(&[(b'a', b'a')]);
+        let expected = bclass(&[]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'a')]);
+        let cls2 = bclass(&[]);
+        let expected = bclass(&[(b'a', b'a')]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[]);
+        let cls2 = bclass(&[(b'a', b'a')]);
+        let expected = bclass(&[]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'z')]);
+        let cls2 = bclass(&[(b'a', b'a')]);
+        let expected = bclass(&[(b'b', b'z')]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'z')]);
+        let cls2 = bclass(&[(b'z', b'z')]);
+        let expected = bclass(&[(b'a', b'y')]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'z')]);
+        let cls2 = bclass(&[(b'm', b'm')]);
+        let expected = bclass(&[(b'a', b'l'), (b'n', b'z')]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]);
+        let cls2 = bclass(&[(b'a', b'z')]);
+        let expected = bclass(&[]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]);
+        let cls2 = bclass(&[(b'd', b'v')]);
+        let expected = bclass(&[(b'a', b'c')]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]);
+        let cls2 = bclass(&[(b'b', b'g'), (b's', b'u')]);
+        let expected = bclass(&[(b'a', b'a'), (b'h', b'i'), (b'r', b'r')]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]);
+        let cls2 = bclass(&[(b'b', b'd'), (b'e', b'g'), (b's', b'u')]);
+        let expected = bclass(&[(b'a', b'a'), (b'h', b'i'), (b'r', b'r')]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'x', b'z')]);
+        let cls2 = bclass(&[(b'a', b'c'), (b'e', b'g'), (b's', b'u')]);
+        let expected = bclass(&[(b'x', b'z')]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'z')]);
+        let cls2 = bclass(&[(b'a', b'c'), (b'e', b'g'), (b's', b'u')]);
+        let expected = bclass(&[(b'd', b'd'), (b'h', b'r'), (b'v', b'z')]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+    }
+
+    #[test]
+    fn class_symmetric_difference_unicode() {
+        let cls1 = uclass(&[('a', 'm')]);
+        let cls2 = uclass(&[('g', 't')]);
+        let expected = uclass(&[('a', 'f'), ('n', 't')]);
+        assert_eq!(expected, usymdifference(&cls1, &cls2));
+    }
+
+    #[test]
+    fn class_symmetric_difference_bytes() {
+        let cls1 = bclass(&[(b'a', b'm')]);
+        let cls2 = bclass(&[(b'g', b't')]);
+        let expected = bclass(&[(b'a', b'f'), (b'n', b't')]);
+        assert_eq!(expected, bsymdifference(&cls1, &cls2));
+    }
+
+    #[test]
+    #[should_panic]
+    fn hir_byte_literal_non_ascii() {
+        Hir::literal(Literal::Byte(b'a'));
+    }
+
+    // We use a thread with an explicit stack size to test that our destructor
+    // for Hir can handle arbitrarily sized expressions in constant stack
+    // space. In case we run on a platform without threads (WASM?), we limit
+    // this test to Windows/Unix.
+    #[test]
+    #[cfg(any(unix, windows))]
+    fn no_stack_overflow_on_drop() {
+        use std::thread;
+
+        let run = || {
+            let mut expr = Hir::empty();
+            for _ in 0..100 {
+                expr = Hir::group(Group {
+                    kind: GroupKind::NonCapturing,
+                    hir: Box::new(expr),
+                });
+                expr = Hir::repetition(Repetition {
+                    kind: RepetitionKind::ZeroOrOne,
+                    greedy: true,
+                    hir: Box::new(expr),
+                });
+
+                expr = Hir {
+                    kind: HirKind::Concat(vec![expr]),
+                    info: HirInfo::new(),
+                };
+                expr = Hir {
+                    kind: HirKind::Alternation(vec![expr]),
+                    info: HirInfo::new(),
+                };
+            }
+            assert!(!expr.kind.is_empty());
+        };
+
+        // We run our test on a thread with a small stack size so we can
+        // force the issue more easily.
+        thread::Builder::new()
+            .stack_size(1<<10)
+            .spawn(run)
+            .unwrap()
+            .join()
+            .unwrap();
+    }
+}
diff --git a/regex-syntax/src/hir/translate.rs b/regex-syntax/src/hir/translate.rs
new file mode 100644
index 0000000000..c56d9745a2
--- /dev/null
+++ b/regex-syntax/src/hir/translate.rs
@@ -0,0 +1,2506 @@
+// Copyright 2018 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+/*!
+Defines a translator that converts an `Ast` to an `Hir`.
+*/
+
+use std::cell::{Cell, RefCell};
+use std::result;
+
+use ast::{self, Ast, Span, Visitor};
+use hir::{self, Error, ErrorKind, Hir};
+use unicode::{self, ClassQuery};
+
+type Result<T> = result::Result<T, Error>;
+
+/// A builder for constructing an AST->HIR translator.
+#[derive(Clone, Debug)]
+pub struct TranslatorBuilder {
+    allow_invalid_utf8: bool,
+    flags: Flags,
+}
+
+impl Default for TranslatorBuilder {
+    fn default() -> TranslatorBuilder {
+        TranslatorBuilder::new()
+    }
+}
+
+impl TranslatorBuilder {
+    /// Create a new translator builder with a default c onfiguration.
+    pub fn new() -> TranslatorBuilder {
+        TranslatorBuilder {
+            allow_invalid_utf8: false,
+            flags: Flags::default(),
+        }
+    }
+
+    /// Build a translator using the current configuration.
+    pub fn build(&self) -> Translator {
+        Translator {
+            stack: RefCell::new(vec![]),
+            flags: Cell::new(self.flags),
+            allow_invalid_utf8: self.allow_invalid_utf8,
+        }
+    }
+
+    /// When enabled, translation will permit the construction of a regular
+    /// expression that may match invalid UTF-8.
+    ///
+    /// When disabled (the default), the translator is guaranteed to produce
+    /// an expression that will only ever match valid UTF-8 (otherwise, the
+    /// translator will return an error).
+    pub fn allow_invalid_utf8(
+        &mut self,
+        yes: bool,
+    ) -> &mut TranslatorBuilder {
+        self.allow_invalid_utf8 = yes;
+        self
+    }
+
+    /// Enable or disable the case insensitive flag (`i`) by default.
+    pub fn case_insensitive(&mut self, yes: bool) -> &mut TranslatorBuilder {
+        self.flags.case_insensitive = if yes { Some(true) } else { None };
+        self
+    }
+
+    /// Enable or disable the multi-line matching flag (`m`) by default.
+    pub fn multi_line(&mut self, yes: bool) -> &mut TranslatorBuilder {
+        self.flags.multi_line = if yes { Some(true) } else { None };
+        self
+    }
+
+    /// Enable or disable the "dot matches any character" flag (`s`) by
+    /// default.
+    pub fn dot_matches_new_line(
+        &mut self,
+        yes: bool,
+    ) -> &mut TranslatorBuilder {
+        self.flags.dot_matches_new_line = if yes { Some(true) } else { None };
+        self
+    }
+
+    /// Enable or disable the "swap greed" flag (`U`) by default.
+    pub fn swap_greed(&mut self, yes: bool) -> &mut TranslatorBuilder {
+        self.flags.swap_greed = if yes { Some(true) } else { None };
+        self
+    }
+
+    /// Enable or disable the Unicode flag (`u`) by default.
+    pub fn unicode(&mut self, yes: bool) -> &mut TranslatorBuilder {
+        self.flags.unicode = if yes { None } else { Some(false) };
+        self
+    }
+}
+
+/// A translator maps abstract syntax to a high level intermediate
+/// representation.
+///
+/// A translator may be benefit from reuse. That is, a translator can translate
+/// many abstract syntax trees.
+///
+/// A `Translator` can be configured in more detail via a
+/// [`TranslatorBuilder`](struct.TranslatorBuilder.html).
+#[derive(Clone, Debug)]
+pub struct Translator {
+    /// Our call stack, but on the heap.
+    stack: RefCell<Vec<HirFrame>>,
+    /// The current flag settings.
+    flags: Cell<Flags>,
+    /// Whether we're allowed to produce HIR that can match arbitrary bytes.
+    allow_invalid_utf8: bool,
+}
+
+impl Translator {
+    /// Create a new translator using the default configuration.
+    pub fn new() -> Translator {
+        TranslatorBuilder::new().build()
+    }
+
+    /// Translate the given abstract syntax tree (AST) into a high level
+    /// intermediate representation (HIR).
+    ///
+    /// If there was a problem doing the translation, then an HIR-specific
+    /// error is returned.
+    ///
+    /// The original pattern string used to produce the `Ast` *must* also be
+    /// provided. The translator does not use the pattern string during any
+    /// correct translation, but is used for error reporting.
+    pub fn translate(&mut self, pattern: &str, ast: &Ast) -> Result<Hir> {
+        ast::visit(ast, TranslatorI::new(self, pattern))
+    }
+}
+
+/// An HirFrame is a single stack frame, represented explicitly, which is
+/// created for each item in the Ast that we traverse.
+///
+/// Note that technically, this type doesn't represent our entire stack
+/// frame. In particular, the Ast visitor represents any state associated with
+/// traversing the Ast itself.
+#[derive(Clone, Debug)]
+enum HirFrame {
+    /// An arbitrary HIR expression. These get pushed whenever we hit a base
+    /// case in the Ast. They get popped after an inductive (i.e., recursive)
+    /// step is complete.
+    Expr(Hir),
+    /// A Unicode character class. This frame is mutated as we descend into
+    /// the Ast of a character class (which is itself its own mini recursive
+    /// structure).
+    ClassUnicode(hir::ClassUnicode),
+    /// A byte-oriented character class. This frame is mutated as we descend
+    /// into the Ast of a character class (which is itself its own mini
+    /// recursive structure).
+    ///
+    /// Byte character classes are created when Unicode mode (`u`) is disabled.
+    /// If `allow_invalid_utf8` is disabled (the default), then a byte
+    /// character is only permitted to match ASCII text.
+    ClassBytes(hir::ClassBytes),
+    /// This is pushed on to the stack upon first seeing any kind of group,
+    /// indicated by parentheses (including non-capturing groups). It is popped
+    /// upon leaving a group.
+    Group {
+        /// The old active flags, if any, when this group was opened.
+        ///
+        /// If this group sets flags, then the new active flags are set to the
+        /// result of merging the old flags with the flags introduced by this
+        /// group.
+        ///
+        /// When this group is popped, the active flags should be restored to
+        /// the flags set here.
+        ///
+        /// The "active" flags correspond to whatever flags are set in the
+        /// Translator.
+        old_flags: Option<Flags>,
+    },
+    /// This is pushed whenever a concatenation is observed. After visiting
+    /// every sub-expression in the concatenation, the translator's stack is
+    /// popped until it sees a Concat frame.
+    Concat,
+    /// This is pushed whenever an alternation is observed. After visiting
+    /// every sub-expression in the alternation, the translator's stack is
+    /// popped until it sees an Alternation frame.
+    Alternation,
+}
+
+impl HirFrame {
+    /// Assert that the current stack frame is an Hir expression and return it.
+    fn unwrap_expr(self) -> Hir {
+        match self {
+            HirFrame::Expr(expr) => expr,
+            _ => panic!("tried to unwrap expr from HirFrame, got: {:?}", self)
+        }
+    }
+
+    /// Assert that the current stack frame is a Unicode class expression and
+    /// return it.
+    fn unwrap_class_unicode(self) -> hir::ClassUnicode {
+        match self {
+            HirFrame::ClassUnicode(cls) => cls,
+            _ => panic!("tried to unwrap Unicode class \
+                         from HirFrame, got: {:?}", self)
+        }
+    }
+
+    /// Assert that the current stack frame is a byte class expression and
+    /// return it.
+    fn unwrap_class_bytes(self) -> hir::ClassBytes {
+        match self {
+            HirFrame::ClassBytes(cls) => cls,
+            _ => panic!("tried to unwrap byte class \
+                         from HirFrame, got: {:?}", self)
+        }
+    }
+
+    /// Assert that the current stack frame is a group indicator and return
+    /// its corresponding flags (the flags that were active at the time the
+    /// group was entered) if they exist.
+    fn unwrap_group(self) -> Option<Flags> {
+        match self {
+            HirFrame::Group { old_flags } => old_flags,
+            _ => panic!("tried to unwrap group from HirFrame, got: {:?}", self)
+        }
+    }
+}
+
+impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
+    type Output = Hir;
+    type Err = Error;
+
+    fn finish(self) -> Result<Hir> {
+        if self.trans().stack.borrow().is_empty() {
+            // This can happen if the Ast given consists of a single set of
+            // flags. e.g., `(?i)`. /shrug
+            return Ok(Hir::empty());
+        }
+        // ... otherwise, we should have exactly one HIR on the stack.
+        assert_eq!(self.trans().stack.borrow().len(), 1);
+        Ok(self.pop().unwrap().unwrap_expr())
+    }
+
+    fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
+        match *ast {
+            Ast::Class(ast::Class::Bracketed(_)) => {
+                if self.flags().unicode() {
+                    let cls = hir::ClassUnicode::empty();
+                    self.push(HirFrame::ClassUnicode(cls));
+                } else {
+                    let cls = hir::ClassBytes::empty();
+                    self.push(HirFrame::ClassBytes(cls));
+                }
+            }
+            Ast::Group(ref x) => {
+                let old_flags = x.flags().map(|ast| self.set_flags(ast));
+                self.push(HirFrame::Group {
+                    old_flags: old_flags,
+                });
+            }
+            Ast::Concat(ref x) if x.asts.is_empty() => {}
+            Ast::Concat(_) => {
+                self.push(HirFrame::Concat);
+            }
+            Ast::Alternation(ref x) if x.asts.is_empty() => {}
+            Ast::Alternation(_) => {
+                self.push(HirFrame::Alternation);
+            }
+            _ => {}
+        }
+        Ok(())
+    }
+
+    fn visit_post(&mut self, ast: &Ast) -> Result<()> {
+        match *ast {
+            Ast::Empty(_) => {
+                self.push(HirFrame::Expr(Hir::empty()));
+            }
+            Ast::Flags(ref x) => {
+                self.set_flags(&x.flags);
+            }
+            Ast::Literal(ref x) => {
+                self.push(HirFrame::Expr(try!(self.hir_literal(x))));
+            }
+            Ast::Dot(span) => {
+                self.push(HirFrame::Expr(try!(self.hir_dot(span))));
+            }
+            Ast::Assertion(ref x) => {
+                self.push(HirFrame::Expr(self.hir_assertion(x)));
+            }
+            Ast::Class(ast::Class::Perl(ref x)) => {
+                if self.flags().unicode() {
+                    let cls = self.hir_perl_unicode_class(x);
+                    let hcls = hir::Class::Unicode(cls);
+                    self.push(HirFrame::Expr(Hir::class(hcls)));
+                } else {
+                    let cls = self.hir_perl_byte_class(x);
+                    let hcls = hir::Class::Bytes(cls);
+                    self.push(HirFrame::Expr(Hir::class(hcls)));
+                }
+            }
+            Ast::Class(ast::Class::Unicode(ref x)) => {
+                let cls = hir::Class::Unicode(try!(self.hir_unicode_class(x)));
+                self.push(HirFrame::Expr(Hir::class(cls)));
+            }
+            Ast::Class(ast::Class::Bracketed(ref ast)) => {
+                if self.flags().unicode() {
+                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
+                    self.unicode_fold_and_negate(ast.negated, &mut cls);
+                    if cls.iter().next().is_none() {
+                        return Err(self.error(
+                            ast.span, ErrorKind::EmptyClassNotAllowed));
+                    }
+                    let expr = Hir::class(hir::Class::Unicode(cls));
+                    self.push(HirFrame::Expr(expr));
+                } else {
+                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
+                    try!(self.bytes_fold_and_negate(
+                        &ast.span, ast.negated, &mut cls));
+                    if cls.iter().next().is_none() {
+                        return Err(self.error(
+                            ast.span, ErrorKind::EmptyClassNotAllowed));
+                    }
+
+                    let expr = Hir::class(hir::Class::Bytes(cls));
+                    self.push(HirFrame::Expr(expr));
+                }
+            }
+            Ast::Repetition(ref x) => {
+                let expr = self.pop().unwrap().unwrap_expr();
+                self.push(HirFrame::Expr(self.hir_repetition(x, expr)));
+            }
+            Ast::Group(ref x) => {
+                let expr = self.pop().unwrap().unwrap_expr();
+                if let Some(flags) = self.pop().unwrap().unwrap_group() {
+                    self.trans().flags.set(flags);
+                }
+                self.push(HirFrame::Expr(self.hir_group(x, expr)));
+            }
+            Ast::Concat(_) => {
+                let mut exprs = vec![];
+                while let Some(HirFrame::Expr(expr)) = self.pop() {
+                    exprs.push(expr);
+                }
+                exprs.reverse();
+                self.push(HirFrame::Expr(Hir::concat(exprs)));
+            }
+            Ast::Alternation(_) => {
+                let mut exprs = vec![];
+                while let Some(HirFrame::Expr(expr)) = self.pop() {
+                    exprs.push(expr);
+                }
+                exprs.reverse();
+                self.push(HirFrame::Expr(Hir::alternation(exprs)));
+            }
+        }
+        Ok(())
+    }
+
+    fn visit_class_set_item_pre(
+        &mut self,
+        ast: &ast::ClassSetItem,
+    ) -> Result<()> {
+        match *ast {
+            ast::ClassSetItem::Bracketed(_) => {
+                if self.flags().unicode() {
+                    let cls = hir::ClassUnicode::empty();
+                    self.push(HirFrame::ClassUnicode(cls));
+                } else {
+                    let cls = hir::ClassBytes::empty();
+                    self.push(HirFrame::ClassBytes(cls));
+                }
+            }
+            // We needn't handle the Union case here since the visitor will
+            // do it for us.
+            _ => {}
+        }
+        Ok(())
+    }
+
+    fn visit_class_set_item_post(
+        &mut self,
+        ast: &ast::ClassSetItem,
+    ) -> Result<()> {
+        match *ast {
+            ast::ClassSetItem::Empty(_) => {}
+            ast::ClassSetItem::Literal(ref x) => {
+                if self.flags().unicode() {
+                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
+                    cls.push(hir::ClassUnicodeRange::new(x.c, x.c));
+                    self.push(HirFrame::ClassUnicode(cls));
+                } else {
+                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
+                    let byte = try!(self.class_literal_byte(x));
+                    cls.push(hir::ClassBytesRange::new(byte, byte));
+                    self.push(HirFrame::ClassBytes(cls));
+                }
+            }
+            ast::ClassSetItem::Range(ref x) => {
+                if self.flags().unicode() {
+                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
+                    cls.push(hir::ClassUnicodeRange::new(x.start.c, x.end.c));
+                    self.push(HirFrame::ClassUnicode(cls));
+                } else {
+                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
+                    let start = try!(self.class_literal_byte(&x.start));
+                    let end = try!(self.class_literal_byte(&x.end));
+                    cls.push(hir::ClassBytesRange::new(start, end));
+                    self.push(HirFrame::ClassBytes(cls));
+                }
+            }
+            ast::ClassSetItem::Ascii(ref x) => {
+                if self.flags().unicode() {
+                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
+                    for &(s, e) in ascii_class(&x.kind) {
+                        cls.push(hir::ClassUnicodeRange::new(s, e));
+                    }
+                    self.unicode_fold_and_negate(x.negated, &mut cls);
+                    self.push(HirFrame::ClassUnicode(cls));
+                } else {
+                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
+                    for &(s, e) in ascii_class(&x.kind) {
+                        cls.push(hir::ClassBytesRange::new(s as u8, e as u8));
+                    }
+                    try!(self.bytes_fold_and_negate(
+                        &x.span, x.negated, &mut cls));
+                    self.push(HirFrame::ClassBytes(cls));
+                }
+            }
+            ast::ClassSetItem::Unicode(ref x) => {
+                let xcls = try!(self.hir_unicode_class(x));
+                let mut cls = self.pop().unwrap().unwrap_class_unicode();
+                cls.union(&xcls);
+                self.push(HirFrame::ClassUnicode(cls));
+            }
+            ast::ClassSetItem::Perl(ref x) => {
+                if self.flags().unicode() {
+                    let xcls = self.hir_perl_unicode_class(x);
+                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
+                    cls.union(&xcls);
+                    self.push(HirFrame::ClassUnicode(cls));
+                } else {
+                    let xcls = self.hir_perl_byte_class(x);
+                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
+                    cls.union(&xcls);
+                    self.push(HirFrame::ClassBytes(cls));
+                }
+            }
+            ast::ClassSetItem::Bracketed(ref ast) => {
+                if self.flags().unicode() {
+                    let mut cls1 = self.pop().unwrap().unwrap_class_unicode();
+                    self.unicode_fold_and_negate(ast.negated, &mut cls1);
+
+                    let mut cls2 = self.pop().unwrap().unwrap_class_unicode();
+                    cls2.union(&cls1);
+                    self.push(HirFrame::ClassUnicode(cls2));
+                } else {
+                    let mut cls1 = self.pop().unwrap().unwrap_class_bytes();
+                    try!(self.bytes_fold_and_negate(
+                        &ast.span, ast.negated, &mut cls1));
+
+                    let mut cls2 = self.pop().unwrap().unwrap_class_bytes();
+                    cls2.union(&cls1);
+                    self.push(HirFrame::ClassBytes(cls2));
+                }
+            }
+            // This is handled automatically by the visitor.
+            ast::ClassSetItem::Union(_) => {}
+        }
+        Ok(())
+    }
+
+    fn visit_class_set_binary_op_pre(
+        &mut self,
+        _op: &ast::ClassSetBinaryOp,
+    ) -> Result<()> {
+        if self.flags().unicode() {
+            let cls = hir::ClassUnicode::empty();
+            self.push(HirFrame::ClassUnicode(cls));
+        } else {
+            let cls = hir::ClassBytes::empty();
+            self.push(HirFrame::ClassBytes(cls));
+        }
+        Ok(())
+    }
+
+    fn visit_class_set_binary_op_in(
+        &mut self,
+        _op: &ast::ClassSetBinaryOp,
+    ) -> Result<()> {
+        if self.flags().unicode() {
+            let cls = hir::ClassUnicode::empty();
+            self.push(HirFrame::ClassUnicode(cls));
+        } else {
+            let cls = hir::ClassBytes::empty();
+            self.push(HirFrame::ClassBytes(cls));
+        }
+        Ok(())
+    }
+
+    fn visit_class_set_binary_op_post(
+        &mut self,
+        op: &ast::ClassSetBinaryOp,
+    ) -> Result<()> {
+        use ast::ClassSetBinaryOpKind::*;
+
+        if self.flags().unicode() {
+            let mut rhs = self.pop().unwrap().unwrap_class_unicode();
+            let mut lhs = self.pop().unwrap().unwrap_class_unicode();
+            let mut cls = self.pop().unwrap().unwrap_class_unicode();
+            if self.flags().case_insensitive() {
+                rhs.case_fold_simple();
+                lhs.case_fold_simple();
+            }
+            match op.kind {
+                Intersection => lhs.intersect(&rhs),
+                Difference => lhs.difference(&rhs),
+                SymmetricDifference => lhs.symmetric_difference(&rhs),
+            }
+            cls.union(&lhs);
+            self.push(HirFrame::ClassUnicode(cls));
+        } else {
+            let mut rhs = self.pop().unwrap().unwrap_class_bytes();
+            let mut lhs = self.pop().unwrap().unwrap_class_bytes();
+            let mut cls = self.pop().unwrap().unwrap_class_bytes();
+            if self.flags().case_insensitive() {
+                rhs.case_fold_simple();
+                lhs.case_fold_simple();
+            }
+            match op.kind {
+                Intersection => lhs.intersect(&rhs),
+                Difference => lhs.difference(&rhs),
+                SymmetricDifference => lhs.symmetric_difference(&rhs),
+            }
+            cls.union(&lhs);
+            self.push(HirFrame::ClassBytes(cls));
+        }
+        Ok(())
+    }
+}
+
+/// The internal implementation of a translator.
+///
+/// This type is responsible for carrying around the original pattern string,
+/// which is not tied to the internal state of a translator.
+///
+/// A TranslatorI exists for the time it takes to translate a single Ast.
+#[derive(Clone, Debug)]
+struct TranslatorI<'t, 'p> {
+    trans: &'t Translator,
+    pattern: &'p str,
+}
+
+impl<'t, 'p> TranslatorI<'t, 'p> {
+    /// Build a new internal translator.
+    fn new(trans: &'t Translator, pattern: &'p str) -> TranslatorI<'t, 'p> {
+        TranslatorI { trans: trans, pattern: pattern }
+    }
+
+    /// Return a reference to the underlying translator.
+    fn trans(&self) -> &Translator {
+        &self.trans
+    }
+
+    /// Push the given frame on to the call stack.
+    fn push(&self, frame: HirFrame) {
+        self.trans().stack.borrow_mut().push(frame);
+    }
+
+    /// Pop the top of the call stack. If the call stack is empty, return None.
+    fn pop(&self) -> Option<HirFrame> {
+        self.trans().stack.borrow_mut().pop()
+    }
+
+    /// Create a new error with the given span and error type.
+    fn error(&self, span: Span, kind: ErrorKind) -> Error {
+        Error { kind: kind, pattern: self.pattern.to_string(), span: span }
+    }
+
+    /// Return a copy of the active flags.
+    fn flags(&self) -> Flags {
+        self.trans().flags.get()
+    }
+
+    /// Set the flags of this translator from the flags set in the given AST.
+    /// Then, return the old flags.
+    fn set_flags(&self, ast_flags: &ast::Flags) -> Flags {
+        let old_flags = self.flags();
+        let mut new_flags = Flags::from_ast(ast_flags);
+        new_flags.merge(&old_flags);
+        self.trans().flags.set(new_flags);
+        old_flags
+    }
+
+    fn hir_literal(&self, lit: &ast::Literal) -> Result<Hir> {
+        let ch = match try!(self.literal_to_char(lit)) {
+            byte @ hir::Literal::Byte(_) => return Ok(Hir::literal(byte)),
+            hir::Literal::Unicode(ch) => ch,
+        };
+        if self.flags().case_insensitive() {
+            self.hir_from_char_case_insensitive(lit.span, ch)
+        } else {
+            self.hir_from_char(lit.span, ch)
+        }
+    }
+
+    /// Convert an Ast literal to its scalar representation.
+    ///
+    /// When Unicode mode is enabled, then this always succeeds and returns a
+    /// `char` (Unicode scalar value).
+    ///
+    /// When Unicode mode is disabled, then a raw byte is returned. If that
+    /// byte is not ASCII and invalid UTF-8 is not allowed, then this returns
+    /// an error.
+    fn literal_to_char(&self, lit: &ast::Literal) -> Result<hir::Literal> {
+        if self.flags().unicode() {
+            return Ok(hir::Literal::Unicode(lit.c));
+        }
+        let byte = match lit.byte() {
+            None => return Ok(hir::Literal::Unicode(lit.c)),
+            Some(byte) => byte,
+        };
+        if byte <= 0x7F {
+            return Ok(hir::Literal::Unicode(byte as char));
+        }
+        if !self.trans().allow_invalid_utf8 {
+            return Err(self.error(lit.span, ErrorKind::InvalidUtf8));
+        }
+        Ok(hir::Literal::Byte(byte))
+    }
+
+    fn hir_from_char(&self, span: Span, c: char) -> Result<Hir> {
+        if !self.flags().unicode() && c.len_utf8() > 1 {
+            return Err(self.error(span, ErrorKind::UnicodeNotAllowed));
+        }
+        Ok(Hir::literal(hir::Literal::Unicode(c)))
+    }
+
+    fn hir_from_char_case_insensitive(
+        &self,
+        span: Span,
+        c: char,
+    ) -> Result<Hir> {
+        // If case folding won't do anything, then don't bother trying.
+        if !unicode::contains_simple_case_mapping(c, c) {
+            return self.hir_from_char(span, c);
+        }
+        if self.flags().unicode() {
+            let mut cls = hir::ClassUnicode::new(vec![
+                hir::ClassUnicodeRange::new(c, c),
+            ]);
+            cls.case_fold_simple();
+            Ok(Hir::class(hir::Class::Unicode(cls)))
+        } else {
+            if c.len_utf8() > 1 {
+                return Err(self.error(span, ErrorKind::UnicodeNotAllowed));
+            }
+            let mut cls = hir::ClassBytes::new(vec![
+                hir::ClassBytesRange::new(c as u8, c as u8),
+            ]);
+            cls.case_fold_simple();
+            Ok(Hir::class(hir::Class::Bytes(cls)))
+        }
+    }
+
+    fn hir_dot(&self, span: Span) -> Result<Hir> {
+        let unicode = self.flags().unicode();
+        if !unicode && !self.trans().allow_invalid_utf8 {
+            return Err(self.error(span, ErrorKind::InvalidUtf8));
+        }
+        Ok(if self.flags().dot_matches_new_line() {
+            Hir::any(!unicode)
+        } else {
+            Hir::dot(!unicode)
+        })
+    }
+
+    fn hir_assertion(&self, asst: &ast::Assertion) -> Hir {
+        let unicode = self.flags().unicode();
+        let multi_line = self.flags().multi_line();
+        match asst.kind {
+            ast::AssertionKind::StartLine => {
+                Hir::anchor(if multi_line {
+                    hir::Anchor::StartLine
+                } else {
+                    hir::Anchor::StartText
+                })
+            }
+            ast::AssertionKind::EndLine => {
+                Hir::anchor(if multi_line {
+                    hir::Anchor::EndLine
+                } else {
+                    hir::Anchor::EndText
+                })
+            }
+            ast::AssertionKind::StartText => {
+                Hir::anchor(hir::Anchor::StartText)
+            }
+            ast::AssertionKind::EndText => {
+                Hir::anchor(hir::Anchor::EndText)
+            }
+            ast::AssertionKind::WordBoundary => {
+                Hir::word_boundary(if unicode {
+                    hir::WordBoundary::Unicode
+                } else {
+                    hir::WordBoundary::Ascii
+                })
+            }
+            ast::AssertionKind::NotWordBoundary => {
+                Hir::word_boundary(if unicode {
+                    hir::WordBoundary::UnicodeNegate
+                } else {
+                    hir::WordBoundary::AsciiNegate
+                })
+            }
+        }
+    }
+
+    fn hir_group(&self, group: &ast::Group, expr: Hir) -> Hir {
+        let kind = match group.kind {
+            ast::GroupKind::CaptureIndex(idx) => {
+                hir::GroupKind::CaptureIndex(idx)
+            }
+            ast::GroupKind::CaptureName(ref capname) => {
+                hir::GroupKind::CaptureName {
+                    name: capname.name.clone(),
+                    index: capname.index,
+                }
+            }
+            ast::GroupKind::NonCapturing(_) => hir::GroupKind::NonCapturing,
+        };
+        Hir::group(hir::Group {
+            kind: kind,
+            hir: Box::new(expr),
+        })
+    }
+
+    fn hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir {
+        let kind = match rep.op.kind {
+            ast::RepetitionKind::ZeroOrOne => hir::RepetitionKind::ZeroOrOne,
+            ast::RepetitionKind::ZeroOrMore => hir::RepetitionKind::ZeroOrMore,
+            ast::RepetitionKind::OneOrMore => hir::RepetitionKind::OneOrMore,
+            ast::RepetitionKind::Range(ast::RepetitionRange::Exactly(m)) => {
+                hir::RepetitionKind::Range(hir::RepetitionRange::Exactly(m))
+            }
+            ast::RepetitionKind::Range(ast::RepetitionRange::AtLeast(m)) => {
+                hir::RepetitionKind::Range(hir::RepetitionRange::AtLeast(m))
+            }
+            ast::RepetitionKind::Range(ast::RepetitionRange::Bounded(m,n)) => {
+                hir::RepetitionKind::Range(hir::RepetitionRange::Bounded(m, n))
+            }
+        };
+        let greedy =
+            if self.flags().swap_greed() {
+                !rep.greedy
+            } else {
+                rep.greedy
+            };
+        Hir::repetition(hir::Repetition {
+            kind: kind,
+            greedy: greedy,
+            hir: Box::new(expr),
+        })
+    }
+
+    fn hir_unicode_class(
+        &self,
+        ast_class: &ast::ClassUnicode,
+    ) -> Result<hir::ClassUnicode> {
+        use ast::ClassUnicodeKind::*;
+
+        if !self.flags().unicode() {
+            return Err(self.error(
+                ast_class.span,
+                ErrorKind::UnicodeNotAllowed,
+            ));
+        }
+        let query = match ast_class.kind {
+            OneLetter(name) => ClassQuery::OneLetter(name),
+            Named(ref name) => ClassQuery::Binary(name),
+            NamedValue { ref name, ref value, .. } => {
+                ClassQuery::ByValue {
+                    property_name: name,
+                    property_value: value,
+                }
+            }
+        };
+        match unicode::class(query) {
+            Ok(mut class) => {
+                self.unicode_fold_and_negate(ast_class.negated, &mut class);
+                Ok(class)
+            }
+            Err(unicode::Error::PropertyNotFound) => {
+                Err(self.error(
+                    ast_class.span,
+                    ErrorKind::UnicodePropertyNotFound,
+                ))
+            }
+            Err(unicode::Error::PropertyValueNotFound) => {
+                Err(self.error(
+                    ast_class.span,
+                    ErrorKind::UnicodePropertyValueNotFound,
+                ))
+            }
+        }
+    }
+
+    fn hir_perl_unicode_class(
+        &self,
+        ast_class: &ast::ClassPerl,
+    ) -> hir::ClassUnicode {
+        use ast::ClassPerlKind::*;
+        use unicode_tables::perl_word::PERL_WORD;
+
+        assert!(self.flags().unicode());
+        let mut class = match ast_class.kind {
+            Digit => {
+                let query = ClassQuery::Binary("Decimal_Number");
+                unicode::class(query).unwrap()
+            }
+            Space => {
+                let query = ClassQuery::Binary("Whitespace");
+                unicode::class(query).unwrap()
+            }
+            Word => unicode::hir_class(PERL_WORD),
+        };
+        // We needn't apply case folding here because the Perl Unicode classes
+        // are already closed under Unicode simple case folding.
+        if ast_class.negated {
+            class.negate();
+        }
+        class
+    }
+
+    fn hir_perl_byte_class(
+        &self,
+        ast_class: &ast::ClassPerl,
+    ) -> hir::ClassBytes {
+        use ast::ClassPerlKind::*;
+
+        assert!(!self.flags().unicode());
+        let mut class = match ast_class.kind {
+            Digit => hir_ascii_class_bytes(&ast::ClassAsciiKind::Digit),
+            Space => hir_ascii_class_bytes(&ast::ClassAsciiKind::Space),
+            Word => hir_ascii_class_bytes(&ast::ClassAsciiKind::Word),
+        };
+        // We needn't apply case folding here because the Perl ASCII classes
+        // are already closed (under ASCII case folding).
+        if ast_class.negated {
+            class.negate();
+        }
+        class
+    }
+
+    fn unicode_fold_and_negate(
+        &self,
+        negated: bool,
+        class: &mut hir::ClassUnicode,
+    ) {
+        // Note that we must apply case folding before negation!
+        // Consider `(?i)[^x]`. If we applied negation field, then
+        // the result would be the character class that matched any
+        // Unicode scalar value.
+        if self.flags().case_insensitive() {
+            class.case_fold_simple();
+        }
+        if negated {
+            class.negate();
+        }
+    }
+
+    fn bytes_fold_and_negate(
+        &self,
+        span: &Span,
+        negated: bool,
+        class: &mut hir::ClassBytes,
+    ) -> Result<()> {
+        // Note that we must apply case folding before negation!
+        // Consider `(?i)[^x]`. If we applied negation field, then
+        // the result would be the character class that matched any
+        // Unicode scalar value.
+        if self.flags().case_insensitive() {
+            class.case_fold_simple();
+        }
+        if negated {
+            class.negate();
+        }
+        if !self.trans().allow_invalid_utf8 && !class.is_all_ascii() {
+            return Err(self.error(span.clone(), ErrorKind::InvalidUtf8));
+        }
+        Ok(())
+    }
+
+    /// Return a scalar byte value suitable for use as a literal in a byte
+    /// character class.
+    fn class_literal_byte(&self, ast: &ast::Literal) -> Result<u8> {
+        match try!(self.literal_to_char(ast)) {
+            hir::Literal::Byte(byte) => Ok(byte),
+            hir::Literal::Unicode(ch) => {
+                if ch <= 0x7F as char {
+                    Ok(ch as u8)
+                } else {
+                    // We can't feasibly support Unicode in
+                    // byte oriented classes. Byte classes don't
+                    // do Unicode case folding.
+                    Err(self.error(ast.span, ErrorKind::UnicodeNotAllowed))
+                }
+            }
+        }
+    }
+}
+
+/// A translator's representation of a regular expression's flags at any given
+/// moment in time.
+///
+/// Each flag can be in one of three states: absent, present but disabled or
+/// present but enabled.
+#[derive(Clone, Copy, Debug, Default)]
+struct Flags {
+    case_insensitive: Option<bool>,
+    multi_line: Option<bool>,
+    dot_matches_new_line: Option<bool>,
+    swap_greed: Option<bool>,
+    unicode: Option<bool>,
+    // Note that `ignore_whitespace` is omitted here because it is handled
+    // entirely in the parser.
+}
+
+impl Flags {
+    fn from_ast(ast: &ast::Flags) -> Flags {
+        let mut flags = Flags::default();
+        let mut enable = true;
+        for item in &ast.items {
+            match item.kind {
+                ast::FlagsItemKind::Negation => {
+                    enable = false;
+                }
+                ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive) => {
+                    flags.case_insensitive = Some(enable);
+                }
+                ast::FlagsItemKind::Flag(ast::Flag::MultiLine) => {
+                    flags.multi_line = Some(enable);
+                }
+                ast::FlagsItemKind::Flag(ast::Flag::DotMatchesNewLine) => {
+                    flags.dot_matches_new_line = Some(enable);
+                }
+                ast::FlagsItemKind::Flag(ast::Flag::SwapGreed) => {
+                    flags.swap_greed = Some(enable);
+                }
+                ast::FlagsItemKind::Flag(ast::Flag::Unicode) => {
+                    flags.unicode = Some(enable);
+                }
+                ast::FlagsItemKind::Flag(ast::Flag::IgnoreWhitespace) => {}
+            }
+        }
+        flags
+    }
+
+    fn merge(&mut self, previous: &Flags) {
+        if self.case_insensitive.is_none() {
+            self.case_insensitive = previous.case_insensitive;
+        }
+        if self.multi_line.is_none() {
+            self.multi_line = previous.multi_line;
+        }
+        if self.dot_matches_new_line.is_none() {
+            self.dot_matches_new_line = previous.dot_matches_new_line;
+        }
+        if self.swap_greed.is_none() {
+            self.swap_greed = previous.swap_greed;
+        }
+        if self.unicode.is_none() {
+            self.unicode = previous.unicode;
+        }
+    }
+
+    fn case_insensitive(&self) -> bool {
+        self.case_insensitive.unwrap_or(false)
+    }
+
+    fn multi_line(&self) -> bool {
+        self.multi_line.unwrap_or(false)
+    }
+
+    fn dot_matches_new_line(&self) -> bool {
+        self.dot_matches_new_line.unwrap_or(false)
+    }
+
+    fn swap_greed(&self) -> bool {
+        self.swap_greed.unwrap_or(false)
+    }
+
+    fn unicode(&self) -> bool {
+        self.unicode.unwrap_or(true)
+    }
+}
+
+fn hir_ascii_class_bytes(kind: &ast::ClassAsciiKind) -> hir::ClassBytes {
+    let ranges: Vec<_> = ascii_class(kind).iter().cloned().map(|(s, e)| {
+        hir::ClassBytesRange::new(s as u8, e as u8)
+    }).collect();
+    hir::ClassBytes::new(ranges)
+}
+
+fn ascii_class(kind: &ast::ClassAsciiKind) -> &'static [(char, char)] {
+    use ast::ClassAsciiKind::*;
+
+    // TODO: Get rid of these consts, which appear necessary for older
+    // versions of Rust.
+    type T = &'static [(char, char)];
+    match *kind {
+        Alnum => {
+            const X: T = &[('0', '9'), ('A', 'Z'), ('a', 'z')];
+            X
+        }
+        Alpha => {
+            const X: T = &[('A', 'Z'), ('a', 'z')];
+            X
+        }
+        Ascii => {
+            const X: T = &[('\x00', '\x7F')];
+            X
+        }
+        Blank => {
+            const X: T = &[(' ', '\t')];
+            X
+        }
+        Cntrl => {
+            const X: T = &[('\x00', '\x1F'), ('\x7F', '\x7F')];
+            X
+        }
+        Digit => {
+            const X: T = &[('0', '9')];
+            X
+        }
+        Graph => {
+            const X: T = &[('!', '~')];
+            X
+        }
+        Lower => {
+            const X: T = &[('a', 'z')];
+            X
+        }
+        Print => {
+            const X: T = &[(' ', '~')];
+            X
+        }
+        Punct => {
+            const X: T = &[('!', '/'), (':', '@'), ('[', '`'), ('{', '~')];
+            X
+        }
+        Space => {
+            const X: T = &[
+                ('\t', '\t'), ('\n', '\n'), ('\x0B', '\x0B'), ('\x0C', '\x0C'),
+                ('\r', '\r'), (' ', ' '),
+            ];
+            X
+        }
+        Upper => {
+            const X: T = &[('A', 'Z')];
+            X
+        }
+        Word => {
+            const X: T = &[('0', '9'), ('A', 'Z'), ('_', '_'), ('a', 'z')];
+            X
+        }
+        Xdigit => {
+            const X: T = &[('0', '9'), ('A', 'F'), ('a', 'f')];
+            X
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use ast::{self, Ast, Position, Span};
+    use ast::parse::ParserBuilder;
+    use hir::{self, Hir, HirKind};
+    use unicode::{self, ClassQuery};
+
+    use super::{TranslatorBuilder, ascii_class};
+
+    // We create these errors to compare with real hir::Errors in the tests.
+    // We define equality between TestError and hir::Error to disregard the
+    // pattern string in hir::Error, which is annoying to provide in tests.
+    #[derive(Clone, Debug)]
+    struct TestError {
+        span: Span,
+        kind: hir::ErrorKind,
+    }
+
+    impl PartialEq<hir::Error> for TestError {
+        fn eq(&self, other: &hir::Error) -> bool {
+            self.span == other.span && self.kind == other.kind
+        }
+    }
+
+    impl PartialEq<TestError> for hir::Error {
+        fn eq(&self, other: &TestError) -> bool {
+            self.span == other.span && self.kind == other.kind
+        }
+    }
+
+    fn parse(pattern: &str) -> Ast {
+        ParserBuilder::new().octal(true).build().parse(pattern).unwrap()
+    }
+
+    fn t(pattern: &str) -> Hir {
+        TranslatorBuilder::new()
+            .allow_invalid_utf8(false)
+            .build()
+            .translate(pattern, &parse(pattern))
+            .unwrap()
+    }
+
+    fn t_err(pattern: &str) -> hir::Error {
+        TranslatorBuilder::new()
+            .allow_invalid_utf8(false)
+            .build()
+            .translate(pattern, &parse(pattern))
+            .unwrap_err()
+    }
+
+    fn t_bytes(pattern: &str) -> Hir {
+        TranslatorBuilder::new()
+            .allow_invalid_utf8(true)
+            .build()
+            .translate(pattern, &parse(pattern))
+            .unwrap()
+    }
+
+    fn hir_lit(s: &str) -> Hir {
+        match s.len() {
+            0 => Hir::empty(),
+            _ => {
+                let lits = s
+                    .chars()
+                    .map(hir::Literal::Unicode)
+                    .map(Hir::literal)
+                    .collect();
+                Hir::concat(lits)
+            }
+        }
+    }
+
+    fn hir_blit(s: &[u8]) -> Hir {
+        match s.len() {
+            0 => Hir::empty(),
+            1 => Hir::literal(hir::Literal::Byte(s[0])),
+            _ => {
+                let lits = s
+                    .iter()
+                    .cloned()
+                    .map(hir::Literal::Byte)
+                    .map(Hir::literal)
+                    .collect();
+                Hir::concat(lits)
+            }
+        }
+    }
+
+    fn hir_group(i: u32, expr: Hir)  -> Hir {
+        Hir::group(hir::Group {
+            kind: hir::GroupKind::CaptureIndex(i),
+            hir: Box::new(expr),
+        })
+    }
+
+    fn hir_group_name(i: u32, name: &str, expr: Hir)  -> Hir {
+        Hir::group(hir::Group {
+            kind: hir::GroupKind::CaptureName {
+                name: name.to_string(),
+                index: i,
+            },
+            hir: Box::new(expr),
+        })
+    }
+
+    fn hir_group_nocap(expr: Hir)  -> Hir {
+        Hir::group(hir::Group {
+            kind: hir::GroupKind::NonCapturing,
+            hir: Box::new(expr),
+        })
+    }
+
+    fn hir_quest(greedy: bool, expr: Hir) -> Hir {
+        Hir::repetition(hir::Repetition {
+            kind: hir::RepetitionKind::ZeroOrOne,
+            greedy: greedy,
+            hir: Box::new(expr),
+        })
+    }
+
+    fn hir_star(greedy: bool, expr: Hir) -> Hir {
+        Hir::repetition(hir::Repetition {
+            kind: hir::RepetitionKind::ZeroOrMore,
+            greedy: greedy,
+            hir: Box::new(expr),
+        })
+    }
+
+    fn hir_plus(greedy: bool, expr: Hir) -> Hir {
+        Hir::repetition(hir::Repetition {
+            kind: hir::RepetitionKind::OneOrMore,
+            greedy: greedy,
+            hir: Box::new(expr),
+        })
+    }
+
+    fn hir_range(greedy: bool, range: hir::RepetitionRange, expr: Hir) -> Hir {
+        Hir::repetition(hir::Repetition {
+            kind: hir::RepetitionKind::Range(range),
+            greedy: greedy,
+            hir: Box::new(expr),
+        })
+    }
+
+    fn hir_alt(alts: Vec<Hir>) -> Hir {
+        Hir::alternation(alts)
+    }
+
+    fn hir_cat(exprs: Vec<Hir>) -> Hir {
+        Hir::concat(exprs)
+    }
+
+    fn hir_uclass_query(query: ClassQuery) -> Hir {
+        Hir::class(hir::Class::Unicode(unicode::class(query).unwrap()))
+    }
+
+    fn hir_uclass_perl_word() -> Hir {
+        use unicode_tables::perl_word::PERL_WORD;
+        Hir::class(hir::Class::Unicode(unicode::hir_class(PERL_WORD)))
+    }
+
+    fn hir_uclass(ranges: &[(char, char)]) -> Hir {
+        let ranges: Vec<hir::ClassUnicodeRange> = ranges
+            .iter()
+            .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e))
+            .collect();
+        Hir::class(hir::Class::Unicode(hir::ClassUnicode::new(ranges)))
+    }
+
+    fn hir_bclass(ranges: &[(u8, u8)]) -> Hir {
+        let ranges: Vec<hir::ClassBytesRange> = ranges
+            .iter()
+            .map(|&(s, e)| hir::ClassBytesRange::new(s, e))
+            .collect();
+        Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges)))
+    }
+
+    fn hir_bclass_from_char(ranges: &[(char, char)]) -> Hir {
+        let ranges: Vec<hir::ClassBytesRange> = ranges
+            .iter()
+            .map(|&(s, e)| {
+                assert!(s as u32 <= 0x7F);
+                assert!(e as u32 <= 0x7F);
+                hir::ClassBytesRange::new(s as u8, e as u8)
+            })
+            .collect();
+        Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges)))
+    }
+
+    fn hir_case_fold(expr: Hir) -> Hir {
+        match expr.into_kind() {
+            HirKind::Class(mut cls) => {
+                cls.case_fold_simple();
+                Hir::class(cls)
+            }
+            _ => panic!("cannot case fold non-class Hir expr"),
+        }
+    }
+
+    fn hir_negate(expr: Hir) -> Hir {
+        match expr.into_kind() {
+            HirKind::Class(mut cls) => {
+                cls.negate();
+                Hir::class(cls)
+            }
+            _ => panic!("cannot negate non-class Hir expr"),
+        }
+    }
+
+    fn hir_union(expr1: Hir, expr2: Hir) -> Hir {
+        use hir::Class::{Bytes, Unicode};
+
+        match (expr1.into_kind(), expr2.into_kind()) {
+            (
+                HirKind::Class(Unicode(mut c1)),
+                HirKind::Class(Unicode(c2)),
+            ) => {
+                c1.union(&c2);
+                Hir::class(hir::Class::Unicode(c1))
+            }
+            (
+                HirKind::Class(Bytes(mut c1)),
+                HirKind::Class(Bytes(c2)),
+            ) => {
+                c1.union(&c2);
+                Hir::class(hir::Class::Bytes(c1))
+            }
+            _ => panic!("cannot union non-class Hir exprs"),
+        }
+    }
+
+    fn hir_difference(expr1: Hir, expr2: Hir) -> Hir {
+        use hir::Class::{Bytes, Unicode};
+
+        match (expr1.into_kind(), expr2.into_kind()) {
+            (
+                HirKind::Class(Unicode(mut c1)),
+                HirKind::Class(Unicode(c2)),
+            ) => {
+                c1.difference(&c2);
+                Hir::class(hir::Class::Unicode(c1))
+            }
+            (
+                HirKind::Class(Bytes(mut c1)),
+                HirKind::Class(Bytes(c2)),
+            ) => {
+                c1.difference(&c2);
+                Hir::class(hir::Class::Bytes(c1))
+            }
+            _ => panic!("cannot difference non-class Hir exprs"),
+        }
+    }
+
+    fn hir_anchor(anchor: hir::Anchor) -> Hir {
+        Hir::anchor(anchor)
+    }
+
+    fn hir_word(wb: hir::WordBoundary) -> Hir {
+        Hir::word_boundary(wb)
+    }
+
+    #[test]
+    fn empty() {
+        assert_eq!(t(""), Hir::empty());
+        assert_eq!(t("(?i)"), Hir::empty());
+        assert_eq!(t("()"), hir_group(1, Hir::empty()));
+        assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty()));
+        assert_eq!(t("(?P<wat>)"), hir_group_name(1, "wat", Hir::empty()));
+        assert_eq!(t("|"), hir_alt(vec![Hir::empty(), Hir::empty()]));
+        assert_eq!(t("()|()"), hir_alt(vec![
+            hir_group(1, Hir::empty()),
+            hir_group(2, Hir::empty()),
+        ]));
+        assert_eq!(t("(|b)"), hir_group(1, hir_alt(vec![
+            Hir::empty(),
+            hir_lit("b"),
+        ])));
+        assert_eq!(t("(a|)"), hir_group(1, hir_alt(vec![
+            hir_lit("a"),
+            Hir::empty(),
+        ])));
+        assert_eq!(t("(a||c)"), hir_group(1, hir_alt(vec![
+            hir_lit("a"),
+            Hir::empty(),
+            hir_lit("c"),
+        ])));
+        assert_eq!(t("(||)"), hir_group(1, hir_alt(vec![
+            Hir::empty(),
+            Hir::empty(),
+            Hir::empty(),
+        ])));
+    }
+
+    #[test]
+    fn literal() {
+        assert_eq!(t("a"), hir_lit("a"));
+        assert_eq!(t("(?-u)a"), hir_lit("a"));
+        assert_eq!(t("☃"), hir_lit("☃"));
+        assert_eq!(t("abcd"), hir_lit("abcd"));
+
+        assert_eq!(t_bytes("(?-u)a"), hir_lit("a"));
+        assert_eq!(t_bytes("(?-u)\x61"), hir_lit("a"));
+        assert_eq!(t_bytes(r"(?-u)\x61"), hir_lit("a"));
+        assert_eq!(t_bytes(r"(?-u)\xFF"), hir_blit(b"\xFF"));
+
+        assert_eq!(t_err("(?-u)☃"), TestError {
+            kind: hir::ErrorKind::UnicodeNotAllowed,
+            span: Span::new(Position::new(5, 1, 6), Position::new(8, 1, 7)),
+        });
+        assert_eq!(t_err(r"(?-u)\xFF"), TestError {
+            kind: hir::ErrorKind::InvalidUtf8,
+            span: Span::new(Position::new(5, 1, 6), Position::new(9, 1, 10)),
+        });
+    }
+
+    #[test]
+    fn literal_case_insensitive() {
+        assert_eq!(t("(?i)a"), hir_uclass(&[
+            ('A', 'A'), ('a', 'a'),
+        ]));
+        assert_eq!(t("(?i:a)"), hir_group_nocap(hir_uclass(&[
+            ('A', 'A'), ('a', 'a')],
+        )));
+        assert_eq!(t("a(?i)a(?-i)a"), hir_cat(vec![
+            hir_lit("a"),
+            hir_uclass(&[('A', 'A'), ('a', 'a')]),
+            hir_lit("a"),
+        ]));
+        assert_eq!(t("(?i)ab@c"), hir_cat(vec![
+            hir_uclass(&[('A', 'A'), ('a', 'a')]),
+            hir_uclass(&[('B', 'B'), ('b', 'b')]),
+            hir_lit("@"),
+            hir_uclass(&[('C', 'C'), ('c', 'c')]),
+        ]));
+        assert_eq!(t("(?i)β"), hir_uclass(&[
+            ('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),
+        ]));
+
+        assert_eq!(t("(?i-u)a"), hir_bclass(&[
+            (b'A', b'A'), (b'a', b'a'),
+        ]));
+        assert_eq!(t("(?-u)a(?i)a(?-i)a"), hir_cat(vec![
+            hir_lit("a"),
+            hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
+            hir_lit("a"),
+        ]));
+        assert_eq!(t("(?i-u)ab@c"), hir_cat(vec![
+            hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
+            hir_bclass(&[(b'B', b'B'), (b'b', b'b')]),
+            hir_lit("@"),
+            hir_bclass(&[(b'C', b'C'), (b'c', b'c')]),
+        ]));
+
+        assert_eq!(t_bytes("(?i-u)a"), hir_bclass(&[
+            (b'A', b'A'), (b'a', b'a'),
+        ]));
+        assert_eq!(t_bytes("(?i-u)\x61"), hir_bclass(&[
+            (b'A', b'A'), (b'a', b'a'),
+        ]));
+        assert_eq!(t_bytes(r"(?i-u)\x61"), hir_bclass(&[
+            (b'A', b'A'), (b'a', b'a'),
+        ]));
+        assert_eq!(t_bytes(r"(?i-u)\xFF"), hir_blit(b"\xFF"));
+
+        assert_eq!(t_err("(?i-u)β"), TestError {
+            kind: hir::ErrorKind::UnicodeNotAllowed,
+            span: Span::new(
+                Position::new(6, 1, 7),
+                Position::new(8, 1, 8),
+            ),
+        });
+    }
+
+    #[test]
+    fn dot() {
+        assert_eq!(t("."), hir_uclass(&[
+            ('\0', '\t'),
+            ('\x0B', '\u{10FFFF}'),
+        ]));
+        assert_eq!(t("(?s)."), hir_uclass(&[
+            ('\0', '\u{10FFFF}'),
+        ]));
+        assert_eq!(t_bytes("(?-u)."), hir_bclass(&[
+            (b'\0', b'\t'),
+            (b'\x0B', b'\xFF'),
+        ]));
+        assert_eq!(t_bytes("(?s-u)."), hir_bclass(&[
+            (b'\0', b'\xFF'),
+        ]));
+
+        // If invalid UTF-8 isn't allowed, then non-Unicode `.` isn't allowed.
+        assert_eq!(t_err("(?-u)."), TestError {
+            kind: hir::ErrorKind::InvalidUtf8,
+            span: Span::new(Position::new(5, 1, 6), Position::new(6, 1, 7)),
+        });
+        assert_eq!(t_err("(?s-u)."), TestError {
+            kind: hir::ErrorKind::InvalidUtf8,
+            span: Span::new(Position::new(6, 1, 7), Position::new(7, 1, 8)),
+        });
+    }
+
+    #[test]
+    fn assertions() {
+        assert_eq!(t("^"), hir_anchor(hir::Anchor::StartText));
+        assert_eq!(t("$"), hir_anchor(hir::Anchor::EndText));
+        assert_eq!(t(r"\A"), hir_anchor(hir::Anchor::StartText));
+        assert_eq!(t(r"\z"), hir_anchor(hir::Anchor::EndText));
+        assert_eq!(t("(?m)^"), hir_anchor(hir::Anchor::StartLine));
+        assert_eq!(t("(?m)$"), hir_anchor(hir::Anchor::EndLine));
+        assert_eq!(t(r"(?m)\A"), hir_anchor(hir::Anchor::StartText));
+        assert_eq!(t(r"(?m)\z"), hir_anchor(hir::Anchor::EndText));
+
+        assert_eq!(t(r"\b"), hir_word(hir::WordBoundary::Unicode));
+        assert_eq!(t(r"\B"), hir_word(hir::WordBoundary::UnicodeNegate));
+        assert_eq!(t(r"(?-u)\b"), hir_word(hir::WordBoundary::Ascii));
+        assert_eq!(t(r"(?-u)\B"), hir_word(hir::WordBoundary::AsciiNegate));
+    }
+
+    #[test]
+    fn group() {
+        assert_eq!(t("(a)"), hir_group(1, hir_lit("a")));
+        assert_eq!(t("(a)(b)"), hir_cat(vec![
+            hir_group(1, hir_lit("a")),
+            hir_group(2, hir_lit("b")),
+        ]));
+        assert_eq!(t("(a)|(b)"), hir_alt(vec![
+            hir_group(1, hir_lit("a")),
+            hir_group(2, hir_lit("b")),
+        ]));
+        assert_eq!(t("(?P<foo>)"), hir_group_name(1, "foo", Hir::empty()));
+        assert_eq!(t("(?P<foo>a)"), hir_group_name(1, "foo", hir_lit("a")));
+        assert_eq!(t("(?P<foo>a)(?P<bar>b)"), hir_cat(vec![
+            hir_group_name(1, "foo", hir_lit("a")),
+            hir_group_name(2, "bar", hir_lit("b")),
+        ]));
+        assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty()));
+        assert_eq!(t("(?:a)"), hir_group_nocap(hir_lit("a")));
+        assert_eq!(t("(?:a)(b)"), hir_cat(vec![
+            hir_group_nocap(hir_lit("a")),
+            hir_group(1, hir_lit("b")),
+        ]));
+        assert_eq!(t("(a)(?:b)(c)"), hir_cat(vec![
+            hir_group(1, hir_lit("a")),
+            hir_group_nocap(hir_lit("b")),
+            hir_group(2, hir_lit("c")),
+        ]));
+        assert_eq!(t("(a)(?P<foo>b)(c)"), hir_cat(vec![
+            hir_group(1, hir_lit("a")),
+            hir_group_name(2, "foo", hir_lit("b")),
+            hir_group(3, hir_lit("c")),
+        ]));
+    }
+
+    #[test]
+    fn flags() {
+        assert_eq!(t("(?i:a)a"), hir_cat(vec![
+            hir_group_nocap(hir_uclass(&[('A', 'A'), ('a', 'a')])),
+            hir_lit("a"),
+        ]));
+        assert_eq!(t("(?i-u:a)β"), hir_cat(vec![
+            hir_group_nocap(hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
+            hir_lit("β"),
+        ]));
+        assert_eq!(t("(?i)(?-i:a)a"), hir_cat(vec![
+            hir_group_nocap(hir_lit("a")),
+            hir_uclass(&[('A', 'A'), ('a', 'a')]),
+        ]));
+        assert_eq!(t("(?im)a^"), hir_cat(vec![
+            hir_uclass(&[('A', 'A'), ('a', 'a')]),
+            hir_anchor(hir::Anchor::StartLine),
+        ]));
+        assert_eq!(t("(?im)a^(?i-m)a^"), hir_cat(vec![
+            hir_uclass(&[('A', 'A'), ('a', 'a')]),
+            hir_anchor(hir::Anchor::StartLine),
+            hir_uclass(&[('A', 'A'), ('a', 'a')]),
+            hir_anchor(hir::Anchor::StartText),
+        ]));
+        assert_eq!(t("(?U)a*a*?(?-U)a*a*?"), hir_cat(vec![
+            hir_star(false, hir_lit("a")),
+            hir_star(true, hir_lit("a")),
+            hir_star(true, hir_lit("a")),
+            hir_star(false, hir_lit("a")),
+        ]));
+        assert_eq!(t("(?:a(?i)a)a"), hir_cat(vec![
+            hir_group_nocap(hir_cat(vec![
+                hir_lit("a"),
+                hir_uclass(&[('A', 'A'), ('a', 'a')]),
+            ])),
+            hir_lit("a"),
+        ]));
+        assert_eq!(t("(?i)(?:a(?-i)a)a"), hir_cat(vec![
+            hir_group_nocap(hir_cat(vec![
+                hir_uclass(&[('A', 'A'), ('a', 'a')]),
+                hir_lit("a"),
+            ])),
+            hir_uclass(&[('A', 'A'), ('a', 'a')]),
+        ]));
+    }
+
+    #[test]
+    fn escape() {
+        assert_eq!(
+            t(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#"),
+            hir_lit(r"\.+*?()|[]{}^$#"),
+        );
+    }
+
+    #[test]
+    fn repetition() {
+        assert_eq!(t("a?"), hir_quest(true, hir_lit("a")));
+        assert_eq!(t("a*"), hir_star(true, hir_lit("a")));
+        assert_eq!(t("a+"), hir_plus(true, hir_lit("a")));
+        assert_eq!(t("a??"), hir_quest(false, hir_lit("a")));
+        assert_eq!(t("a*?"), hir_star(false, hir_lit("a")));
+        assert_eq!(t("a+?"), hir_plus(false, hir_lit("a")));
+
+        assert_eq!(
+            t("a{1}"),
+            hir_range(
+                true,
+                hir::RepetitionRange::Exactly(1),
+                hir_lit("a"),
+            ));
+        assert_eq!(
+            t("a{1,}"),
+            hir_range(
+                true,
+                hir::RepetitionRange::AtLeast(1),
+                hir_lit("a"),
+            ));
+        assert_eq!(
+            t("a{1,2}"),
+            hir_range(
+                true,
+                hir::RepetitionRange::Bounded(1, 2),
+                hir_lit("a"),
+            ));
+        assert_eq!(
+            t("a{1}?"),
+            hir_range(
+                false,
+                hir::RepetitionRange::Exactly(1),
+                hir_lit("a"),
+            ));
+        assert_eq!(
+            t("a{1,}?"),
+            hir_range(
+                false,
+                hir::RepetitionRange::AtLeast(1),
+                hir_lit("a"),
+            ));
+        assert_eq!(
+            t("a{1,2}?"),
+            hir_range(
+                false,
+                hir::RepetitionRange::Bounded(1, 2),
+                hir_lit("a"),
+            ));
+
+        assert_eq!(t("ab?"), hir_cat(vec![
+            hir_lit("a"),
+            hir_quest(true, hir_lit("b")),
+        ]));
+        assert_eq!(t("(ab)?"), hir_quest(true, hir_group(1, hir_cat(vec![
+            hir_lit("a"),
+            hir_lit("b"),
+        ]))));
+        assert_eq!(t("a|b?"), hir_alt(vec![
+            hir_lit("a"),
+            hir_quest(true, hir_lit("b")),
+        ]));
+    }
+
+    #[test]
+    fn cat_alt() {
+        assert_eq!(t("(ab)"), hir_group(1, hir_cat(vec![
+            hir_lit("a"),
+            hir_lit("b"),
+        ])));
+        assert_eq!(t("a|b"), hir_alt(vec![
+            hir_lit("a"),
+            hir_lit("b"),
+        ]));
+        assert_eq!(t("a|b|c"), hir_alt(vec![
+            hir_lit("a"),
+            hir_lit("b"),
+            hir_lit("c"),
+        ]));
+        assert_eq!(t("ab|bc|cd"), hir_alt(vec![
+            hir_lit("ab"),
+            hir_lit("bc"),
+            hir_lit("cd"),
+        ]));
+        assert_eq!(t("(a|b)"), hir_group(1, hir_alt(vec![
+            hir_lit("a"),
+            hir_lit("b"),
+        ])));
+        assert_eq!(t("(a|b|c)"), hir_group(1, hir_alt(vec![
+            hir_lit("a"),
+            hir_lit("b"),
+            hir_lit("c"),
+        ])));
+        assert_eq!(t("(ab|bc|cd)"), hir_group(1, hir_alt(vec![
+            hir_lit("ab"),
+            hir_lit("bc"),
+            hir_lit("cd"),
+        ])));
+        assert_eq!(t("(ab|(bc|(cd)))"), hir_group(1, hir_alt(vec![
+            hir_lit("ab"),
+            hir_group(2, hir_alt(vec![
+                hir_lit("bc"),
+                hir_group(3, hir_lit("cd")),
+            ])),
+        ])));
+    }
+
+    #[test]
+    fn class_ascii() {
+        assert_eq!(
+            t("[[:alnum:]]"),
+            hir_uclass(ascii_class(&ast::ClassAsciiKind::Alnum)));
+        assert_eq!(
+            t("[[:alpha:]]"),
+            hir_uclass(ascii_class(&ast::ClassAsciiKind::Alpha)));
+        assert_eq!(
+            t("[[:ascii:]]"),
+            hir_uclass(ascii_class(&ast::ClassAsciiKind::Ascii)));
+        assert_eq!(
+            t("[[:blank:]]"),
+            hir_uclass(ascii_class(&ast::ClassAsciiKind::Blank)));
+        assert_eq!(
+            t("[[:cntrl:]]"),
+            hir_uclass(ascii_class(&ast::ClassAsciiKind::Cntrl)));
+        assert_eq!(
+            t("[[:digit:]]"),
+            hir_uclass(ascii_class(&ast::ClassAsciiKind::Digit)));
+        assert_eq!(
+            t("[[:graph:]]"),
+            hir_uclass(ascii_class(&ast::ClassAsciiKind::Graph)));
+        assert_eq!(
+            t("[[:lower:]]"),
+            hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower)));
+        assert_eq!(
+            t("[[:print:]]"),
+            hir_uclass(ascii_class(&ast::ClassAsciiKind::Print)));
+        assert_eq!(
+            t("[[:punct:]]"),
+            hir_uclass(ascii_class(&ast::ClassAsciiKind::Punct)));
+        assert_eq!(
+            t("[[:space:]]"),
+            hir_uclass(ascii_class(&ast::ClassAsciiKind::Space)));
+        assert_eq!(
+            t("[[:upper:]]"),
+            hir_uclass(ascii_class(&ast::ClassAsciiKind::Upper)));
+        assert_eq!(
+            t("[[:word:]]"),
+            hir_uclass(ascii_class(&ast::ClassAsciiKind::Word)));
+        assert_eq!(
+            t("[[:xdigit:]]"),
+            hir_uclass(ascii_class(&ast::ClassAsciiKind::Xdigit)));
+
+        assert_eq!(
+            t("[[:^lower:]]"),
+            hir_negate(hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower))));
+        assert_eq!(
+            t("(?i)[[:lower:]]"),
+            hir_uclass(&[
+                ('A', 'Z'), ('a', 'z'),
+                ('\u{17F}', '\u{17F}'),
+                ('\u{212A}', '\u{212A}'),
+            ]));
+
+        assert_eq!(
+            t("(?-u)[[:lower:]]"),
+            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Lower)));
+        assert_eq!(
+            t("(?i-u)[[:lower:]]"),
+            hir_case_fold(hir_bclass_from_char(ascii_class(
+                &ast::ClassAsciiKind::Lower))));
+
+        assert_eq!(t_err("(?-u)[[:^lower:]]"), TestError {
+            kind: hir::ErrorKind::InvalidUtf8,
+            span: Span::new(Position::new(6, 1, 7), Position::new(16, 1, 17)),
+        });
+        assert_eq!(t_err("(?i-u)[[:^lower:]]"), TestError {
+            kind: hir::ErrorKind::InvalidUtf8,
+            span: Span::new(Position::new(7, 1, 8), Position::new(17, 1, 18)),
+        });
+    }
+
+    #[test]
+    fn class_perl() {
+        // Unicode
+        assert_eq!(
+            t(r"\d"),
+            hir_uclass_query(ClassQuery::Binary("digit")));
+        assert_eq!(
+            t(r"\s"),
+            hir_uclass_query(ClassQuery::Binary("space")));
+        assert_eq!(
+            t(r"\w"),
+            hir_uclass_perl_word());
+        assert_eq!(
+            t(r"(?i)\d"),
+            hir_uclass_query(ClassQuery::Binary("digit")));
+        assert_eq!(
+            t(r"(?i)\s"),
+            hir_uclass_query(ClassQuery::Binary("space")));
+        assert_eq!(
+            t(r"(?i)\w"),
+            hir_uclass_perl_word());
+
+        // Unicode, negated
+        assert_eq!(
+            t(r"\D"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("digit"))));
+        assert_eq!(
+            t(r"\S"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("space"))));
+        assert_eq!(
+            t(r"\W"),
+            hir_negate(hir_uclass_perl_word()));
+        assert_eq!(
+            t(r"(?i)\D"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("digit"))));
+        assert_eq!(
+            t(r"(?i)\S"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("space"))));
+        assert_eq!(
+            t(r"(?i)\W"),
+            hir_negate(hir_uclass_perl_word()));
+
+        // ASCII only
+        assert_eq!(
+            t(r"(?-u)\d"),
+            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit)));
+        assert_eq!(
+            t(r"(?-u)\s"),
+            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space)));
+        assert_eq!(
+            t(r"(?-u)\w"),
+            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word)));
+        assert_eq!(
+            t(r"(?i-u)\d"),
+            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit)));
+        assert_eq!(
+            t(r"(?i-u)\s"),
+            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space)));
+        assert_eq!(
+            t(r"(?i-u)\w"),
+            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word)));
+
+        // ASCII only, negated
+        assert_eq!(
+            t(r"(?-u)\D"),
+            hir_negate(hir_bclass_from_char(ascii_class(
+                &ast::ClassAsciiKind::Digit))));
+        assert_eq!(
+            t(r"(?-u)\S"),
+            hir_negate(hir_bclass_from_char(ascii_class(
+                &ast::ClassAsciiKind::Space))));
+        assert_eq!(
+            t(r"(?-u)\W"),
+            hir_negate(hir_bclass_from_char(ascii_class(
+                &ast::ClassAsciiKind::Word))));
+        assert_eq!(
+            t(r"(?i-u)\D"),
+            hir_negate(hir_bclass_from_char(ascii_class(
+                &ast::ClassAsciiKind::Digit))));
+        assert_eq!(
+            t(r"(?i-u)\S"),
+            hir_negate(hir_bclass_from_char(ascii_class(
+                &ast::ClassAsciiKind::Space))));
+        assert_eq!(
+            t(r"(?i-u)\W"),
+            hir_negate(hir_bclass_from_char(ascii_class(
+                &ast::ClassAsciiKind::Word))));
+    }
+
+    #[test]
+    fn class_unicode() {
+        assert_eq!(
+            t(r"\pZ"),
+            hir_uclass_query(ClassQuery::Binary("Z")));
+        assert_eq!(
+            t(r"\pz"),
+            hir_uclass_query(ClassQuery::Binary("Z")));
+        assert_eq!(
+            t(r"\p{Separator}"),
+            hir_uclass_query(ClassQuery::Binary("Z")));
+        assert_eq!(
+            t(r"\p{se      PaRa ToR}"),
+            hir_uclass_query(ClassQuery::Binary("Z")));
+        assert_eq!(
+            t(r"\p{gc:Separator}"),
+            hir_uclass_query(ClassQuery::Binary("Z")));
+        assert_eq!(
+            t(r"\p{gc=Separator}"),
+            hir_uclass_query(ClassQuery::Binary("Z")));
+
+        assert_eq!(
+            t(r"\PZ"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("Z"))));
+        assert_eq!(
+            t(r"\P{separator}"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("Z"))));
+        assert_eq!(
+            t(r"\P{gc!=separator}"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("Z"))));
+
+        assert_eq!(
+            t(r"\p{Greek}"),
+            hir_uclass_query(ClassQuery::Binary("Greek")));
+        assert_eq!(
+            t(r"(?i)\p{Greek}"),
+            hir_case_fold(hir_uclass_query(ClassQuery::Binary("Greek"))));
+        assert_eq!(
+            t(r"(?i)\P{Greek}"),
+            hir_negate(hir_case_fold(hir_uclass_query(
+                ClassQuery::Binary("Greek")))));
+
+        assert_eq!(
+            t(r"\p{any}"),
+            hir_uclass_query(ClassQuery::Binary("Any")));
+        assert_eq!(
+            t(r"\p{assigned}"),
+            hir_uclass_query(ClassQuery::Binary("Assigned")));
+        assert_eq!(
+            t(r"\p{ascii}"),
+            hir_uclass_query(ClassQuery::Binary("ASCII")));
+        assert_eq!(
+            t(r"\p{gc:any}"),
+            hir_uclass_query(ClassQuery::Binary("Any")));
+        assert_eq!(
+            t(r"\p{gc:assigned}"),
+            hir_uclass_query(ClassQuery::Binary("Assigned")));
+        assert_eq!(
+            t(r"\p{gc:ascii}"),
+            hir_uclass_query(ClassQuery::Binary("ASCII")));
+
+        assert_eq!(t_err(r"(?-u)\pZ"), TestError {
+            kind: hir::ErrorKind::UnicodeNotAllowed,
+            span: Span::new(Position::new(5, 1, 6), Position::new(8, 1, 9)),
+        });
+        assert_eq!(t_err(r"(?-u)\p{Separator}"), TestError {
+            kind: hir::ErrorKind::UnicodeNotAllowed,
+            span: Span::new(Position::new(5, 1, 6), Position::new(18, 1, 19)),
+        });
+        assert_eq!(t_err(r"\pE"), TestError {
+            kind: hir::ErrorKind::UnicodePropertyNotFound,
+            span: Span::new(Position::new(0, 1, 1), Position::new(3, 1, 4)),
+        });
+        assert_eq!(t_err(r"\p{Foo}"), TestError {
+            kind: hir::ErrorKind::UnicodePropertyNotFound,
+            span: Span::new(Position::new(0, 1, 1), Position::new(7, 1, 8)),
+        });
+        assert_eq!(t_err(r"\p{gc:Foo}"), TestError {
+            kind: hir::ErrorKind::UnicodePropertyValueNotFound,
+            span: Span::new(Position::new(0, 1, 1), Position::new(10, 1, 11)),
+        });
+        assert_eq!(t_err(r"\p{sc:Foo}"), TestError {
+            kind: hir::ErrorKind::UnicodePropertyValueNotFound,
+            span: Span::new(Position::new(0, 1, 1), Position::new(10, 1, 11)),
+        });
+        assert_eq!(t_err(r"\p{scx:Foo}"), TestError {
+            kind: hir::ErrorKind::UnicodePropertyValueNotFound,
+            span: Span::new(Position::new(0, 1, 1), Position::new(11, 1, 12)),
+        });
+        assert_eq!(t_err(r"\p{age:Foo}"), TestError {
+            kind: hir::ErrorKind::UnicodePropertyValueNotFound,
+            span: Span::new(Position::new(0, 1, 1), Position::new(11, 1, 12)),
+        });
+    }
+
+    #[test]
+    fn class_bracketed() {
+        assert_eq!(t("[a]"), hir_uclass(&[('a', 'a')]));
+        assert_eq!(t("[^[a]]"), hir_negate(hir_uclass(&[('a', 'a')])));
+        assert_eq!(t("[a-z]"), hir_uclass(&[('a', 'z')]));
+        assert_eq!(t("[a-fd-h]"), hir_uclass(&[('a', 'h')]));
+        assert_eq!(t("[a-fg-m]"), hir_uclass(&[('a', 'm')]));
+        assert_eq!(t(r"[\x00]"), hir_uclass(&[('\0', '\0')]));
+        assert_eq!(t(r"[\n]"), hir_uclass(&[('\n', '\n')]));
+        assert_eq!(t("[\n]"), hir_uclass(&[('\n', '\n')]));
+        assert_eq!(
+            t(r"[\d]"),
+            hir_uclass_query(ClassQuery::Binary("digit")));
+        assert_eq!(
+            t(r"[\pZ]"),
+            hir_uclass_query(ClassQuery::Binary("separator")));
+        assert_eq!(
+            t(r"[\p{separator}]"),
+            hir_uclass_query(ClassQuery::Binary("separator")));
+        assert_eq!(
+            t(r"[^\D]"),
+            hir_uclass_query(ClassQuery::Binary("digit")));
+        assert_eq!(
+            t(r"[^\PZ]"),
+            hir_uclass_query(ClassQuery::Binary("separator")));
+        assert_eq!(
+            t(r"[^\P{separator}]"),
+            hir_uclass_query(ClassQuery::Binary("separator")));
+        assert_eq!(
+            t(r"(?i)[^\D]"),
+            hir_uclass_query(ClassQuery::Binary("digit")));
+        assert_eq!(
+            t(r"(?i)[^\P{greek}]"),
+            hir_case_fold(hir_uclass_query(ClassQuery::Binary("greek"))));
+
+        assert_eq!(t("(?-u)[a]"), hir_bclass(&[(b'a', b'a')]));
+        assert_eq!(t(r"(?-u)[\x00]"), hir_bclass(&[(b'\0', b'\0')]));
+        assert_eq!(t_bytes(r"(?-u)[\xFF]"), hir_bclass(&[(b'\xFF', b'\xFF')]));
+
+        assert_eq!(t("(?i)[a]"), hir_uclass(&[('A', 'A'), ('a', 'a')]));
+        assert_eq!(t("(?i)[k]"), hir_uclass(&[
+            ('K', 'K'), ('k', 'k'), ('\u{212A}', '\u{212A}'),
+        ]));
+        assert_eq!(t("(?i)[β]"), hir_uclass(&[
+            ('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),
+        ]));
+        assert_eq!(t("(?i-u)[k]"), hir_bclass(&[
+            (b'K', b'K'), (b'k', b'k'),
+        ]));
+
+        assert_eq!(t("[^a]"), hir_negate(hir_uclass(&[('a', 'a')])));
+        assert_eq!(t(r"[^\x00]"), hir_negate(hir_uclass(&[('\0', '\0')])));
+        assert_eq!(
+            t_bytes("(?-u)[^a]"),
+            hir_negate(hir_bclass(&[(b'a', b'a')])));
+        assert_eq!(
+            t(r"[^\d]"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("digit"))));
+        assert_eq!(
+            t(r"[^\pZ]"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("separator"))));
+        assert_eq!(
+            t(r"[^\p{separator}]"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("separator"))));
+        assert_eq!(
+            t(r"(?i)[^\p{greek}]"),
+            hir_negate(hir_case_fold(hir_uclass_query(
+                ClassQuery::Binary("greek")))));
+        assert_eq!(
+            t(r"(?i)[\P{greek}]"),
+            hir_negate(hir_case_fold(hir_uclass_query(
+                ClassQuery::Binary("greek")))));
+
+        // Test some weird cases.
+        assert_eq!(t(r"[\[]"), hir_uclass(&[('[', '[')]));
+
+        assert_eq!(t(r"[&]"), hir_uclass(&[('&', '&')]));
+        assert_eq!(t(r"[\&]"), hir_uclass(&[('&', '&')]));
+        assert_eq!(t(r"[\&\&]"), hir_uclass(&[('&', '&')]));
+        assert_eq!(t(r"[\x00-&]"), hir_uclass(&[('\0', '&')]));
+        assert_eq!(t(r"[&-\xFF]"), hir_uclass(&[('&', '\u{FF}')]));
+
+        assert_eq!(t(r"[~]"), hir_uclass(&[('~', '~')]));
+        assert_eq!(t(r"[\~]"), hir_uclass(&[('~', '~')]));
+        assert_eq!(t(r"[\~\~]"), hir_uclass(&[('~', '~')]));
+        assert_eq!(t(r"[\x00-~]"), hir_uclass(&[('\0', '~')]));
+        assert_eq!(t(r"[~-\xFF]"), hir_uclass(&[('~', '\u{FF}')]));
+
+        assert_eq!(t(r"[-]"), hir_uclass(&[('-', '-')]));
+        assert_eq!(t(r"[\-]"), hir_uclass(&[('-', '-')]));
+        assert_eq!(t(r"[\-\-]"), hir_uclass(&[('-', '-')]));
+        assert_eq!(t(r"[\x00-\-]"), hir_uclass(&[('\0', '-')]));
+        assert_eq!(t(r"[\--\xFF]"), hir_uclass(&[('-', '\u{FF}')]));
+
+        assert_eq!(t_err("(?-u)[^a]"), TestError {
+            kind: hir::ErrorKind::InvalidUtf8,
+            span: Span::new(Position::new(5, 1, 6), Position::new(9, 1, 10)),
+        });
+        assert_eq!(t_err(r"[^\s\S]"), TestError {
+            kind: hir::ErrorKind::EmptyClassNotAllowed,
+            span: Span::new(Position::new(0, 1, 1), Position::new(7, 1, 8)),
+        });
+        assert_eq!(t_err(r"(?-u)[^\s\S]"), TestError {
+            kind: hir::ErrorKind::EmptyClassNotAllowed,
+            span: Span::new(Position::new(5, 1, 6), Position::new(12, 1, 13)),
+        });
+    }
+
+    #[test]
+    fn class_bracketed_union() {
+        assert_eq!(
+            t("[a-zA-Z]"),
+            hir_uclass(&[('A', 'Z'), ('a', 'z')]));
+        assert_eq!(
+            t(r"[a\pZb]"),
+            hir_union(
+                hir_uclass(&[('a', 'b')]),
+                hir_uclass_query(ClassQuery::Binary("separator"))));
+        assert_eq!(
+            t(r"[\pZ\p{Greek}]"),
+            hir_union(
+                hir_uclass_query(ClassQuery::Binary("greek")),
+                hir_uclass_query(ClassQuery::Binary("separator"))));
+        assert_eq!(
+            t(r"[\p{age:3.0}\pZ\p{Greek}]"),
+            hir_union(
+                hir_uclass_query(ClassQuery::ByValue {
+                    property_name: "age",
+                    property_value: "3.0",
+                }),
+                hir_union(
+                    hir_uclass_query(ClassQuery::Binary("greek")),
+                    hir_uclass_query(ClassQuery::Binary("separator")))));
+        assert_eq!(
+            t(r"[[[\p{age:3.0}\pZ]\p{Greek}][\p{Cyrillic}]]"),
+            hir_union(
+                hir_uclass_query(ClassQuery::ByValue {
+                    property_name: "age",
+                    property_value: "3.0",
+                }),
+                hir_union(
+                    hir_uclass_query(ClassQuery::Binary("cyrillic")),
+                    hir_union(
+                        hir_uclass_query(ClassQuery::Binary("greek")),
+                        hir_uclass_query(ClassQuery::Binary("separator"))))));
+
+        assert_eq!(
+            t(r"(?i)[\p{age:3.0}\pZ\p{Greek}]"),
+            hir_case_fold(hir_union(
+                hir_uclass_query(ClassQuery::ByValue {
+                    property_name: "age",
+                    property_value: "3.0",
+                }),
+                hir_union(
+                    hir_uclass_query(ClassQuery::Binary("greek")),
+                    hir_uclass_query(ClassQuery::Binary("separator"))))));
+        assert_eq!(
+            t(r"[^\p{age:3.0}\pZ\p{Greek}]"),
+            hir_negate(hir_union(
+                hir_uclass_query(ClassQuery::ByValue {
+                    property_name: "age",
+                    property_value: "3.0",
+                }),
+                hir_union(
+                    hir_uclass_query(ClassQuery::Binary("greek")),
+                    hir_uclass_query(ClassQuery::Binary("separator"))))));
+        assert_eq!(
+            t(r"(?i)[^\p{age:3.0}\pZ\p{Greek}]"),
+            hir_negate(hir_case_fold(hir_union(
+                hir_uclass_query(ClassQuery::ByValue {
+                    property_name: "age",
+                    property_value: "3.0",
+                }),
+                hir_union(
+                    hir_uclass_query(ClassQuery::Binary("greek")),
+                    hir_uclass_query(ClassQuery::Binary("separator")))))));
+    }
+
+    #[test]
+    fn class_bracketed_nested() {
+        assert_eq!(
+            t(r"[a[^c]]"),
+            hir_negate(hir_uclass(&[('c', 'c')])));
+        assert_eq!(
+            t(r"[a-b[^c]]"),
+            hir_negate(hir_uclass(&[('c', 'c')])));
+        assert_eq!(
+            t(r"[a-c[^c]]"),
+            hir_negate(hir_uclass(&[])));
+
+        assert_eq!(
+            t(r"[^a[^c]]"),
+            hir_uclass(&[('c', 'c')]));
+        assert_eq!(
+            t(r"[^a-b[^c]]"),
+            hir_uclass(&[('c', 'c')]));
+
+        assert_eq!(
+            t(r"(?i)[a[^c]]"),
+            hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')]))));
+        assert_eq!(
+            t(r"(?i)[a-b[^c]]"),
+            hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')]))));
+
+        assert_eq!(
+            t(r"(?i)[^a[^c]]"),
+            hir_uclass(&[('C', 'C'), ('c', 'c')]));
+        assert_eq!(
+            t(r"(?i)[^a-b[^c]]"),
+            hir_uclass(&[('C', 'C'), ('c', 'c')]));
+
+        assert_eq!(t_err(r"[^a-c[^c]]"), TestError {
+            kind: hir::ErrorKind::EmptyClassNotAllowed,
+            span: Span::new(Position::new(0, 1, 1), Position::new(10, 1, 11)),
+        });
+        assert_eq!(t_err(r"(?i)[^a-c[^c]]"), TestError {
+            kind: hir::ErrorKind::EmptyClassNotAllowed,
+            span: Span::new(Position::new(4, 1, 5), Position::new(14, 1, 15)),
+        });
+    }
+
+    #[test]
+    fn class_bracketed_intersect() {
+        assert_eq!(t("[abc&&b-c]"), hir_uclass(&[('b', 'c')]));
+        assert_eq!(t("[abc&&[b-c]]"), hir_uclass(&[('b', 'c')]));
+        assert_eq!(t("[[abc]&&[b-c]]"), hir_uclass(&[('b', 'c')]));
+        assert_eq!(t("[a-z&&b-y&&c-x]"), hir_uclass(&[('c', 'x')]));
+        assert_eq!(t("[c-da-b&&a-d]"), hir_uclass(&[('a', 'd')]));
+        assert_eq!(t("[a-d&&c-da-b]"), hir_uclass(&[('a', 'd')]));
+        assert_eq!(t(r"[a-z&&a-c]"), hir_uclass(&[('a', 'c')]));
+        assert_eq!(t(r"[[a-z&&a-c]]"), hir_uclass(&[('a', 'c')]));
+        assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
+
+        assert_eq!(t("(?-u)[abc&&b-c]"), hir_bclass(&[(b'b', b'c')]));
+        assert_eq!(t("(?-u)[abc&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
+        assert_eq!(t("(?-u)[[abc]&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
+        assert_eq!(t("(?-u)[a-z&&b-y&&c-x]"), hir_bclass(&[(b'c', b'x')]));
+        assert_eq!(t("(?-u)[c-da-b&&a-d]"), hir_bclass(&[(b'a', b'd')]));
+        assert_eq!(t("(?-u)[a-d&&c-da-b]"), hir_bclass(&[(b'a', b'd')]));
+
+        assert_eq!(
+            t("(?i)[abc&&b-c]"),
+            hir_case_fold(hir_uclass(&[('b', 'c')])));
+        assert_eq!(
+            t("(?i)[abc&&[b-c]]"),
+            hir_case_fold(hir_uclass(&[('b', 'c')])));
+        assert_eq!(
+            t("(?i)[[abc]&&[b-c]]"),
+            hir_case_fold(hir_uclass(&[('b', 'c')])));
+        assert_eq!(
+            t("(?i)[a-z&&b-y&&c-x]"),
+            hir_case_fold(hir_uclass(&[('c', 'x')])));
+        assert_eq!(
+            t("(?i)[c-da-b&&a-d]"),
+            hir_case_fold(hir_uclass(&[('a', 'd')])));
+        assert_eq!(
+            t("(?i)[a-d&&c-da-b]"),
+            hir_case_fold(hir_uclass(&[('a', 'd')])));
+
+        assert_eq!(
+            t("(?i-u)[abc&&b-c]"),
+            hir_case_fold(hir_bclass(&[(b'b', b'c')])));
+        assert_eq!(
+            t("(?i-u)[abc&&[b-c]]"),
+            hir_case_fold(hir_bclass(&[(b'b', b'c')])));
+        assert_eq!(
+            t("(?i-u)[[abc]&&[b-c]]"),
+            hir_case_fold(hir_bclass(&[(b'b', b'c')])));
+        assert_eq!(
+            t("(?i-u)[a-z&&b-y&&c-x]"),
+            hir_case_fold(hir_bclass(&[(b'c', b'x')])));
+        assert_eq!(
+            t("(?i-u)[c-da-b&&a-d]"),
+            hir_case_fold(hir_bclass(&[(b'a', b'd')])));
+        assert_eq!(
+            t("(?i-u)[a-d&&c-da-b]"),
+            hir_case_fold(hir_bclass(&[(b'a', b'd')])));
+
+        // In `[a^]`, `^` does not need to be escaped, so it makes sense that
+        // `^` is also allowed to be unescaped after `&&`.
+        assert_eq!(t(r"[\^&&^]"), hir_uclass(&[('^', '^')]));
+        // `]` needs to be escaped after `&&` since it's not at start of class.
+        assert_eq!(t(r"[]&&\]]"), hir_uclass(&[(']', ']')]));
+        assert_eq!(t(r"[-&&-]"), hir_uclass(&[('-', '-')]));
+        assert_eq!(t(r"[\&&&&]"), hir_uclass(&[('&', '&')]));
+        assert_eq!(t(r"[\&&&\&]"), hir_uclass(&[('&', '&')]));
+        // Test precedence.
+        assert_eq!(
+            t(r"[a-w&&[^c-g]z]"),
+            hir_uclass(&[('a', 'b'), ('h', 'w')]));
+    }
+
+    #[test]
+    fn class_bracketed_intersect_negate() {
+        assert_eq!(
+            t(r"[^\w&&\d]"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("digit"))));
+        assert_eq!(
+            t(r"[^[a-z&&a-c]]"),
+            hir_negate(hir_uclass(&[('a', 'c')])));
+        assert_eq!(
+            t(r"[^[\w&&\d]]"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("digit"))));
+        assert_eq!(
+            t(r"[^[^\w&&\d]]"),
+            hir_uclass_query(ClassQuery::Binary("digit")));
+        assert_eq!(
+            t(r"[[[^\w]&&[^\d]]]"),
+            hir_negate(hir_uclass_perl_word()));
+
+        assert_eq!(
+            t_bytes(r"(?-u)[^\w&&\d]"),
+            hir_negate(hir_bclass_from_char(ascii_class(
+                &ast::ClassAsciiKind::Digit))));
+        assert_eq!(
+            t_bytes(r"(?-u)[^[a-z&&a-c]]"),
+            hir_negate(hir_bclass(&[(b'a', b'c')])));
+        assert_eq!(
+            t_bytes(r"(?-u)[^[\w&&\d]]"),
+            hir_negate(hir_bclass_from_char(ascii_class(
+                &ast::ClassAsciiKind::Digit))));
+        assert_eq!(
+            t_bytes(r"(?-u)[^[^\w&&\d]]"),
+            hir_bclass_from_char(ascii_class(
+                &ast::ClassAsciiKind::Digit)));
+        assert_eq!(
+            t_bytes(r"(?-u)[[[^\w]&&[^\d]]]"),
+            hir_negate(hir_bclass_from_char(ascii_class(
+                &ast::ClassAsciiKind::Word))));
+    }
+
+    #[test]
+    fn class_bracketed_difference() {
+        assert_eq!(
+            t(r"[\pL--[:ascii:]]"),
+            hir_difference(
+                hir_uclass_query(ClassQuery::Binary("letter")),
+                hir_uclass(&[('\0', '\x7F')])));
+
+        assert_eq!(
+            t(r"(?-u)[[:alpha:]--[:lower:]]"),
+            hir_bclass(&[(b'A', b'Z')]));
+    }
+
+    #[test]
+    fn class_bracketed_symmetric_difference() {
+        assert_eq!(
+            t(r"[\p{sc:Greek}~~\p{scx:Greek}]"),
+            hir_uclass(&[
+                ('\u{0342}', '\u{0342}'),
+                ('\u{0345}', '\u{0345}'),
+                ('\u{1DC0}', '\u{1DC1}'),
+            ]));
+        assert_eq!(
+            t(r"[a-g~~c-j]"),
+            hir_uclass(&[('a', 'b'), ('h', 'j')]));
+
+        assert_eq!(
+            t(r"(?-u)[a-g~~c-j]"),
+            hir_bclass(&[(b'a', b'b'), (b'h', b'j')]));
+    }
+
+    #[test]
+    fn ignore_whitespace() {
+        assert_eq!(t(r"(?x)\12 3"), hir_lit("\n3"));
+        assert_eq!(t(r"(?x)\x { 53 }"), hir_lit("S"));
+        assert_eq!(t(r"(?x)\x # comment
+{ # comment
+    53 # comment
+} #comment"), hir_lit("S"));
+
+        assert_eq!(t(r"(?x)\x 53"), hir_lit("S"));
+        assert_eq!(t(r"(?x)\x # comment
+        53 # comment"), hir_lit("S"));
+        assert_eq!(t(r"(?x)\x5 3"), hir_lit("S"));
+
+        assert_eq!(t(r"(?x)\p # comment
+{ # comment
+    Separator # comment
+} # comment"), hir_uclass_query(ClassQuery::Binary("separator")));
+
+        assert_eq!(t(r"(?x)a # comment
+{ # comment
+    5 # comment
+    , # comment
+    10 # comment
+} # comment"),
+            hir_range(
+                true, hir::RepetitionRange::Bounded(5, 10), hir_lit("a")));
+
+        assert_eq!(t(r"(?x)a\  # hi there"), hir_lit("a "));
+    }
+
+    #[test]
+    fn analysis_is_always_utf8() {
+        // Positive examples.
+        assert!(t_bytes(r"a").is_always_utf8());
+        assert!(t_bytes(r"ab").is_always_utf8());
+        assert!(t_bytes(r"(?-u)a").is_always_utf8());
+        assert!(t_bytes(r"(?-u)ab").is_always_utf8());
+        assert!(t_bytes(r"\xFF").is_always_utf8());
+        assert!(t_bytes(r"\xFF\xFF").is_always_utf8());
+        assert!(t_bytes(r"[^a]").is_always_utf8());
+        assert!(t_bytes(r"[^a][^a]").is_always_utf8());
+        assert!(t_bytes(r"\b").is_always_utf8());
+        assert!(t_bytes(r"\B").is_always_utf8());
+
+        // Negative examples.
+        assert!(!t_bytes(r"(?-u)\xFF").is_always_utf8());
+        assert!(!t_bytes(r"(?-u)\xFF\xFF").is_always_utf8());
+        assert!(!t_bytes(r"(?-u)[^a]").is_always_utf8());
+        assert!(!t_bytes(r"(?-u)[^a][^a]").is_always_utf8());
+        assert!(!t_bytes(r"(?-u)\b").is_always_utf8());
+        assert!(!t_bytes(r"(?-u)\B").is_always_utf8());
+    }
+
+    #[test]
+    fn analysis_is_all_assertions() {
+        // Positive examples.
+        assert!(t(r"\b").is_all_assertions());
+        assert!(t(r"\B").is_all_assertions());
+        assert!(t(r"^").is_all_assertions());
+        assert!(t(r"$").is_all_assertions());
+        assert!(t(r"\A").is_all_assertions());
+        assert!(t(r"\z").is_all_assertions());
+        assert!(t(r"$^\z\A\b\B").is_all_assertions());
+        assert!(t(r"$|^|\z|\A|\b|\B").is_all_assertions());
+        assert!(t(r"^$|$^").is_all_assertions());
+        assert!(t(r"((\b)+())*^").is_all_assertions());
+
+        // Negative examples.
+        assert!(!t(r"^a").is_all_assertions());
+    }
+
+    #[test]
+    fn analysis_is_anchored() {
+        // Positive examples.
+        assert!(t(r"^").is_anchored_start());
+        assert!(t(r"$").is_anchored_end());
+
+        assert!(t(r"^^").is_anchored_start());
+        assert!(t(r"$$").is_anchored_end());
+
+        assert!(t(r"^$").is_anchored_start());
+        assert!(t(r"^$").is_anchored_end());
+
+        assert!(t(r"^foo").is_anchored_start());
+        assert!(t(r"foo$").is_anchored_end());
+
+        assert!(t(r"^foo|^bar").is_anchored_start());
+        assert!(t(r"foo$|bar$").is_anchored_end());
+
+        assert!(t(r"^(foo|bar)").is_anchored_start());
+        assert!(t(r"(foo|bar)$").is_anchored_end());
+
+        assert!(t(r"^+").is_anchored_start());
+        assert!(t(r"$+").is_anchored_end());
+        assert!(t(r"^++").is_anchored_start());
+        assert!(t(r"$++").is_anchored_end());
+        assert!(t(r"(^)+").is_anchored_start());
+        assert!(t(r"($)+").is_anchored_end());
+
+        assert!(t(r"$^").is_anchored_start());
+        assert!(t(r"$^").is_anchored_end());
+        assert!(t(r"$^|^$").is_anchored_start());
+        assert!(t(r"$^|^$").is_anchored_end());
+
+        assert!(t(r"\b^").is_anchored_start());
+        assert!(t(r"$\b").is_anchored_end());
+        assert!(t(r"^(?m:^)").is_anchored_start());
+        assert!(t(r"(?m:$)$").is_anchored_end());
+        assert!(t(r"(?m:^)^").is_anchored_start());
+        assert!(t(r"$(?m:$)").is_anchored_end());
+
+        // Negative examples.
+        assert!(!t(r"(?m)^").is_anchored_start());
+        assert!(!t(r"(?m)$").is_anchored_end());
+        assert!(!t(r"(?m:^$)|$^").is_anchored_start());
+        assert!(!t(r"(?m:^$)|$^").is_anchored_end());
+        assert!(!t(r"$^|(?m:^$)").is_anchored_start());
+        assert!(!t(r"$^|(?m:^$)").is_anchored_end());
+
+        assert!(!t(r"a^").is_anchored_start());
+        assert!(!t(r"$a").is_anchored_start());
+
+        assert!(!t(r"a^").is_anchored_start());
+        assert!(!t(r"$a").is_anchored_start());
+
+        assert!(!t(r"^foo|bar").is_anchored_start());
+        assert!(!t(r"foo|bar$").is_anchored_end());
+
+        assert!(!t(r"^*").is_anchored_start());
+        assert!(!t(r"$*").is_anchored_end());
+        assert!(!t(r"^*+").is_anchored_start());
+        assert!(!t(r"$*+").is_anchored_end());
+        assert!(!t(r"^+*").is_anchored_start());
+        assert!(!t(r"$+*").is_anchored_end());
+        assert!(!t(r"(^)*").is_anchored_start());
+        assert!(!t(r"($)*").is_anchored_end());
+    }
+
+    #[test]
+    fn analysis_is_any_anchored() {
+        // Positive examples.
+        assert!(t(r"^").is_any_anchored_start());
+        assert!(t(r"$").is_any_anchored_end());
+        assert!(t(r"\A").is_any_anchored_start());
+        assert!(t(r"\z").is_any_anchored_end());
+
+        // Negative examples.
+        assert!(!t(r"(?m)^").is_any_anchored_start());
+        assert!(!t(r"(?m)$").is_any_anchored_end());
+        assert!(!t(r"$").is_any_anchored_start());
+        assert!(!t(r"^").is_any_anchored_end());
+    }
+
+    #[test]
+    fn analysis_is_match_empty() {
+        // Positive examples.
+        assert!(t(r"").is_match_empty());
+        assert!(t(r"()").is_match_empty());
+        assert!(t(r"()*").is_match_empty());
+        assert!(t(r"()+").is_match_empty());
+        assert!(t(r"()?").is_match_empty());
+        assert!(t(r"a*").is_match_empty());
+        assert!(t(r"a?").is_match_empty());
+        assert!(t(r"a{0}").is_match_empty());
+        assert!(t(r"a{0,}").is_match_empty());
+        assert!(t(r"a{0,1}").is_match_empty());
+        assert!(t(r"a{0,10}").is_match_empty());
+        assert!(t(r"\pL*").is_match_empty());
+        assert!(t(r"a*|b").is_match_empty());
+        assert!(t(r"b|a*").is_match_empty());
+        assert!(t(r"a*a?(abcd)*").is_match_empty());
+        assert!(t(r"^").is_match_empty());
+        assert!(t(r"$").is_match_empty());
+        assert!(t(r"(?m)^").is_match_empty());
+        assert!(t(r"(?m)$").is_match_empty());
+        assert!(t(r"\A").is_match_empty());
+        assert!(t(r"\z").is_match_empty());
+        assert!(t(r"\B").is_match_empty());
+        assert!(t(r"(?-u)\B").is_match_empty());
+
+        // Negative examples.
+        assert!(!t(r"a+").is_match_empty());
+        assert!(!t(r"a{1}").is_match_empty());
+        assert!(!t(r"a{1,}").is_match_empty());
+        assert!(!t(r"a{1,2}").is_match_empty());
+        assert!(!t(r"a{1,10}").is_match_empty());
+        assert!(!t(r"b|a").is_match_empty());
+        assert!(!t(r"a*a+(abcd)*").is_match_empty());
+        assert!(!t(r"\b").is_match_empty());
+        assert!(!t(r"(?-u)\b").is_match_empty());
+    }
+}
diff --git a/regex-syntax/src/hir/visitor.rs b/regex-syntax/src/hir/visitor.rs
new file mode 100644
index 0000000000..716a96d9b4
--- /dev/null
+++ b/regex-syntax/src/hir/visitor.rs
@@ -0,0 +1,222 @@
+// Copyright 2018 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use hir::{self, Hir, HirKind};
+
+/// A trait for visiting the high-level IR (HIR) in depth first order.
+///
+/// The principle aim of this trait is to enable callers to perform case
+/// analysis on a high-level intermediate representation of a regular
+/// expression without necessarily using recursion. In particular, this permits
+/// callers to do case analysis with constant stack usage, which can be
+/// important since the size of an HIR may be proportional to end user input.
+///
+/// Typical usage of this trait involves providing an implementation and then
+/// running it using the [`visit`](fn.visit.html) function.
+pub trait Visitor {
+    /// The result of visiting an HIR.
+    type Output;
+    /// An error that visiting an HIR might return.
+    type Err;
+
+    /// All implementors of `Visitor` must provide a `finish` method, which
+    /// yields the result of visiting the HIR or an error.
+    fn finish(self) -> Result<Self::Output, Self::Err>;
+
+    /// This method is called before beginning traversal of the HIR.
+    fn start(&mut self) {}
+
+    /// This method is called on an `Hir` before descending into child `Hir`
+    /// nodes.
+    fn visit_pre(&mut self, _hir: &Hir) -> Result<(), Self::Err> {
+        Ok(())
+    }
+
+    /// This method is called on an `Hir` after descending all of its child
+    /// `Hir` nodes.
+    fn visit_post(&mut self, _hir: &Hir) -> Result<(), Self::Err> {
+        Ok(())
+    }
+
+    /// This method is called between child nodes of an alternation.
+    fn visit_alternation_in(&mut self) -> Result<(), Self::Err> {
+        Ok(())
+    }
+}
+
+/// Executes an implementation of `Visitor` in constant stack space.
+///
+/// This function will visit every node in the given `Hir` while calling
+/// appropriate methods provided by the
+/// [`Visitor`](trait.Visitor.html) trait.
+///
+/// The primary use case for this method is when one wants to perform case
+/// analysis over an `Hir` without using a stack size proportional to the depth
+/// of the `Hir`. Namely, this method will instead use constant stack space,
+/// but will use heap space proportional to the size of the `Hir`. This may be
+/// desirable in cases where the size of `Hir` is proportional to end user
+/// input.
+///
+/// If the visitor returns an error at any point, then visiting is stopped and
+/// the error is returned.
+pub fn visit<V: Visitor>(hir: &Hir, visitor: V) -> Result<V::Output, V::Err> {
+    HeapVisitor::new().visit(hir, visitor)
+}
+
+/// HeapVisitor visits every item in an `Hir` recursively using constant stack
+/// size and a heap size proportional to the size of the `Hir`.
+struct HeapVisitor<'a> {
+    /// A stack of `Hir` nodes. This is roughly analogous to the call stack
+    /// used in a typical recursive visitor.
+    stack: Vec<(&'a Hir, Frame<'a>)>,
+}
+
+/// Represents a single stack frame while performing structural induction over
+/// an `Hir`.
+enum Frame<'a> {
+    /// A stack frame allocated just before descending into a repetition
+    /// operator's child node.
+    Repetition(&'a hir::Repetition),
+    /// A stack frame allocated just before descending into a group's child
+    /// node.
+    Group(&'a hir::Group),
+    /// The stack frame used while visiting every child node of a concatenation
+    /// of expressions.
+    Concat {
+        /// The child node we are currently visiting.
+        head: &'a Hir,
+        /// The remaining child nodes to visit (which may be empty).
+        tail: &'a [Hir],
+    },
+    /// The stack frame used while visiting every child node of an alternation
+    /// of expressions.
+    Alternation {
+        /// The child node we are currently visiting.
+        head: &'a Hir,
+        /// The remaining child nodes to visit (which may be empty).
+        tail: &'a [Hir],
+    },
+}
+
+impl<'a> HeapVisitor<'a> {
+    fn new() -> HeapVisitor<'a> {
+        HeapVisitor { stack: vec![] }
+    }
+
+    fn visit<V: Visitor>(
+        &mut self,
+        mut hir: &'a Hir,
+        mut visitor: V,
+    ) -> Result<V::Output, V::Err> {
+        self.stack.clear();
+
+        visitor.start();
+        loop {
+            try!(visitor.visit_pre(hir));
+            if let Some(x) = self.induct(hir) {
+                let child = x.child();
+                self.stack.push((hir, x));
+                hir = child;
+                continue;
+            }
+            // No induction means we have a base case, so we can post visit
+            // it now.
+            try!(visitor.visit_post(hir));
+
+            // At this point, we now try to pop our call stack until it is
+            // either empty or we hit another inductive case.
+            loop {
+                let (post_hir, frame) = match self.stack.pop() {
+                    None => return visitor.finish(),
+                    Some((post_hir, frame)) => (post_hir, frame),
+                };
+                // If this is a concat/alternate, then we might have additional
+                // inductive steps to process.
+                if let Some(x) = self.pop(frame) {
+                    if let Frame::Alternation {..} = x {
+                        try!(visitor.visit_alternation_in());
+                    }
+                    hir = x.child();
+                    self.stack.push((post_hir, x));
+                    break;
+                }
+                // Otherwise, we've finished visiting all the child nodes for
+                // this HIR, so we can post visit it now.
+                try!(visitor.visit_post(post_hir));
+            }
+        }
+    }
+
+    /// Build a stack frame for the given HIR if one is needed (which occurs if
+    /// and only if there are child nodes in the HIR). Otherwise, return None.
+    fn induct(&mut self, hir: &'a Hir) -> Option<Frame<'a>> {
+        match *hir.kind() {
+            HirKind::Repetition(ref x) => Some(Frame::Repetition(x)),
+            HirKind::Group(ref x) => Some(Frame::Group(x)),
+            HirKind::Concat(ref x) if x.is_empty() => None,
+            HirKind::Concat(ref x) => {
+                Some(Frame::Concat {
+                    head: &x[0],
+                    tail: &x[1..],
+                })
+            }
+            HirKind::Alternation(ref x) if x.is_empty() => None,
+            HirKind::Alternation(ref x) => {
+                Some(Frame::Alternation {
+                    head: &x[0],
+                    tail: &x[1..],
+                })
+            }
+            _ => None,
+        }
+    }
+
+    /// Pops the given frame. If the frame has an additional inductive step,
+    /// then return it, otherwise return `None`.
+    fn pop(&self, induct: Frame<'a>) -> Option<Frame<'a>> {
+        match induct {
+            Frame::Repetition(_) => None,
+            Frame::Group(_) => None,
+            Frame::Concat { tail, .. } => {
+                if tail.is_empty() {
+                    None
+                } else {
+                    Some(Frame::Concat {
+                        head: &tail[0],
+                        tail: &tail[1..],
+                    })
+                }
+            }
+            Frame::Alternation { tail, .. } => {
+                if tail.is_empty() {
+                    None
+                } else {
+                    Some(Frame::Alternation {
+                        head: &tail[0],
+                        tail: &tail[1..],
+                    })
+                }
+            }
+        }
+    }
+}
+
+impl<'a> Frame<'a> {
+    /// Perform the next inductive step on this frame and return the next
+    /// child HIR node to visit.
+    fn child(&self) -> &'a Hir {
+        match *self {
+            Frame::Repetition(rep) => &rep.hir,
+            Frame::Group(group) => &group.hir,
+            Frame::Concat { head, .. } => head,
+            Frame::Alternation { head, .. } => head,
+        }
+    }
+}
diff --git a/regex-syntax/src/lib.rs b/regex-syntax/src/lib.rs
index df764594a8..27a1d45fdb 100644
--- a/regex-syntax/src/lib.rs
+++ b/regex-syntax/src/lib.rs
@@ -1,4 +1,4 @@
-// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
+// Copyright 2018 The Rust Project Developers. See the COPYRIGHT
 // file at the top-level directory of this distribution and at
 // http://rust-lang.org/COPYRIGHT.
 //
@@ -9,1671 +9,114 @@
 // except according to those terms.
 
 /*!
-This crate provides a regular expression parser and an abstract syntax for
-regular expressions. The abstract syntax is defined by the `Expr` type. The
-concrete syntax is enumerated in the
-[`regex`](../regex/index.html#syntax)
-crate documentation.
+This crate provides a robust regular expression parser.
 
-Note that since this crate is first and foremost an implementation detail for
-the `regex` crate, it may experience more frequent breaking changes. It is
-exposed as a separate crate so that others may use it to do analysis on regular
-expressions or even build their own matching engine.
+This crate defines two primary types:
 
-# Example: parsing an expression
+* [`Ast`](ast/enum.Ast.html) is the abstract syntax of a regular expression.
+  An abstract syntax corresponds to a *structured representation* of the
+  concrete syntax of a regular expression, where the concrete syntax is the
+  pattern string itself (e.g., `foo(bar)+`). Given some abstract syntax, it
+  can be converted back to the original concrete syntax (modulo some details,
+  like whitespace). To a first approximation, the abstract syntax is complex
+  and difficult to analyze.
+* [`Hir`](hir/struct.Hir.html) is the high-level intermediate representation
+  ("HIR" or "high-level IR" for short) of regular expression. It corresponds to
+  an intermediate state of a regular expression that sits between the abstract
+  syntax and the low level compiled opcodes that are eventually responsible for
+  executing a regular expression search. Given some high-level IR, it is not
+  possible to produce the original concrete syntax (although it is possible to
+  produce an equivalent conrete syntax, but it will likely scarcely resemble
+  the original pattern). To a first approximation, the high-level IR is simple
+  and easy to analyze.
 
-Parsing a regular expression can be done with the `Expr::parse` function.
+These two types come with conversion routines:
 
-```rust
-use regex_syntax::Expr;
+* An [`ast::parse::Parser`](ast/parse/struct.Parser.html) converts concrete
+  syntax (a `&str`) to an [`Ast`](ast/enum.Ast.html).
+* A [`hir::translate::Translator`](hir/translate/struct.Translator.html)
+  converts an [`Ast`](ast/enum.Ast.html) to a [`Hir`](hir/struct.Hir.html).
 
-assert_eq!(Expr::parse(r"ab|yz").unwrap(), Expr::Alternate(vec![
-    Expr::Literal { chars: vec!['a', 'b'], casei: false },
-    Expr::Literal { chars: vec!['y', 'z'], casei: false },
-]));
-```
+As a convenience, the above two conversion routines are combined into one via
+the top-level [`Parser`](struct.Parser.html) type. This `Parser` will first
+convert your pattern to an `Ast` and then convert the `Ast` to an `Hir`.
 
-# Example: inspecting an error
 
-The parser in this crate provides very detailed error values. For example,
-if an invalid character class range is given:
+# Example
 
-```rust
-use regex_syntax::{Expr, ErrorKind};
+This example shows how to parse a pattern string into its HIR:
 
-let err = Expr::parse(r"[z-a]").unwrap_err();
-assert_eq!(err.position(), 4);
-assert_eq!(err.kind(), &ErrorKind::InvalidClassRange {
-    start: 'z',
-    end: 'a',
-});
 ```
+use regex_syntax::Parser;
+use regex_syntax::hir::{self, Hir};
 
-Or unbalanced parentheses:
-
-```rust
-use regex_syntax::{Expr, ErrorKind};
-
-let err = Expr::parse(r"ab(cd").unwrap_err();
-assert_eq!(err.position(), 2);
-assert_eq!(err.kind(), &ErrorKind::UnclosedParen);
+let hir = Parser::new().parse("a|b").unwrap();
+assert_eq!(hir, Hir::alternation(vec![
+    Hir::literal(hir::Literal::Unicode('a')),
+    Hir::literal(hir::Literal::Unicode('b')),
+]));
 ```
-*/
-
-#![deny(missing_docs)]
-#![cfg_attr(test, deny(warnings))]
-
-#[cfg(test)] extern crate quickcheck;
-#[cfg(test)] extern crate rand;
-
-mod literals;
-mod parser;
-mod unicode;
-
-use std::ascii;
-use std::char;
-use std::cmp::{Ordering, max, min};
-use std::fmt;
-use std::iter::IntoIterator;
-use std::ops::Deref;
-use std::result;
-use std::slice;
-use std::u8;
-use std::vec;
-
-use unicode::case_folding;
-
-use self::Expr::*;
-use self::Repeater::*;
-
-use parser::{Flags, Parser};
-
-pub use literals::{Literals, Lit};
-
-/// A regular expression abstract syntax tree.
-///
-/// An `Expr` represents the abstract syntax of a regular expression.
-#[derive(Clone, Debug, PartialEq, Eq)]
-pub enum Expr {
-    /// An empty regex (which never matches any text).
-    Empty,
-    /// A sequence of one or more literal characters to be matched.
-    Literal {
-        /// The characters.
-        chars: Vec<char>,
-        /// Whether to match case insensitively.
-        casei: bool,
-    },
-    /// A sequence of one or more literal bytes to be matched.
-    LiteralBytes {
-        /// The bytes.
-        bytes: Vec<u8>,
-        /// Whether to match case insensitively.
-        ///
-        /// The interpretation of "case insensitive" in this context is
-        /// ambiguous since `bytes` can be arbitrary. However, a good heuristic
-        /// is to assume that the bytes are ASCII-compatible and do simple
-        /// ASCII case folding.
-        casei: bool,
-    },
-    /// Match any character.
-    AnyChar,
-    /// Match any character, excluding new line (`0xA`).
-    AnyCharNoNL,
-    /// Match any byte.
-    AnyByte,
-    /// Match any byte, excluding new line (`0xA`).
-    AnyByteNoNL,
-    /// A character class.
-    Class(CharClass),
-    /// A character class with byte ranges only.
-    ClassBytes(ByteClass),
-    /// Match the start of a line or beginning of input.
-    StartLine,
-    /// Match the end of a line or end of input.
-    EndLine,
-    /// Match the beginning of input.
-    StartText,
-    /// Match the end of input.
-    EndText,
-    /// Match a word boundary (word character on one side and a non-word
-    /// character on the other).
-    WordBoundary,
-    /// Match a position that is not a word boundary (word or non-word
-    /// characters on both sides).
-    NotWordBoundary,
-    /// Match an ASCII word boundary.
-    WordBoundaryAscii,
-    /// Match a position that is not an ASCII word boundary.
-    NotWordBoundaryAscii,
-    /// A group, possibly non-capturing.
-    Group {
-        /// The expression inside the group.
-        e: Box<Expr>,
-        /// The capture index (starting at `1`) only for capturing groups.
-        i: Option<usize>,
-        /// The capture name, only for capturing named groups.
-        name: Option<String>,
-    },
-    /// A repeat operator (`?`, `*`, `+` or `{m,n}`).
-    Repeat {
-        /// The expression to be repeated. Limited to literals, `.`, classes
-        /// or grouped expressions.
-        e: Box<Expr>,
-        /// The type of repeat operator used.
-        r: Repeater,
-        /// Whether the repeat is greedy (match the most) or not (match the
-        /// least).
-        greedy: bool,
-    },
-    /// A concatenation of expressions. Must be matched one after the other.
-    ///
-    /// N.B. A concat expression can only appear at the top-level or
-    /// immediately inside a group expression.
-    Concat(Vec<Expr>),
-    /// An alternation of expressions. Only one must match.
-    ///
-    /// N.B. An alternate expression can only appear at the top-level or
-    /// immediately inside a group expression.
-    Alternate(Vec<Expr>),
-}
-
-type CaptureIndex = Option<usize>;
-
-type CaptureName = Option<String>;
-
-/// The type of a repeat operator expression.
-#[derive(Clone, Copy, Debug, PartialEq, Eq)]
-pub enum Repeater {
-    /// Match zero or one (`?`).
-    ZeroOrOne,
-    /// Match zero or more (`*`).
-    ZeroOrMore,
-    /// Match one or more (`+`).
-    OneOrMore,
-    /// Match for at least `min` and at most `max` (`{m,n}`).
-    ///
-    /// When `max` is `None`, there is no upper bound on the number of matches.
-    Range {
-        /// Lower bound on the number of matches.
-        min: u32,
-        /// Optional upper bound on the number of matches.
-        max: Option<u32>,
-    },
-}
-
-impl Repeater {
-    /// Returns true if and only if this repetition can match the empty string.
-    fn matches_empty(&self) -> bool {
-        use self::Repeater::*;
-        match *self {
-            ZeroOrOne => true,
-            ZeroOrMore => true,
-            OneOrMore => false,
-            Range { min, .. } => min == 0,
-        }
-    }
-}
-
-/// A character class.
-///
-/// A character class has a canonical format that the parser guarantees. Its
-/// canonical format is defined by the following invariants:
-///
-/// 1. Given any Unicode scalar value, it is matched by *at most* one character
-///    range in a canonical character class.
-/// 2. Every adjacent character range is separated by at least one Unicode
-///    scalar value.
-/// 3. Given any pair of character ranges `r1` and `r2`, if
-///    `r1.end < r2.start`, then `r1` comes before `r2` in a canonical
-///    character class.
-///
-/// In sum, any `CharClass` produced by this crate's parser is a sorted
-/// sequence of non-overlapping ranges. This makes it possible to test whether
-/// a character is matched by a class with a binary search.
-///
-/// If the case insensitive flag was set when parsing a character class, then
-/// simple case folding is done automatically. For example, `(?i)[a-c]` is
-/// automatically translated to `[a-cA-C]`.
-#[derive(Clone, Debug, PartialEq, Eq)]
-pub struct CharClass {
-    ranges: Vec<ClassRange>,
-}
-
-/// A single inclusive range in a character class.
-///
-/// Since range boundaries are defined by Unicode scalar values, the boundaries
-/// can never be in the open interval `(0xD7FF, 0xE000)`. However, a range may
-/// *cover* codepoints that are not scalar values.
-///
-/// Note that this has a few convenient impls on `PartialEq` and `PartialOrd`
-/// for testing whether a character is contained inside a given range.
-#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Eq, Ord)]
-pub struct ClassRange {
-    /// The start character of the range.
-    ///
-    /// This must be less than or equal to `end`.
-    pub start: char,
-
-    /// The end character of the range.
-    ///
-    /// This must be greater than or equal to `start`.
-    pub end: char,
-}
-
-/// A byte class for byte ranges only.
-///
-/// A byte class has a canonical format that the parser guarantees. Its
-/// canonical format is defined by the following invariants:
-///
-/// 1. Given any byte, it is matched by *at most* one byte range in a canonical
-///    character class.
-/// 2. Every adjacent byte range is separated by at least one byte.
-/// 3. Given any pair of byte ranges `r1` and `r2`, if
-///    `r1.end < r2.start`, then `r1` comes before `r2` in a canonical
-///    character class.
-///
-/// In sum, any `ByteClass` produced by this crate's parser is a sorted
-/// sequence of non-overlapping ranges. This makes it possible to test whether
-/// a byte is matched by a class with a binary search.
-///
-/// If the case insensitive flag was set when parsing a character class,
-/// then simple ASCII-only case folding is done automatically. For example,
-/// `(?i)[a-c]` is automatically translated to `[a-cA-C]`.
-#[derive(Clone, Debug, PartialEq, Eq)]
-pub struct ByteClass {
-    ranges: Vec<ByteRange>,
-}
-
-/// A single inclusive range in a byte class.
-///
-/// Note that this has a few convenient impls on `PartialEq` and `PartialOrd`
-/// for testing whether a byte is contained inside a given range.
-#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Eq, Ord)]
-pub struct ByteRange {
-    /// The start byte of the range.
-    ///
-    /// This must be less than or equal to `end`.
-    pub start: u8,
-
-    /// The end byte of the range.
-    ///
-    /// This must be greater than or equal to `end`.
-    pub end: u8,
-}
-
-/// A builder for configuring regular expression parsing.
-///
-/// This allows setting the default values of flags and other options, such
-/// as the maximum nesting depth.
-#[derive(Clone, Debug)]
-pub struct ExprBuilder {
-    flags: Flags,
-    nest_limit: usize,
-}
-
-impl ExprBuilder {
-    /// Create a new builder for configuring expression parsing.
-    ///
-    /// Note that all flags are disabled by default.
-    pub fn new() -> ExprBuilder {
-        ExprBuilder {
-            flags: Flags::default(),
-            nest_limit: 200,
-        }
-    }
-
-    /// Set the default value for the case insensitive (`i`) flag.
-    pub fn case_insensitive(mut self, yes: bool) -> ExprBuilder {
-        self.flags.casei = yes;
-        self
-    }
-
-    /// Set the default value for the multi-line matching (`m`) flag.
-    pub fn multi_line(mut self, yes: bool) -> ExprBuilder {
-        self.flags.multi = yes;
-        self
-    }
-
-    /// Set the default value for the any character (`s`) flag.
-    pub fn dot_matches_new_line(mut self, yes: bool) -> ExprBuilder {
-        self.flags.dotnl = yes;
-        self
-    }
-
-    /// Set the default value for the greedy swap (`U`) flag.
-    pub fn swap_greed(mut self, yes: bool) -> ExprBuilder {
-        self.flags.swap_greed = yes;
-        self
-    }
-
-    /// Set the default value for the ignore whitespace (`x`) flag.
-    pub fn ignore_whitespace(mut self, yes: bool) -> ExprBuilder {
-        self.flags.ignore_space = yes;
-        self
-    }
-
-    /// Set the default value for the Unicode (`u`) flag.
-    ///
-    /// If `yes` is false, then `allow_bytes` is set to true.
-    pub fn unicode(mut self, yes: bool) -> ExprBuilder {
-        self.flags.unicode = yes;
-        if !yes {
-            self.allow_bytes(true)
-        } else {
-            self
-        }
-    }
-
-    /// Whether the parser allows matching arbitrary bytes or not.
-    ///
-    /// When the `u` flag is disabled (either with this builder or in the
-    /// expression itself), the parser switches to interpreting the expression
-    /// as matching arbitrary bytes instead of Unicode codepoints. For example,
-    /// the expression `(?u:\xFF)` matches the *codepoint* `\xFF`, which
-    /// corresponds to the UTF-8 byte sequence `\xCE\xBF`. Conversely,
-    /// `(?-u:\xFF)` matches the *byte* `\xFF`, which is not valid UTF-8.
-    ///
-    /// When `allow_bytes` is disabled (the default), an expression like
-    /// `(?-u:\xFF)` will cause the parser to return an error, since it would
-    /// otherwise match invalid UTF-8. When enabled, it will be allowed.
-    pub fn allow_bytes(mut self, yes: bool) -> ExprBuilder {
-        self.flags.allow_bytes = yes;
-        self
-    }
-
-    /// Set the nesting limit for regular expression parsing.
-    ///
-    /// Regular expressions that nest more than this limit will result in a
-    /// `StackExhausted` error.
-    pub fn nest_limit(mut self, limit: usize) -> ExprBuilder {
-        self.nest_limit = limit;
-        self
-    }
-
-    /// Parse a string as a regular expression using the current configuraiton.
-    pub fn parse(self, s: &str) -> Result<Expr> {
-        Parser::parse(s, self.flags).and_then(|e| e.simplify(self.nest_limit))
-    }
-}
-
-impl Expr {
-    /// Parses a string in a regular expression syntax tree.
-    ///
-    /// This is a convenience method for parsing an expression using the
-    /// default configuration. To tweak parsing options (such as which flags
-    /// are enabled by default), use the `ExprBuilder` type.
-    pub fn parse(s: &str) -> Result<Expr> {
-        ExprBuilder::new().parse(s)
-    }
-
-    /// Returns true iff the expression can be repeated by a quantifier.
-    fn can_repeat(&self) -> bool {
-        match *self {
-            Literal{..} | LiteralBytes{..}
-            | AnyChar | AnyCharNoNL | AnyByte | AnyByteNoNL
-            | Class(_) | ClassBytes(_)
-            | StartLine | EndLine | StartText | EndText
-            | WordBoundary | NotWordBoundary
-            | WordBoundaryAscii | NotWordBoundaryAscii
-            | Group{..}
-            => true,
-            _ => false,
-        }
-    }
-
-    fn simplify(self, nest_limit: usize) -> Result<Expr> {
-        fn combine_literals(es: &mut Vec<Expr>, e: Expr) {
-            match (es.pop(), e) {
-                (None, e) => es.push(e),
-                (Some(Literal { chars: mut chars1, casei: casei1 }),
-                      Literal { chars: chars2, casei: casei2 }) => {
-                    if casei1 == casei2 {
-                        chars1.extend(chars2);
-                        es.push(Literal { chars: chars1, casei: casei1 });
-                    } else {
-                        es.push(Literal { chars: chars1, casei: casei1 });
-                        es.push(Literal { chars: chars2, casei: casei2 });
-                    }
-                }
-                (Some(LiteralBytes { bytes: mut bytes1, casei: casei1 }),
-                      LiteralBytes { bytes: bytes2, casei: casei2 }) => {
-                    if casei1 == casei2 {
-                        bytes1.extend(bytes2);
-                        es.push(LiteralBytes { bytes: bytes1, casei: casei1 });
-                    } else {
-                        es.push(LiteralBytes { bytes: bytes1, casei: casei1 });
-                        es.push(LiteralBytes { bytes: bytes2, casei: casei2 });
-                    }
-                }
-                (Some(e1), e2) => {
-                    es.push(e1);
-                    es.push(e2);
-                }
-            }
-        }
-        fn simp(expr: Expr, recurse: usize, limit: usize) -> Result<Expr> {
-            if recurse > limit {
-                return Err(Error {
-                    pos: 0,
-                    surround: "".to_owned(),
-                    kind: ErrorKind::StackExhausted,
-                });
-            }
-            let simplify = |e| simp(e, recurse + 1, limit);
-            Ok(match expr {
-                Repeat { e, r, greedy } => Repeat {
-                    e: Box::new(try!(simplify(*e))),
-                    r: r,
-                    greedy: greedy,
-                },
-                Group { e, i, name } => {
-                    let e = try!(simplify(*e));
-                    if i.is_none() && name.is_none() && e.can_repeat() {
-                        e
-                    } else {
-                        Group { e: Box::new(e), i: i, name: name }
-                    }
-                }
-                Concat(es) => {
-                    let mut new_es = Vec::with_capacity(es.len());
-                    for e in es {
-                        combine_literals(&mut new_es, try!(simplify(e)));
-                    }
-                    if new_es.len() == 1 {
-                        new_es.pop().unwrap()
-                    } else {
-                        Concat(new_es)
-                    }
-                }
-                Alternate(es) => {
-                    let mut new_es = Vec::with_capacity(es.len());
-                    for e in es {
-                        new_es.push(try!(simplify(e)));
-                    }
-                    Alternate(new_es)
-                }
-                e => e,
-            })
-        }
-        simp(self, 0, nest_limit)
-    }
-
-    /// Returns a set of literal prefixes extracted from this expression.
-    pub fn prefixes(&self) -> Literals {
-        let mut lits = Literals::empty();
-        lits.union_prefixes(self);
-        lits
-    }
-
-    /// Returns a set of literal suffixes extracted from this expression.
-    pub fn suffixes(&self) -> Literals {
-        let mut lits = Literals::empty();
-        lits.union_suffixes(self);
-        lits
-    }
-
-    /// Returns true if and only if the expression is required to match from
-    /// the beginning of text.
-    pub fn is_anchored_start(&self) -> bool {
-        match *self {
-            Repeat { ref e, r, .. } => {
-                !r.matches_empty() && e.is_anchored_start()
-            }
-            Group { ref e, .. } => e.is_anchored_start(),
-            Concat(ref es) => es[0].is_anchored_start(),
-            Alternate(ref es) => es.iter().all(|e| e.is_anchored_start()),
-            StartText => true,
-            _ => false,
-        }
-    }
-
-    /// Returns true if and only if the expression has at least one matchable
-    /// sub-expression that must match the beginning of text.
-    pub fn has_anchored_start(&self) -> bool {
-        match *self {
-            Repeat { ref e, r, .. } => {
-                !r.matches_empty() && e.has_anchored_start()
-            }
-            Group { ref e, .. } => e.has_anchored_start(),
-            Concat(ref es) => es[0].has_anchored_start(),
-            Alternate(ref es) => es.iter().any(|e| e.has_anchored_start()),
-            StartText => true,
-            _ => false,
-        }
-    }
-
-    /// Returns true if and only if the expression is required to match at the
-    /// end of the text.
-    pub fn is_anchored_end(&self) -> bool {
-        match *self {
-            Repeat { ref e, r, .. } => {
-                !r.matches_empty() && e.is_anchored_end()
-            }
-            Group { ref e, .. } => e.is_anchored_end(),
-            Concat(ref es) => es[es.len() - 1].is_anchored_end(),
-            Alternate(ref es) => es.iter().all(|e| e.is_anchored_end()),
-            EndText => true,
-            _ => false,
-        }
-    }
-
-    /// Returns true if and only if the expression has at least one matchable
-    /// sub-expression that must match the beginning of text.
-    pub fn has_anchored_end(&self) -> bool {
-        match *self {
-            Repeat { ref e, r, .. } => {
-                !r.matches_empty() && e.has_anchored_end()
-            }
-            Group { ref e, .. } => e.has_anchored_end(),
-            Concat(ref es) => es[es.len() - 1].has_anchored_end(),
-            Alternate(ref es) => es.iter().any(|e| e.has_anchored_end()),
-            EndText => true,
-            _ => false,
-        }
-    }
-
-    /// Returns true if and only if the expression contains sub-expressions
-    /// that can match arbitrary bytes.
-    pub fn has_bytes(&self) -> bool {
-        match *self {
-            Repeat { ref e, .. } => e.has_bytes(),
-            Group { ref e, .. } => e.has_bytes(),
-            Concat(ref es) => es.iter().any(|e| e.has_bytes()),
-            Alternate(ref es) => es.iter().any(|e| e.has_bytes()),
-            LiteralBytes{..} => true,
-            AnyByte | AnyByteNoNL => true,
-            ClassBytes(_) => true,
-            WordBoundaryAscii | NotWordBoundaryAscii => true,
-            _ => false,
-        }
-    }
-}
-
-impl Deref for CharClass {
-    type Target = Vec<ClassRange>;
-    fn deref(&self) -> &Vec<ClassRange> { &self.ranges }
-}
-
-impl IntoIterator for CharClass {
-    type Item = ClassRange;
-    type IntoIter = vec::IntoIter<ClassRange>;
-    fn into_iter(self) -> vec::IntoIter<ClassRange> { self.ranges.into_iter() }
-}
-
-impl<'a> IntoIterator for &'a CharClass {
-    type Item = &'a ClassRange;
-    type IntoIter = slice::Iter<'a, ClassRange>;
-    fn into_iter(self) -> slice::Iter<'a, ClassRange> { self.iter() }
-}
-
-impl CharClass {
-    /// Create a new class from an existing set of ranges.
-    pub fn new(ranges: Vec<ClassRange>) -> CharClass {
-        CharClass { ranges: ranges }
-    }
-
-    /// Create an empty class.
-    fn empty() -> CharClass {
-        CharClass::new(Vec::new())
-    }
-
-    /// Returns true if `c` is matched by this character class.
-    pub fn matches(&self, c: char) -> bool {
-        self.binary_search_by(|range| c.partial_cmp(range).unwrap()).is_ok()
-    }
-
-    /// Removes the given character from the class if it exists.
-    ///
-    /// Note that this takes `O(n)` time in the number of ranges.
-    pub fn remove(&mut self, c: char) {
-        let mut i = match self.binary_search_by(|r| c.partial_cmp(r).unwrap()) {
-            Ok(i) => i,
-            Err(_) => return,
-        };
-        let mut r = self.ranges.remove(i);
-        if r.start == c {
-            r.start = inc_char(c);
-            if r.start > r.end || c == char::MAX {
-                return;
-            }
-            self.ranges.insert(i, r);
-        } else if r.end == c {
-            r.end = dec_char(c);
-            if r.end < r.start || c == '\x00' {
-                return;
-            }
-            self.ranges.insert(0, r);
-        } else {
-            let (mut r1, mut r2) = (r.clone(), r.clone());
-            r1.end = dec_char(c);
-            if r1.start <= r1.end {
-                self.ranges.insert(i, r1);
-                i += 1;
-            }
-            r2.start = inc_char(c);
-            if r2.start <= r2.end {
-                self.ranges.insert(i, r2);
-            }
-        }
-    }
-
-    /// Create a new empty class from this one.
-    fn to_empty(&self) -> CharClass {
-        CharClass { ranges: Vec::with_capacity(self.len()) }
-    }
-
-    /// Create a byte class from this character class.
-    ///
-    /// Codepoints above 0xFF are removed.
-    fn to_byte_class(self) -> ByteClass {
-        ByteClass::new(
-            self.ranges.into_iter()
-                       .filter_map(|r| r.to_byte_range())
-                       .collect()).canonicalize()
-    }
-
-    /// Merge two classes and canonicalize them.
-    #[cfg(test)]
-    fn merge(mut self, other: CharClass) -> CharClass {
-        self.ranges.extend(other);
-        self.canonicalize()
-    }
-
-    /// Canonicalize any sequence of ranges.
-    ///
-    /// This is responsible for enforcing the canonical format invariants
-    /// as described on the docs for the `CharClass` type.
-    fn canonicalize(mut self) -> CharClass {
-        // TODO: Save some cycles here by checking if already canonicalized.
-        self.ranges.sort();
-        let mut ordered = self.to_empty(); // TODO: Do this in place?
-        for candidate in self {
-            // If the candidate overlaps with an existing range, then it must
-            // be the most recent range added because we process the candidates
-            // in order.
-            if let Some(or) = ordered.ranges.last_mut() {
-                if or.overlapping(candidate) {
-                    *or = or.merge(candidate);
-                    continue;
-                }
-            }
-            ordered.ranges.push(candidate);
-        }
-        ordered
-    }
-
-    /// Calculate the intersection of two canonical character classes.
-    ///
-    /// The returned intersection is canonical.
-    fn intersection(&self, other: &CharClass) -> CharClass {
-        if self.ranges.is_empty() || other.ranges.is_empty() {
-            return CharClass::empty();
-        }
-
-        let mut intersection = CharClass::empty();
-
-        let mut iter_a = self.ranges.iter();
-        let mut iter_b = other.ranges.iter();
-        let mut a = iter_a.next().unwrap();
-        let mut b = iter_b.next().unwrap();
-        loop {
-            if let Some(i) = a.intersection(&b) {
-                intersection.ranges.push(i);
-            }
-
-            // If the range with the smaller end didn't match this time,
-            // it won't ever match, so move on to the next one.
-            let (iter, item) = if a.end < b.end {
-                (&mut iter_a, &mut a)
-            } else {
-                (&mut iter_b, &mut b)
-            };
-            match iter.next() {
-                Some(v) => *item = v,
-                None => break, // no more ranges to check, done
-            }
-        }
-
-        intersection.canonicalize()
-    }
-
-    /// Negates the character class.
-    ///
-    /// For all `c` where `c` is a Unicode scalar value, `c` matches `self`
-    /// if and only if `c` does not match `self.negate()`.
-    pub fn negate(mut self) -> CharClass {
-        fn range(s: char, e: char) -> ClassRange { ClassRange::new(s, e) }
-
-        if self.is_empty() {
-            // Inverting an empty range yields all of Unicode.
-            return CharClass {
-                ranges: vec![ClassRange { start: '\x00', end: '\u{10ffff}' }],
-            };
-        }
-        self = self.canonicalize();
-        let mut inv = self.to_empty();
-        if self[0].start > '\x00' {
-            inv.ranges.push(range('\x00', dec_char(self[0].start)));
-        }
-        for win in self.windows(2) {
-            inv.ranges.push(range(inc_char(win[0].end),
-                                  dec_char(win[1].start)));
-        }
-        if self[self.len() - 1].end < char::MAX {
-            inv.ranges.push(range(inc_char(self[self.len() - 1].end),
-                                  char::MAX));
-        }
-        inv
-    }
-
-    /// Apply case folding to this character class.
-    ///
-    /// N.B. Applying case folding to a negated character class probably
-    /// won't produce the expected result. e.g., `(?i)[^x]` really should
-    /// match any character sans `x` and `X`, but if `[^x]` is negated
-    /// before being case folded, you'll end up matching any character.
-    pub fn case_fold(self) -> CharClass {
-        let mut folded = self.to_empty();
-        for r in self {
-            // Applying case folding to a range is expensive because *every*
-            // character needs to be examined. Thus, we avoid that drudgery
-            // if no character in the current range is in our case folding
-            // table.
-            if r.needs_case_folding() {
-                folded.ranges.extend(r.case_fold());
-            }
-            folded.ranges.push(r);
-        }
-        folded.canonicalize()
-    }
-
-    /// Returns the number of characters that match this class.
-    fn num_chars(&self) -> usize {
-        self.ranges.iter()
-            .map(|&r| 1 + (r.end as u32) - (r.start as u32))
-            .fold(0, |acc, len| acc + len)
-            as usize
-    }
-}
-
-impl ClassRange {
-    /// Create a new class range.
-    ///
-    /// If `end < start`, then the two values are swapped so that
-    /// the invariant `start <= end` is preserved.
-    fn new(start: char, end: char) -> ClassRange {
-        if start <= end {
-            ClassRange { start: start, end: end }
-        } else {
-            ClassRange { start: end, end: start }
-        }
-    }
-
-    /// Translate this to a byte class.
-    ///
-    /// If the start codepoint exceeds 0xFF, then this returns `None`.
-    ///
-    /// If the end codepoint exceeds 0xFF, then it is set to 0xFF.
-    fn to_byte_range(self) -> Option<ByteRange> {
-        if self.start > '\u{FF}' {
-            None
-        } else {
-            let s = self.start as u8;
-            let e = min('\u{FF}', self.end) as u8;
-            Some(ByteRange::new(s, e))
-        }
-    }
-
-    /// Create a range of one character.
-    fn one(c: char) -> ClassRange {
-        ClassRange { start: c, end: c }
-    }
-
-    /// Returns true if and only if the two ranges are overlapping. Note that
-    /// since ranges are inclusive, `a-c` and `d-f` are overlapping!
-    fn overlapping(self, other: ClassRange) -> bool {
-        max(self.start, other.start) <= inc_char(min(self.end, other.end))
-    }
-
-    /// Returns the intersection of the two ranges if they have common
-    /// characters, `None` otherwise.
-    fn intersection(&self, other: &ClassRange) -> Option<ClassRange> {
-        let start = max(self.start, other.start);
-        let end = min(self.end, other.end);
-        if start <= end {
-            Some(ClassRange::new(start, end))
-        } else {
-            None
-        }
-    }
-
-    /// Creates a new range representing the union of `self` and `other.
-    fn merge(self, other: ClassRange) -> ClassRange {
-        ClassRange {
-            start: min(self.start, other.start),
-            end: max(self.end, other.end),
-        }
-    }
-
-    /// Returns true if and only if this range contains a character that is
-    /// in the case folding table.
-    fn needs_case_folding(self) -> bool {
-        case_folding::C_plus_S_both_table
-        .binary_search_by(|&(c, _)| self.partial_cmp(&c).unwrap()).is_ok()
-    }
-
-    /// Apply case folding to this range.
-    ///
-    /// Since case folding might add characters such that the range is no
-    /// longer contiguous, this returns multiple class ranges. They are in
-    /// canonical order.
-    fn case_fold(self) -> Vec<ClassRange> {
-        let table = &case_folding::C_plus_S_both_table;
-        let (s, e) = (self.start as u32, self.end as u32 + 1);
-        let mut start = self.start;
-        let mut end = start;
-        let mut next_case_fold = '\x00';
-        let mut ranges = Vec::with_capacity(10);
-        for mut c in (s..e).filter_map(char::from_u32) {
-            if c >= next_case_fold {
-                c = match simple_case_fold_both_result(c) {
-                    Ok(i) => {
-                        for &(c1, c2) in &table[i..] {
-                            if c1 != c {
-                                break;
-                            }
-                            if c2 != inc_char(end) {
-                                ranges.push(ClassRange::new(start, end));
-                                start = c2;
-                            }
-                            end = c2;
-                        }
-                        continue;
-                    }
-                    Err(i) => {
-                        if i < table.len() {
-                            next_case_fold = table[i].0;
-                        } else {
-                            next_case_fold = '\u{10FFFF}';
-                        }
-                        c
-                    }
-                };
-            }
-            // The fast path. We know this character doesn't have an entry
-            // in the case folding table.
-            if c != inc_char(end) {
-                ranges.push(ClassRange::new(start, end));
-                start = c;
-            }
-            end = c;
-        }
-        ranges.push(ClassRange::new(start, end));
-        ranges
-    }
-}
-
-impl PartialEq<char> for ClassRange {
-    #[inline]
-    fn eq(&self, other: &char) -> bool {
-        self.start <= *other && *other <= self.end
-    }
-}
-
-impl PartialEq<ClassRange> for char {
-    #[inline]
-    fn eq(&self, other: &ClassRange) -> bool {
-        other.eq(self)
-    }
-}
-
-impl PartialOrd<char> for ClassRange {
-    #[inline]
-    fn partial_cmp(&self, other: &char) -> Option<Ordering> {
-        Some(if self == other {
-            Ordering::Equal
-        } else if *other > self.end {
-            Ordering::Greater
-        } else {
-            Ordering::Less
-        })
-    }
-}
-
-impl PartialOrd<ClassRange> for char {
-    #[inline]
-    fn partial_cmp(&self, other: &ClassRange) -> Option<Ordering> {
-        other.partial_cmp(self).map(|o| o.reverse())
-    }
-}
-
-impl ByteClass {
-    /// Create a new class from an existing set of ranges.
-    pub fn new(ranges: Vec<ByteRange>) -> ByteClass {
-        ByteClass { ranges: ranges }
-    }
-
-    /// Returns true if `b` is matched by this byte class.
-    pub fn matches(&self, b: u8) -> bool {
-        self.binary_search_by(|range| b.partial_cmp(range).unwrap()).is_ok()
-    }
-
-    /// Removes the given byte from the class if it exists.
-    ///
-    /// Note that this takes `O(n)` time in the number of ranges.
-    pub fn remove(&mut self, b: u8) {
-        let mut i = match self.binary_search_by(|r| b.partial_cmp(r).unwrap()) {
-            Ok(i) => i,
-            Err(_) => return,
-        };
-        let mut r = self.ranges.remove(i);
-        if r.start == b {
-            r.start = b.saturating_add(1);
-            if r.start > r.end || b == u8::MAX {
-                return;
-            }
-            self.ranges.insert(i, r);
-        } else if r.end == b {
-            r.end = b.saturating_sub(1);
-            if r.end < r.start || b == b'\x00' {
-                return;
-            }
-            self.ranges.insert(0, r);
-        } else {
-            let (mut r1, mut r2) = (r.clone(), r.clone());
-            r1.end = b.saturating_sub(1);
-            if r1.start <= r1.end {
-                self.ranges.insert(i, r1);
-                i += 1;
-            }
-            r2.start = b.saturating_add(1);
-            if r2.start <= r2.end {
-                self.ranges.insert(i, r2);
-            }
-        }
-    }
-
-    /// Create a new empty class from this one.
-    fn to_empty(&self) -> ByteClass {
-        ByteClass { ranges: Vec::with_capacity(self.len()) }
-    }
-
-    /// Canonicalze any sequence of ranges.
-    ///
-    /// This is responsible for enforcing the canonical format invariants
-    /// as described on the docs for the `ByteClass` type.
-    fn canonicalize(mut self) -> ByteClass {
-        // TODO: Save some cycles here by checking if already canonicalized.
-        self.ranges.sort();
-        let mut ordered = self.to_empty(); // TODO: Do this in place?
-        for candidate in self {
-            // If the candidate overlaps with an existing range, then it must
-            // be the most recent range added because we process the candidates
-            // in order.
-            if let Some(or) = ordered.ranges.last_mut() {
-                if or.overlapping(candidate) {
-                    *or = or.merge(candidate);
-                    continue;
-                }
-            }
-            ordered.ranges.push(candidate);
-        }
-        ordered
-    }
-
-    /// Negates the byte class.
-    ///
-    /// For all `b` where `b` is a byte, `b` matches `self` if and only if `b`
-    /// does not match `self.negate()`.
-    pub fn negate(mut self) -> ByteClass {
-        fn range(s: u8, e: u8) -> ByteRange { ByteRange::new(s, e) }
-
-        if self.is_empty() {
-            // Inverting an empty range yields all bytes.
-            return ByteClass {
-                ranges: vec![ByteRange { start: b'\x00', end: b'\xff' }],
-            };
-        }
-        self = self.canonicalize();
-        let mut inv = self.to_empty();
-        if self[0].start > b'\x00' {
-            inv.ranges.push(range(b'\x00', self[0].start.saturating_sub(1)));
-        }
-        for win in self.windows(2) {
-            inv.ranges.push(range(win[0].end.saturating_add(1),
-                                  win[1].start.saturating_sub(1)));
-        }
-        if self[self.len() - 1].end < u8::MAX {
-            inv.ranges.push(range(self[self.len() - 1].end.saturating_add(1),
-                                  u8::MAX));
-        }
-        inv
-    }
-
-    /// Apply case folding to this byte class.
-    ///
-    /// This assumes that the bytes in the ranges are ASCII compatible.
-    ///
-    /// N.B. Applying case folding to a negated character class probably
-    /// won't produce the expected result. e.g., `(?i)[^x]` really should
-    /// match any character sans `x` and `X`, but if `[^x]` is negated
-    /// before being case folded, you'll end up matching any character.
-    pub fn case_fold(self) -> ByteClass {
-        let mut folded = self.to_empty();
-        for r in self {
-            folded.ranges.extend(r.case_fold());
-        }
-        folded.canonicalize()
-    }
-
-    /// Returns the number of bytes that match this class.
-    fn num_bytes(&self) -> usize {
-        self.ranges.iter()
-            .map(|&r| 1 + (r.end as u32) - (r.start as u32))
-            .fold(0, |acc, len| acc + len)
-            as usize
-    }
-}
-
-impl ByteRange {
-    /// Create a new class range.
-    ///
-    /// If `end < start`, then the two values are swapped so that
-    /// the invariant `start <= end` is preserved.
-    fn new(start: u8, end: u8) -> ByteRange {
-        if start <= end {
-            ByteRange { start: start, end: end }
-        } else {
-            ByteRange { start: end, end: start }
-        }
-    }
-
-    /// Returns true if and only if the two ranges are overlapping. Note that
-    /// since ranges are inclusive, `a-c` and `d-f` are overlapping!
-    fn overlapping(self, other: ByteRange) -> bool {
-        max(self.start, other.start)
-        <= min(self.end, other.end).saturating_add(1)
-    }
-
-    /// Returns true if and only if the intersection of self and other is non
-    /// empty.
-    fn is_intersect_empty(self, other: ByteRange) -> bool {
-        max(self.start, other.start) > min(self.end, other.end)
-    }
-
-    /// Creates a new range representing the union of `self` and `other.
-    fn merge(self, other: ByteRange) -> ByteRange {
-        ByteRange {
-            start: min(self.start, other.start),
-            end: max(self.end, other.end),
-        }
-    }
-
-    /// Apply case folding to this range.
-    ///
-    /// Since case folding might add bytes such that the range is no
-    /// longer contiguous, this returns multiple byte ranges.
-    ///
-    /// This assumes that the bytes in this range are ASCII compatible.
-    fn case_fold(self) -> Vec<ByteRange> {
-        // So much easier than Unicode case folding!
-        let mut ranges = vec![self];
-        if !ByteRange::new(b'a', b'z').is_intersect_empty(self) {
-            let lower = max(self.start, b'a');
-            let upper = min(self.end, b'z');
-            ranges.push(ByteRange::new(lower - 32, upper - 32));
-        }
-        if !ByteRange::new(b'A', b'Z').is_intersect_empty(self) {
-            let lower = max(self.start, b'A');
-            let upper = min(self.end, b'Z');
-            ranges.push(ByteRange::new(lower + 32, upper + 32));
-        }
-        ranges
-    }
-}
-
-impl Deref for ByteClass {
-    type Target = Vec<ByteRange>;
-    fn deref(&self) -> &Vec<ByteRange> { &self.ranges }
-}
 
-impl IntoIterator for ByteClass {
-    type Item = ByteRange;
-    type IntoIter = vec::IntoIter<ByteRange>;
-    fn into_iter(self) -> vec::IntoIter<ByteRange> { self.ranges.into_iter() }
-}
-
-impl<'a> IntoIterator for &'a ByteClass {
-    type Item = &'a ByteRange;
-    type IntoIter = slice::Iter<'a, ByteRange>;
-    fn into_iter(self) -> slice::Iter<'a, ByteRange> { self.iter() }
-}
-
-impl PartialEq<u8> for ByteRange {
-    #[inline]
-    fn eq(&self, other: &u8) -> bool {
-        self.start <= *other && *other <= self.end
-    }
-}
-
-impl PartialEq<ByteRange> for u8 {
-    #[inline]
-    fn eq(&self, other: &ByteRange) -> bool {
-        other.eq(self)
-    }
-}
 
-impl PartialOrd<u8> for ByteRange {
-    #[inline]
-    fn partial_cmp(&self, other: &u8) -> Option<Ordering> {
-        Some(if self == other {
-            Ordering::Equal
-        } else if *other > self.end {
-            Ordering::Greater
-        } else {
-            Ordering::Less
-        })
-    }
-}
+# Concrete syntax supported
 
-impl PartialOrd<ByteRange> for u8 {
-    #[inline]
-    fn partial_cmp(&self, other: &ByteRange) -> Option<Ordering> {
-        other.partial_cmp(self).map(|o| o.reverse())
-    }
-}
+The concrete syntax is documented as part of the public API of the
+[`regex` crate](https://docs.rs/regex/%2A/regex/#syntax).
 
-/// This implementation of `Display` will write a regular expression from the
-/// syntax tree. It does not write the original string parsed.
-impl fmt::Display for Expr {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match *self {
-            Empty => write!(f, ""),
-            Literal { ref chars, casei } => {
-                if casei {
-                    try!(write!(f, "(?iu:"));
-                } else {
-                    try!(write!(f, "(?u:"));
-                }
-                for &c in chars {
-                    try!(write!(f, "{}", quote_char(c)));
-                }
-                try!(write!(f, ")"));
-                Ok(())
-            }
-            LiteralBytes { ref bytes, casei } => {
-                if casei {
-                    try!(write!(f, "(?i-u:"));
-                } else {
-                    try!(write!(f, "(?-u:"));
-                }
-                for &b in bytes {
-                    try!(write!(f, "{}", quote_byte(b)));
-                }
-                try!(write!(f, ")"));
-                Ok(())
-            }
-            AnyChar => write!(f, "(?su:.)"),
-            AnyCharNoNL => write!(f, "(?u:.)"),
-            AnyByte => write!(f, "(?s-u:.)"),
-            AnyByteNoNL => write!(f, "(?-u:.)"),
-            Class(ref cls) => write!(f, "{}", cls),
-            ClassBytes(ref cls) => write!(f, "{}", cls),
-            StartLine => write!(f, "(?m:^)"),
-            EndLine => write!(f, "(?m:$)"),
-            StartText => write!(f, r"^"),
-            EndText => write!(f, r"$"),
-            WordBoundary => write!(f, r"(?u:\b)"),
-            NotWordBoundary => write!(f, r"(?u:\B)"),
-            WordBoundaryAscii => write!(f, r"(?-u:\b)"),
-            NotWordBoundaryAscii => write!(f, r"(?-u:\B)"),
-            Group { ref e, i: None, name: None } => write!(f, "(?:{})", e),
-            Group { ref e, name: None, .. } => write!(f, "({})", e),
-            Group { ref e, name: Some(ref n), .. } => {
-                write!(f, "(?P<{}>{})", n, e)
-            }
-            Repeat { ref e, r, greedy } => {
-                match &**e {
-                    &Literal { ref chars, .. } if chars.len() > 1 => {
-                        try!(write!(f, "(?:{}){}", e, r))
-                    }
-                    _ => try!(write!(f, "{}{}", e, r)),
-                }
-                if !greedy { try!(write!(f, "?")); }
-                Ok(())
-            }
-            Concat(ref es) => {
-                for e in es {
-                    try!(write!(f, "{}", e));
-                }
-                Ok(())
-            }
-            Alternate(ref es) => {
-                for (i, e) in es.iter().enumerate() {
-                    if i > 0 { try!(write!(f, "|")); }
-                    try!(write!(f, "{}", e));
-                }
-                Ok(())
-            }
-        }
-    }
-}
-
-impl fmt::Display for Repeater {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match *self {
-            ZeroOrOne => write!(f, "?"),
-            ZeroOrMore => write!(f, "*"),
-            OneOrMore => write!(f, "+"),
-            Range { min: s, max: None } => write!(f, "{{{},}}", s),
-            Range { min: s, max: Some(e) } if s == e => write!(f, "{{{}}}", s),
-            Range { min: s, max: Some(e) } => write!(f, "{{{}, {}}}", s, e),
-        }
-    }
-}
-
-impl fmt::Display for CharClass {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        try!(write!(f, "(?u:["));
-        for range in self.iter() {
-            if range.start == '-' || range.end == '-' {
-                try!(write!(f, "-"));
-                break;
-            }
-        }
-        for range in self.iter() {
-            let mut range = *range;
-            if range.start == '-' {
-                range.start = ((range.start as u8) + 1) as char;
-            }
-            if range.end == '-' {
-                range.end = ((range.end as u8) - 1) as char;
-            }
-            if range.start > range.end {
-                continue;
-            }
-            try!(write!(f, "{}", range));
-        }
-        try!(write!(f, "])"));
-        Ok(())
-    }
-}
 
-impl fmt::Display for ClassRange {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{}-{}", quote_char(self.start), quote_char(self.end))
-    }
-}
+# Input safety
 
-impl fmt::Display for ByteClass {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        try!(write!(f, "(?-u:["));
-        for range in self.iter() {
-            if range.start == b'-' || range.end == b'-' {
-                try!(write!(f, "-"));
-                break;
-            }
-        }
-        for range in self.iter() {
-            let mut range = *range;
-            if range.start == b'-' {
-                range.start += 1;
-            }
-            if range.end == b'-' {
-                range.start -= 1;
-            }
-            if range.start > range.end {
-                continue;
-            }
-            try!(write!(f, "{}", range));
-        }
-        try!(write!(f, "])"));
-        Ok(())
-    }
-}
+A key feature of this library is that it is safe to use with end user facing
+input. This plays a significant role in the internal implementation. In
+particular:
 
-impl fmt::Display for ByteRange {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{}-{}", quote_byte(self.start), quote_byte(self.end))
-    }
-}
+1. Parsers provide a `nest_limit` option that permits callers to control how
+   deeply nested a regular expression is allowed to be. This makes it possible
+   to do case analysis over an `Ast` or an `Hir` using recursion without
+   worrying about stack overflow.
+2. Since relying on a particular stack size is brittle, this crate goes to
+   great lengths to ensure that all interactions with both the `Ast` and the
+   `Hir` do not use recursion. Namely, they use constant stack space and heap
+   space proportional to the size of the original pattern string (in bytes).
+   This includes the type's corresponding destructors. (One exception to this
+   is literal extraction, but this will eventually get fixed.)
 
-/// An alias for computations that can return a `Error`.
-pub type Result<T> = ::std::result::Result<T, Error>;
 
-/// A parse error.
-///
-/// This includes details about the specific type of error and a rough
-/// approximation of where it occurred.
-#[derive(Clone, Debug, PartialEq)]
-pub struct Error {
-    pos: usize,
-    surround: String,
-    kind: ErrorKind,
-}
+# Error reporting
 
-/// The specific type of parse error that can occur.
-#[derive(Clone, Debug, PartialEq)]
-pub enum ErrorKind {
-    /// A negation symbol is used twice in flag settings.
-    /// e.g., `(?-i-s)`.
-    DoubleFlagNegation,
-    /// The same capture name was used more than once.
-    /// e.g., `(?P<a>.)(?P<a>.)`.
-    DuplicateCaptureName(String),
-    /// An alternate is empty. e.g., `(|a)`.
-    EmptyAlternate,
-    /// A capture group name is empty. e.g., `(?P<>a)`.
-    EmptyCaptureName,
-    /// A negation symbol was not proceded by any flags. e.g., `(?i-)`.
-    EmptyFlagNegation,
-    /// A group is empty. e.g., `()`.
-    EmptyGroup,
-    /// An invalid number was used in a counted repetition. e.g., `a{b}`.
-    InvalidBase10(String),
-    /// An invalid hexadecimal number was used in an escape sequence.
-    /// e.g., `\xAG`.
-    InvalidBase16(String),
-    /// An invalid capture name was used. e.g., `(?P<0a>b)`.
-    InvalidCaptureName(String),
-    /// An invalid class range was givien. Specifically, when the start of the
-    /// range is greater than the end. e.g., `[z-a]`.
-    InvalidClassRange {
-        /// The first character specified in the range.
-        start: char,
-        /// The second character specified in the range.
-        end: char,
-    },
-    /// An escape sequence was used in a character class where it is not
-    /// allowed. e.g., `[a-\pN]` or `[\A]`.
-    InvalidClassEscape(Expr),
-    /// An invalid counted repetition min/max was given. e.g., `a{2,1}`.
-    InvalidRepeatRange {
-        /// The first number specified in the repetition.
-        min: u32,
-        /// The second number specified in the repetition.
-        max: u32,
-    },
-    /// An invalid Unicode scalar value was used in a long hexadecimal
-    /// sequence. e.g., `\x{D800}`.
-    InvalidScalarValue(u32),
-    /// An empty counted repetition operator. e.g., `a{}`.
-    MissingBase10,
-    /// A repetition operator was not applied to an expression. e.g., `*`.
-    RepeaterExpectsExpr,
-    /// A repetition operator was applied to an expression that cannot be
-    /// repeated. e.g., `a+*` or `a|*`.
-    RepeaterUnexpectedExpr(Expr),
-    /// A capture group name that is never closed. e.g., `(?P<a`.
-    UnclosedCaptureName(String),
-    /// An unclosed hexadecimal literal. e.g., `\x{a`.
-    UnclosedHex,
-    /// An unclosed parenthesis. e.g., `(a`.
-    UnclosedParen,
-    /// An unclosed counted repetition operator. e.g., `a{2`.
-    UnclosedRepeat,
-    /// An unclosed named Unicode class. e.g., `\p{Yi`.
-    UnclosedUnicodeName,
-    /// Saw end of regex before class was closed. e.g., `[a`.
-    UnexpectedClassEof,
-    /// Saw end of regex before escape sequence was closed. e.g., `\`.
-    UnexpectedEscapeEof,
-    /// Saw end of regex before flags were closed. e.g., `(?i`.
-    UnexpectedFlagEof,
-    /// Saw end of regex before two hexadecimal digits were seen. e.g., `\xA`.
-    UnexpectedTwoDigitHexEof,
-    /// Unopened parenthesis. e.g., `)`.
-    UnopenedParen,
-    /// Unrecognized escape sequence. e.g., `\q`.
-    UnrecognizedEscape(char),
-    /// Unrecognized flag. e.g., `(?a)`.
-    UnrecognizedFlag(char),
-    /// Unrecognized named Unicode class. e.g., `\p{Foo}`.
-    UnrecognizedUnicodeClass(String),
-    /// Indicates that the regex uses too much nesting.
-    ///
-    /// (N.B. This error exists because traversing the Expr is recursive and
-    /// an explicit heap allocated stack is not (yet?) used. Regardless, some
-    /// sort of limit must be applied to avoid unbounded memory growth.
-    StackExhausted,
-    /// A disallowed flag was found (e.g., `u`).
-    FlagNotAllowed(char),
-    /// A Unicode class was used when the Unicode (`u`) flag was disabled.
-    UnicodeNotAllowed,
-    /// InvalidUtf8 indicates that the expression may match non-UTF-8 bytes.
-    /// This never returned if the parser is permitted to allow expressions
-    /// that match arbitrary bytes.
-    InvalidUtf8,
-    /// A character class was constructed such that it is empty.
-    /// e.g., `[^\d\D]`.
-    EmptyClass,
-    /// Indicates that unsupported notation was used in a character class.
-    ///
-    /// The char in this error corresponds to the illegal character.
-    ///
-    /// The intent of this error is to carve a path to support set notation
-    /// as described in UTS#18 RL1.3. We do this by rejecting regexes that
-    /// would use the notation.
-    ///
-    /// The work around for end users is to escape the character included in
-    /// this error message.
-    UnsupportedClassChar(char),
-    /// Hints that destructuring should not be exhaustive.
-    ///
-    /// This enum may grow additional variants, so this makes sure clients
-    /// don't count on exhaustive matching. (Otherwise, adding a new variant
-    /// could break existing code.)
-    #[doc(hidden)]
-    __Nonexhaustive,
-}
+The `Display` implementations on all `Error` types exposed in this library
+provide nice human readable errors that are suitable for showing to end users
+in a monospace font.
 
-impl Error {
-    /// Returns an approximate *character* offset at which the error occurred.
-    ///
-    /// The character offset may be equal to the number of characters in the
-    /// string, in which case it should be interpreted as pointing to the end
-    /// of the regex.
-    pub fn position(&self) -> usize {
-        self.pos
-    }
 
-    /// Returns the type of the regex parse error.
-    pub fn kind(&self) -> &ErrorKind {
-        &self.kind
-    }
-}
+# Literal extraction
 
-impl ErrorKind {
-    fn description(&self) -> &str {
-        use ErrorKind::*;
-        match *self {
-            DoubleFlagNegation => "double flag negation",
-            DuplicateCaptureName(_) => "duplicate capture name",
-            EmptyAlternate => "empty alternate",
-            EmptyCaptureName => "empty capture name",
-            EmptyFlagNegation => "flag negation without any flags",
-            EmptyGroup => "empty group (e.g., '()')",
-            InvalidBase10(_) => "invalid base 10 number",
-            InvalidBase16(_) => "invalid base 16 number",
-            InvalidCaptureName(_) => "invalid capture name",
-            InvalidClassRange{..} => "invalid character class range",
-            InvalidClassEscape(_) => "invalid escape sequence in class",
-            InvalidRepeatRange{..} => "invalid counted repetition range",
-            InvalidScalarValue(_) => "invalid Unicode scalar value",
-            MissingBase10 => "missing count in repetition operator",
-            RepeaterExpectsExpr => "repetition operator missing expression",
-            RepeaterUnexpectedExpr(_) => "expression cannot be repeated",
-            UnclosedCaptureName(_) => "unclosed capture group name",
-            UnclosedHex => "unclosed hexadecimal literal",
-            UnclosedParen => "unclosed parenthesis",
-            UnclosedRepeat => "unclosed counted repetition operator",
-            UnclosedUnicodeName => "unclosed Unicode class literal",
-            UnexpectedClassEof => "unexpected EOF in character class",
-            UnexpectedEscapeEof => "unexpected EOF in escape sequence",
-            UnexpectedFlagEof => "unexpected EOF in flags",
-            UnexpectedTwoDigitHexEof => "unexpected EOF in hex literal",
-            UnopenedParen => "unopened parenthesis",
-            UnrecognizedEscape(_) => "unrecognized escape sequence",
-            UnrecognizedFlag(_) => "unrecognized flag",
-            UnrecognizedUnicodeClass(_) => "unrecognized Unicode class name",
-            StackExhausted => "stack exhausted, too much nesting",
-            FlagNotAllowed(_) => "flag not allowed",
-            UnicodeNotAllowed => "Unicode features not allowed",
-            InvalidUtf8 => "matching arbitrary bytes is not allowed",
-            EmptyClass => "empty character class",
-            UnsupportedClassChar(_) => "unsupported class notation",
-            __Nonexhaustive => unreachable!(),
-        }
-    }
-}
+This crate provides limited support for
+[literal extraction from `Hir` values](hir/literal/struct.Literals.html).
+Be warned that literal extraction currently uses recursion, and therefore,
+stack size proportional to the size of the `Hir`.
 
-impl ::std::error::Error for Error {
-    fn description(&self) -> &str {
-        self.kind.description()
-    }
-}
+The purpose of literal extraction is to speed up searches. That is, if you
+know a regular expression must match a prefix or suffix literal, then it is
+often quicker to search for instances of that literal, and then confirm or deny
+the match using the full regular expression engine. These optimizations are
+done automatically in the `regex` crate.
+*/
 
-impl fmt::Display for Error {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        if let ErrorKind::StackExhausted = self.kind {
-            write!(f, "Error parsing regex: {}", self.kind)
-        } else {
-            write!(
-                f, "Error parsing regex near '{}' at character offset {}: {}",
-                self.surround, self.pos, self.kind)
-        }
-    }
-}
+#![deny(missing_docs)]
 
-impl fmt::Display for ErrorKind {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        use ErrorKind::*;
-        match *self {
-            DoubleFlagNegation =>
-                write!(f, "Only one negation symbol is allowed in flags."),
-            DuplicateCaptureName(ref s) =>
-                write!(f, "Capture name '{}' is used more than once.", s),
-            EmptyAlternate =>
-                write!(f, "Alternations cannot be empty."),
-            EmptyCaptureName =>
-                write!(f, "Capture names cannot be empty."),
-            EmptyFlagNegation =>
-                write!(f, "Flag negation requires setting at least one flag."),
-            EmptyGroup =>
-                write!(f, "Empty regex groups (e.g., '()') are not allowed."),
-            InvalidBase10(ref s) =>
-                write!(f, "Not a valid base 10 number: '{}'", s),
-            InvalidBase16(ref s) =>
-                write!(f, "Not a valid base 16 number: '{}'", s),
-            InvalidCaptureName(ref s) =>
-                write!(f, "Invalid capture name: '{}'. Capture names must \
-                           consist of [_a-zA-Z0-9] and are not allowed to \
-                           start with with a number.", s),
-            InvalidClassRange { start, end } =>
-                write!(f, "Invalid character class range '{}-{}'. \
-                           Character class ranges must start with the smaller \
-                           character, but {} > {}", start, end, start, end),
-            InvalidClassEscape(_) =>
-                write!(f, "Invalid escape sequence in character class."),
-            InvalidRepeatRange { min, max } =>
-                write!(f, "Invalid counted repetition range: {{{}, {}}}. \
-                           Counted repetition ranges must start with the \
-                           minimum, but {} > {}", min, max, min, max),
-            InvalidScalarValue(c) =>
-                write!(f, "Number does not correspond to a Unicode scalar \
-                           value: '{}'.", c),
-            MissingBase10 =>
-                write!(f, "Missing maximum in counted repetition operator."),
-            RepeaterExpectsExpr =>
-                write!(f, "Missing expression for repetition operator."),
-            RepeaterUnexpectedExpr(_) =>
-                write!(f, "Invalid application of repetition operator."),
-            UnclosedCaptureName(ref s) =>
-                write!(f, "Capture name group for '{}' is not closed. \
-                           (Missing a '>'.)", s),
-            UnclosedHex =>
-                write!(f, "Unclosed hexadecimal literal (missing a '}}')."),
-            UnclosedParen =>
-                write!(f, "Unclosed parenthesis."),
-            UnclosedRepeat =>
-                write!(f, "Unclosed counted repetition (missing a '}}')."),
-            UnclosedUnicodeName =>
-                write!(f, "Unclosed Unicode literal (missing a '}}')."),
-            UnexpectedClassEof =>
-                write!(f, "Character class was not closed before the end of \
-                           the regex (missing a ']')."),
-            UnexpectedEscapeEof =>
-                write!(f, "Started an escape sequence that didn't finish \
-                           before the end of the regex."),
-            UnexpectedFlagEof =>
-                write!(f, "Inline flag settings was not closed before the end \
-                           of the regex (missing a ')' or ':')."),
-            UnexpectedTwoDigitHexEof =>
-                write!(f, "Unexpected end of two digit hexadecimal literal."),
-            UnopenedParen =>
-                write!(f, "Unopened parenthesis."),
-            UnrecognizedEscape(c) =>
-                write!(f, "Unrecognized escape sequence: '\\{}'.", c),
-            UnrecognizedFlag(c) =>
-                write!(f, "Unrecognized flag: '{}'. \
-                           (Allowed flags: i, m, s, U, u, x.)", c),
-            UnrecognizedUnicodeClass(ref s) =>
-                write!(f, "Unrecognized Unicode class name: '{}'.", s),
-            StackExhausted =>
-                write!(f, "Exhausted space required to parse regex with too \
-                           much nesting."),
-            FlagNotAllowed(flag) =>
-                write!(f, "Use of the flag '{}' is not allowed.", flag),
-            UnicodeNotAllowed =>
-                write!(f, "Unicode features are not allowed when the Unicode \
-                           (u) flag is not set."),
-            InvalidUtf8 =>
-                write!(f, "Matching arbitrary bytes is not allowed."),
-            EmptyClass =>
-                write!(f, "Empty character classes are not allowed."),
-            UnsupportedClassChar(c) =>
-                write!(f, "Use of unescaped '{}' in character class is \
-                           not allowed.", c),
-            __Nonexhaustive => unreachable!(),
-        }
-    }
-}
+extern crate ucd_util;
 
-/// The result of binary search on the simple case folding table.
-///
-/// Note that this binary search is done on the "both" table, such that
-/// the index returned corresponds to the *first* location of `c1` in the
-/// table. The table can then be scanned linearly starting from the position
-/// returned to find other case mappings for `c1`.
-fn simple_case_fold_both_result(c1: char) -> result::Result<usize, usize> {
-    let table = &case_folding::C_plus_S_both_table;
-    let i = binary_search(table, |&(c2, _)| c1 <= c2);
-    if i >= table.len() || table[i].0 != c1 {
-        Err(i)
-    } else {
-        Ok(i)
-    }
-}
+pub use error::{Error, Result};
+pub use parser::{Parser, ParserBuilder};
 
-/// Binary search to find first element such that `pred(T) == true`.
-///
-/// Assumes that if `pred(xs[i]) == true` then `pred(xs[i+1]) == true`.
-///
-/// If all elements yield `pred(T) == false`, then `xs.len()` is returned.
-fn binary_search<T, F>(xs: &[T], mut pred: F) -> usize
-        where F: FnMut(&T) -> bool {
-    let (mut left, mut right) = (0, xs.len());
-    while left < right {
-        let mid = (left + right) / 2;
-        if pred(&xs[mid]) {
-            right = mid;
-        } else {
-            left = mid + 1;
-        }
-    }
-    left
-}
+pub mod ast;
+mod either;
+mod error;
+pub mod hir;
+mod parser;
+mod unicode;
+mod unicode_tables;
 
 /// Escapes all regular expression meta characters in `text`.
 ///
@@ -1681,543 +124,99 @@ fn binary_search<T, F>(xs: &[T], mut pred: F) -> usize
 /// expression.
 pub fn escape(text: &str) -> String {
     let mut quoted = String::with_capacity(text.len());
-    for c in text.chars() {
-        if parser::is_punct(c) {
-            quoted.push('\\');
-        }
-        quoted.push(c);
-    }
+    escape_into(text, &mut quoted);
     quoted
 }
 
-fn quote_char(c: char) -> String {
-    let mut s = String::new();
-    if parser::is_punct(c) {
-        s.push('\\');
-    }
-    s.push(c);
-    s
-}
-
-fn quote_byte(b: u8) -> String {
-    if parser::is_punct(b as char) || b == b'\'' || b == b'"' {
-        quote_char(b as char)
-    } else {
-        let escaped: Vec<u8> = ascii::escape_default(b).collect();
-        String::from_utf8(escaped).unwrap()
+/// Escapes all meta characters in `text` and writes the result into `buf`.
+///
+/// This will append escape characters into the given buffer. The characters
+/// that are appended are safe to use as a literal in a regular expression.
+pub fn escape_into(text: &str, buf: &mut String) {
+    for c in text.chars() {
+        if is_meta_character(c) {
+            buf.push('\\');
+        }
+        buf.push(c);
     }
 }
 
-fn inc_char(c: char) -> char {
+/// Returns true if the give character has significance in a regex.
+///
+/// These are the only characters that are allowed to be escaped, with one
+/// exception: an ASCII space character may be escaped when extended mode (with
+/// the `x` flag) is enabld. In particular, `is_meta_character(' ')` returns
+/// `false`.
+///
+/// Note that the set of characters for which this function returns `true` or
+/// `false` is fixed and won't change in a semver compatible release.
+pub fn is_meta_character(c: char) -> bool {
     match c {
-        char::MAX => char::MAX,
-        '\u{D7FF}' => '\u{E000}',
-        c => char::from_u32(c as u32 + 1).unwrap(),
+        '\\' | '.' | '+' | '*' | '?' | '(' | ')' | '|' |
+        '[' | ']' | '{' | '}' | '^' | '$' | '#' | '&' | '-' | '~' => true,
+        _ => false,
     }
 }
 
-fn dec_char(c: char) -> char {
-    match c {
-        '\x00' => '\x00',
-        '\u{E000}' => '\u{D7FF}',
-        c => char::from_u32(c as u32 - 1).unwrap(),
-    }
-}
+/// Returns true if and only if the given character is a Unicode word
+/// character.
+///
+/// A Unicode word character is defined by
+/// [UTS#18 Annex C](http://unicode.org/reports/tr18/#Compatibility_Properties).
+/// In particular, a character
+/// is considered a word character if it is in either of the `Alphabetic` or
+/// `Join_Control` properties, or is in one of the `Decimal_Number`, `Mark`
+/// or `Connector_Punctuation` general categories.
+pub fn is_word_character(c: char) -> bool {
+    use std::cmp::Ordering;
+    use unicode_tables::perl_word::PERL_WORD;
 
-/// Returns true if and only if `c` is a word character.
-#[doc(hidden)]
-pub fn is_word_char(c: char) -> bool {
-    match c {
-        '_' | '0' ... '9' | 'a' ... 'z' | 'A' ... 'Z'  => true,
-        _ => ::unicode::regex::PERLW.binary_search_by(|&(start, end)| {
-            if c >= start && c <= end {
+    if c <= 0x7F as char && is_word_byte(c as u8) {
+        return true;
+    }
+    PERL_WORD
+        .binary_search_by(|&(start, end)| {
+            if start <= c && c <= end {
                 Ordering::Equal
             } else if start > c {
                 Ordering::Greater
             } else {
                 Ordering::Less
             }
-        }).is_ok(),
-    }
+        }).is_ok()
 }
 
-/// Returns true if and only if `c` is an ASCII word byte.
-#[doc(hidden)]
-pub fn is_word_byte(b: u8) -> bool {
-    match b {
+/// Returns true if and only if the given character is an ASCII word character.
+///
+/// An ASCII word character is defined by the following character class:
+/// `[_0-9a-zA-Z]'.
+pub fn is_word_byte(c: u8) -> bool {
+    match c {
         b'_' | b'0' ... b'9' | b'a' ... b'z' | b'A' ... b'Z'  => true,
         _ => false,
     }
 }
 
-#[cfg(test)]
-mod properties;
-
 #[cfg(test)]
 mod tests {
-    use {CharClass, ClassRange, ByteClass, ByteRange, Expr};
-
-    fn class(ranges: &[(char, char)]) -> CharClass {
-        let ranges = ranges.iter().cloned()
-                           .map(|(c1, c2)| ClassRange::new(c1, c2)).collect();
-        CharClass::new(ranges)
-    }
-
-    fn bclass(ranges: &[(u8, u8)]) -> ByteClass {
-        let ranges = ranges.iter().cloned()
-                           .map(|(c1, c2)| ByteRange::new(c1, c2)).collect();
-        ByteClass::new(ranges)
-    }
-
-    fn e(re: &str) -> Expr { Expr::parse(re).unwrap() }
+    use super::*;
 
     #[test]
-    fn stack_exhaustion() {
-        use std::iter::repeat;
-
-        let open: String = repeat('(').take(200).collect();
-        let close: String = repeat(')').take(200).collect();
-        assert!(Expr::parse(&format!("{}a{}", open, close)).is_ok());
-
-        let open: String = repeat('(').take(200 + 1).collect();
-        let close: String = repeat(')').take(200 + 1).collect();
-        assert!(Expr::parse(&format!("{}a{}", open, close)).is_err());
+    fn escape_meta() {
+        assert_eq!(
+            escape(r"\.+*?()|[]{}^$#&-~"),
+            r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#\&\-\~".to_string(),
+        );
     }
 
     #[test]
-    fn anchored_start() {
-        assert!(e("^a").is_anchored_start());
-        assert!(e("(^a)").is_anchored_start());
-        assert!(e("^a|^b").is_anchored_start());
-        assert!(e("(^a)|(^b)").is_anchored_start());
-        assert!(e("(^(a|b))").is_anchored_start());
-
-        assert!(!e("^a|b").is_anchored_start());
-        assert!(!e("a|^b").is_anchored_start());
-    }
-
-    #[test]
-    fn anchored_end() {
-        assert!(e("a$").is_anchored_end());
-        assert!(e("(a$)").is_anchored_end());
-        assert!(e("a$|b$").is_anchored_end());
-        assert!(e("(a$)|(b$)").is_anchored_end());
-        assert!(e("((a|b)$)").is_anchored_end());
-
-        assert!(!e("a$|b").is_anchored_end());
-        assert!(!e("a|b$").is_anchored_end());
-    }
-
-    #[test]
-    fn class_canon_no_change() {
-        let cls = class(&[('a', 'c'), ('x', 'z')]);
-        assert_eq!(cls.clone().canonicalize(), cls);
-    }
-
-    #[test]
-    fn class_canon_unordered() {
-        let cls = class(&[('x', 'z'), ('a', 'c')]);
-        assert_eq!(cls.canonicalize(), class(&[
-            ('a', 'c'), ('x', 'z'),
-        ]));
-    }
-
-    #[test]
-    fn class_canon_overlap() {
-        let cls = class(&[('x', 'z'), ('w', 'y')]);
-        assert_eq!(cls.canonicalize(), class(&[
-            ('w', 'z'),
-        ]));
-    }
-
-    #[test]
-    fn class_canon_overlap_many() {
-        let cls = class(&[
-            ('c', 'f'), ('a', 'g'), ('d', 'j'), ('a', 'c'),
-            ('m', 'p'), ('l', 's'),
-        ]);
-        assert_eq!(cls.clone().canonicalize(), class(&[
-            ('a', 'j'), ('l', 's'),
-        ]));
-    }
-
-    #[test]
-    fn class_canon_overlap_boundary() {
-        let cls = class(&[('x', 'z'), ('u', 'w')]);
-        assert_eq!(cls.canonicalize(), class(&[
-            ('u', 'z'),
-        ]));
-    }
-
-    #[test]
-    fn class_canon_extreme_edge_case() {
-        let cls = class(&[('\x00', '\u{10FFFF}'), ('\x00', '\u{10FFFF}')]);
-        assert_eq!(cls.canonicalize(), class(&[
-            ('\x00', '\u{10FFFF}'),
-        ]));
-    }
-
-    #[test]
-    fn class_canon_singles() {
-        let cls = class(&[('a', 'a'), ('b', 'b')]);
-        assert_eq!(cls.canonicalize(), class(&[('a', 'b')]));
-    }
-
-    #[test]
-    fn class_negate_single() {
-        let cls = class(&[('a', 'a')]);
-        assert_eq!(cls.negate(), class(&[
-            ('\x00', '\x60'), ('\x62', '\u{10FFFF}'),
-        ]));
-    }
-
-    #[test]
-    fn class_negate_singles() {
-        let cls = class(&[('a', 'a'), ('b', 'b')]);
-        assert_eq!(cls.negate(), class(&[
-            ('\x00', '\x60'), ('\x63', '\u{10FFFF}'),
-        ]));
-    }
-
-    #[test]
-    fn class_negate_multiples() {
-        let cls = class(&[('a', 'c'), ('x', 'z')]);
-        assert_eq!(cls.negate(), class(&[
-            ('\x00', '\x60'), ('\x64', '\x77'), ('\x7b', '\u{10FFFF}'),
-        ]));
-    }
-
-    #[test]
-    fn class_negate_min_scalar() {
-        let cls = class(&[('\x00', 'a')]);
-        assert_eq!(cls.negate(), class(&[
-            ('\x62', '\u{10FFFF}'),
-        ]));
-    }
-
-    #[test]
-    fn class_negate_max_scalar() {
-        let cls = class(&[('a', '\u{10FFFF}')]);
-        assert_eq!(cls.negate(), class(&[
-            ('\x00', '\x60'),
-        ]));
-    }
-
-    #[test]
-    fn class_negate_everything() {
-        let cls = class(&[('\x00', '\u{10FFFF}')]);
-        assert_eq!(cls.negate(), class(&[]));
-    }
-
-    #[test]
-    fn class_negate_everything_sans_one() {
-        let cls = class(&[
-            ('\x00', '\u{10FFFD}'), ('\u{10FFFF}', '\u{10FFFF}')
-        ]);
-        assert_eq!(cls.negate(), class(&[
-            ('\u{10FFFE}', '\u{10FFFE}'),
-        ]));
-    }
-
-    #[test]
-    fn class_negate_surrogates_min() {
-        let cls = class(&[('\x00', '\u{D7FF}')]);
-        assert_eq!(cls.negate(), class(&[
-            ('\u{E000}', '\u{10FFFF}'),
-        ]));
-    }
-
-    #[test]
-    fn class_negate_surrogates_min_edge() {
-        let cls = class(&[('\x00', '\u{D7FE}')]);
-        assert_eq!(cls.negate(), class(&[
-            ('\u{D7FF}', '\u{10FFFF}'),
-        ]));
-    }
-
-    #[test]
-    fn class_negate_surrogates_max() {
-        let cls = class(&[('\u{E000}', '\u{10FFFF}')]);
-        assert_eq!(cls.negate(), class(&[
-            ('\x00', '\u{D7FF}'),
-        ]));
-    }
-
-    #[test]
-    fn class_negate_surrogates_max_edge() {
-        let cls = class(&[('\u{E001}', '\u{10FFFF}')]);
-        assert_eq!(cls.negate(), class(&[
-            ('\x00', '\u{E000}'),
-        ]));
-    }
-
-    #[test]
-    fn class_intersection_empty() {
-        let cls1 = class(&[]);
-        let cls2 = class(&[('a', 'a')]);
-        assert_intersection(cls1, cls2, class(&[]));
-    }
-
-    #[test]
-    fn class_intersection_single_equal() {
-        let cls1 = class(&[('a', 'a')]);
-        let cls2 = class(&[('a', 'a')]);
-        assert_intersection(cls1, cls2, class(&[('a', 'a')]));
-    }
-
-    #[test]
-    fn class_intersection_single_unequal() {
-        let cls1 = class(&[('a', 'a')]);
-        let cls2 = class(&[('b', 'b')]);
-        assert_intersection(cls1, cls2, class(&[]));
-    }
-
-    #[test]
-    fn class_intersection_single_in_other() {
-        let cls1 = class(&[('a', 'a')]);
-        let cls2 = class(&[('a', 'c')]);
-        assert_intersection(cls1, cls2, class(&[('a', 'a')]));
-    }
-
-    #[test]
-    fn class_intersection_range_in_other() {
-        let cls1 = class(&[('a', 'b')]);
-        let cls2 = class(&[('a', 'c')]);
-        assert_intersection(cls1, cls2, class(&[('a', 'b')]));
-    }
-
-    #[test]
-    fn class_intersection_range_intersection() {
-        let cls1 = class(&[('a', 'b')]);
-        let cls2 = class(&[('b', 'c')]);
-        assert_intersection(cls1, cls2, class(&[('b', 'b')]));
-    }
-
-    #[test]
-    fn class_intersection_only_adjacent() {
-        let cls1 = class(&[('a', 'b')]);
-        let cls2 = class(&[('c', 'd')]);
-        assert_intersection(cls1, cls2, class(&[]));
-    }
-
-    #[test]
-    fn class_intersection_range_subset() {
-        let cls1 = class(&[('b', 'c')]);
-        let cls2 = class(&[('a', 'd')]);
-        assert_intersection(cls1, cls2, class(&[('b', 'c')]));
-    }
-
-    #[test]
-    fn class_intersection_many_ranges_in_one_big() {
-        let cls1 = class(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
-        let cls2 = class(&[('a', 'h')]);
-        assert_intersection(cls1, cls2, class(&[
-            ('a', 'b'), ('d', 'e'), ('g', 'h')
-        ]));
-    }
-
-    #[test]
-    fn class_intersection_many_ranges_same() {
-        let cls1 = class(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
-        let cls2 = class(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
-        assert_intersection(cls1, cls2, class(&[
-            ('a', 'b'), ('d', 'e'), ('g', 'h')
-        ]));
-    }
-
-    #[test]
-    fn class_intersection_multiple_non_intersecting() {
-        let cls1 = class(&[('a', 'b'), ('g', 'h')]);
-        let cls2 = class(&[('d', 'e'), ('k', 'l')]);
-        assert_intersection(cls1, cls2, class(&[]));
-    }
-
-    #[test]
-    fn class_intersection_non_intersecting_then_intersecting() {
-        let cls1 = class(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
-        let cls2 = class(&[('h', 'h')]);
-        assert_intersection(cls1, cls2, class(&[('h', 'h')]));
-    }
-
-    #[test]
-    fn class_intersection_adjacent_alternating() {
-        let cls1 = class(&[('a', 'b'), ('e', 'f'), ('i', 'j')]);
-        let cls2 = class(&[('c', 'd'), ('g', 'h'), ('k', 'l')]);
-        assert_intersection(cls1, cls2, class(&[]));
-    }
-
-    #[test]
-    fn class_intersection_overlapping_alternating() {
-        let cls1 = class(&[('a', 'b'), ('c', 'd'), ('e', 'f')]);
-        let cls2 = class(&[('b', 'c'), ('d', 'e'), ('f', 'g')]);
-        assert_intersection(cls1, cls2, class(&[('b', 'f')]));
-    }
-
-    #[test]
-    fn class_canon_overlap_many_case_fold() {
-        let cls = class(&[
-            ('C', 'F'), ('A', 'G'), ('D', 'J'), ('A', 'C'),
-            ('M', 'P'), ('L', 'S'), ('c', 'f'),
-        ]);
-        assert_eq!(cls.case_fold(), class(&[
-            ('A', 'J'), ('L', 'S'),
-            ('a', 'j'), ('l', 's'),
-            ('\u{17F}', '\u{17F}'),
-        ]));
-
-        let cls = bclass(&[
-            (b'C', b'F'), (b'A', b'G'), (b'D', b'J'), (b'A', b'C'),
-            (b'M', b'P'), (b'L', b'S'), (b'c', b'f'),
-        ]);
-        assert_eq!(cls.case_fold(), bclass(&[
-            (b'A', b'J'), (b'L', b'S'),
-            (b'a', b'j'), (b'l', b's'),
-        ]));
-    }
-
-    #[test]
-    fn class_fold_az() {
-        let cls = class(&[('A', 'Z')]);
-        assert_eq!(cls.case_fold(), class(&[
-            ('A', 'Z'), ('a', 'z'),
-            ('\u{17F}', '\u{17F}'),
-            ('\u{212A}', '\u{212A}'),
-        ]));
-        let cls = class(&[('a', 'z')]);
-        assert_eq!(cls.case_fold(), class(&[
-            ('A', 'Z'), ('a', 'z'),
-            ('\u{17F}', '\u{17F}'),
-            ('\u{212A}', '\u{212A}'),
-        ]));
-
-        let cls = bclass(&[(b'A', b'Z')]);
-        assert_eq!(cls.case_fold(), bclass(&[
-            (b'A', b'Z'), (b'a', b'z'),
-        ]));
-        let cls = bclass(&[(b'a', b'z')]);
-        assert_eq!(cls.case_fold(), bclass(&[
-            (b'A', b'Z'), (b'a', b'z'),
-        ]));
-    }
-
-    #[test]
-    fn class_fold_a_underscore() {
-        let cls = class(&[('A', 'A'), ('_', '_')]);
-        assert_eq!(cls.clone().canonicalize(), class(&[
-            ('A', 'A'), ('_', '_'),
-        ]));
-        assert_eq!(cls.case_fold(), class(&[
-            ('A', 'A'), ('_', '_'), ('a', 'a'),
-        ]));
-
-        let cls = bclass(&[(b'A', b'A'), (b'_', b'_')]);
-        assert_eq!(cls.clone().canonicalize(), bclass(&[
-            (b'A', b'A'), (b'_', b'_'),
-        ]));
-        assert_eq!(cls.case_fold(), bclass(&[
-            (b'A', b'A'), (b'_', b'_'), (b'a', b'a'),
-        ]));
-    }
-
-    #[test]
-    fn class_fold_a_equals() {
-        let cls = class(&[('A', 'A'), ('=', '=')]);
-        assert_eq!(cls.clone().canonicalize(), class(&[
-            ('=', '='), ('A', 'A'),
-        ]));
-        assert_eq!(cls.case_fold(), class(&[
-            ('=', '='), ('A', 'A'), ('a', 'a'),
-        ]));
-
-        let cls = bclass(&[(b'A', b'A'), (b'=', b'=')]);
-        assert_eq!(cls.clone().canonicalize(), bclass(&[
-            (b'=', b'='), (b'A', b'A'),
-        ]));
-        assert_eq!(cls.case_fold(), bclass(&[
-            (b'=', b'='), (b'A', b'A'), (b'a', b'a'),
-        ]));
-    }
-
-    #[test]
-    fn class_fold_no_folding_needed() {
-        let cls = class(&[('\x00', '\x10')]);
-        assert_eq!(cls.case_fold(), class(&[
-            ('\x00', '\x10'),
-        ]));
-
-        let cls = bclass(&[(b'\x00', b'\x10')]);
-        assert_eq!(cls.case_fold(), bclass(&[
-            (b'\x00', b'\x10'),
-        ]));
-    }
-
-    #[test]
-    fn class_fold_negated() {
-        let cls = class(&[('x', 'x')]);
-        assert_eq!(cls.clone().case_fold(), class(&[
-            ('X', 'X'), ('x', 'x'),
-        ]));
-        assert_eq!(cls.case_fold().negate(), class(&[
-            ('\x00', 'W'), ('Y', 'w'), ('y', '\u{10FFFF}'),
-        ]));
-
-        let cls = bclass(&[(b'x', b'x')]);
-        assert_eq!(cls.clone().case_fold(), bclass(&[
-            (b'X', b'X'), (b'x', b'x'),
-        ]));
-        assert_eq!(cls.case_fold().negate(), bclass(&[
-            (b'\x00', b'W'), (b'Y', b'w'), (b'y', b'\xff'),
-        ]));
-    }
-
-    #[test]
-    fn class_fold_single_to_multiple() {
-        let cls = class(&[('k', 'k')]);
-        assert_eq!(cls.case_fold(), class(&[
-            ('K', 'K'), ('k', 'k'), ('\u{212A}', '\u{212A}'),
-        ]));
-
-        let cls = bclass(&[(b'k', b'k')]);
-        assert_eq!(cls.case_fold(), bclass(&[
-            (b'K', b'K'), (b'k', b'k'),
-        ]));
-    }
-
-    #[test]
-    fn class_fold_at() {
-        let cls = class(&[('@', '@')]);
-        assert_eq!(cls.clone().canonicalize(), class(&[('@', '@')]));
-        assert_eq!(cls.case_fold(), class(&[('@', '@')]));
-
-        let cls = bclass(&[(b'@', b'@')]);
-        assert_eq!(cls.clone().canonicalize(), bclass(&[(b'@', b'@')]));
-        assert_eq!(cls.case_fold(), bclass(&[(b'@', b'@')]));
-    }
-
-    #[test]
-    fn is_word_char() {
-        use super::is_word_char;
-        assert!(is_word_char('a'), "ASCII");
-        assert!(is_word_char('à'), "Latin-1");
-        assert!(is_word_char('\u{11011}'), "Brahmi (Unicode 6.0)");
-        assert!(is_word_char('\u{11611}'), "Modi (Unicode 7.0)");
-        assert!(is_word_char('\u{11711}'), "Ahom (Unicode 8.0)");
-        assert!(is_word_char('\u{17828}'), "Tangut (Unicode 9.0)");
-        assert!(is_word_char('\u{1B1B1}'), "Nushu (Unicode 10.0)");
-    }
-
-    #[test]
-    fn roundtrip_class_hypen() {
-        let expr = e("[-./]");
-        assert_eq!("(?u:[-\\.-/])", expr.to_string());
-
-        let expr = e("(?-u)[-./]");
-        assert_eq!("(?-u:[-\\.-/])", expr.to_string());
-    }
+    fn word() {
+        assert!(is_word_byte(b'a'));
+        assert!(!is_word_byte(b'-'));
 
-    fn assert_intersection(cls1: CharClass, cls2: CharClass, expected: CharClass) {
-        // intersection operation should be commutative
-        assert_eq!(cls1.intersection(&cls2), expected);
-        assert_eq!(cls2.intersection(&cls1), expected);
+        assert!(is_word_character('a'));
+        assert!(is_word_character('β'));
+        assert!(!is_word_character('-'));
+        assert!(!is_word_character('☃'));
     }
 }
diff --git a/regex-syntax/src/parser.rs b/regex-syntax/src/parser.rs
index be595e578c..2afd2fe234 100644
--- a/regex-syntax/src/parser.rs
+++ b/regex-syntax/src/parser.rs
@@ -1,3311 +1,201 @@
-// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
+use ast;
+use hir;
 
-use std::cmp::{max, min};
-use std::u8;
+use Result;
 
-use unicode::regex::UNICODE_CLASSES;
-
-use {
-    Expr, Repeater, CharClass, ClassRange,
-    CaptureIndex, CaptureName,
-    Error, ErrorKind, Result,
-};
-
-/// Parser state.
+/// A builder for a regular expression parser.
 ///
-/// Keeps the entire input in memory and maintains a cursor (char offset).
+/// This builder permits modifying configuration options for the parser.
 ///
-/// It also keeps an expression stack, which is responsible for managing
-/// grouped expressions and flag state.
-#[derive(Debug)]
-pub struct Parser {
-    chars: Vec<char>,
-    chari: usize,
-    stack: Vec<Build>,
-    caps: usize,
-    names: Vec<String>, // to check for duplicates
-    flags: Flags,
-}
-
-/// Flag state used in the parser.
-#[derive(Clone, Copy, Debug)]
-pub struct Flags {
-    /// i
-    pub casei: bool,
-    /// m
-    pub multi: bool,
-    /// s
-    pub dotnl: bool,
-    /// U
-    pub swap_greed: bool,
-    /// x
-    pub ignore_space: bool,
-    /// u
-    pub unicode: bool,
-    /// Not actually a flag, but when disabled, every regex that may not match
-    /// UTF-8 exclusively will cause the parser to return an error.
-    pub allow_bytes: bool,
+/// This type combines the builder options for both the
+/// [AST `ParserBuilder`](ast/parse/struct.ParserBuilder.html)
+/// and the
+/// [HIR `TranslatorBuilder`](hir/translate/struct.TranslatorBuilder.html).
+#[derive(Clone, Debug, Default)]
+pub struct ParserBuilder {
+    ast: ast::parse::ParserBuilder,
+    hir: hir::translate::TranslatorBuilder,
 }
 
-impl Default for Flags {
-    fn default() -> Self {
-        Flags {
-            casei: false,
-            multi: false,
-            dotnl: false,
-            swap_greed: false,
-            ignore_space: false,
-            unicode: true,
-            allow_bytes: false,
-        }
+impl ParserBuilder {
+    /// Create a new parser builder with a default configuration.
+    pub fn new() -> ParserBuilder {
+        ParserBuilder::default()
     }
-}
 
-/// An ephemeral type for representing the expression stack.
+    /// Build a parser from this configuration with the given pattern.
+    pub fn build(&self) -> Parser {
+        Parser {
+            ast: self.ast.build(),
+            hir: self.hir.build(),
+        }
+    }
+
+    /// Set the nesting limit for this parser.
+    ///
+    /// The nesting limit controls how deep the abstract syntax tree is allowed
+    /// to be. If the AST exceeds the given limit (e.g., with too many nested
+    /// groups), then an error is returned by the parser.
+    ///
+    /// The purpose of this limit is to act as a heuristic to prevent stack
+    /// overflow for consumers that do structural induction on an `Ast` using
+    /// explicit recursion. While this crate never does this (instead using
+    /// constant stack space and moving the call stack to the heap), other
+    /// crates may.
+    ///
+    /// This limit is not checked until the entire Ast is parsed. Therefore,
+    /// if callers want to put a limit on the amount of heap space used, then
+    /// they should impose a limit on the length, in bytes, of the concrete
+    /// pattern string. In particular, this is viable since this parser
+    /// implementation will limit itself to heap space proportional to the
+    /// lenth of the pattern string.
+    ///
+    /// Note that a nest limit of `0` will return a nest limit error for most
+    /// patterns but not all. For example, a nest limit of `0` permits `a` but
+    /// not `ab`, since `ab` requires a concatenation, which results in a nest
+    /// depth of `1`. In general, a nest limit is not something that manifests
+    /// in an obvious way in the concrete syntax, therefore, it should not be
+    /// used in a granular way.
+    pub fn nest_limit(&mut self, limit: u32) -> &mut ParserBuilder {
+        self.ast.nest_limit(limit);
+        self
+    }
+
+    /// Whether to support octal syntax or not.
+    ///
+    /// Octal syntax is a little-known way of uttering Unicode codepoints in
+    /// a regular expression. For example, `a`, `\x61`, `\u0061` and
+    /// `\141` are all equivalent regular expressions, where the last example
+    /// shows octal syntax.
+    ///
+    /// While supporting octal syntax isn't in and of itself a problem, it does
+    /// make good error messages harder. That is, in PCRE based regex engines,
+    /// syntax like `\0` invokes a backreference, which is explicitly
+    /// unsupported in Rust's regex engine. However, many users expect it to
+    /// be supported. Therefore, when octal support is disabled, the error
+    /// message will explicitly mention that backreferences aren't supported.
+    ///
+    /// Octal syntax is disabled by default.
+    pub fn octal(&mut self, yes: bool) -> &mut ParserBuilder {
+        self.ast.octal(yes);
+        self
+    }
+
+    /// When enabled, the parser will permit the construction of a regular
+    /// expression that may match invalid UTF-8.
+    ///
+    /// When disabled (the default), the parser is guaranteed to produce
+    /// an expression that will only ever match valid UTF-8 (otherwise, the
+    /// parser will return an error).
+    pub fn allow_invalid_utf8(&mut self, yes: bool) -> &mut ParserBuilder {
+        self.hir.allow_invalid_utf8(yes);
+        self
+    }
+
+    /// Enable verbose mode in the regular expression.
+    ///
+    /// When enabled, verbose mode permits insigificant whitespace in many
+    /// places in the regular expression, as well as comments. Comments are
+    /// started using `#` and continue until the end of the line.
+    ///
+    /// By default, this is disabled. It may be selectively enabled in the
+    /// regular expression by using the `x` flag regardless of this setting.
+    pub fn ignore_whitespace(&mut self, yes: bool) -> &mut ParserBuilder {
+        self.ast.ignore_whitespace(yes);
+        self
+    }
+
+    /// Enable or disable the case insensitive flag by default.
+    ///
+    /// By default this is disabled. It may alternatively be selectively
+    /// enabled in the regular expression itself via the `i` flag.
+    pub fn case_insensitive(&mut self, yes: bool) -> &mut ParserBuilder {
+        self.hir.case_insensitive(yes);
+        self
+    }
+
+    /// Enable or disable the multi-line matching flag by default.
+    ///
+    /// By default this is disabled. It may alternatively be selectively
+    /// enabled in the regular expression itself via the `m` flag.
+    pub fn multi_line(&mut self, yes: bool) -> &mut ParserBuilder {
+        self.hir.multi_line(yes);
+        self
+    }
+
+    /// Enable or disable the "dot matches any character" flag by default.
+    ///
+    /// By default this is disabled. It may alternatively be selectively
+    /// enabled in the regular expression itself via the `s` flag.
+    pub fn dot_matches_new_line(
+        &mut self,
+        yes: bool,
+    ) -> &mut ParserBuilder {
+        self.hir.dot_matches_new_line(yes);
+        self
+    }
+
+    /// Enable or disable the "swap greed" flag by default.
+    ///
+    /// By default this is disabled. It may alternatively be selectively
+    /// enabled in the regular expression itself via the `U` flag.
+    pub fn swap_greed(&mut self, yes: bool) -> &mut ParserBuilder {
+        self.hir.swap_greed(yes);
+        self
+    }
+
+    /// Enable or disable the Unicode flag (`u`) by default.
+    ///
+    /// By default this is **enabled**. It may alternatively be selectively
+    /// disabled in the regular expression itself via the `u` flag.
+    ///
+    /// Note that unless `allow_invalid_utf8` is enabled (it's disabled by
+    /// default), a regular expression will fail to parse if Unicode mode is
+    /// disabled and a sub-expression could possibly match invalid UTF-8.
+    pub fn unicode(&mut self, yes: bool) -> &mut ParserBuilder {
+        self.hir.unicode(yes);
+        self
+    }
+}
+
+/// A convenience parser for regular expressions.
 ///
-/// Everything on the stack is either a regular expression or a marker
-/// indicating the opening of a group (possibly non-capturing). The opening
-/// of a group copies the current flag state, which is reset on the parser
-/// state once the group closes.
-#[derive(Debug)]
-enum Build {
-    Expr(Expr),
-    LeftParen {
-        i: CaptureIndex,
-        name: CaptureName,
-        chari: usize,
-        old_flags: Flags,
-    },
-}
-
-/// A type for representing the elements of a bracket stack used for parsing
-/// character classes.
+/// This parser takes as input a regular expression pattern string (the
+/// "concrete syntax") and returns a high-level intermediate representation
+/// (the HIR) suitable for most types of analysis. In particular, this parser
+/// hides the intermediate state of producing an AST (the "abstract syntax").
+/// The AST is itself far more complex than the HIR, so this parser serves as a
+/// convenience for never having to deal with it at all.
 ///
-/// This is for parsing nested character classes without recursion.
-#[derive(Debug)]
-enum Bracket {
-    /// The opening of a character class (possibly negated)
-    LeftBracket {
-        negated: bool,
-    },
-    /// A set of characters within a character class, e.g., `a-z`
-    Set(CharClass),
-    /// An intersection operator (`&&`)
-    Intersection,
-}
-
-// Primary expression parsing routines.
-impl Parser {
-    pub fn parse(s: &str, flags: Flags) -> Result<Expr> {
-        Parser {
-            chars: s.chars().collect(),
-            chari: 0,
-            stack: vec![],
-            caps: 0,
-            names: vec![],
-            flags: flags,
-        }.parse_expr()
-    }
-
-    // Top-level expression parser.
-    //
-    // Starts at the beginning of the input and consumes until either the end
-    // of input or an error.
-    fn parse_expr(mut self) -> Result<Expr> {
-        loop {
-            self.ignore_space();
-            if self.eof() {
-                break;
-            }
-            let build_expr = match self.cur() {
-                '\\' => try!(self.parse_escape()),
-                '|' => { let e = try!(self.alternate()); self.bump(); e }
-                '?' => try!(self.parse_simple_repeat(Repeater::ZeroOrOne)),
-                '*' => try!(self.parse_simple_repeat(Repeater::ZeroOrMore)),
-                '+' => try!(self.parse_simple_repeat(Repeater::OneOrMore)),
-                '{' => try!(self.parse_counted_repeat()),
-                '[' => try!(self.parse_class()),
-                '^' => {
-                    if self.flags.multi {
-                        self.parse_one(Expr::StartLine)
-                    } else {
-                        self.parse_one(Expr::StartText)
-                    }
-                }
-                '$' => {
-                    if self.flags.multi {
-                        self.parse_one(Expr::EndLine)
-                    } else {
-                        self.parse_one(Expr::EndText)
-                    }
-                }
-                '.' => {
-                    if self.flags.dotnl {
-                        if self.flags.unicode {
-                            self.parse_one(Expr::AnyChar)
-                        } else {
-                            if !self.flags.allow_bytes {
-                                return Err(self.err(ErrorKind::InvalidUtf8));
-                            }
-                            self.parse_one(Expr::AnyByte)
-                        }
-                    } else {
-                        if self.flags.unicode {
-                            self.parse_one(Expr::AnyCharNoNL)
-                        } else {
-                            if !self.flags.allow_bytes {
-                                return Err(self.err(ErrorKind::InvalidUtf8));
-                            }
-                            self.parse_one(Expr::AnyByteNoNL)
-                        }
-                    }
-                }
-                '(' => try!(self.parse_group()),
-                ')' => {
-                    let (old_flags, e) = try!(self.close_paren());
-                    self.bump();
-                    self.flags = old_flags;
-                    e
-                }
-                _ => {
-                    let c = self.bump();
-                    try!(self.lit(c))
-                }
-            };
-            if !build_expr.is_empty() {
-                self.stack.push(build_expr);
-            }
-        }
-        self.finish_concat()
-    }
-
-    // Parses an escape sequence, e.g., \Ax
-    //
-    // Start: `\`
-    // End:   `x`
-    fn parse_escape(&mut self) -> Result<Build> {
-        self.bump();
-        if self.eof() {
-            return Err(self.err(ErrorKind::UnexpectedEscapeEof));
-        }
-        let c = self.cur();
-        if is_punct(c) || (self.flags.ignore_space && c.is_whitespace()) {
-            let c = self.bump();
-            return Ok(try!(self.lit(c)));
-        }
-        match c {
-            'a' => { self.bump(); Ok(try!(self.lit('\x07'))) }
-            'f' => { self.bump(); Ok(try!(self.lit('\x0C'))) }
-            't' => { self.bump(); Ok(try!(self.lit('\t'))) }
-            'n' => { self.bump(); Ok(try!(self.lit('\n'))) }
-            'r' => { self.bump(); Ok(try!(self.lit('\r'))) }
-            'v' => { self.bump(); Ok(try!(self.lit('\x0B'))) }
-            'A' => { self.bump(); Ok(Build::Expr(Expr::StartText)) }
-            'z' => { self.bump(); Ok(Build::Expr(Expr::EndText)) }
-            'b' => {
-                self.bump();
-                Ok(Build::Expr(if self.flags.unicode {
-                    Expr::WordBoundary
-                } else {
-                    Expr::WordBoundaryAscii
-                }))
-            }
-            'B' => {
-                self.bump();
-                Ok(Build::Expr(if self.flags.unicode {
-                    Expr::NotWordBoundary
-                } else {
-                    Expr::NotWordBoundaryAscii
-                }))
-            }
-            '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7' => self.parse_octal(),
-            'x' => { self.bump(); self.parse_hex() }
-            'p'|'P' => {
-                self.bump();
-                self.parse_unicode_class(c == 'P')
-                    .map(|cls| Build::Expr(Expr::Class(cls)))
-            }
-            'd'|'s'|'w'|'D'|'S'|'W' => {
-                self.bump();
-                Ok(Build::Expr(Expr::Class(self.parse_perl_class(c))))
-            }
-            c => Err(self.err(ErrorKind::UnrecognizedEscape(c))),
-        }
-    }
-
-    // Parses a group, e.g., `(abc)`.
-    //
-    // Start: `(`
-    // End:   `a`
-    //
-    // A more interesting example, `(?P<foo>abc)`.
-    //
-    // Start: `(`
-    // End:   `a`
-    fn parse_group(&mut self) -> Result<Build> {
-        let chari = self.chari;
-        let mut name: CaptureName = None;
-        self.bump();
-        self.ignore_space();
-        if self.bump_if("?P<") {
-            let n = try!(self.parse_group_name());
-            if self.names.iter().any(|n2| n2 == &n) {
-                return Err(self.err(ErrorKind::DuplicateCaptureName(n)));
-            }
-            self.names.push(n.clone());
-            name = Some(n);
-        } else if self.bump_if("?") {
-            // This can never be capturing. It's either setting flags for
-            // the current group, or it's opening a non-capturing group or
-            // it's opening a group with a specific set of flags (which is
-            // also non-capturing).
-            // Anything else is an error.
-            return self.parse_group_flags(chari);
-        }
-        self.caps = checkadd(self.caps, 1);
-        Ok(Build::LeftParen {
-            i: Some(self.caps),
-            name: name,
-            chari: chari,
-            old_flags: self.flags, // no flags changed if we're here
-        })
-    }
-
-    // Parses flags (inline or grouped), e.g., `(?s-i:abc)`.
-    //
-    // Start: `s`
-    // End:   `a`
-    //
-    // Another example, `(?s-i)a`.
-    //
-    // Start: `s`
-    // End:   `a`
-    fn parse_group_flags(&mut self, opening_chari: usize) -> Result<Build> {
-        let old_flags = self.flags;
-        let mut sign = true;
-        let mut saw_flag = false;
-        loop {
-            if self.eof() {
-                // e.g., (?i
-                return Err(self.err(ErrorKind::UnexpectedFlagEof));
-            }
-            match self.cur() {
-                'i' => { self.flags.casei = sign; saw_flag = true }
-                'm' => { self.flags.multi = sign; saw_flag = true }
-                's' => { self.flags.dotnl = sign; saw_flag = true }
-                'U' => { self.flags.swap_greed = sign; saw_flag = true }
-                'x' => { self.flags.ignore_space = sign; saw_flag = true }
-                'u' => { self.flags.unicode = sign; saw_flag = true }
-                '-' => {
-                    if !sign {
-                        // e.g., (?-i-s)
-                        return Err(self.err(ErrorKind::DoubleFlagNegation));
-                    }
-                    sign = false;
-                    saw_flag = false;
-                }
-                ')' => {
-                    if !saw_flag {
-                        // e.g., (?)
-                        return Err(self.err(ErrorKind::EmptyFlagNegation));
-                    }
-                    // At this point, we're just changing the flags inside
-                    // the current group, which means the old flags have
-                    // been saved elsewhere. Our modifications in place are
-                    // okey dokey!
-                    //
-                    // This particular flag expression only has a stateful
-                    // impact on a regex's AST, so nothing gets explicitly
-                    // added.
-                    self.bump();
-                    return Ok(Build::Expr(Expr::Empty));
-                }
-                ':' => {
-                    if !sign && !saw_flag {
-                        // e.g., (?i-:a)
-                        // Note that if there's no negation, it's OK not
-                        // to see flag, because you end up with a regular
-                        // non-capturing group: `(?:a)`.
-                        return Err(self.err(ErrorKind::EmptyFlagNegation));
-                    }
-                    self.bump();
-                    return Ok(Build::LeftParen {
-                        i: None,
-                        name: None,
-                        chari: opening_chari,
-                        old_flags: old_flags,
-                    });
-                }
-                // e.g., (?z:a)
-                c => return Err(self.err(ErrorKind::UnrecognizedFlag(c))),
-            }
-            self.bump();
-        }
-    }
-
-    // Parses a group name, e.g., `foo` in `(?P<foo>abc)`.
-    //
-    // Start: `f`
-    // End:   `a`
-    fn parse_group_name(&mut self) -> Result<String> {
-        let mut name = String::new();
-        while !self.eof() && !self.peek_is('>') {
-            name.push(self.bump());
-        }
-        if self.eof() {
-            // e.g., (?P<a
-            return Err(self.err(ErrorKind::UnclosedCaptureName(name)));
-        }
-        let all_valid = name.chars().all(is_valid_capture_char);
-        match name.chars().next() {
-            // e.g., (?P<>a)
-            None => Err(self.err(ErrorKind::EmptyCaptureName)),
-            Some(c) if (c >= '0' && c <= '9') || !all_valid => {
-                // e.g., (?P<a#>x)
-                // e.g., (?P<1a>x)
-                Err(self.err(ErrorKind::InvalidCaptureName(name)))
-            }
-            _ => {
-                self.bump(); // for `>`
-                Ok(name)
-            }
-        }
-    }
-
-    // Parses a counted repeition operator, e.g., `a{2,4}?z`.
-    //
-    // Start: `{`
-    // End:   `z`
-    fn parse_counted_repeat(&mut self) -> Result<Build> {
-        let e = try!(self.pop(ErrorKind::RepeaterExpectsExpr)); // e.g., ({5}
-        if !e.can_repeat() {
-            // e.g., a*{5}
-            return Err(self.err(ErrorKind::RepeaterUnexpectedExpr(e)));
-        }
-        self.bump();
-        self.ignore_space();
-        let min = try!(self.parse_decimal());
-        let mut max_opt = Some(min);
-        self.ignore_space();
-        if self.bump_if(',') {
-            self.ignore_space();
-            if self.peek_is('}') {
-                max_opt = None;
-            } else {
-                let max = try!(self.parse_decimal());
-                if min > max {
-                    // e.g., a{2,1}
-                    return Err(self.err(ErrorKind::InvalidRepeatRange {
-                        min: min,
-                        max: max,
-                    }));
-                }
-                max_opt = Some(max);
-            }
-        }
-        self.ignore_space();
-        if !self.bump_if('}') {
-            Err(self.err(ErrorKind::UnclosedRepeat))
-        } else {
-            Ok(Build::Expr(Expr::Repeat {
-                e: Box::new(e),
-                r: Repeater::Range { min: min, max: max_opt },
-                greedy: !self.bump_if('?') ^ self.flags.swap_greed,
-            }))
-        }
-    }
-
-    // Parses a simple repetition operator, e.g., `a+?z`.
-    //
-    // Start: `+`
-    // End:   `z`
-    //
-    // N.B. "simple" in this context means "not min/max repetition",
-    // e.g., `a{1,2}`.
-    fn parse_simple_repeat(&mut self, rep: Repeater) -> Result<Build> {
-        let e = try!(self.pop(ErrorKind::RepeaterExpectsExpr)); // e.g., (*
-        if !e.can_repeat() {
-            // e.g., a**
-            return Err(self.err(ErrorKind::RepeaterUnexpectedExpr(e)));
-        }
-        self.bump();
-        Ok(Build::Expr(Expr::Repeat {
-            e: Box::new(e),
-            r: rep,
-            greedy: !self.bump_if('?') ^ self.flags.swap_greed,
-        }))
-    }
-
-    // Parses a decimal number until the given character, e.g., `a{123,456}`.
-    //
-    // Start: `1`
-    // End:   `,` (where `until == ','`)
-    fn parse_decimal(&mut self) -> Result<u32> {
-        match self.bump_get(|c| is_ascii_word(c) || c.is_whitespace()) {
-            // e.g., a{}
-            None => Err(self.err(ErrorKind::MissingBase10)),
-            Some(n) => {
-                // e.g., a{xyz
-                // e.g., a{9999999999}
-                let n = n.trim();
-                u32::from_str_radix(n, 10)
-                    .map_err(|_| self.err(ErrorKind::InvalidBase10(n.into())))
-            }
-        }
-    }
-
-    // Parses an octal number, up to 3 digits, e.g., `a\123b`
-    //
-    // Start: `1`
-    // End:   `b`
-    fn parse_octal(&mut self) -> Result<Build> {
-        use std::char;
-        let mut i = 0; // counter for limiting octal to 3 digits.
-        let n = self.bump_get(|c| { i += 1; i <= 3 && c >= '0' && c <= '7' })
-                    .expect("octal string"); // guaranteed at least 1 digit
-        // I think both of the following unwraps are impossible to fail.
-        // We limit it to a three digit octal number, which maxes out at
-        // `0777` or `511` in decimal. Since all digits are in `0...7`, we'll
-        // always have a valid `u32` number. Moreover, since all numbers in
-        // the range `0...511` are valid Unicode scalar values, it will always
-        // be a valid `char`.
-        //
-        // Hence, we `unwrap` with reckless abandon.
-        let n = u32::from_str_radix(&n, 8).ok().expect("valid octal number");
-        if !self.flags.unicode {
-            return Ok(try!(self.u32_to_one_byte(n)));
-        }
-        let c = char::from_u32(n).expect("Unicode scalar value");
-        Ok(try!(self.lit(c)))
-    }
-
-    // Parses a hex number, e.g., `a\x5ab`.
-    //
-    // Start: `5`
-    // End:   `b`
-    //
-    // And also, `a\x{2603}b`.
-    //
-    // Start: `{`
-    // End:   `b`
-    fn parse_hex(&mut self) -> Result<Build> {
-        self.ignore_space();
-        if self.bump_if('{') {
-            self.parse_hex_many_digits()
-        } else {
-            self.parse_hex_two_digits()
-        }
-    }
-
-    // Parses a many-digit hex number, e.g., `a\x{2603}b`.
-    //
-    // Start: `2`
-    // End:   `b`
-    fn parse_hex_many_digits(&mut self) -> Result<Build> {
-        use std::char;
-
-        self.ignore_space();
-        let s = self.bump_get(is_ascii_word).unwrap_or("".into());
-        let n = try!(u32::from_str_radix(&s, 16)
-                         .map_err(|_| self.err(ErrorKind::InvalidBase16(s))));
-        self.ignore_space();
-        if !self.bump_if('}') {
-            // e.g., a\x{d
-            return Err(self.err(ErrorKind::UnclosedHex));
-        }
-        if !self.flags.unicode {
-            return Ok(try!(self.u32_to_one_byte(n)));
-        }
-        let c = try!(char::from_u32(n)
-                          .ok_or(self.err(ErrorKind::InvalidScalarValue(n))));
-        Ok(try!(self.lit(c)))
-    }
-
-    // Parses a two-digit hex number, e.g., `a\x5ab`.
-    //
-    // Start: `5`
-    // End:   `b`
-    fn parse_hex_two_digits(&mut self) -> Result<Build> {
-        use std::char;
-
-        let mut i = 0;
-        let s = self.bump_get(|_| { i += 1; i <= 2 }).unwrap_or("".into());
-        if s.len() < 2 {
-            // e.g., a\x
-            // e.g., a\xf
-            return Err(self.err(ErrorKind::UnexpectedTwoDigitHexEof));
-        }
-        let n = try!(u32::from_str_radix(&s, 16)
-                         .map_err(|_| self.err(ErrorKind::InvalidBase16(s))));
-        if !self.flags.unicode {
-            return Ok(try!(self.u32_to_one_byte(n)));
-        }
-        let c = char::from_u32(n).expect("Unicode scalar value");
-        Ok(try!(self.lit(c)))
-    }
-
-    // Parses a character class, e.g., `[^a-zA-Z0-9]+`.
-    //
-    // If the Unicode flag is enabled, the class is returned as a `CharClass`,
-    // otherwise it is converted to a `ByteClass`.
-    //
-    // Start: `[`
-    // End:   `+`
-    fn parse_class(&mut self) -> Result<Build> {
-        let class = try!(self.parse_class_as_chars());
-        Ok(Build::Expr(if self.flags.unicode {
-            Expr::Class(class)
-        } else {
-            let byte_class = class.to_byte_class();
-
-            // If `class` was only non-empty due to multibyte characters, the
-            // corresponding byte class will now be empty.
-            //
-            // See https://github.com/rust-lang/regex/issues/303
-            if byte_class.is_empty() {
-                // e.g., (?-u)[^\x00-\xFF]
-                return Err(self.err(ErrorKind::EmptyClass));
-            }
-
-            Expr::ClassBytes(byte_class)
-        }))
-    }
-
-    // Parses a character class as a `CharClass`, e.g., `[^a-zA-Z0-9]+`.
-    //
-    // Start: `[`
-    // End:   `+`
-    fn parse_class_as_chars(&mut self) -> Result<CharClass> {
-        let mut bracket_stack = vec![];
-        bracket_stack.extend(self.parse_open_bracket());
-        loop {
-            self.ignore_space();
-            if self.eof() {
-                // e.g., [a
-                return Err(self.err(ErrorKind::UnexpectedClassEof));
-            }
-            match self.cur() {
-                '[' => {
-                    if let Some(class) = self.maybe_parse_ascii() {
-                        // e.g. `[:alnum:]`
-                        bracket_stack.push(Bracket::Set(class));
-                    } else {
-                        // nested set, e.g. `[c-d]` in `[a-b[c-d]]`
-                        bracket_stack.extend(self.parse_open_bracket());
-                    }
-                }
-                ']' => {
-                    self.bump();
-                    let class = try!(self.close_bracket(&mut bracket_stack));
-                    if bracket_stack.is_empty() {
-                        // That was the outermost class, so stop now
-                        return Ok(class);
-                    }
-                    bracket_stack.push(Bracket::Set(class));
-                }
-                '\\' => {
-                    let class = try!(self.parse_class_escape());
-                    bracket_stack.push(Bracket::Set(class));
-                }
-                '&' if self.peek_is("&&") => {
-                    self.bump();
-                    self.bump();
-                    bracket_stack.push(Bracket::Intersection);
-                }
-                start => {
-                    if !self.flags.unicode {
-                        let _ = try!(self.codepoint_to_one_byte(start));
-                    }
-                    self.bump();
-                    match start {
-                        '~'|'-' => {
-                            // Only report an error if we see ~~ or --.
-                            if self.peek_is(start) {
-                                return Err(self.err(
-                                    ErrorKind::UnsupportedClassChar(start)));
-                            }
-                        }
-                        _ => {}
-                    }
-                    let class = try!(self.parse_class_range(start));
-                    bracket_stack.push(Bracket::Set(class));
-                }
-            }
-        }
-    }
-
-    // Parses the start of a character class or a nested character class.
-    // That includes negation using `^` and unescaped `-` and `]` allowed at
-    // the start of the class.
-    //
-    // e.g., `[^a]` or `[-a]` or `[]a]`
-    //
-    // Start: `[`
-    // End:   `a`
-    fn parse_open_bracket(&mut self) -> Vec<Bracket> {
-        self.bump();
-        self.ignore_space();
-        let negated = self.bump_if('^');
-        self.ignore_space();
-
-        let mut class = CharClass::empty();
-        while self.bump_if('-') {
-            class.ranges.push(ClassRange::one('-'));
-            self.ignore_space();
-        }
-        if class.is_empty() {
-            if self.bump_if(']') {
-                class.ranges.push(ClassRange::one(']'));
-                self.ignore_space();
-            }
-        }
-
-        let bracket = Bracket::LeftBracket { negated: negated };
-        if class.is_empty() {
-            vec![bracket]
-        } else {
-            vec![bracket, Bracket::Set(class)]
-        }
-    }
-
-    // Parses an escape in a character class.
-    //
-    // This is a helper for `parse_class`. Instead of returning an `Ok` value,
-    // it either mutates the char class or returns an error.
-    //
-    // e.g., `\wx`
-    //
-    // Start: `\`
-    // End:   `x`
-    fn parse_class_escape(&mut self) -> Result<CharClass> {
-        match try!(self.parse_escape()) {
-            Build::Expr(Expr::Class(class)) => {
-                Ok(class)
-            }
-            Build::Expr(Expr::ClassBytes(class2)) => {
-                let mut class = CharClass::empty();
-                for byte_range in class2 {
-                    let s = byte_range.start as char;
-                    let e = byte_range.end as char;
-                    class.ranges.push(ClassRange::new(s, e));
-                }
-                Ok(class)
-            }
-            Build::Expr(Expr::Literal { chars, .. }) => {
-                self.parse_class_range(chars[0])
-            }
-            Build::Expr(Expr::LiteralBytes { bytes, .. }) => {
-                let start = bytes[0] as char;
-                self.parse_class_range(start)
-            }
-            Build::Expr(e) => {
-                let err = ErrorKind::InvalidClassEscape(e);
-                Err(self.err(err))
-            }
-            // Because `parse_escape` can never return `LeftParen`.
-            _ => unreachable!(),
-        }
-    }
-
-    // Parses a single range in a character class.
-    //
-    // e.g., `[a-z]`
-    //
-    // Start: `-` (with start == `a`)
-    // End:   `]`
-    fn parse_class_range(&mut self, start: char) -> Result<CharClass> {
-        self.ignore_space();
-        if !self.bump_if('-') {
-            // Not a range, so just return a singleton range.
-            return Ok(CharClass::new(vec![ClassRange::one(start)]));
-        }
-        self.ignore_space();
-        if self.eof() {
-            // e.g., [a-
-            return Err(self.err(ErrorKind::UnexpectedClassEof));
-        }
-        if self.peek_is(']') {
-            // This is the end of the class, so we permit use of `-` as a
-            // regular char (just like we do in the beginning).
-            return Ok(CharClass::new(vec![ClassRange::one(start), ClassRange::one('-')]));
-        }
-
-        // We have a real range. Just need to check to parse literal and
-        // make sure it's a valid range.
-        let end = match self.cur() {
-            '\\' => match try!(self.parse_escape()) {
-                Build::Expr(Expr::Literal { chars, .. }) => {
-                    chars[0]
-                }
-                Build::Expr(Expr::LiteralBytes { bytes, .. }) => {
-                    bytes[0] as char
-                }
-                Build::Expr(e) => {
-                    return Err(self.err(ErrorKind::InvalidClassEscape(e)));
-                }
-                // Because `parse_escape` can never return `LeftParen`.
-                _ => unreachable!(),
-            },
-            c => {
-                self.bump();
-                if c == '-' {
-                    return Err(self.err(ErrorKind::UnsupportedClassChar('-')));
-                }
-                if !self.flags.unicode {
-                    let _ = try!(self.codepoint_to_one_byte(c));
-                }
-                c
-            }
-        };
-        if end < start {
-            // e.g., [z-a]
-            return Err(self.err(ErrorKind::InvalidClassRange {
-                start: start,
-                end: end,
-            }));
-        }
-        Ok(CharClass::new(vec![ClassRange::new(start, end)]))
-    }
-
-    // Parses an ASCII class, e.g., `[:alnum:]+`.
-    //
-    // Start: `[`
-    // End:   `+`
-    //
-    // Also supports negation, e.g., `[:^alnum:]`.
-    //
-    // This parsing routine is distinct from the others in that it doesn't
-    // actually report any errors. Namely, if it fails, then the parser should
-    // fall back to parsing a regular class.
-    //
-    // This method will only make progress in the parser if it succeeds.
-    // Otherwise, the input remains where it started.
-    fn maybe_parse_ascii(&mut self) -> Option<CharClass> {
-        fn parse(p: &mut Parser) -> Option<CharClass> {
-            p.bump(); // the `[`
-            if !p.bump_if(':') { return None; }
-            let negate = p.bump_if('^');
-            let name = match p.bump_get(|c| c != ':') {
-                None => return None,
-                Some(name) => name,
-            };
-            if !p.bump_if(":]") { return None; }
-            ascii_class(&name).map(|cls| p.class_transform(negate, cls))
-        }
-        let start = self.chari;
-        match parse(self) {
-            None => { self.chari = start; None }
-            result => result,
-        }
-    }
-
-    // Parses a Uncode class name, e.g., `a\pLb`.
-    //
-    // Start: `L`
-    // End:   `b`
-    //
-    // And also, `a\p{Greek}b`.
-    //
-    // Start: `{`
-    // End:   `b`
-    //
-    // `negate` is true when the class name is used with `\P`.
-    fn parse_unicode_class(&mut self, neg: bool) -> Result<CharClass> {
-        self.ignore_space();
-        let name =
-            if self.bump_if('{') {
-                self.ignore_space();
-                let n = self.bump_get(is_ascii_word).unwrap_or("".into());
-                self.ignore_space();
-                if n.is_empty() || !self.bump_if('}') {
-                    // e.g., \p{Greek
-                    return Err(self.err(ErrorKind::UnclosedUnicodeName));
-                }
-                n
-            } else {
-                if self.eof() {
-                    // e.g., \p
-                    return Err(self.err(ErrorKind::UnexpectedEscapeEof));
-                }
-                self.bump().to_string()
-            };
-        match unicode_class(&name) {
-            None => Err(self.err(ErrorKind::UnrecognizedUnicodeClass(name))),
-            Some(cls) => {
-                if self.flags.unicode {
-                    Ok(self.class_transform(neg, cls))
-                } else {
-                    Err(self.err(ErrorKind::UnicodeNotAllowed))
-                }
-            }
-        }
-    }
-
-    // Parses a perl character class with Unicode support.
-    //
-    // `name` must be one of d, s, w, D, S, W. If not, this function panics.
-    //
-    // No parser state is changed.
-    fn parse_perl_class(&mut self, name: char) -> CharClass {
-        use unicode::regex::{PERLD, PERLS, PERLW};
-        let (cls, negate) = match (self.flags.unicode, name) {
-            (true, 'd') => (raw_class_to_expr(PERLD), false),
-            (true, 'D') => (raw_class_to_expr(PERLD), true),
-            (true, 's') => (raw_class_to_expr(PERLS), false),
-            (true, 'S') => (raw_class_to_expr(PERLS), true),
-            (true, 'w') => (raw_class_to_expr(PERLW), false),
-            (true, 'W') => (raw_class_to_expr(PERLW), true),
-            (false, 'd') => (ascii_class("digit").unwrap(), false),
-            (false, 'D') => (ascii_class("digit").unwrap(), true),
-            (false, 's') => (ascii_class("space").unwrap(), false),
-            (false, 'S') => (ascii_class("space").unwrap(), true),
-            (false, 'w') => (ascii_class("word").unwrap(), false),
-            (false, 'W') => (ascii_class("word").unwrap(), true),
-            _ => unreachable!(),
-        };
-        self.class_transform(negate, cls)
-    }
-
-    // Always bump to the next input and return the given expression as a
-    // `Build`.
-    //
-    // This is mostly for convenience when the surrounding context implies
-    // that the next character corresponds to the given expression.
-    fn parse_one(&mut self, e: Expr) -> Build {
-        self.bump();
-        Build::Expr(e)
-    }
-}
-
-// Auxiliary helper methods.
-impl Parser {
-    fn chars(&self) -> Chars {
-        Chars::new(&self.chars[self.chari..])
-    }
-
-    fn ignore_space(&mut self) {
-        if !self.flags.ignore_space {
-            return;
-        }
-        while !self.eof() {
-            match self.cur() {
-                '#' => {
-                    self.bump();
-                    while !self.eof() {
-                        match self.bump() {
-                            '\n' => break,
-                            _ => continue,
-                        }
-                    }
-                },
-                c => if !c.is_whitespace() {
-                    return;
-                } else {
-                    self.bump();
-                }
-            }
-        }
-    }
-
-    fn bump(&mut self) -> char {
-        let c = self.cur();
-        self.chari = checkadd(self.chari, self.chars().next_count());
-        c
-    }
-
-    fn cur(&self) -> char { self.chars().next().unwrap() }
-
-    fn eof(&self) -> bool { self.chars().next().is_none() }
-
-    fn bump_get<B: Bumpable>(&mut self, s: B) -> Option<String> {
-        let n = s.match_end(self);
-        if n == 0 {
-            None
-        } else {
-            let end = checkadd(self.chari, n);
-            let s = self.chars[self.chari..end]
-                        .iter().cloned().collect::<String>();
-            self.chari = end;
-            Some(s)
-        }
-    }
-
-    fn bump_if<B: Bumpable>(&mut self, s: B) -> bool {
-        let n = s.match_end(self);
-        if n == 0 {
-            false
-        } else {
-            self.chari = checkadd(self.chari, n);
-            true
-        }
-    }
-
-    fn peek_is<B: Bumpable>(&self, s: B) -> bool {
-        s.match_end(self) > 0
-    }
-
-    fn err(&self, kind: ErrorKind) -> Error {
-        self.errat(self.chari, kind)
-    }
-
-    fn errat(&self, pos: usize, kind: ErrorKind) -> Error {
-        Error { pos: pos, surround: self.windowat(pos), kind: kind }
-    }
-
-    fn windowat(&self, pos: usize) -> String {
-        let s = max(5, pos) - 5;
-        let e = min(self.chars.len(), checkadd(pos, 5));
-        self.chars[s..e].iter().cloned().collect()
-    }
-
-    fn pop(&mut self, expected: ErrorKind) -> Result<Expr> {
-        match self.stack.pop() {
-            None | Some(Build::LeftParen{..}) => Err(self.err(expected)),
-            Some(Build::Expr(e)) => Ok(e),
-        }
-    }
-
-    // If the current context calls for case insensitivity, then apply
-    // case folding. Similarly, if `negate` is `true`, then negate the
-    // class. (Negation always proceeds case folding.)
-    fn class_transform(&self, negate: bool, mut cls: CharClass) -> CharClass {
-        if self.flags.casei {
-            cls = cls.case_fold();
-        }
-        if negate {
-            cls = cls.negate();
-        }
-        cls
-    }
-
-    // Translates a Unicode codepoint into a single UTF-8 byte, and returns an
-    // error if it's not possible.
-    //
-    // This will panic if self.flags.unicode == true.
-    fn codepoint_to_one_byte(&self, c: char) -> Result<u8> {
-        assert!(!self.flags.unicode);
-        let bytes = c.to_string().as_bytes().to_owned();
-        if bytes.len() > 1 {
-            return Err(self.err(ErrorKind::UnicodeNotAllowed));
-        }
-        Ok(bytes[0])
-    }
-
-    // Creates a new byte literal from a single byte.
-    //
-    // If the given number can't fit into a single byte, then it is assumed
-    // to be a Unicode codepoint and an error is returned.
-    //
-    // This should only be called when the bytes flag is enabled.
-    fn u32_to_one_byte(&self, b: u32) -> Result<Build> {
-        assert!(!self.flags.unicode);
-        if b > u8::MAX as u32 {
-            Err(self.err(ErrorKind::UnicodeNotAllowed))
-        } else if !self.flags.allow_bytes && b > 0x7F {
-            Err(self.err(ErrorKind::InvalidUtf8))
-        } else {
-            Ok(Build::Expr(Expr::LiteralBytes {
-                bytes: vec![b as u8],
-                casei: self.flags.casei,
-            }))
-        }
-    }
-
-    // Creates a new literal expr from a Unicode codepoint.
-    //
-    // Creates a byte literal if the `bytes` flag is set.
-    fn lit(&self, c: char) -> Result<Build> {
-        Ok(Build::Expr(if self.flags.unicode {
-            Expr::Literal {
-                chars: vec![c],
-                casei: self.flags.casei,
-            }
-        } else {
-            Expr::LiteralBytes {
-                bytes: vec![try!(self.codepoint_to_one_byte(c))],
-                casei: self.flags.casei,
-            }
-        }))
-    }
-}
-
-struct Chars<'a> {
-    chars: &'a [char],
-    cur: usize,
-}
-
-impl<'a> Iterator for Chars<'a> {
-    type Item = char;
-    fn next(&mut self) -> Option<char> {
-        let x = self.c();
-        self.advance();
-        return x;
-    }
-}
-
-impl<'a> Chars<'a> {
-    fn new(chars: &[char]) -> Chars {
-        Chars {
-            chars: chars,
-            cur: 0,
-        }
-    }
-
-    fn c(&self) -> Option<char> {
-        self.chars.get(self.cur).map(|&c| c)
-    }
-
-    fn advance(&mut self) {
-        self.cur = checkadd(self.cur, 1);
-    }
-
-    fn next_count(&mut self) -> usize {
-        self.next();
-        self.cur
-    }
-}
-
-// Auxiliary methods for manipulating the expression stack.
-impl Parser {
-    // Called whenever an alternate (`|`) is found.
-    //
-    // This pops the expression stack until:
-    //
-    //  1. The stack is empty. Pushes an alternation with one arm.
-    //  2. An opening parenthesis is found. Leave the parenthesis
-    //     on the stack and push an alternation with one arm.
-    //  3. An alternate (`|`) is found. Pop the existing alternation,
-    //     add an arm and push the modified alternation.
-    //
-    // Each "arm" in the above corresponds to the concatenation of all
-    // popped expressions.
-    //
-    // In the first two cases, the stack is left in an invalid state
-    // because an alternation with one arm is not allowed. This
-    // particular state will be detected by `finish_concat` and an
-    // error will be reported.
-    //
-    // In none of the cases is an empty arm allowed. If an empty arm
-    // is found, an error is reported.
-    fn alternate(&mut self) -> Result<Build> {
-        let mut concat = vec![];
-        let alts = |es| Ok(Build::Expr(Expr::Alternate(es)));
-        loop {
-            match self.stack.pop() {
-                None => {
-                    if concat.is_empty() {
-                        // e.g., |a
-                        return Err(self.err(ErrorKind::EmptyAlternate));
-                    }
-                    return alts(vec![rev_concat(concat)]);
-                }
-                Some(e @ Build::LeftParen{..}) => {
-                    if concat.is_empty() {
-                        // e.g., (|a)
-                        return Err(self.err(ErrorKind::EmptyAlternate));
-                    }
-                    self.stack.push(e);
-                    return alts(vec![rev_concat(concat)]);
-                }
-                Some(Build::Expr(Expr::Alternate(mut es))) => {
-                    if concat.is_empty() {
-                        // e.g., a||
-                        return Err(self.err(ErrorKind::EmptyAlternate));
-                    }
-                    es.push(rev_concat(concat));
-                    return alts(es);
-                }
-                Some(Build::Expr(e)) => { concat.push(e); }
-            }
-        }
-    }
-
-    // Called whenever a closing parenthesis (`)`) is found.
-    //
-    // This pops the expression stack until:
-    //
-    //  1. The stack is empty. An error is reported because this
-    //     indicates an unopened parenthesis.
-    //  2. An opening parenthesis is found. Pop the opening parenthesis
-    //     and push a `Group` expression.
-    //  3. An alternate (`|`) is found. Pop the existing alternation
-    //     and an arm to it in place. Pop one more item from the stack.
-    //     If the stack was empty, then report an unopened parenthesis
-    //     error, otherwise assume it is an opening parenthesis and
-    //     push a `Group` expression with the popped alternation.
-    //     (We can assume this is an opening parenthesis because an
-    //     alternation either corresponds to the entire Regex or it
-    //     corresponds to an entire group. This is guaranteed by the
-    //     `alternate` method.)
-    //
-    // Each "arm" in the above corresponds to the concatenation of all
-    // popped expressions.
-    //
-    // Empty arms nor empty groups are allowed.
-    fn close_paren(&mut self) -> Result<(Flags, Build)> {
-        let mut concat = vec![];
-        loop {
-            match self.stack.pop() {
-                // e.g., )
-                None => return Err(self.err(ErrorKind::UnopenedParen)),
-                Some(Build::LeftParen { i, name, old_flags, .. }) => {
-                    if concat.is_empty() {
-                        // e.g., ()
-                        return Err(self.err(ErrorKind::EmptyGroup));
-                    }
-                    return Ok((old_flags, Build::Expr(Expr::Group {
-                        e: Box::new(rev_concat(concat)),
-                        i: i,
-                        name: name,
-                    })));
-                }
-                Some(Build::Expr(Expr::Alternate(mut es))) => {
-                    if concat.is_empty() {
-                        // e.g., (a|)
-                        return Err(self.err(ErrorKind::EmptyAlternate));
-                    }
-                    es.push(rev_concat(concat));
-                    match self.stack.pop() {
-                        // e.g., a|b)
-                        None => return Err(self.err(ErrorKind::UnopenedParen)),
-                        Some(Build::Expr(_)) => unreachable!(),
-                        Some(Build::LeftParen { i, name, old_flags, .. }) => {
-                            return Ok((old_flags, Build::Expr(Expr::Group {
-                                e: Box::new(Expr::Alternate(es)),
-                                i: i,
-                                name: name,
-                            })));
-                        }
-                    }
-                }
-                Some(Build::Expr(e)) => { concat.push(e); }
-            }
-        }
-    }
-
-    // Called only when the parser reaches the end of input.
-    //
-    // This pops the expression stack until:
-    //
-    //  1. The stack is empty. Return concatenation of popped
-    //     expressions. This concatenation may be empty!
-    //  2. An alternation is found. Pop the alternation and push
-    //     a new arm. Return the alternation as the entire Regex.
-    //     After this, the stack must be empty, or else there is
-    //     an unclosed paren.
-    //
-    // If an opening parenthesis is popped, then an error is
-    // returned since it indicates an unclosed parenthesis.
-    fn finish_concat(&mut self) -> Result<Expr> {
-        let mut concat = vec![];
-        loop {
-            match self.stack.pop() {
-                None => { return Ok(rev_concat(concat)); }
-                Some(Build::LeftParen{ chari, ..}) => {
-                    // e.g., a(b
-                    return Err(self.errat(chari, ErrorKind::UnclosedParen));
-                }
-                Some(Build::Expr(Expr::Alternate(mut es))) => {
-                    if concat.is_empty() {
-                        // e.g., a|
-                        return Err(self.err(ErrorKind::EmptyAlternate));
-                    }
-                    es.push(rev_concat(concat));
-                    // Make sure there are no opening parens remaining.
-                    match self.stack.pop() {
-                        None => return Ok(Expr::Alternate(es)),
-                        Some(Build::LeftParen{ chari, ..}) => {
-                            // e.g., (a|b
-                            return Err(self.errat(
-                                chari, ErrorKind::UnclosedParen));
-                        }
-                        _ => unreachable!(),
-                    }
-                }
-                Some(Build::Expr(e)) => { concat.push(e); }
-            }
-        }
-    }
+/// If callers have more fine grained use cases that need an AST, then please
+/// see the [`ast::parse`](ast/parse/index.html) module.
+///
+/// A `Parser` can be configured in more detail via a
+/// [`ParserBuilder`](struct.ParserBuilder.html).
+#[derive(Clone, Debug)]
+pub struct Parser {
+    ast: ast::parse::Parser,
+    hir: hir::translate::Translator,
 }
 
-// Methods for working with the bracket stack used for character class parsing.
 impl Parser {
-
-    // After parsing a closing bracket `]`, process elements of the bracket
-    // stack until finding the corresponding opening bracket `[`, and return
-    // the combined character class. E.g. with `[^b-f&&ab-c]`:
-    //
-    // 1. Adjacent sets are merged into a single union: `ab-c` -> `a-c`
-    // 2. Unions separated by `&&` are intersected: `b-f` and `a-c` -> `b-c`
-    // 3. Negation is applied if necessary: `b-c` -> negation of `b-c`
-    fn close_bracket(&self, stack: &mut Vec<Bracket>) -> Result<CharClass> {
-        let mut union = CharClass::empty();
-        let mut intersect = vec![];
-        loop {
-            match stack.pop() {
-                Some(Bracket::Set(class)) => {
-                    union.ranges.extend(class);
-                }
-                Some(Bracket::Intersection) => {
-                    let class = self.class_union_transform(union);
-                    intersect.push(class);
-                    union = CharClass::empty();
-                }
-                Some(Bracket::LeftBracket { negated }) => {
-                    let mut class = self.class_union_transform(union);
-                    for c in intersect {
-                        class = class.intersection(&c);
-                    }
-                    // negate after combining all sets (`^` has lower precedence than `&&`)
-                    if negated {
-                        class = class.negate();
-                    }
-                    if class.is_empty() {
-                        // e.g., [^\d\D]
-                        return Err(self.err(ErrorKind::EmptyClass));
-                    }
-                    return Ok(class);
-                }
-                // The first element on the stack is a `LeftBracket`
-                None => unreachable!()
-            }
-        }
-    }
-
-    // Apply case folding if requested on the union character class, and
-    // return a canonicalized class.
-    fn class_union_transform(&self, class: CharClass) -> CharClass {
-        if self.flags.casei {
-            // Case folding canonicalizes too
-            class.case_fold()
-        } else {
-            class.canonicalize()
-        }
-    }
-}
-
-impl Build {
-    fn is_empty(&self) -> bool {
-        match *self {
-            Build::Expr(Expr::Empty) => true,
-            _ => false,
-        }
-    }
-}
-
-// Make it ergonomic to conditionally bump the parser.
-// i.e., `bump_if('a')` or `bump_if("abc")`.
-trait Bumpable {
-    fn match_end(self, p: &Parser) -> usize;
-}
-
-impl Bumpable for char {
-    fn match_end(self, p: &Parser) -> usize {
-        let mut chars = p.chars();
-        if chars.next().map(|c| c == self).unwrap_or(false) {
-            chars.cur
-        } else {
-            0
-        }
-    }
-}
-
-impl<'a> Bumpable for &'a str {
-    fn match_end(self, p: &Parser) -> usize {
-        let mut search = self.chars();
-        let mut rest = p.chars();
-        let mut count = 0;
-        loop {
-            match (rest.next(), search.next()) {
-                (Some(c1), Some(c2)) if c1 == c2 => count = rest.cur,
-                (_, None) => return count,
-                _ => return 0,
-            }
-        }
-    }
-}
-
-impl<F: FnMut(char) -> bool> Bumpable for F {
-    fn match_end(mut self, p: &Parser) -> usize {
-        let mut chars = p.chars();
-        let mut count = 0;
-        while let Some(c) = chars.next() {
-            if !self(c) {
-                break
-            }
-            count = chars.cur;
-        }
-        count
-    }
-}
-
-// Turn a sequence of expressions into a concatenation.
-// This only uses `Concat` if there are 2 or more expressions.
-fn rev_concat(mut exprs: Vec<Expr>) -> Expr {
-    if exprs.len() == 0 {
-        Expr::Empty
-    } else if exprs.len() == 1 {
-        exprs.pop().unwrap()
-    } else {
-        exprs.reverse();
-        Expr::Concat(exprs)
-    }
-}
-
-// Returns true if and only if the given character is allowed in a capture
-// name. Note that the first char of a capture name must not be numeric.
-fn is_valid_capture_char(c: char) -> bool {
-    c == '_' || (c >= '0' && c <= '9')
-    || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
-}
-
-fn is_ascii_word(c: char) -> bool {
-    match c {
-        'a' ... 'z' | 'A' ... 'Z' | '_' | '0' ... '9' => true,
-        _ => false,
-    }
-}
-
-/// Returns true if the give character has significance in a regex.
-pub fn is_punct(c: char) -> bool {
-    match c {
-        '\\' | '.' | '+' | '*' | '?' | '(' | ')' | '|' |
-        '[' | ']' | '{' | '}' | '^' | '$' | '#' | '&' | '-' | '~' => true,
-        _ => false,
-    }
-}
-
-fn checkadd(x: usize, y: usize) -> usize {
-    x.checked_add(y).expect("regex length overflow")
-}
-
-fn unicode_class(name: &str) -> Option<CharClass> {
-    UNICODE_CLASSES.binary_search_by(|&(s, _)| s.cmp(name)).ok().map(|i| {
-        raw_class_to_expr(UNICODE_CLASSES[i].1)
-    })
-}
-
-fn ascii_class(name: &str) -> Option<CharClass> {
-    ASCII_CLASSES.binary_search_by(|&(s, _)| s.cmp(name)).ok().map(|i| {
-        raw_class_to_expr(ASCII_CLASSES[i].1)
-    })
-}
-
-fn raw_class_to_expr(raw: &[(char, char)]) -> CharClass {
-    let range = |&(s, e)| ClassRange { start: s, end: e };
-    CharClass::new(raw.iter().map(range).collect())
-}
-
-type Class = &'static [(char, char)];
-type NamedClasses = &'static [(&'static str, Class)];
-
-const ASCII_CLASSES: NamedClasses = &[
-    // Classes must be in alphabetical order so that bsearch works.
-    // [:alnum:]      alphanumeric (== [0-9A-Za-z])
-    // [:alpha:]      alphabetic (== [A-Za-z])
-    // [:ascii:]      ASCII (== [\x00-\x7F])
-    // [:blank:]      blank (== [\t ])
-    // [:cntrl:]      control (== [\x00-\x1F\x7F])
-    // [:digit:]      digits (== [0-9])
-    // [:graph:]      graphical (== [!-~])
-    // [:lower:]      lower case (== [a-z])
-    // [:print:]      printable (== [ -~] == [ [:graph:]])
-    // [:punct:]      punctuation (== [!-/:-@[-`{-~])
-    // [:space:]      whitespace (== [\t\n\v\f\r ])
-    // [:upper:]      upper case (== [A-Z])
-    // [:word:]       word characters (== [0-9A-Za-z_])
-    // [:xdigit:]     hex digit (== [0-9A-Fa-f])
-    // Taken from: http://golang.org/pkg/regex/syntax/
-    ("alnum", &ALNUM),
-    ("alpha", &ALPHA),
-    ("ascii", &ASCII),
-    ("blank", &BLANK),
-    ("cntrl", &CNTRL),
-    ("digit", &DIGIT),
-    ("graph", &GRAPH),
-    ("lower", &LOWER),
-    ("print", &PRINT),
-    ("punct", &PUNCT),
-    ("space", &SPACE),
-    ("upper", &UPPER),
-    ("word", &WORD),
-    ("xdigit", &XDIGIT),
-];
-
-const ALNUM: Class = &[('0', '9'), ('A', 'Z'), ('a', 'z')];
-const ALPHA: Class = &[('A', 'Z'), ('a', 'z')];
-const ASCII: Class = &[('\x00', '\x7F')];
-const BLANK: Class = &[(' ', ' '), ('\t', '\t')];
-const CNTRL: Class = &[('\x00', '\x1F'), ('\x7F', '\x7F')];
-const DIGIT: Class = &[('0', '9')];
-const GRAPH: Class = &[('!', '~')];
-const LOWER: Class = &[('a', 'z')];
-const PRINT: Class = &[(' ', '~')];
-const PUNCT: Class = &[('!', '/'), (':', '@'), ('[', '`'), ('{', '~')];
-const SPACE: Class = &[('\t', '\t'), ('\n', '\n'), ('\x0B', '\x0B'),
-                       ('\x0C', '\x0C'), ('\r', '\r'), (' ', ' ')];
-const UPPER: Class = &[('A', 'Z')];
-const WORD: Class = &[('0', '9'), ('A', 'Z'), ('_', '_'), ('a', 'z')];
-const XDIGIT: Class = &[('0', '9'), ('A', 'F'), ('a', 'f')];
-
-#[cfg(test)]
-mod tests {
-    use {
-        CharClass, ClassRange, ByteClass, ByteRange,
-        Expr, Repeater,
-        ErrorKind,
-    };
-    use unicode::regex::{PERLD, PERLS, PERLW};
-    use super::{LOWER, UPPER, WORD, Flags, Parser, ascii_class};
-
-    static YI: &'static [(char, char)] = &[
-        ('\u{a000}', '\u{a48c}'), ('\u{a490}', '\u{a4c6}'),
-    ];
-
-    fn p(s: &str) -> Expr { Parser::parse(s, Flags::default()).unwrap() }
-    fn pf(s: &str, flags: Flags) -> Expr { Parser::parse(s, flags).unwrap() }
-    fn lit(c: char) -> Expr { Expr::Literal { chars: vec![c], casei: false } }
-    fn liti(c: char) -> Expr { Expr::Literal { chars: vec![c], casei: true } }
-    fn b<T>(v: T) -> Box<T> { Box::new(v) }
-    fn c(es: &[Expr]) -> Expr { Expr::Concat(es.to_vec()) }
-
-    fn pb(s: &str) -> Expr {
-        let flags = Flags { allow_bytes: true, .. Flags::default() };
-        Parser::parse(s, flags).unwrap()
-    }
-
-    fn blit(b: u8) -> Expr {
-        Expr::LiteralBytes {
-            bytes: vec![b],
-            casei: false,
-        }
-    }
-
-    fn bliti(b: u8) -> Expr {
-        Expr::LiteralBytes {
-            bytes: vec![b],
-            casei: true,
-        }
-    }
-
-    fn class(ranges: &[(char, char)]) -> CharClass {
-        let ranges = ranges.iter().cloned()
-                           .map(|(c1, c2)| ClassRange::new(c1, c2)).collect();
-        CharClass::new(ranges)
-    }
-
-    fn classes(classes: &[&[(char, char)]]) -> CharClass {
-        let mut cls = CharClass::empty();
-        for &ranges in classes {
-            cls.ranges.extend(class(ranges));
-        }
-        cls.canonicalize()
-    }
-
-    fn bclass(ranges: &[(u8, u8)]) -> ByteClass {
-        let ranges = ranges.iter().cloned()
-                           .map(|(c1, c2)| ByteRange::new(c1, c2)).collect();
-        ByteClass::new(ranges)
-    }
-
-    fn asciid() -> CharClass {
-        ascii_class("digit").unwrap()
-    }
-
-    fn asciis() -> CharClass {
-        ascii_class("space").unwrap()
-    }
-
-    fn asciiw() -> CharClass {
-        ascii_class("word").unwrap()
-    }
-
-    fn asciid_bytes() -> ByteClass {
-        asciid().to_byte_class()
-    }
-
-    fn asciis_bytes() -> ByteClass {
-        asciis().to_byte_class()
-    }
-
-    fn asciiw_bytes() -> ByteClass {
-        asciiw().to_byte_class()
-    }
-
-    #[test]
-    fn empty() {
-        assert_eq!(p(""), Expr::Empty);
-    }
-
-    #[test]
-    fn literal() {
-        assert_eq!(p("a"), lit('a'));
-        assert_eq!(pb("(?-u)a"), blit(b'a'));
-    }
-
-    #[test]
-    fn literal_string() {
-        assert_eq!(p("ab"), Expr::Concat(vec![lit('a'), lit('b')]));
-        assert_eq!(pb("(?-u)ab"), Expr::Concat(vec![blit(b'a'), blit(b'b')]));
-    }
-
-    #[test]
-    fn start_literal() {
-        assert_eq!(p("^a"), Expr::Concat(vec![
-            Expr::StartText,
-            Expr::Literal { chars: vec!['a'], casei: false },
-        ]));
-    }
-
-    #[test]
-    fn repeat_zero_or_one_greedy() {
-        assert_eq!(p("a?"), Expr::Repeat {
-            e: b(lit('a')),
-            r: Repeater::ZeroOrOne,
-            greedy: true,
-        });
-    }
-
-    #[test]
-    fn repeat_zero_or_one_greedy_concat() {
-        assert_eq!(p("ab?"), Expr::Concat(vec![
-            lit('a'),
-            Expr::Repeat {
-                e: b(lit('b')),
-                r: Repeater::ZeroOrOne,
-                greedy: true,
-            },
-        ]));
-    }
-
-    #[test]
-    fn repeat_zero_or_one_nongreedy() {
-        assert_eq!(p("a??"), Expr::Repeat {
-            e: b(lit('a')),
-            r: Repeater::ZeroOrOne,
-            greedy: false,
-        });
-    }
-
-    #[test]
-    fn repeat_one_or_more_greedy() {
-        assert_eq!(p("a+"), Expr::Repeat {
-            e: b(lit('a')),
-            r: Repeater::OneOrMore,
-            greedy: true,
-        });
-    }
-
-    #[test]
-    fn repeat_one_or_more_nongreedy() {
-        assert_eq!(p("a+?"), Expr::Repeat {
-            e: b(lit('a')),
-            r: Repeater::OneOrMore,
-            greedy: false,
-        });
-    }
-
-    #[test]
-    fn repeat_zero_or_more_greedy() {
-        assert_eq!(p("a*"), Expr::Repeat {
-            e: b(lit('a')),
-            r: Repeater::ZeroOrMore,
-            greedy: true,
-        });
-    }
-
-    #[test]
-    fn repeat_zero_or_more_nongreedy() {
-        assert_eq!(p("a*?"), Expr::Repeat {
-            e: b(lit('a')),
-            r: Repeater::ZeroOrMore,
-            greedy: false,
-        });
-    }
-
-    #[test]
-    fn repeat_counted_exact() {
-        assert_eq!(p("a{5}"), Expr::Repeat {
-            e: b(lit('a')),
-            r: Repeater::Range { min: 5, max: Some(5) },
-            greedy: true,
-        });
-    }
-
-    #[test]
-    fn repeat_counted_min() {
-        assert_eq!(p("a{5,}"), Expr::Repeat {
-            e: b(lit('a')),
-            r: Repeater::Range { min: 5, max: None },
-            greedy: true,
-        });
-    }
-
-    #[test]
-    fn repeat_counted_min_max() {
-        assert_eq!(p("a{5,10}"), Expr::Repeat {
-            e: b(lit('a')),
-            r: Repeater::Range { min: 5, max: Some(10) },
-            greedy: true,
-        });
-    }
-
-    #[test]
-    fn repeat_counted_exact_nongreedy() {
-        assert_eq!(p("a{5}?"), Expr::Repeat {
-            e: b(lit('a')),
-            r: Repeater::Range { min: 5, max: Some(5) },
-            greedy: false,
-        });
-    }
-
-    #[test]
-    fn repeat_counted_min_nongreedy() {
-        assert_eq!(p("a{5,}?"), Expr::Repeat {
-            e: b(lit('a')),
-            r: Repeater::Range { min: 5, max: None },
-            greedy: false,
-        });
-    }
-
-    #[test]
-    fn repeat_counted_min_max_nongreedy() {
-        assert_eq!(p("a{5,10}?"), Expr::Repeat {
-            e: b(lit('a')),
-            r: Repeater::Range { min: 5, max: Some(10) },
-            greedy: false,
-        });
-    }
-
-    #[test]
-    fn repeat_counted_whitespace() {
-        assert_eq!(p("a{ 5   }"), Expr::Repeat {
-            e: b(lit('a')),
-            r: Repeater::Range { min: 5, max: Some(5) },
-            greedy: true,
-        });
-        assert_eq!(p("a{ 5 , 10 }"), Expr::Repeat {
-            e: b(lit('a')),
-            r: Repeater::Range { min: 5, max: Some(10) },
-            greedy: true,
-        });
-    }
-
-    #[test]
-    fn group_literal() {
-        assert_eq!(p("(a)"), Expr::Group {
-            e: b(lit('a')),
-            i: Some(1),
-            name: None,
-        });
-    }
-
-    #[test]
-    fn group_literal_concat() {
-        assert_eq!(p("(ab)"), Expr::Group {
-            e: b(c(&[lit('a'), lit('b')])),
-            i: Some(1),
-            name: None,
-        });
-    }
-
-    #[test]
-    fn alt_two() {
-        assert_eq!(p("a|b"), Expr::Alternate(vec![lit('a'), lit('b')]));
-    }
-
-    #[test]
-    fn alt_many() {
-        assert_eq!(p("a|b|c"), Expr::Alternate(vec![
-            lit('a'), lit('b'), lit('c'),
-        ]));
-    }
-
-    #[test]
-    fn alt_many_concat() {
-        assert_eq!(p("ab|bc|cd"), Expr::Alternate(vec![
-            c(&[lit('a'), lit('b')]),
-            c(&[lit('b'), lit('c')]),
-            c(&[lit('c'), lit('d')]),
-        ]));
-    }
-
-    #[test]
-    fn alt_group_two() {
-        assert_eq!(p("(a|b)"), Expr::Group {
-            e: b(Expr::Alternate(vec![lit('a'), lit('b')])),
-            i: Some(1),
-            name: None,
-        });
-    }
-
-    #[test]
-    fn alt_group_many() {
-        assert_eq!(p("(a|b|c)"), Expr::Group {
-            e: b(Expr::Alternate(vec![lit('a'), lit('b'), lit('c')])),
-            i: Some(1),
-            name: None,
-        });
-    }
-
-    #[test]
-    fn alt_group_many_concat() {
-        assert_eq!(p("(ab|bc|cd)"), Expr::Group {
-            e: b(Expr::Alternate(vec![
-                c(&[lit('a'), lit('b')]),
-                c(&[lit('b'), lit('c')]),
-                c(&[lit('c'), lit('d')]),
-            ])),
-            i: Some(1),
-            name: None,
-        });
-    }
-
-    #[test]
-    fn alt_group_nested() {
-        assert_eq!(p("(ab|(bc|(cd)))"), Expr::Group {
-            e: b(Expr::Alternate(vec![
-                c(&[lit('a'), lit('b')]),
-                Expr::Group {
-                    e: b(Expr::Alternate(vec![
-                        c(&[lit('b'), lit('c')]),
-                        Expr::Group {
-                            e: b(c(&[lit('c'), lit('d')])),
-                            i: Some(3),
-                            name: None,
-                        }
-                    ])),
-                    i: Some(2),
-                    name: None,
-                },
-            ])),
-            i: Some(1),
-            name: None,
-        });
-    }
-
-    #[test]
-    fn group_name() {
-        assert_eq!(p("(?P<foo>a)"), Expr::Group {
-            e: b(lit('a')),
-            i: Some(1),
-            name: Some("foo".into()),
-        });
-    }
-
-    #[test]
-    fn group_no_capture() {
-        assert_eq!(p("(?:a)"), Expr::Group {
-            e: b(lit('a')),
-            i: None,
-            name: None,
-        });
-    }
-
-    #[test]
-    fn group_flags() {
-        assert_eq!(p("(?i:a)"), Expr::Group {
-            e: b(liti('a')),
-            i: None,
-            name: None,
-        });
-        assert_eq!(pb("(?i-u:a)"), Expr::Group {
-            e: b(bliti(b'a')),
-            i: None,
-            name: None,
-        });
-    }
-
-    #[test]
-    fn group_flags_returned() {
-        assert_eq!(p("(?i:a)a"), c(&[
-            Expr::Group {
-                e: b(liti('a')),
-                i: None,
-                name: None,
-            },
-            lit('a'),
-        ]));
-        assert_eq!(pb("(?i-u:a)a"), c(&[
-            Expr::Group {
-                e: b(bliti(b'a')),
-                i: None,
-                name: None,
-            },
-            lit('a'),
-        ]));
-    }
-
-    #[test]
-    fn group_flags_retained() {
-        assert_eq!(p("(?i)(?-i:a)a"), c(&[
-            Expr::Group {
-                e: b(lit('a')),
-                i: None,
-                name: None,
-            },
-            liti('a'),
-        ]));
-        assert_eq!(pb("(?i-u)(?u-i:a)a"), c(&[
-            Expr::Group {
-                e: b(lit('a')),
-                i: None,
-                name: None,
-            },
-            bliti(b'a'),
-        ]));
-    }
-
-    #[test]
-    fn flags_inline() {
-        assert_eq!(p("(?i)a"), liti('a'));
-    }
-
-    #[test]
-    fn flags_inline_multiple() {
-        assert_eq!(p("(?is)a."), c(&[liti('a'), Expr::AnyChar]));
-    }
-
-    #[test]
-    fn flags_inline_multiline() {
-        assert_eq!(p("(?m)^(?-m)$"), c(&[Expr::StartLine, Expr::EndText]));
-    }
-
-    #[test]
-    fn flags_inline_swap_greed() {
-        assert_eq!(p("(?U)a*a*?(?i-U)a*a*?"), c(&[
-            Expr::Repeat {
-                e: b(lit('a')),
-                r: Repeater::ZeroOrMore,
-                greedy: false,
-            },
-            Expr::Repeat {
-                e: b(lit('a')),
-                r: Repeater::ZeroOrMore,
-                greedy: true,
-            },
-            Expr::Repeat {
-                e: b(liti('a')),
-                r: Repeater::ZeroOrMore,
-                greedy: true,
-            },
-            Expr::Repeat {
-                e: b(liti('a')),
-                r: Repeater::ZeroOrMore,
-                greedy: false,
-            },
-        ]));
-    }
-
-    #[test]
-    fn flags_inline_multiple_negate_one() {
-        assert_eq!(p("(?is)a.(?i-s)a."), c(&[
-            liti('a'), Expr::AnyChar, liti('a'), Expr::AnyCharNoNL,
-        ]));
-    }
-
-    #[test]
-    fn any_byte() {
-        assert_eq!(
-            pb("(?-u).(?u)."), c(&[Expr::AnyByteNoNL, Expr::AnyCharNoNL]));
-        assert_eq!(
-            pb("(?s)(?-u).(?u)."), c(&[Expr::AnyByte, Expr::AnyChar]));
-    }
-
-    #[test]
-    fn flags_inline_negate() {
-        assert_eq!(p("(?i)a(?-i)a"), c(&[liti('a'), lit('a')]));
-    }
-
-    #[test]
-    fn flags_group_inline() {
-        assert_eq!(p("(a(?i)a)a"), c(&[
-            Expr::Group {
-                e: b(c(&[lit('a'), liti('a')])),
-                i: Some(1),
-                name: None,
-            },
-            lit('a'),
-        ]));
-    }
-
-    #[test]
-    fn flags_group_inline_retain() {
-        assert_eq!(p("(?i)((?-i)a)a"), c(&[
-            Expr::Group {
-                e: b(lit('a')),
-                i: Some(1),
-                name: None,
-            },
-            liti('a'),
-        ]));
-    }
-
-    #[test]
-    fn flags_default_casei() {
-        let flags = Flags { casei: true, .. Flags::default() };
-        assert_eq!(pf("a", flags), liti('a'));
-    }
-
-    #[test]
-    fn flags_default_multi() {
-        let flags = Flags { multi: true, .. Flags::default() };
-        assert_eq!(pf("^", flags), Expr::StartLine);
-    }
-
-    #[test]
-    fn flags_default_dotnl() {
-        let flags = Flags { dotnl: true, .. Flags::default() };
-        assert_eq!(pf(".", flags), Expr::AnyChar);
-    }
-
-    #[test]
-    fn flags_default_swap_greed() {
-        let flags = Flags { swap_greed: true, .. Flags::default() };
-        assert_eq!(pf("a+", flags), Expr::Repeat {
-            e: b(lit('a')),
-            r: Repeater::OneOrMore,
-            greedy: false,
-        });
-    }
-
-    #[test]
-    fn flags_default_ignore_space() {
-        let flags = Flags { ignore_space: true, .. Flags::default() };
-        assert_eq!(pf(" a ", flags), lit('a'));
-    }
-
-    #[test]
-    fn escape_simple() {
-        assert_eq!(p(r"\a\f\t\n\r\v"), c(&[
-            lit('\x07'), lit('\x0C'), lit('\t'),
-            lit('\n'), lit('\r'), lit('\x0B'),
-        ]));
-    }
-
-    #[test]
-    fn escape_boundaries() {
-        assert_eq!(p(r"\A\z\b\B"), c(&[
-            Expr::StartText, Expr::EndText,
-            Expr::WordBoundary, Expr::NotWordBoundary,
-        ]));
-        assert_eq!(pb(r"(?-u)\b\B"), c(&[
-            Expr::WordBoundaryAscii, Expr::NotWordBoundaryAscii,
-        ]));
-    }
-
-    #[test]
-    fn escape_punctuation() {
-        assert_eq!(p(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#"), c(&[
-            lit('\\'), lit('.'), lit('+'), lit('*'), lit('?'),
-            lit('('), lit(')'), lit('|'), lit('['), lit(']'),
-            lit('{'), lit('}'), lit('^'), lit('$'), lit('#'),
-        ]));
-    }
-
-    #[test]
-    fn escape_octal() {
-        assert_eq!(p(r"\123"), lit('S'));
-        assert_eq!(p(r"\1234"), c(&[lit('S'), lit('4')]));
-
-        assert_eq!(pb(r"(?-u)\377"), blit(0xFF));
-    }
-
-    #[test]
-    fn escape_hex2() {
-        assert_eq!(p(r"\x53"), lit('S'));
-        assert_eq!(p(r"\x534"), c(&[lit('S'), lit('4')]));
-
-        assert_eq!(pb(r"(?-u)\xff"), blit(0xFF));
-        assert_eq!(pb(r"(?-u)\x00"), blit(0x0));
-        assert_eq!(pb(r"(?-u)[\x00]"),
-                   Expr::ClassBytes(bclass(&[(b'\x00', b'\x00')])));
-        assert_eq!(pb(r"(?-u)[^\x00]"),
-                   Expr::ClassBytes(bclass(&[(b'\x01', b'\xFF')])));
-    }
-
-    #[test]
-    fn escape_hex() {
-        assert_eq!(p(r"\x{53}"), lit('S'));
-        assert_eq!(p(r"\x{53}4"), c(&[lit('S'), lit('4')]));
-        assert_eq!(p(r"\x{2603}"), lit('\u{2603}'));
-
-        assert_eq!(pb(r"(?-u)\x{00FF}"), blit(0xFF));
-    }
-
-    #[test]
-    fn escape_unicode_name() {
-        assert_eq!(p(r"\p{Yi}"), Expr::Class(class(YI)));
-    }
-
-    #[test]
-    fn escape_unicode_letter() {
-        assert_eq!(p(r"\pZ"), Expr::Class(class(&[
-            ('\u{20}', '\u{20}'), ('\u{a0}', '\u{a0}'),
-            ('\u{1680}', '\u{1680}'), ('\u{2000}', '\u{200a}'),
-            ('\u{2028}', '\u{2029}'), ('\u{202f}', '\u{202f}'),
-            ('\u{205f}', '\u{205f}'), ('\u{3000}', '\u{3000}'),
-        ])));
-    }
-
-    #[test]
-    fn escape_unicode_name_case_fold() {
-        assert_eq!(p(r"(?i)\p{Yi}"), Expr::Class(class(YI).case_fold()));
-    }
-
-    #[test]
-    fn escape_unicode_letter_case_fold() {
-        assert_eq!(p(r"(?i)\pZ"), Expr::Class(class(&[
-            ('\u{20}', '\u{20}'), ('\u{a0}', '\u{a0}'),
-            ('\u{1680}', '\u{1680}'), ('\u{2000}', '\u{200a}'),
-            ('\u{2028}', '\u{2029}'), ('\u{202f}', '\u{202f}'),
-            ('\u{205f}', '\u{205f}'), ('\u{3000}', '\u{3000}'),
-        ]).case_fold()));
-    }
-
-    #[test]
-    fn escape_unicode_name_negate() {
-        assert_eq!(p(r"\P{Yi}"), Expr::Class(class(YI).negate()));
-    }
-
-    #[test]
-    fn escape_unicode_letter_negate() {
-        assert_eq!(p(r"\PZ"), Expr::Class(class(&[
-            ('\u{20}', '\u{20}'), ('\u{a0}', '\u{a0}'),
-            ('\u{1680}', '\u{1680}'), ('\u{2000}', '\u{200a}'),
-            ('\u{2028}', '\u{2029}'), ('\u{202f}', '\u{202f}'),
-            ('\u{205f}', '\u{205f}'), ('\u{3000}', '\u{3000}'),
-        ]).negate()));
-    }
-
-    #[test]
-    fn escape_unicode_name_negate_case_fold() {
-        assert_eq!(p(r"(?i)\P{Yi}"),
-                   Expr::Class(class(YI).negate().case_fold()));
-    }
-
-    #[test]
-    fn escape_unicode_letter_negate_case_fold() {
-        assert_eq!(p(r"(?i)\PZ"), Expr::Class(class(&[
-            ('\u{20}', '\u{20}'), ('\u{a0}', '\u{a0}'),
-            ('\u{1680}', '\u{1680}'), ('\u{2000}', '\u{200a}'),
-            ('\u{2028}', '\u{2029}'), ('\u{202f}', '\u{202f}'),
-            ('\u{205f}', '\u{205f}'), ('\u{3000}', '\u{3000}'),
-        ]).negate().case_fold()));
-    }
-
-    #[test]
-    fn escape_perl_d() {
-        assert_eq!(p(r"\d"), Expr::Class(class(PERLD)));
-        assert_eq!(pb(r"(?-u)\d"), Expr::Class(asciid()));
-    }
-
-    #[test]
-    fn escape_perl_s() {
-        assert_eq!(p(r"\s"), Expr::Class(class(PERLS)));
-        assert_eq!(pb(r"(?-u)\s"), Expr::Class(asciis()));
-    }
-
-    #[test]
-    fn escape_perl_w() {
-        assert_eq!(p(r"\w"), Expr::Class(class(PERLW)));
-        assert_eq!(pb(r"(?-u)\w"), Expr::Class(asciiw()));
-    }
-
-    #[test]
-    fn escape_perl_d_negate() {
-        assert_eq!(p(r"\D"), Expr::Class(class(PERLD).negate()));
-        assert_eq!(pb(r"(?-u)\D"), Expr::Class(asciid().negate()));
-    }
-
-    #[test]
-    fn escape_perl_s_negate() {
-        assert_eq!(p(r"\S"), Expr::Class(class(PERLS).negate()));
-        assert_eq!(pb(r"(?-u)\S"), Expr::Class(asciis().negate()));
-    }
-
-    #[test]
-    fn escape_perl_w_negate() {
-        assert_eq!(p(r"\W"), Expr::Class(class(PERLW).negate()));
-        assert_eq!(pb(r"(?-u)\W"), Expr::Class(asciiw().negate()));
-    }
-
-    #[test]
-    fn escape_perl_d_case_fold() {
-        assert_eq!(p(r"(?i)\d"), Expr::Class(class(PERLD).case_fold()));
-        assert_eq!(pb(r"(?i-u)\d"), Expr::Class(asciid().case_fold()));
-    }
-
-    #[test]
-    fn escape_perl_s_case_fold() {
-        assert_eq!(p(r"(?i)\s"), Expr::Class(class(PERLS).case_fold()));
-        assert_eq!(pb(r"(?i-u)\s"), Expr::Class(asciis().case_fold()));
-    }
-
-    #[test]
-    fn escape_perl_w_case_fold() {
-        assert_eq!(p(r"(?i)\w"), Expr::Class(class(PERLW).case_fold()));
-        assert_eq!(pb(r"(?i-u)\w"), Expr::Class(asciiw().case_fold()));
-    }
-
-    #[test]
-    fn escape_perl_d_case_fold_negate() {
-        assert_eq!(p(r"(?i)\D"),
-                   Expr::Class(class(PERLD).case_fold().negate()));
-        let bytes = asciid().case_fold().negate();
-        assert_eq!(pb(r"(?i-u)\D"), Expr::Class(bytes));
-    }
-
-    #[test]
-    fn escape_perl_s_case_fold_negate() {
-        assert_eq!(p(r"(?i)\S"),
-                   Expr::Class(class(PERLS).case_fold().negate()));
-        let bytes = asciis().case_fold().negate();
-        assert_eq!(pb(r"(?i-u)\S"), Expr::Class(bytes));
-    }
-
-    #[test]
-    fn escape_perl_w_case_fold_negate() {
-        assert_eq!(p(r"(?i)\W"),
-                   Expr::Class(class(PERLW).case_fold().negate()));
-        let bytes = asciiw().case_fold().negate();
-        assert_eq!(pb(r"(?i-u)\W"), Expr::Class(bytes));
-    }
-
-    #[test]
-    fn class_singleton() {
-        assert_eq!(p(r"[a]"), Expr::Class(class(&[('a', 'a')])));
-        assert_eq!(p(r"[\x00]"), Expr::Class(class(&[('\x00', '\x00')])));
-        assert_eq!(p(r"[\n]"), Expr::Class(class(&[('\n', '\n')])));
-        assert_eq!(p("[\n]"), Expr::Class(class(&[('\n', '\n')])));
-
-        assert_eq!(pb(r"(?-u)[a]"), Expr::ClassBytes(bclass(&[(b'a', b'a')])));
-        assert_eq!(pb(r"(?-u)[\x00]"), Expr::ClassBytes(bclass(&[(0, 0)])));
-        assert_eq!(pb(r"(?-u)[\xFF]"),
-                   Expr::ClassBytes(bclass(&[(0xFF, 0xFF)])));
-        assert_eq!(pb("(?-u)[\n]"),
-                   Expr::ClassBytes(bclass(&[(b'\n', b'\n')])));
-        assert_eq!(pb(r"(?-u)[\n]"),
-                   Expr::ClassBytes(bclass(&[(b'\n', b'\n')])));
-    }
-
-    #[test]
-    fn class_singleton_negate() {
-        assert_eq!(p(r"[^a]"), Expr::Class(class(&[
-            ('\x00', '\x60'), ('\x62', '\u{10FFFF}'),
-        ])));
-        assert_eq!(p(r"[^\x00]"), Expr::Class(class(&[
-            ('\x01', '\u{10FFFF}'),
-        ])));
-        assert_eq!(p(r"[^\n]"), Expr::Class(class(&[
-            ('\x00', '\x09'), ('\x0b', '\u{10FFFF}'),
-        ])));
-        assert_eq!(p("[^\n]"), Expr::Class(class(&[
-            ('\x00', '\x09'), ('\x0b', '\u{10FFFF}'),
-        ])));
-
-        assert_eq!(pb(r"(?-u)[^a]"), Expr::ClassBytes(bclass(&[
-            (0x00, 0x60), (0x62, 0xFF),
-        ])));
-        assert_eq!(pb(r"(?-u)[^\x00]"), Expr::ClassBytes(bclass(&[
-            (0x01, 0xFF),
-        ])));
-        assert_eq!(pb(r"(?-u)[^\n]"), Expr::ClassBytes(bclass(&[
-            (0x00, 0x09), (0x0B, 0xFF),
-        ])));
-        assert_eq!(pb("(?-u)[^\n]"), Expr::ClassBytes(bclass(&[
-            (0x00, 0x09), (0x0B, 0xFF),
-        ])));
-    }
-
-    #[test]
-    fn class_singleton_class() {
-        assert_eq!(p(r"[\d]"), Expr::Class(class(PERLD)));
-        assert_eq!(p(r"[\p{Yi}]"), Expr::Class(class(YI)));
-
-        let bytes = class(PERLD).to_byte_class();
-        assert_eq!(pb(r"(?-u)[\d]"), Expr::ClassBytes(bytes));
-    }
-
-    #[test]
-    fn class_singleton_class_negate() {
-        assert_eq!(p(r"[^\d]"), Expr::Class(class(PERLD).negate()));
-        assert_eq!(p(r"[^\w]"), Expr::Class(class(PERLW).negate()));
-        assert_eq!(p(r"[^\s]"), Expr::Class(class(PERLS).negate()));
-
-        let bytes = asciid_bytes().negate();
-        assert_eq!(pb(r"(?-u)[^\d]"), Expr::ClassBytes(bytes));
-        let bytes = asciiw_bytes().negate();
-        assert_eq!(pb(r"(?-u)[^\w]"), Expr::ClassBytes(bytes));
-        let bytes = asciis_bytes().negate();
-        assert_eq!(pb(r"(?-u)[^\s]"), Expr::ClassBytes(bytes));
-    }
-
-    #[test]
-    fn class_singleton_class_negate_negate() {
-        assert_eq!(p(r"[^\D]"), Expr::Class(class(PERLD)));
-        assert_eq!(p(r"[^\W]"), Expr::Class(class(PERLW)));
-        assert_eq!(p(r"[^\S]"), Expr::Class(class(PERLS)));
-
-        assert_eq!(pb(r"(?-u)[^\D]"), Expr::ClassBytes(asciid_bytes()));
-        assert_eq!(pb(r"(?-u)[^\W]"), Expr::ClassBytes(asciiw_bytes()));
-        assert_eq!(pb(r"(?-u)[^\S]"), Expr::ClassBytes(asciis_bytes()));
-    }
-
-    #[test]
-    fn class_singleton_class_casei() {
-        assert_eq!(p(r"(?i)[\d]"), Expr::Class(class(PERLD).case_fold()));
-        assert_eq!(p(r"(?i)[\p{Yi}]"), Expr::Class(class(YI).case_fold()));
-
-        assert_eq!(pb(r"(?i-u)[\d]"),
-                   Expr::ClassBytes(asciid_bytes().case_fold()));
-    }
-
-    #[test]
-    fn class_singleton_class_negate_casei() {
-        assert_eq!(p(r"(?i)[^\d]"),
-                   Expr::Class(class(PERLD).case_fold().negate()));
-        assert_eq!(p(r"(?i)[^\w]"),
-                   Expr::Class(class(PERLW).case_fold().negate()));
-        assert_eq!(p(r"(?i)[^\s]"),
-                   Expr::Class(class(PERLS).case_fold().negate()));
-
-        let bytes = asciid_bytes().case_fold().negate();
-        assert_eq!(pb(r"(?i-u)[^\d]"), Expr::ClassBytes(bytes));
-        let bytes = asciiw_bytes().case_fold().negate();
-        assert_eq!(pb(r"(?i-u)[^\w]"), Expr::ClassBytes(bytes));
-        let bytes = asciis_bytes().case_fold().negate();
-        assert_eq!(pb(r"(?i-u)[^\s]"), Expr::ClassBytes(bytes));
-    }
-
-    #[test]
-    fn class_singleton_class_negate_negate_casei() {
-        assert_eq!(p(r"(?i)[^\D]"), Expr::Class(class(PERLD).case_fold()));
-        assert_eq!(p(r"(?i)[^\W]"), Expr::Class(class(PERLW).case_fold()));
-        assert_eq!(p(r"(?i)[^\S]"), Expr::Class(class(PERLS).case_fold()));
-
-        assert_eq!(pb(r"(?i-u)[^\D]"),
-                   Expr::ClassBytes(asciid_bytes().case_fold()));
-        assert_eq!(pb(r"(?i-u)[^\W]"),
-                   Expr::ClassBytes(asciiw_bytes().case_fold()));
-        assert_eq!(pb(r"(?i-u)[^\S]"),
-                   Expr::ClassBytes(asciis_bytes().case_fold()));
-    }
-
-    #[test]
-    fn class_multiple_class() {
-        assert_eq!(p(r"[\d\p{Yi}]"), Expr::Class(classes(&[
-            PERLD, YI,
-        ])));
-    }
-
-    #[test]
-    fn class_multiple_class_negate() {
-        assert_eq!(p(r"[^\d\p{Yi}]"), Expr::Class(classes(&[
-            PERLD, YI,
-        ]).negate()));
-    }
-
-    #[test]
-    fn class_multiple_class_negate_negate() {
-        let nperlw = class(PERLW).negate();
-        let nyi = class(YI).negate();
-        let cls = CharClass::empty().merge(nperlw).merge(nyi);
-        assert_eq!(p(r"[^\W\P{Yi}]"), Expr::Class(cls.negate()));
-    }
-
-    #[test]
-    fn class_multiple_class_casei() {
-        assert_eq!(p(r"(?i)[\d\p{Yi}]"), Expr::Class(classes(&[
-            PERLD, YI,
-        ]).case_fold()));
-    }
-
-    #[test]
-    fn class_multiple_class_negate_casei() {
-        assert_eq!(p(r"(?i)[^\d\p{Yi}]"), Expr::Class(classes(&[
-            PERLD, YI,
-        ]).case_fold().negate()));
-    }
-
-    #[test]
-    fn class_multiple_class_negate_negate_casei() {
-        let nperlw = class(PERLW).negate();
-        let nyi = class(YI).negate();
-        let class = CharClass::empty().merge(nperlw).merge(nyi);
-        assert_eq!(p(r"(?i)[^\W\P{Yi}]"),
-                   Expr::Class(class.case_fold().negate()));
-    }
-
-    #[test]
-    fn class_class_hypen() {
-        assert_eq!(p(r"[\p{Yi}-]"), Expr::Class(classes(&[
-            &[('-', '-')], YI,
-        ])));
-        assert_eq!(p(r"[\p{Yi}-a]"), Expr::Class(classes(&[
-            &[('-', '-')], &[('a', 'a')], YI,
-        ])));
-    }
-
-    #[test]
-    fn class_brackets() {
-        assert_eq!(p(r"[]]"), Expr::Class(class(&[(']', ']')])));
-        assert_eq!(p(r"[]\[]"), Expr::Class(class(&[('[', '['), (']', ']')])));
-        assert_eq!(p(r"[\[]]"), Expr::Concat(vec![
-            Expr::Class(class(&[('[', '[')])),
-            lit(']'),
-        ]));
-    }
-
-    #[test]
-    fn class_brackets_hypen() {
-        assert_eq!(p("[]-]"), Expr::Class(class(&[('-', '-'), (']', ']')])));
-        assert_eq!(p("[-]]"), Expr::Concat(vec![
-            Expr::Class(class(&[('-', '-')])),
-            lit(']'),
-        ]));
-    }
-
-    #[test]
-    fn class_nested_class_union() {
-        assert_eq!(p(r"[c[a-b]]"), Expr::Class(class(&[('a', 'c')])));
-        assert_eq!(p(r"[[a-b]]"), Expr::Class(class(&[('a', 'b')])));
-        assert_eq!(p(r"[[c][a-b]]"), Expr::Class(class(&[('a', 'c')])));
-
-        assert_eq!(pb(r"(?-u)[c[a-b]]"),
-                   Expr::ClassBytes(bclass(&[(b'a', b'c')])));
-        assert_eq!(pb(r"(?-u)[[a-b]]"),
-                   Expr::ClassBytes(bclass(&[(b'a', b'b')])));
-        assert_eq!(pb(r"(?-u)[[c][a-b]]"),
-                   Expr::ClassBytes(bclass(&[(b'a', b'c')])));
-    }
-
-    #[test]
-    fn class_nested_class_union_casei() {
-        assert_eq!(p(r"(?i)[c[a-b]]"),
-                   Expr::Class(class(&[('a', 'c')]).case_fold()));
-        assert_eq!(p(r"(?i)[[a-b]]"),
-                   Expr::Class(class(&[('a', 'b')]).case_fold()));
-        assert_eq!(p(r"(?i)[[c][a-b]]"),
-                   Expr::Class(class(&[('a', 'c')]).case_fold()));
-
-        assert_eq!(pb(r"(?i-u)[[\d]]"),
-                   Expr::ClassBytes(asciid_bytes().case_fold()));
-    }
-
-    #[test]
-    fn class_nested_class_negate() {
-        assert_eq!(p(r"[^[\d]]"), Expr::Class(class(PERLD).negate()));
-        assert_eq!(p(r"[[^\d]]"), Expr::Class(class(PERLD).negate()));
-        assert_eq!(p(r"[^[^\d]]"), Expr::Class(class(PERLD)));
-        assert_eq!(p(r"[^[\w]]"), Expr::Class(class(PERLW).negate()));
-        assert_eq!(p(r"[[^\w]]"), Expr::Class(class(PERLW).negate()));
-        assert_eq!(p(r"[^[^\w]]"), Expr::Class(class(PERLW)));
-        assert_eq!(p(r"[a-b[^c]]"),
-                   Expr::Class(class(&[('\u{0}', 'b'), ('d', '\u{10FFFF}')])));
-
-        assert_eq!(pb(r"(?-u)[^[\d]]"),
-                   Expr::ClassBytes(asciid_bytes().negate()));
-        assert_eq!(pb(r"(?-u)[[^\d]]"),
-                   Expr::ClassBytes(asciid_bytes().negate()));
-        assert_eq!(pb(r"(?-u)[^[^\d]]"),
-                   Expr::ClassBytes(asciid_bytes()));
-        assert_eq!(pb(r"(?-u)[^[\w]]"),
-                   Expr::ClassBytes(asciiw_bytes().negate()));
-        assert_eq!(pb(r"(?-u)[[^\w]]"),
-                   Expr::ClassBytes(asciiw_bytes().negate()));
-        assert_eq!(pb(r"(?-u)[^[^\w]]"),
-                   Expr::ClassBytes(asciiw_bytes()));
-        assert_eq!(pb(r"(?-u)[a-b[^c]]"),
-                   Expr::ClassBytes(bclass(&[(b'\x00', b'b'), (b'd', b'\xFF')])))
-    }
-
-    #[test]
-    fn class_nested_class_negate_casei() {
-        assert_eq!(p(r"(?i)[^[\d]]"),
-                   Expr::Class(class(PERLD).case_fold().negate()));
-        assert_eq!(p(r"(?i)[[^\d]]"),
-                   Expr::Class(class(PERLD).case_fold().negate()));
-        assert_eq!(p(r"(?i)[^[^\d]]"),
-                   Expr::Class(class(PERLD).case_fold()));
-        assert_eq!(p(r"(?i)[^[\w]]"),
-                   Expr::Class(class(PERLW).case_fold().negate()));
-        assert_eq!(p(r"(?i)[[^\w]]"),
-                   Expr::Class(class(PERLW).case_fold().negate()));
-        assert_eq!(p(r"(?i)[^[^\w]]"),
-                   Expr::Class(class(PERLW).case_fold()));
-        let mut cls = CharClass::empty().negate();
-        cls.remove('c');
-        cls.remove('C');
-        assert_eq!(p(r"(?i)[a-b[^c]]"), Expr::Class(cls));
-
-        assert_eq!(pb(r"(?i-u)[^[\d]]"),
-                   Expr::ClassBytes(asciid_bytes().case_fold().negate()));
-        assert_eq!(pb(r"(?i-u)[[^\d]]"),
-                   Expr::ClassBytes(asciid_bytes().case_fold().negate()));
-        assert_eq!(pb(r"(?i-u)[^[^\d]]"),
-                   Expr::ClassBytes(asciid_bytes().case_fold()));
-        assert_eq!(pb(r"(?i-u)[^[\w]]"),
-                   Expr::ClassBytes(asciiw_bytes().case_fold().negate()));
-        assert_eq!(pb(r"(?i-u)[[^\w]]"),
-                   Expr::ClassBytes(asciiw_bytes().case_fold().negate()));
-        assert_eq!(pb(r"(?i-u)[^[^\w]]"),
-                   Expr::ClassBytes(asciiw_bytes().case_fold()));
-        let mut bytes = ByteClass::new(vec![]).negate();
-        bytes.remove(b'c');
-        bytes.remove(b'C');
-        assert_eq!(pb(r"(?i-u)[a-b[^c]]"), Expr::ClassBytes(bytes));
-    }
-
-    #[test]
-    fn class_nested_class_brackets_hyphen() {
-        // This is confusing, but `]` is allowed if first character within a class
-        // It parses as a nested class with the `]` and `-` characters
-        assert_eq!(p(r"[[]-]]"), Expr::Class(class(&[('-', '-'), (']', ']')])));
-        assert_eq!(p(r"[[\[]]"), Expr::Class(class(&[('[', '[')])));
-        assert_eq!(p(r"[[\]]]"), Expr::Class(class(&[(']', ']')])));
-    }
-
-    #[test]
-    fn class_nested_class_deep_nesting() {
-        // Makes sure that implementation can handle deep nesting.
-        // With recursive parsing, this regex would blow the stack size.
-        use std::iter::repeat;
-        let nesting = 10_000;
-        let open: String = repeat("[").take(nesting).collect();
-        let close: String = repeat("]").take(nesting).collect();
-        let s  = format!("{}a{}", open, close);
-        assert_eq!(p(&s), Expr::Class(class(&[('a', 'a')])));
-    }
-
-    #[test]
-    fn class_intersection_ranges() {
-        assert_eq!(p(r"[abc&&b-c]"), Expr::Class(class(&[('b', 'c')])));
-        assert_eq!(p(r"[abc&&[b-c]]"), Expr::Class(class(&[('b', 'c')])));
-        assert_eq!(p(r"[[abc]&&[b-c]]"), Expr::Class(class(&[('b', 'c')])));
-        assert_eq!(p(r"[a-z&&b-y&&c-x]"), Expr::Class(class(&[('c', 'x')])));
-        assert_eq!(p(r"[c-da-b&&a-d]"), Expr::Class(class(&[('a', 'd')])));
-        assert_eq!(p(r"[a-d&&c-da-b]"), Expr::Class(class(&[('a', 'd')])));
-
-        assert_eq!(pb(r"(?-u)[abc&&b-c]"),
-                   Expr::ClassBytes(bclass(&[(b'b', b'c')])));
-        assert_eq!(pb(r"(?-u)[abc&&[b-c]]"),
-                   Expr::ClassBytes(bclass(&[(b'b', b'c')])));
-        assert_eq!(pb(r"(?-u)[[abc]&&[b-c]]"),
-                   Expr::ClassBytes(bclass(&[(b'b', b'c')])));
-        assert_eq!(pb(r"(?-u)[a-z&&b-y&&c-x]"),
-                   Expr::ClassBytes(bclass(&[(b'c', b'x')])));
-        assert_eq!(pb(r"(?-u)[c-da-b&&a-d]"),
-                   Expr::ClassBytes(bclass(&[(b'a', b'd')])));
-    }
-
-    #[test]
-    fn class_intersection_ranges_casei() {
-        assert_eq!(p(r"(?i)[abc&&b-c]"),
-                   Expr::Class(class(&[('b', 'c')]).case_fold()));
-        assert_eq!(p(r"(?i)[abc&&[b-c]]"),
-                   Expr::Class(class(&[('b', 'c')]).case_fold()));
-        assert_eq!(p(r"(?i)[[abc]&&[b-c]]"),
-                   Expr::Class(class(&[('b', 'c')]).case_fold()));
-        assert_eq!(p(r"(?i)[a-z&&b-y&&c-x]"),
-                   Expr::Class(class(&[('c', 'x')]).case_fold()));
-        assert_eq!(p(r"(?i)[c-da-b&&a-d]"),
-                   Expr::Class(class(&[('a', 'd')]).case_fold()));
-
-        assert_eq!(pb(r"(?i-u)[abc&&b-c]"),
-                   Expr::ClassBytes(bclass(&[(b'b', b'c')]).case_fold()));
-        assert_eq!(pb(r"(?i-u)[abc&&[b-c]]"),
-                   Expr::ClassBytes(bclass(&[(b'b', b'c')]).case_fold()));
-        assert_eq!(pb(r"(?i-u)[[abc]&&[b-c]]"),
-                   Expr::ClassBytes(bclass(&[(b'b', b'c')]).case_fold()));
-        assert_eq!(pb(r"(?i-u)[a-z&&b-y&&c-x]"),
-                   Expr::ClassBytes(bclass(&[(b'c', b'x')]).case_fold()));
-        assert_eq!(pb(r"(?i-u)[c-da-b&&a-d]"),
-                   Expr::ClassBytes(bclass(&[(b'a', b'd')]).case_fold()));
-    }
-
-    #[test]
-    fn class_intersection_classes() {
-        assert_eq!(p(r"[\w&&\d]"), Expr::Class(class(PERLD)));
-        assert_eq!(p(r"[\w&&[[:ascii:]]]"), Expr::Class(asciiw()));
-        assert_eq!(p(r"[\x00-\xFF&&\pZ]"),
-                   Expr::Class(class(&[('\u{20}', '\u{20}'), ('\u{a0}', '\u{a0}')])));
-
-        assert_eq!(pb(r"(?-u)[\w&&\d]"), Expr::ClassBytes(asciid_bytes()));
-        assert_eq!(pb(r"(?-u)[\w&&[[:ascii:]]]"), Expr::ClassBytes(asciiw_bytes()));
-    }
-
-    #[test]
-    fn class_intersection_classes_casei() {
-        assert_eq!(p(r"(?i)[\w&&\d]"), Expr::Class(class(PERLD).case_fold()));
-        assert_eq!(p(r"(?i)[\w&&[[:ascii:]]]"), Expr::Class(asciiw().case_fold()));
-        assert_eq!(p(r"(?i)[\x00-\xFF&&\pZ]"),
-                   Expr::Class(class(&[('\u{20}', '\u{20}'), ('\u{a0}', '\u{a0}')])));
-
-        assert_eq!(pb(r"(?i-u)[\w&&\d]"), Expr::ClassBytes(asciid_bytes().case_fold()));
-        assert_eq!(pb(r"(?i-u)[\w&&[[:ascii:]]]"), Expr::ClassBytes(asciiw_bytes().case_fold()));
-    }
-
-    #[test]
-    fn class_intersection_negate() {
-        assert_eq!(p(r"[^\w&&\d]"), Expr::Class(class(PERLD).negate()));
-        assert_eq!(p(r"[^[\w&&\d]]"), Expr::Class(class(PERLD).negate()));
-        assert_eq!(p(r"[^[^\w&&\d]]"), Expr::Class(class(PERLD)));
-        assert_eq!(p(r"[\w&&[^\d]]"),
-                   Expr::Class(class(PERLW).intersection(&class(PERLD).negate())));
-        assert_eq!(p(r"[[^\w]&&[^\d]]"),
-                   Expr::Class(class(PERLW).negate()));
-
-        assert_eq!(pb(r"(?-u)[^\w&&\d]"),
-                   Expr::ClassBytes(asciid_bytes().negate()));
-        assert_eq!(pb(r"(?-u)[^[\w&&\d]]"),
-                   Expr::ClassBytes(asciid_bytes().negate()));
-        assert_eq!(pb(r"(?-u)[^[^\w&&\d]]"),
-                   Expr::ClassBytes(asciid_bytes()));
-        assert_eq!(pb(r"(?-u)[\w&&[^\d]]"),
-                   Expr::ClassBytes(asciiw().intersection(&asciid().negate()).to_byte_class()));
-        assert_eq!(pb(r"(?-u)[[^\w]&&[^\d]]"),
-                   Expr::ClassBytes(asciiw_bytes().negate()));
-    }
-
-    #[test]
-    fn class_intersection_negate_casei() {
-        assert_eq!(p(r"(?i)[^\w&&a-z]"),
-                   Expr::Class(class(&[('a', 'z')]).case_fold().negate()));
-        assert_eq!(p(r"(?i)[^[\w&&a-z]]"),
-                   Expr::Class(class(&[('a', 'z')]).case_fold().negate()));
-        assert_eq!(p(r"(?i)[^[^\w&&a-z]]"),
-                   Expr::Class(class(&[('a', 'z')]).case_fold()));
-        assert_eq!(p(r"(?i)[\w&&[^a-z]]"),
-                   Expr::Class(
-                       class(PERLW).intersection(&class(&[('a', 'z')])
-                       .case_fold().negate())));
-        assert_eq!(p(r"(?i)[[^\w]&&[^a-z]]"),
-                   Expr::Class(class(PERLW).negate()));
-
-        assert_eq!(pb(r"(?i-u)[^\w&&a-z]"),
-                   Expr::ClassBytes(bclass(&[(b'a', b'z')]).case_fold().negate()));
-        assert_eq!(pb(r"(?i-u)[^[\w&&a-z]]"),
-                   Expr::ClassBytes(bclass(&[(b'a', b'z')]).case_fold().negate()));
-        assert_eq!(pb(r"(?i-u)[^[^\w&&a-z]]"),
-                   Expr::ClassBytes(bclass(&[(b'a', b'z')]).case_fold()));
-        assert_eq!(pb(r"(?i-u)[\w&&[^a-z]]"),
-                   Expr::ClassBytes(bclass(&[(b'0', b'9'), (b'_', b'_')])));
-        assert_eq!(pb(r"(?i-u)[[^\w]&&[^a-z]]"),
-                   Expr::ClassBytes(asciiw_bytes().negate()));
-    }
-
-    #[test]
-    fn class_intersection_caret() {
-        // In `[a^]`, `^` does not need to be escaped, so it makes sense that
-        // `^` is also allowed to be unescaped after `&&`.
-        assert_eq!(p(r"[\^&&^]"), Expr::Class(class(&[('^', '^')])));
-    }
-
-    #[test]
-    fn class_intersection_brackets_hyphen() {
-        // `]` needs to be escaped after `&&` because it is not at the start of the class.
-        assert_eq!(p(r"[]&&\]]"), Expr::Class(class(&[(']', ']')])));
-
-        assert_eq!(p(r"[-&&-]"), Expr::Class(class(&[('-', '-')])));
-    }
-
-    #[test]
-    fn class_intersection_ampersand() {
-        // Unescaped `&` after `&&`
-        assert_eq!(p(r"[\&&&&]"), Expr::Class(class(&[('&', '&')])));
-        assert_eq!(p(r"[\&&&\&]"), Expr::Class(class(&[('&', '&')])));
-    }
-
-    #[test]
-    fn class_intersection_precedence() {
-        assert_eq!(p(r"[a-w&&[^c-g]z]"), Expr::Class(class(&[('a', 'b'), ('h', 'w')])));
-    }
-
-    #[test]
-    fn class_special_escaped_set_chars() {
-        // These tests ensure that some special characters require escaping
-        // for use in character classes. The intention is to use these
-        // characters to implement sets as described in UTS#18 RL1.3. Once
-        // that's done, these tests should be removed and replaced with others.
-        assert_eq!(p(r"[\[]"), Expr::Class(class(&[('[', '[')])));
-        assert_eq!(p(r"[&]"), Expr::Class(class(&[('&', '&')])));
-        assert_eq!(p(r"[\&]"), Expr::Class(class(&[('&', '&')])));
-        assert_eq!(p(r"[\&\&]"), Expr::Class(class(&[('&', '&')])));
-        assert_eq!(p(r"[\x00-&]"), Expr::Class(class(&[('\u{0}', '&')])));
-        assert_eq!(p(r"[&-\xFF]"), Expr::Class(class(&[('&', '\u{FF}')])));
-
-        assert_eq!(p(r"[~]"), Expr::Class(class(&[('~', '~')])));
-        assert_eq!(p(r"[\~]"), Expr::Class(class(&[('~', '~')])));
-        assert_eq!(p(r"[\~\~]"), Expr::Class(class(&[('~', '~')])));
-        assert_eq!(p(r"[\x00-~]"), Expr::Class(class(&[('\u{0}', '~')])));
-        assert_eq!(p(r"[~-\xFF]"), Expr::Class(class(&[('~', '\u{FF}')])));
-
-        assert_eq!(p(r"[+-\-]"), Expr::Class(class(&[('+', '-')])));
-        assert_eq!(p(r"[a-a\--\xFF]"), Expr::Class(class(&[
-            ('-', '\u{FF}'),
-        ])));
-    }
-
-    #[test]
-    fn class_overlapping() {
-        assert_eq!(p("[a-fd-h]"), Expr::Class(class(&[('a', 'h')])));
-        assert_eq!(p("[a-fg-m]"), Expr::Class(class(&[('a', 'm')])));
-
-        assert_eq!(pb("(?-u)[a-fd-h]"),
-                   Expr::ClassBytes(bclass(&[(b'a', b'h')])));
-        assert_eq!(pb("(?-u)[a-fg-m]"),
-                   Expr::ClassBytes(bclass(&[(b'a', b'm')])));
-    }
-
-    #[test]
-    fn ascii_classes() {
-        assert_eq!(p("[:blank:]"), Expr::Class(class(&[
-            (':', ':'), ('a', 'b'), ('k', 'l'), ('n', 'n'),
-        ])));
-        assert_eq!(p("[[:upper:]]"), Expr::Class(class(UPPER)));
-
-        assert_eq!(pb("(?-u)[[:upper:]]"),
-                   Expr::ClassBytes(class(UPPER).to_byte_class()));
-    }
-
-    #[test]
-    fn ascii_classes_not() {
-        assert_eq!(p("[:abc:]"),
-                   Expr::Class(class(&[(':', ':'), ('a', 'c')])));
-        assert_eq!(pb("(?-u)[:abc:]"),
-                   Expr::ClassBytes(bclass(&[(b':', b':'), (b'a', b'c')])));
-    }
-
-    #[test]
-    fn ascii_classes_multiple() {
-        assert_eq!(p("[[:lower:][:upper:]]"),
-                   Expr::Class(classes(&[UPPER, LOWER])));
-
-        assert_eq!(pb("(?-u)[[:lower:][:upper:]]"),
-                   Expr::ClassBytes(classes(&[UPPER, LOWER]).to_byte_class()));
-    }
-
-    #[test]
-    fn ascii_classes_negate() {
-        assert_eq!(p("[[:^upper:]]"), Expr::Class(class(UPPER).negate()));
-        assert_eq!(p("[^[:^upper:]]"), Expr::Class(class(UPPER)));
-
-        assert_eq!(pb("(?-u)[[:^upper:]]"),
-                   Expr::ClassBytes(class(UPPER).to_byte_class().negate()));
-        assert_eq!(pb("(?-u)[^[:^upper:]]"),
-                   Expr::ClassBytes(class(UPPER).to_byte_class()));
-    }
-
-    #[test]
-    fn ascii_classes_negate_multiple() {
-        let (nlower, nword) = (class(LOWER).negate(), class(WORD).negate());
-        let cls = CharClass::empty().merge(nlower).merge(nword);
-        assert_eq!(p("[[:^lower:][:^word:]]"), Expr::Class(cls.clone()));
-        assert_eq!(p("[^[:^lower:][:^word:]]"), Expr::Class(cls.negate()));
-    }
-
-    #[test]
-    fn ascii_classes_case_fold() {
-        assert_eq!(p("(?i)[[:upper:]]"),
-                   Expr::Class(class(UPPER).case_fold()));
-
-        assert_eq!(pb("(?i-u)[[:upper:]]"),
-                   Expr::ClassBytes(class(UPPER).to_byte_class().case_fold()));
-    }
-
-    #[test]
-    fn ascii_classes_negate_case_fold() {
-        assert_eq!(p("(?i)[[:^upper:]]"),
-                   Expr::Class(class(UPPER).case_fold().negate()));
-        assert_eq!(p("(?i)[^[:^upper:]]"),
-                   Expr::Class(class(UPPER).case_fold()));
-
-        assert_eq!(pb("(?i-u)[[:^upper:]]"),
-                   Expr::ClassBytes(
-                       class(UPPER).to_byte_class().case_fold().negate()));
-        assert_eq!(pb("(?i-u)[^[:^upper:]]"),
-                   Expr::ClassBytes(class(UPPER).to_byte_class().case_fold()));
-    }
-
-    #[test]
-    fn single_class_negate_case_fold() {
-        assert_eq!(p("(?i)[^x]"),
-                   Expr::Class(class(&[('x', 'x')]).case_fold().negate()));
-
-        assert_eq!(pb("(?i-u)[^x]"),
-                   Expr::ClassBytes(
-                       class(&[('x', 'x')])
-                       .to_byte_class().case_fold().negate()));
-    }
-
-    #[test]
-    fn ignore_space_empty() {
-        assert_eq!(p("(?x) "), Expr::Empty);
-    }
-
-    #[test]
-    fn ignore_space_literal() {
-        assert_eq!(p("(?x) a b c"), Expr::Concat(vec![
-            lit('a'), lit('b'), lit('c'),
-        ]));
-    }
-
-    #[test]
-    fn ignore_space_literal_off() {
-        assert_eq!(p("(?x) a b c(?-x) a"), Expr::Concat(vec![
-            lit('a'), lit('b'), lit('c'), lit(' '), lit('a'),
-        ]));
-    }
-
-    #[test]
-    fn ignore_space_class() {
-        assert_eq!(p("(?x)[a
-        - z
-]"), Expr::Class(class(&[('a', 'z')])));
-        assert_eq!(p("(?x)[  ^   a
-        - z
-]"), Expr::Class(class(&[('a', 'z')]).negate()));
-    }
-
-    #[test]
-    fn ignore_space_escape_octal() {
-        assert_eq!(p(r"(?x)\12 3"), Expr::Concat(vec![
-            lit('\n'),
-            lit('3'),
-        ]));
-    }
-
-    #[test]
-    fn ignore_space_escape_hex() {
-        assert_eq!(p(r"(?x)\x { 53 }"), lit('S'));
-        assert_eq!(p(r"(?x)\x # comment
-{ # comment
-    53 # comment
-} # comment"), lit('S'));
-    }
-
-    #[test]
-    fn ignore_space_escape_hex2() {
-        assert_eq!(p(r"(?x)\x 53"), lit('S'));
-        assert_eq!(p(r"(?x)\x # comment
-        53 # comment"), lit('S'));
-    }
-
-    #[test]
-    fn ignore_space_escape_unicode_name() {
-        assert_eq!(p(r"(?x)\p # comment
-{ # comment
-    Yi # comment
-} # comment"), Expr::Class(class(YI)));
-    }
-
-    #[test]
-    fn ignore_space_repeat_counted() {
-        assert_eq!(p("(?x)a # comment
-{ # comment
-    5 # comment
-    , # comment
-    10 # comment
-}"), Expr::Repeat {
-            e: b(lit('a')),
-            r: Repeater::Range { min: 5, max: Some(10) },
-            greedy: true,
-        });
-    }
-
-    #[test]
-    fn ignore_space_comments() {
-        assert_eq!(p(r"(?x)(?P<foo>
-    a # comment 1
-)(?P<bar>
-    z # comment 2
-)"), Expr::Concat(vec![
-        Expr::Group {
-            e: Box::new(lit('a')),
-            i: Some(1),
-            name: Some("foo".into()),
-        },
-        Expr::Group {
-            e: Box::new(lit('z')),
-            i: Some(2),
-            name: Some("bar".into()),
-        },
-    ]));
-    }
-
-    #[test]
-    fn ignore_space_comments_re_enable() {
-        assert_eq!(p(r"(?x)a # hi
-(?-x:#) # sweet"), Expr::Concat(vec![
-            lit('a'),
-            Expr::Group {
-                e: Box::new(lit('#')),
-                i: None,
-                name: None,
-            },
-        ]));
-    }
-
-    #[test]
-    fn ignore_space_escape_punctuation() {
-        assert_eq!(p(r"(?x)\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#"), c(&[
-            lit('\\'), lit('.'), lit('+'), lit('*'), lit('?'),
-            lit('('), lit(')'), lit('|'), lit('['), lit(']'),
-            lit('{'), lit('}'), lit('^'), lit('$'), lit('#'),
-        ]));
-    }
-
-    #[test]
-    fn ignore_space_escape_hash() {
-        assert_eq!(p(r"(?x)a\# # hi there"), Expr::Concat(vec![
-            lit('a'),
-            lit('#'),
-        ]));
-    }
-
-    #[test]
-    fn ignore_space_escape_space() {
-        assert_eq!(p(r"(?x)a\  # hi there"), Expr::Concat(vec![
-            lit('a'),
-            lit(' '),
-        ]));
-    }
-
-    // Test every single possible error case.
-
-    macro_rules! test_err {
-        ($re:expr, $pos:expr, $kind:expr) => {
-            test_err!($re, $pos, $kind, Flags::default());
-        };
-        ($re:expr, $pos:expr, $kind:expr, $flags:expr) => {{
-            let err = Parser::parse($re, $flags).unwrap_err();
-            assert_eq!($pos, err.pos);
-            assert_eq!($kind, err.kind);
-            assert!($re.contains(&err.surround));
-        }}
-    }
-
-    #[test]
-    fn invalid_utf8_not_allowed() {
-        // let flags = Flags { unicode: false, .. Flags::default() };
-        test_err!(r"(?-u)\xFF", 9, ErrorKind::InvalidUtf8);
-        test_err!(r"(?-u).", 5, ErrorKind::InvalidUtf8);
-        test_err!(r"(?-u)(?s).", 9, ErrorKind::InvalidUtf8);
-        test_err!(r"(?-u)[\x00-\x80]", 15, ErrorKind::InvalidUtf8);
-        test_err!(r"(?-u)\222", 9, ErrorKind::InvalidUtf8);
-        test_err!(r"(?-u)\x{0080}", 13, ErrorKind::InvalidUtf8);
-    }
-
-    #[test]
-    fn unicode_char_not_allowed() {
-        let flags = Flags { allow_bytes: true, .. Flags::default() };
-        test_err!("☃(?-u:☃)", 7, ErrorKind::UnicodeNotAllowed, flags);
-    }
-
-    #[test]
-    fn unicode_class_not_allowed() {
-        let flags = Flags { allow_bytes: true, .. Flags::default() };
-        test_err!(r"☃(?-u:\pL)", 9, ErrorKind::UnicodeNotAllowed, flags);
-    }
-
-    #[test]
-    fn unicode_class_literal_not_allowed() {
-        let flags = Flags { allow_bytes: true, .. Flags::default() };
-        test_err!(r"(?-u)[☃]", 6, ErrorKind::UnicodeNotAllowed, flags);
-        test_err!(r"(?-u)[☃-☃]", 6, ErrorKind::UnicodeNotAllowed, flags);
-    }
-
-    #[test]
-    fn unicode_hex_not_allowed() {
-        let flags = Flags { allow_bytes: true, .. Flags::default() };
-        test_err!(r"(?-u)\x{FFFF}", 13, ErrorKind::UnicodeNotAllowed, flags);
-        test_err!(r"(?-u)\x{100}", 12, ErrorKind::UnicodeNotAllowed, flags);
-    }
-
-    #[test]
-    fn unicode_octal_not_allowed() {
-        let flags = Flags { allow_bytes: true, .. Flags::default() };
-        test_err!(r"(?-u)\400", 9, ErrorKind::UnicodeNotAllowed, flags);
-    }
-
-    #[test]
-    fn error_repeat_no_expr_simple() {
-        test_err!("(*", 1, ErrorKind::RepeaterExpectsExpr);
-    }
-
-    #[test]
-    fn error_repeat_no_expr_counted() {
-        test_err!("({5}", 1, ErrorKind::RepeaterExpectsExpr);
-    }
-
-    #[test]
-    fn error_repeat_beginning_counted() {
-        test_err!("{5}", 0, ErrorKind::RepeaterExpectsExpr);
-    }
-
-    #[test]
-    fn error_repeat_illegal_exprs_simple() {
-        test_err!("a**", 2, ErrorKind::RepeaterUnexpectedExpr(Expr::Repeat {
-            e: b(lit('a')),
-            r: Repeater::ZeroOrMore,
-            greedy: true,
-        }));
-        test_err!("a|*", 2,
-            ErrorKind::RepeaterUnexpectedExpr(Expr::Alternate(vec![lit('a')]))
-        );
-    }
-
-    #[test]
-    fn error_repeat_illegal_exprs_counted() {
-        test_err!("a*{5}", 2, ErrorKind::RepeaterUnexpectedExpr(Expr::Repeat {
-            e: b(lit('a')),
-            r: Repeater::ZeroOrMore,
-            greedy: true,
-        }));
-        test_err!("a|{5}", 2,
-            ErrorKind::RepeaterUnexpectedExpr(Expr::Alternate(vec![lit('a')]))
-        );
-    }
-
-    #[test]
-    fn error_repeat_empty_number() {
-        test_err!("a{}", 2, ErrorKind::MissingBase10);
-    }
-
-    #[test]
-    fn error_repeat_eof() {
-        test_err!("a{5", 3, ErrorKind::UnclosedRepeat);
-    }
-
-    #[test]
-    fn error_repeat_empty_number_eof() {
-        test_err!("a{xyz", 5, ErrorKind::InvalidBase10("xyz".into()));
-        test_err!("a{12,xyz", 8, ErrorKind::InvalidBase10("xyz".into()));
-    }
-
-    #[test]
-    fn error_repeat_invalid_number() {
-        test_err!("a{9999999999}", 12,
-                  ErrorKind::InvalidBase10("9999999999".into()));
-        test_err!("a{1,9999999999}", 14,
-                  ErrorKind::InvalidBase10("9999999999".into()));
-    }
-
-    #[test]
-    fn error_repeat_invalid_number_extra() {
-        test_err!("a{12x}", 5, ErrorKind::InvalidBase10("12x".into()));
-        test_err!("a{1,12x}", 7, ErrorKind::InvalidBase10("12x".into()));
-    }
-
-    #[test]
-    fn error_repeat_invalid_range() {
-        test_err!("a{2,1}", 5,
-                  ErrorKind::InvalidRepeatRange { min: 2, max: 1 });
-    }
-
-    #[test]
-    fn error_alternate_empty() {
-        test_err!("|a", 0, ErrorKind::EmptyAlternate);
-    }
-
-    #[test]
-    fn error_alternate_empty_with_group() {
-        test_err!("(|a)", 1, ErrorKind::EmptyAlternate);
-    }
-
-    #[test]
-    fn error_alternate_empty_with_alternate() {
-        test_err!("a||", 2, ErrorKind::EmptyAlternate);
-    }
-
-    #[test]
-    fn error_close_paren_unopened_empty() {
-        test_err!(")", 0, ErrorKind::UnopenedParen);
-    }
-
-    #[test]
-    fn error_close_paren_unopened() {
-        test_err!("ab)", 2, ErrorKind::UnopenedParen);
-    }
-
-    #[test]
-    fn error_close_paren_unopened_with_alt() {
-        test_err!("a|b)", 3, ErrorKind::UnopenedParen);
-    }
-
-    #[test]
-    fn error_close_paren_unclosed_with_alt() {
-        test_err!("(a|b", 0, ErrorKind::UnclosedParen);
-    }
-
-    #[test]
-    fn error_close_paren_empty_alt() {
-        test_err!("(a|)", 3, ErrorKind::EmptyAlternate);
-    }
-
-    #[test]
-    fn error_close_paren_empty_group() {
-        test_err!("()", 1, ErrorKind::EmptyGroup);
-    }
-
-    #[test]
-    fn error_close_paren_empty_group_with_name() {
-        test_err!("(?P<foo>)", 8, ErrorKind::EmptyGroup);
-    }
-
-    #[test]
-    fn error_finish_concat_unclosed() {
-        test_err!("ab(xy", 2, ErrorKind::UnclosedParen);
-    }
-
-    #[test]
-    fn error_finish_concat_empty_alt() {
-        test_err!("a|", 2, ErrorKind::EmptyAlternate);
-    }
-
-    #[test]
-    fn error_group_name_invalid() {
-        test_err!("(?P<a#>x)", 6, ErrorKind::InvalidCaptureName("a#".into()));
-    }
-
-    #[test]
-    fn error_group_name_invalid_leading() {
-        test_err!("(?P<1a>a)", 6, ErrorKind::InvalidCaptureName("1a".into()));
-    }
-
-    #[test]
-    fn error_group_name_unexpected_eof() {
-        test_err!("(?P<a", 5, ErrorKind::UnclosedCaptureName("a".into()));
-    }
-
-    #[test]
-    fn error_group_name_empty() {
-        test_err!("(?P<>a)", 4, ErrorKind::EmptyCaptureName);
-    }
-
-    #[test]
-    fn error_group_opts_unrecognized_flag() {
-        test_err!("(?z:a)", 2, ErrorKind::UnrecognizedFlag('z'));
-    }
-
-    #[test]
-    fn error_group_opts_unexpected_eof() {
-        test_err!("(?i", 3, ErrorKind::UnexpectedFlagEof);
-    }
-
-    #[test]
-    fn error_group_opts_double_negation() {
-        test_err!("(?-i-s:a)", 4, ErrorKind::DoubleFlagNegation);
-    }
-
-    #[test]
-    fn error_group_opts_empty_negation() {
-        test_err!("(?i-:a)", 4, ErrorKind::EmptyFlagNegation);
-    }
-
-    #[test]
-    fn error_group_opts_empty() {
-        test_err!("(?)", 2, ErrorKind::EmptyFlagNegation);
-    }
-
-    #[test]
-    fn error_escape_unexpected_eof() {
-        test_err!(r"\", 1, ErrorKind::UnexpectedEscapeEof);
-    }
-
-    #[test]
-    fn error_escape_unrecognized() {
-        test_err!(r"\m", 1, ErrorKind::UnrecognizedEscape('m'));
-    }
-
-    #[test]
-    fn error_escape_hex2_eof0() {
-        test_err!(r"\x", 2, ErrorKind::UnexpectedTwoDigitHexEof);
-    }
-
-    #[test]
-    fn error_escape_hex2_eof1() {
-        test_err!(r"\xA", 3, ErrorKind::UnexpectedTwoDigitHexEof);
-    }
-
-    #[test]
-    fn error_escape_hex2_invalid() {
-        test_err!(r"\xAG", 4, ErrorKind::InvalidBase16("AG".into()));
-    }
-
-    #[test]
-    fn error_escape_hex_eof0() {
-        test_err!(r"\x{", 3, ErrorKind::InvalidBase16("".into()));
-    }
-
-    #[test]
-    fn error_escape_hex_eof1() {
-        test_err!(r"\x{A", 4, ErrorKind::UnclosedHex);
-    }
-
-    #[test]
-    fn error_escape_hex_invalid() {
-        test_err!(r"\x{AG}", 5, ErrorKind::InvalidBase16("AG".into()));
-    }
-
-    #[test]
-    fn error_escape_hex_invalid_scalar_value_surrogate() {
-        test_err!(r"\x{D800}", 8, ErrorKind::InvalidScalarValue(0xD800));
-    }
-
-    #[test]
-    fn error_escape_hex_invalid_scalar_value_high() {
-        test_err!(r"\x{110000}", 10, ErrorKind::InvalidScalarValue(0x110000));
-    }
-
-    #[test]
-    fn error_escape_hex_invalid_u32() {
-        test_err!(r"\x{9999999999}", 13,
-                  ErrorKind::InvalidBase16("9999999999".into()));
-    }
-
-    #[test]
-    fn error_unicode_unclosed() {
-        test_err!(r"\p{", 3, ErrorKind::UnclosedUnicodeName);
-        test_err!(r"\p{Greek", 8, ErrorKind::UnclosedUnicodeName);
-    }
-
-    #[test]
-    fn error_unicode_no_letter() {
-        test_err!(r"\p", 2, ErrorKind::UnexpectedEscapeEof);
-    }
-
-    #[test]
-    fn error_unicode_unknown_letter() {
-        test_err!(r"\pA", 3, ErrorKind::UnrecognizedUnicodeClass("A".into()));
-    }
-
-    #[test]
-    fn error_unicode_unknown_name() {
-        test_err!(r"\p{Yii}", 7,
-                  ErrorKind::UnrecognizedUnicodeClass("Yii".into()));
-    }
-
-    #[test]
-    fn error_class_eof_empty() {
-        test_err!("[", 1, ErrorKind::UnexpectedClassEof);
-        test_err!("[^", 2, ErrorKind::UnexpectedClassEof);
-    }
-
-    #[test]
-    fn error_class_eof_non_empty() {
-        test_err!("[a", 2, ErrorKind::UnexpectedClassEof);
-        test_err!("[^a", 3, ErrorKind::UnexpectedClassEof);
-    }
-
-    #[test]
-    fn error_class_eof_range() {
-        test_err!("[a-", 3, ErrorKind::UnexpectedClassEof);
-        test_err!("[^a-", 4, ErrorKind::UnexpectedClassEof);
-        test_err!("[---", 4, ErrorKind::UnexpectedClassEof);
-    }
-
-    #[test]
-    fn error_class_invalid_escape() {
-        test_err!(r"[\pA]", 4,
-                  ErrorKind::UnrecognizedUnicodeClass("A".into()));
-    }
-
-    #[test]
-    fn error_class_valid_escape_not_allowed() {
-        test_err!(r"[\A]", 3, ErrorKind::InvalidClassEscape(Expr::StartText));
-    }
-
-    #[test]
-    fn error_class_range_valid_escape_not_allowed() {
-        test_err!(r"[a-\d]", 5,
-                  ErrorKind::InvalidClassEscape(Expr::Class(class(PERLD))));
-        test_err!(r"[a-\A]", 5,
-                  ErrorKind::InvalidClassEscape(Expr::StartText));
-        test_err!(r"[\A-a]", 3,
-                  ErrorKind::InvalidClassEscape(Expr::StartText));
-    }
-
-    #[test]
-    fn error_class_invalid_range() {
-        test_err!("[z-a]", 4, ErrorKind::InvalidClassRange {
-            start: 'z',
-            end: 'a',
-        });
-    }
-
-    #[test]
-    fn error_class_empty_range() {
-        test_err!("[]", 2, ErrorKind::UnexpectedClassEof);
-        test_err!("[^]", 3, ErrorKind::UnexpectedClassEof);
-        test_err!(r"[^\d\D]", 7, ErrorKind::EmptyClass);
-
-        let flags = Flags { allow_bytes: true, .. Flags::default() };
-        test_err!(r"(?-u)[^\x00-\xFF]", 17, ErrorKind::EmptyClass, flags);
-    }
-
-    #[test]
-    fn error_class_unsupported_char() {
-        // These tests ensure that some unescaped special characters are
-        // rejected in character classes. The intention is to use these
-        // characters to implement sets as described in UTS#18 RL1.3. Once
-        // that's done, these tests should be removed and replaced with others.
-        test_err!("[~~]", 2, ErrorKind::UnsupportedClassChar('~'));
-        test_err!("[+--]", 4, ErrorKind::UnsupportedClassChar('-'));
-        test_err!(r"[a-a--\xFF]", 5, ErrorKind::UnsupportedClassChar('-'));
-        test_err!(r"[a&&~~]", 5, ErrorKind::UnsupportedClassChar('~'));
-        test_err!(r"[a&&--]", 5, ErrorKind::UnsupportedClassChar('-'));
-    }
-
-    #[test]
-    fn error_class_nested_class() {
-        test_err!(r"[[]]", 4, ErrorKind::UnexpectedClassEof);
-        test_err!(r"[[][]]", 6, ErrorKind::UnexpectedClassEof);
-        test_err!(r"[[^\d\D]]", 8, ErrorKind::EmptyClass);
-        test_err!(r"[[]", 3, ErrorKind::UnexpectedClassEof);
-        test_err!(r"[[^]", 4, ErrorKind::UnexpectedClassEof);
-    }
-
-    #[test]
-    fn error_class_intersection() {
-        test_err!(r"[&&]", 4, ErrorKind::EmptyClass);
-        test_err!(r"[a&&]", 5, ErrorKind::EmptyClass);
-        test_err!(r"[&&&&]", 6, ErrorKind::EmptyClass);
-        // `]` after `&&` is not the same as in (`[]]`), because it's also not
-        // allowed unescaped in `[a]]`.
-        test_err!(r"[]&&]]", 5, ErrorKind::EmptyClass);
-
-        let flags = Flags { allow_bytes: true, .. Flags::default() };
-        test_err!(r"(?-u)[a&&\pZ]", 12, ErrorKind::UnicodeNotAllowed, flags);
-    }
-
-    #[test]
-    fn error_duplicate_capture_name() {
-        test_err!("(?P<a>.)(?P<a>.)", 14,
-                  ErrorKind::DuplicateCaptureName("a".into()));
-    }
-
-    #[test]
-    fn error_ignore_space_escape_hex() {
-        test_err!(r"(?x)\x{ 5 3 }", 10, ErrorKind::UnclosedHex);
-    }
-
-    #[test]
-    fn error_ignore_space_escape_hex2() {
-        test_err!(r"(?x)\x 5 3", 9, ErrorKind::InvalidBase16("5 ".into()));
-    }
-
-    #[test]
-    fn error_ignore_space_escape_unicode_name() {
-        test_err!(r"(?x)\p{Y i}", 9, ErrorKind::UnclosedUnicodeName);
+    /// Create a new parser with a default configuration.
+    ///
+    /// The parser can be run with `parse` method. The parse method returns
+    /// a high level intermediate representation of the given regular
+    /// expression.
+    ///
+    /// To set configuration options on the parser, use
+    /// [`ParserBuilder`](struct.ParserBuilder.html).
+    pub fn new() -> Parser {
+        ParserBuilder::new().build()
+    }
+
+    /// Parse the regular expression into a high level intermediate
+    /// representation.
+    pub fn parse(&mut self, pattern: &str) -> Result<hir::Hir> {
+        let ast = try!(self.ast.parse(pattern));
+        let hir = try!(self.hir.translate(pattern, &ast));
+        Ok(hir)
     }
 }
diff --git a/regex-syntax/src/properties.rs b/regex-syntax/src/properties.rs
deleted file mode 100644
index c1da573721..0000000000
--- a/regex-syntax/src/properties.rs
+++ /dev/null
@@ -1,482 +0,0 @@
-// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-
-use quickcheck::{Arbitrary, Gen, Testable, QuickCheck, StdGen};
-
-use {
-    Expr, ExprBuilder,
-    CharClass, ClassRange, ByteClass, ByteRange, Repeater, dec_char,
-};
-
-fn qc<T: Testable>(t: T) {
-    QuickCheck::new()
-        .tests(10_000)
-        .max_tests(20_000)
-        .quickcheck(t);
-}
-
-fn class(ranges: &[(char, char)]) -> CharClass {
-    let ranges = ranges.iter().cloned()
-                       .map(|(c1, c2)| ClassRange::new(c1, c2)).collect();
-    CharClass::new(ranges)
-}
-
-// Test invariants for canonicalizing character classes.
-
-#[test]
-fn negate() {
-    fn prop(ranges: Vec<(char, char)>) -> bool {
-        let expected = class(&ranges).canonicalize();
-        let got = class(&ranges).negate().negate();
-        expected == got
-    }
-    qc(prop as fn(Vec<(char, char)>) -> bool);
-}
-
-#[test]
-fn classes_are_sorted_and_nonoverlapping() {
-    fn prop(ranges: Vec<(char, char)>) -> bool {
-        class(&ranges)
-            .canonicalize()
-            .windows(2)
-            .all(|w| w[0].end < dec_char(w[1].start))
-    }
-    qc(prop as fn(Vec<(char, char)>) -> bool);
-}
-
-#[test]
-fn valid_class_ranges() {
-    fn prop(ranges: Vec<(char, char)>) -> bool {
-        class(&ranges).canonicalize().into_iter().all(|r| r.start <= r.end)
-    }
-    qc(prop as fn(Vec<(char, char)>) -> bool);
-}
-
-#[test]
-fn intersection() {
-    fn prop(ranges1: Vec<(char, char)>, ranges2: Vec<(char, char)>) -> bool {
-        let class1 = class(&ranges1).canonicalize();
-        let class2 = class(&ranges2).canonicalize();
-
-        let mut expected = CharClass::empty();
-        // This is inefficient but correct.
-        for range1 in &class1 {
-            for range2 in &class2 {
-                if let Some(intersection) = range1.intersection(range2) {
-                    expected.ranges.push(intersection);
-                }
-            }
-        }
-        expected = expected.canonicalize();
-
-        let got = class1.intersection(&class2);
-        expected == got
-    }
-    qc(prop as fn(Vec<(char, char)>, Vec<(char, char)>) -> bool);
-}
-
-/// A wrapper type for generating "regex-like" Unicode strings.
-///
-/// In particular, this type's `Arbitrary` impl specifically biases toward
-/// special regex characters to make test cases more interesting.
-#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
-struct RegexLikeString(String);
-
-impl Arbitrary for RegexLikeString {
-    fn arbitrary<G: Gen>(g: &mut G) -> RegexLikeString {
-        const SPECIAL: &'static [char] = &[
-            '\\', '.', '+', '*', '?', '(', ')', '|', '[', ']', '{', '}',
-            '^', '$',
-        ];
-        // Generating random Unicode strings results in mostly uninteresting
-        // regexes. Namely, they'll mostly just be literals.
-        // To make properties using regex strings more interesting, we bias
-        // toward selecting characters of significance to a regex.
-        let size = { let s = g.size(); g.gen_range(0, s) };
-        RegexLikeString((0..size).map(|_| {
-            if g.gen_weighted_bool(3) {
-                *g.choose(SPECIAL).unwrap()
-            } else {
-                g.gen()
-            }
-        }).collect())
-    }
-
-    fn shrink(&self) -> Box<Iterator<Item=RegexLikeString>> {
-        // The regular `String` shrinker is good enough.
-        Box::new(self.0.shrink().map(RegexLikeString))
-    }
-}
-
-/// A special type for generating small non-zero sized ASCII strings.
-#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
-struct SmallAscii(String);
-
-impl Arbitrary for SmallAscii {
-    fn arbitrary<G: Gen>(g: &mut G) -> SmallAscii {
-        use std::char::from_u32;
-        let size = g.gen_range(1, 5);
-        SmallAscii((0..size)
-                   .map(|_| from_u32(g.gen_range(97, 123)).unwrap())
-                   .collect())
-    }
-
-    fn shrink(&self) -> Box<Iterator<Item=SmallAscii>> {
-        Box::new(self.0.shrink().map(SmallAscii))
-    }
-}
-
-#[test]
-fn parser_never_panics() {
-    fn prop(s: RegexLikeString) -> bool {
-        let _ = Expr::parse(&s.0); true
-    }
-    qc(prop as fn(RegexLikeString) -> bool);
-}
-
-// Testing entire expressions.
-//
-// We only have one test at the moment, but the machinery could be useful
-// for other things.
-//
-// In particular, Russ Cox writes about testing regexes by comparing the
-// strings they match with other regex implementations. A fuzzer/shrinker
-// (which is what's implemented below) would be a great way to drive that
-// process. ---AG
-
-impl Arbitrary for Expr {
-    fn arbitrary<G: Gen>(g: &mut G) -> Expr {
-        let e = fix_capture_indices(gen_expr(g, 0, ExprType::Anything));
-        e.simplify(200).unwrap()
-    }
-
-    fn shrink(&self) -> Box<Iterator<Item=Expr>> {
-        use Expr::*;
-
-        let nada = || Box::new(None.into_iter());
-        let es: Box<Iterator<Item=Expr>> = match *self {
-            Empty | AnyChar | AnyCharNoNL | AnyByte | AnyByteNoNL
-            | StartLine | EndLine | StartText | EndText
-            | WordBoundary | NotWordBoundary
-            | WordBoundaryAscii | NotWordBoundaryAscii => nada(),
-            Literal { ref chars, .. } if chars.len() == 1 => nada(),
-            Literal { ref chars, casei } => {
-                Box::new((chars.clone(), casei)
-                         .shrink()
-                         .filter(|&(ref chars, _)| chars.len() > 0)
-                         .map(|(chars, casei)| {
-                             Literal { chars: chars, casei: casei }
-                         }))
-            }
-            LiteralBytes { ref bytes, .. } if bytes.len() == 1 => nada(),
-            LiteralBytes { ref bytes, casei } => {
-                Box::new((bytes.clone(), casei)
-                         .shrink()
-                         .filter(|&(ref bytes, _)| bytes.len() > 0)
-                         .map(|(bytes, casei)| {
-                             LiteralBytes { bytes: bytes, casei: casei }
-                         }))
-            }
-            Class(ref cls) => Box::new(cls.shrink().map(Class)),
-            ClassBytes(ref cls) => Box::new(cls.shrink().map(ClassBytes)),
-            Group { ref e, ref i, ref name } => {
-                let (i, name) = (i.clone(), name.clone());
-                Box::new((*e.clone()).shrink()
-                          .chain((*e.clone()).shrink()
-                                  .map(move |e| Group {
-                                      e: Box::new(e),
-                                      i: i.clone(),
-                                      name: name.clone(),
-                                  })))
-            }
-            Repeat { ref e, ref r, greedy } => {
-                Box::new((*e.clone(), r.clone())
-                         .shrink()
-                         .filter(|&(ref e, _)| e.can_repeat())
-                         .map(move |(e, r)| Repeat {
-                             e: Box::new(e),
-                             r: r,
-                             greedy: greedy,
-                         }))
-            }
-            // Concat(ref es) if es.len() <= 2 => nada(),
-            Concat(ref es) => {
-                Box::new(es.clone()
-                           .shrink()
-                           .filter(|es| es.len() > 0)
-                           .map(|mut es| if es.len() == 1 {
-                               es.pop().unwrap()
-                           } else {
-                               Concat(es)
-                           }))
-            }
-            // Alternate(ref es) if es.len() <= 2 => nada(),
-            Alternate(ref es) => {
-                Box::new(es.clone()
-                           .shrink()
-                           .filter(|es| es.len() > 0)
-                           .map(|mut es| if es.len() == 1 {
-                               es.pop().unwrap()
-                           } else {
-                               Alternate(es)
-                           }))
-            }
-        };
-        Box::new(es.map(|e| fix_capture_indices(e).simplify(200).unwrap()))
-    }
-}
-
-enum ExprType {
-    NoSequences, // disallow concat/alternate
-    Anything,
-}
-
-fn gen_expr<G: Gen>(g: &mut G, depth: u32, ty: ExprType) -> Expr {
-    use Expr::*;
-    let ub = match (depth as usize >= g.size(), ty) {
-        (true, _) => 16,
-        (false, ExprType::NoSequences) => 18,
-        (false, ExprType::Anything) => 20,
-    };
-    match g.gen_range(1, ub) {
-        0 => Empty,
-        1 => Literal {
-            chars: SmallAscii::arbitrary(g).0.chars().collect(),
-            casei: g.gen(),
-        },
-        2 => LiteralBytes {
-            bytes: SmallAscii::arbitrary(g).0.as_bytes().to_owned(),
-            casei: g.gen(),
-        },
-        3 => AnyChar,
-        4 => AnyCharNoNL,
-        5 => AnyByte,
-        6 => AnyByteNoNL,
-        7 => Class(CharClass::arbitrary(g)),
-        8 => StartLine,
-        9 => EndLine,
-        10 => StartText,
-        11 => EndText,
-        12 => WordBoundary,
-        13 => NotWordBoundary,
-        14 => WordBoundaryAscii,
-        15 => NotWordBoundaryAscii,
-        16 => gen_group_expr(g, depth + 1),
-        17 => Repeat {
-            e: Box::new(gen_repeatable_expr(g, depth + 1)),
-            r: Repeater::arbitrary(g),
-            greedy: bool::arbitrary(g),
-        },
-        18 => {
-            let size = { let s = g.size(); g.gen_range(2, s) };
-            Concat((0..size)
-                   .map(|_| {
-                       gen_expr(g, depth + 1, ExprType::NoSequences)
-                    })
-                   .collect())
-        }
-        19 => {
-            let size = { let s = g.size(); g.gen_range(2, s) };
-            Alternate((0..size)
-                      .map(|_| {
-                          gen_expr(g, depth + 1, ExprType::NoSequences)
-                      })
-                      .collect())
-        }
-        _ => unreachable!()
-    }
-}
-
-fn gen_repeatable_expr<G: Gen>(g: &mut G, depth: u32) -> Expr {
-    use Expr::*;
-    match g.gen_range(1, 10) {
-        0 => Empty,
-        1 => Literal {
-            chars: vec![Arbitrary::arbitrary(g)],
-            casei: g.gen(),
-        },
-        2 => LiteralBytes {
-            bytes: vec![Arbitrary::arbitrary(g)],
-            casei: g.gen(),
-        },
-        3 => AnyChar,
-        4 => AnyCharNoNL,
-        5 => AnyByte,
-        6 => AnyByteNoNL,
-        7 => Class(CharClass::arbitrary(g)),
-        8 => ClassBytes(ByteClass::arbitrary(g)),
-        9 => gen_group_expr(g, depth + 1),
-        _ => unreachable!(),
-    }
-}
-
-fn gen_group_expr<G: Gen>(g: &mut G, depth: u32) -> Expr {
-    let (i, name) = if g.gen() {
-        (None, None)
-    } else {
-        (Some(0), if g.gen() {
-            Some(SmallAscii::arbitrary(g).0)
-        } else {
-            None
-        })
-    };
-    Expr::Group {
-        e: Box::new(gen_expr(g, depth + 1, ExprType::Anything)),
-        i: i,
-        name: name,
-    }
-}
-
-fn fix_capture_indices(e: Expr) -> Expr {
-    fn bx(e: Expr) -> Box<Expr> { Box::new(e) }
-    fn fix(e: Expr, capi: &mut usize, names: &mut Vec<String>) -> Expr {
-        use Expr::*;
-        match e {
-            Group { e, i: Some(_), mut name } => {
-                *capi += 1;
-                let i = *capi;
-                let mut dupe_name = false;
-                if let Some(ref n1) = name {
-                    if names.iter().any(|n2| n1 == n2) {
-                        dupe_name = true;
-                    } else {
-                        names.push(n1.clone());
-                    }
-                }
-                if dupe_name { name = None; }
-                Group { e: bx(fix(*e, capi, names)), i: Some(i), name: name }
-            }
-            Group { e, i, name } => {
-                Group { e: bx(fix(*e, capi, names)), i: i, name: name }
-            }
-            Repeat { e, r, greedy } => {
-                Repeat { e: bx(fix(*e, capi, names)), r: r, greedy: greedy }
-            }
-            Concat(es) =>
-                Concat(es.into_iter().map(|e| fix(e, capi, names)).collect()),
-            Alternate(es) =>
-                Alternate(es.into_iter().map(|e| fix(e, capi, names)).collect()),
-            e => e,
-        }
-    }
-    fix(e, &mut 0, &mut vec![])
-}
-
-impl Arbitrary for Repeater {
-    fn arbitrary<G: Gen>(g: &mut G) -> Repeater {
-        use Repeater::*;
-        match g.gen_range(0, 4) {
-            0 => ZeroOrOne,
-            1 => ZeroOrMore,
-            2 => OneOrMore,
-            3 => {
-                use std::cmp::{max, min};
-                let n1 = Arbitrary::arbitrary(g);
-                let n2 = Arbitrary::arbitrary(g);
-                Range {
-                    min: min(n1, n2),
-                    max: if g.gen() { None } else { Some(max(n1, n2)) },
-                }
-            },
-            _ => unreachable!(),
-        }
-    }
-
-    fn shrink(&self) -> Box<Iterator<Item=Repeater>> {
-        use Repeater::*;
-        match *self {
-            ZeroOrOne | ZeroOrMore | OneOrMore => Box::new(None.into_iter()),
-            Range { min, max } => {
-                Box::new((min, max)
-                         .shrink()
-                         .map(|(min, max)| Range { min: min, max: max }))
-            }
-        }
-    }
-}
-
-impl Arbitrary for CharClass {
-    fn arbitrary<G: Gen>(g: &mut G) -> CharClass {
-        let mut ranges: Vec<ClassRange> = Arbitrary::arbitrary(g);
-        if ranges.is_empty() {
-            ranges.push(Arbitrary::arbitrary(g));
-        }
-        let cls = CharClass { ranges: ranges }.canonicalize();
-        if g.gen() { cls.case_fold() } else { cls }
-    }
-
-    fn shrink(&self) -> Box<Iterator<Item=CharClass>> {
-        Box::new(self.ranges.clone()
-                 .shrink()
-                 .filter(|ranges| ranges.len() > 0)
-                 .map(|ranges| CharClass { ranges: ranges }.canonicalize()))
-    }
-}
-
-impl Arbitrary for ClassRange {
-    fn arbitrary<G: Gen>(g: &mut G) -> ClassRange {
-        use std::char::from_u32;
-        ClassRange::new(
-            from_u32(g.gen_range(97, 123)).unwrap(),
-            from_u32(g.gen_range(97, 123)).unwrap(),
-        )
-    }
-
-    fn shrink(&self) -> Box<Iterator<Item=ClassRange>> {
-        Box::new((self.start, self.end)
-                 .shrink().map(|(s, e)| ClassRange::new(s, e)))
-    }
-}
-
-impl Arbitrary for ByteClass {
-    fn arbitrary<G: Gen>(g: &mut G) -> ByteClass {
-        let mut ranges: Vec<ByteRange> = Arbitrary::arbitrary(g);
-        if ranges.is_empty() {
-            ranges.push(Arbitrary::arbitrary(g));
-        }
-        let cls = ByteClass { ranges: ranges }.canonicalize();
-        if g.gen() { cls.case_fold() } else { cls }
-    }
-
-    fn shrink(&self) -> Box<Iterator<Item=ByteClass>> {
-        Box::new(self.ranges.clone()
-                 .shrink()
-                 .filter(|ranges| ranges.len() > 0)
-                 .map(|ranges| ByteClass { ranges: ranges }.canonicalize()))
-    }
-}
-
-impl Arbitrary for ByteRange {
-    fn arbitrary<G: Gen>(g: &mut G) -> ByteRange {
-        ByteRange::new(g.gen_range(97, 123), g.gen_range(97, 123))
-    }
-
-    fn shrink(&self) -> Box<Iterator<Item=ByteRange>> {
-        Box::new((self.start, self.end)
-                 .shrink().map(|(s, e)| ByteRange::new(s, e)))
-    }
-}
-
-#[test]
-fn display_regex_roundtrips() {
-    // Given an AST, if we print it as a regex and then re-parse it, do we
-    // get back the same AST?
-    // A lot of this relies crucially on regex simplification. So this is
-    // testing `Expr::simplify` as much as it is testing the `Display` impl.
-    fn prop(e: Expr) -> bool {
-        let parser = ExprBuilder::new().allow_bytes(true);
-        e == parser.parse(&e.to_string()).unwrap()
-    }
-    QuickCheck::new()
-        .tests(10_000)
-        .max_tests(20_000)
-        .gen(StdGen::new(::rand::thread_rng(), 50))
-        .quickcheck(prop as fn(Expr) -> bool);
-}
diff --git a/regex-syntax/src/unicode.rs b/regex-syntax/src/unicode.rs
index f9a52fe65e..e76b203fad 100644
--- a/regex-syntax/src/unicode.rs
+++ b/regex-syntax/src/unicode.rs
@@ -1,6018 +1,462 @@
-// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-
-// NOTE: The following code was generated by "scripts/unicode.py", do not edit
-// directly
-
-#![allow(warnings)]
-pub mod general_category {
-    pub const C_table: &'static [(char, char)] = &[
-        ('\u{0}', '\u{1f}'), ('\u{7f}', '\u{9f}'), ('\u{ad}', '\u{ad}'),
-        ('\u{378}', '\u{379}'), ('\u{380}', '\u{383}'), ('\u{38b}', '\u{38b}'),
-        ('\u{38d}', '\u{38d}'), ('\u{3a2}', '\u{3a2}'), ('\u{530}', '\u{530}'),
-        ('\u{557}', '\u{558}'), ('\u{560}', '\u{560}'), ('\u{588}', '\u{588}'),
-        ('\u{58b}', '\u{58c}'), ('\u{590}', '\u{590}'), ('\u{5c8}', '\u{5cf}'),
-        ('\u{5eb}', '\u{5ef}'), ('\u{5f5}', '\u{605}'), ('\u{61c}', '\u{61d}'),
-        ('\u{6dd}', '\u{6dd}'), ('\u{70e}', '\u{70f}'), ('\u{74b}', '\u{74c}'),
-        ('\u{7b2}', '\u{7bf}'), ('\u{7fb}', '\u{7ff}'), ('\u{82e}', '\u{82f}'),
-        ('\u{83f}', '\u{83f}'), ('\u{85c}', '\u{85d}'), ('\u{85f}', '\u{85f}'),
-        ('\u{86b}', '\u{89f}'), ('\u{8b5}', '\u{8b5}'), ('\u{8be}', '\u{8d3}'),
-        ('\u{8e2}', '\u{8e2}'), ('\u{984}', '\u{984}'), ('\u{98d}', '\u{98e}'),
-        ('\u{991}', '\u{992}'), ('\u{9a9}', '\u{9a9}'), ('\u{9b1}', '\u{9b1}'),
-        ('\u{9b3}', '\u{9b5}'), ('\u{9ba}', '\u{9bb}'), ('\u{9c5}', '\u{9c6}'),
-        ('\u{9c9}', '\u{9ca}'), ('\u{9cf}', '\u{9d6}'), ('\u{9d8}', '\u{9db}'),
-        ('\u{9de}', '\u{9de}'), ('\u{9e4}', '\u{9e5}'), ('\u{9fe}', '\u{a00}'),
-        ('\u{a04}', '\u{a04}'), ('\u{a0b}', '\u{a0e}'), ('\u{a11}', '\u{a12}'),
-        ('\u{a29}', '\u{a29}'), ('\u{a31}', '\u{a31}'), ('\u{a34}', '\u{a34}'),
-        ('\u{a37}', '\u{a37}'), ('\u{a3a}', '\u{a3b}'), ('\u{a3d}', '\u{a3d}'),
-        ('\u{a43}', '\u{a46}'), ('\u{a49}', '\u{a4a}'), ('\u{a4e}', '\u{a50}'),
-        ('\u{a52}', '\u{a58}'), ('\u{a5d}', '\u{a5d}'), ('\u{a5f}', '\u{a65}'),
-        ('\u{a76}', '\u{a80}'), ('\u{a84}', '\u{a84}'), ('\u{a8e}', '\u{a8e}'),
-        ('\u{a92}', '\u{a92}'), ('\u{aa9}', '\u{aa9}'), ('\u{ab1}', '\u{ab1}'),
-        ('\u{ab4}', '\u{ab4}'), ('\u{aba}', '\u{abb}'), ('\u{ac6}', '\u{ac6}'),
-        ('\u{aca}', '\u{aca}'), ('\u{ace}', '\u{acf}'), ('\u{ad1}', '\u{adf}'),
-        ('\u{ae4}', '\u{ae5}'), ('\u{af2}', '\u{af8}'), ('\u{b00}', '\u{b00}'),
-        ('\u{b04}', '\u{b04}'), ('\u{b0d}', '\u{b0e}'), ('\u{b11}', '\u{b12}'),
-        ('\u{b29}', '\u{b29}'), ('\u{b31}', '\u{b31}'), ('\u{b34}', '\u{b34}'),
-        ('\u{b3a}', '\u{b3b}'), ('\u{b45}', '\u{b46}'), ('\u{b49}', '\u{b4a}'),
-        ('\u{b4e}', '\u{b55}'), ('\u{b58}', '\u{b5b}'), ('\u{b5e}', '\u{b5e}'),
-        ('\u{b64}', '\u{b65}'), ('\u{b78}', '\u{b81}'), ('\u{b84}', '\u{b84}'),
-        ('\u{b8b}', '\u{b8d}'), ('\u{b91}', '\u{b91}'), ('\u{b96}', '\u{b98}'),
-        ('\u{b9b}', '\u{b9b}'), ('\u{b9d}', '\u{b9d}'), ('\u{ba0}', '\u{ba2}'),
-        ('\u{ba5}', '\u{ba7}'), ('\u{bab}', '\u{bad}'), ('\u{bba}', '\u{bbd}'),
-        ('\u{bc3}', '\u{bc5}'), ('\u{bc9}', '\u{bc9}'), ('\u{bce}', '\u{bcf}'),
-        ('\u{bd1}', '\u{bd6}'), ('\u{bd8}', '\u{be5}'), ('\u{bfb}', '\u{bff}'),
-        ('\u{c04}', '\u{c04}'), ('\u{c0d}', '\u{c0d}'), ('\u{c11}', '\u{c11}'),
-        ('\u{c29}', '\u{c29}'), ('\u{c3a}', '\u{c3c}'), ('\u{c45}', '\u{c45}'),
-        ('\u{c49}', '\u{c49}'), ('\u{c4e}', '\u{c54}'), ('\u{c57}', '\u{c57}'),
-        ('\u{c5b}', '\u{c5f}'), ('\u{c64}', '\u{c65}'), ('\u{c70}', '\u{c77}'),
-        ('\u{c84}', '\u{c84}'), ('\u{c8d}', '\u{c8d}'), ('\u{c91}', '\u{c91}'),
-        ('\u{ca9}', '\u{ca9}'), ('\u{cb4}', '\u{cb4}'), ('\u{cba}', '\u{cbb}'),
-        ('\u{cc5}', '\u{cc5}'), ('\u{cc9}', '\u{cc9}'), ('\u{cce}', '\u{cd4}'),
-        ('\u{cd7}', '\u{cdd}'), ('\u{cdf}', '\u{cdf}'), ('\u{ce4}', '\u{ce5}'),
-        ('\u{cf0}', '\u{cf0}'), ('\u{cf3}', '\u{cff}'), ('\u{d04}', '\u{d04}'),
-        ('\u{d0d}', '\u{d0d}'), ('\u{d11}', '\u{d11}'), ('\u{d45}', '\u{d45}'),
-        ('\u{d49}', '\u{d49}'), ('\u{d50}', '\u{d53}'), ('\u{d64}', '\u{d65}'),
-        ('\u{d80}', '\u{d81}'), ('\u{d84}', '\u{d84}'), ('\u{d97}', '\u{d99}'),
-        ('\u{db2}', '\u{db2}'), ('\u{dbc}', '\u{dbc}'), ('\u{dbe}', '\u{dbf}'),
-        ('\u{dc7}', '\u{dc9}'), ('\u{dcb}', '\u{dce}'), ('\u{dd5}', '\u{dd5}'),
-        ('\u{dd7}', '\u{dd7}'), ('\u{de0}', '\u{de5}'), ('\u{df0}', '\u{df1}'),
-        ('\u{df5}', '\u{e00}'), ('\u{e3b}', '\u{e3e}'), ('\u{e5c}', '\u{e80}'),
-        ('\u{e83}', '\u{e83}'), ('\u{e85}', '\u{e86}'), ('\u{e89}', '\u{e89}'),
-        ('\u{e8b}', '\u{e8c}'), ('\u{e8e}', '\u{e93}'), ('\u{e98}', '\u{e98}'),
-        ('\u{ea0}', '\u{ea0}'), ('\u{ea4}', '\u{ea4}'), ('\u{ea6}', '\u{ea6}'),
-        ('\u{ea8}', '\u{ea9}'), ('\u{eac}', '\u{eac}'), ('\u{eba}', '\u{eba}'),
-        ('\u{ebe}', '\u{ebf}'), ('\u{ec5}', '\u{ec5}'), ('\u{ec7}', '\u{ec7}'),
-        ('\u{ece}', '\u{ecf}'), ('\u{eda}', '\u{edb}'), ('\u{ee0}', '\u{eff}'),
-        ('\u{f48}', '\u{f48}'), ('\u{f6d}', '\u{f70}'), ('\u{f98}', '\u{f98}'),
-        ('\u{fbd}', '\u{fbd}'), ('\u{fcd}', '\u{fcd}'), ('\u{fdb}', '\u{fff}'),
-        ('\u{10c6}', '\u{10c6}'), ('\u{10c8}', '\u{10cc}'), ('\u{10ce}',
-        '\u{10cf}'), ('\u{1249}', '\u{1249}'), ('\u{124e}', '\u{124f}'),
-        ('\u{1257}', '\u{1257}'), ('\u{1259}', '\u{1259}'), ('\u{125e}',
-        '\u{125f}'), ('\u{1289}', '\u{1289}'), ('\u{128e}', '\u{128f}'),
-        ('\u{12b1}', '\u{12b1}'), ('\u{12b6}', '\u{12b7}'), ('\u{12bf}',
-        '\u{12bf}'), ('\u{12c1}', '\u{12c1}'), ('\u{12c6}', '\u{12c7}'),
-        ('\u{12d7}', '\u{12d7}'), ('\u{1311}', '\u{1311}'), ('\u{1316}',
-        '\u{1317}'), ('\u{135b}', '\u{135c}'), ('\u{137d}', '\u{137f}'),
-        ('\u{139a}', '\u{139f}'), ('\u{13f6}', '\u{13f7}'), ('\u{13fe}',
-        '\u{13ff}'), ('\u{169d}', '\u{169f}'), ('\u{16f9}', '\u{16ff}'),
-        ('\u{170d}', '\u{170d}'), ('\u{1715}', '\u{171f}'), ('\u{1737}',
-        '\u{173f}'), ('\u{1754}', '\u{175f}'), ('\u{176d}', '\u{176d}'),
-        ('\u{1771}', '\u{1771}'), ('\u{1774}', '\u{177f}'), ('\u{17de}',
-        '\u{17df}'), ('\u{17ea}', '\u{17ef}'), ('\u{17fa}', '\u{17ff}'),
-        ('\u{180e}', '\u{180f}'), ('\u{181a}', '\u{181f}'), ('\u{1878}',
-        '\u{187f}'), ('\u{18ab}', '\u{18af}'), ('\u{18f6}', '\u{18ff}'),
-        ('\u{191f}', '\u{191f}'), ('\u{192c}', '\u{192f}'), ('\u{193c}',
-        '\u{193f}'), ('\u{1941}', '\u{1943}'), ('\u{196e}', '\u{196f}'),
-        ('\u{1975}', '\u{197f}'), ('\u{19ac}', '\u{19af}'), ('\u{19ca}',
-        '\u{19cf}'), ('\u{19db}', '\u{19dd}'), ('\u{1a1c}', '\u{1a1d}'),
-        ('\u{1a5f}', '\u{1a5f}'), ('\u{1a7d}', '\u{1a7e}'), ('\u{1a8a}',
-        '\u{1a8f}'), ('\u{1a9a}', '\u{1a9f}'), ('\u{1aae}', '\u{1aaf}'),
-        ('\u{1abf}', '\u{1aff}'), ('\u{1b4c}', '\u{1b4f}'), ('\u{1b7d}',
-        '\u{1b7f}'), ('\u{1bf4}', '\u{1bfb}'), ('\u{1c38}', '\u{1c3a}'),
-        ('\u{1c4a}', '\u{1c4c}'), ('\u{1c89}', '\u{1cbf}'), ('\u{1cc8}',
-        '\u{1ccf}'), ('\u{1cfa}', '\u{1cff}'), ('\u{1dfa}', '\u{1dfa}'),
-        ('\u{1f16}', '\u{1f17}'), ('\u{1f1e}', '\u{1f1f}'), ('\u{1f46}',
-        '\u{1f47}'), ('\u{1f4e}', '\u{1f4f}'), ('\u{1f58}', '\u{1f58}'),
-        ('\u{1f5a}', '\u{1f5a}'), ('\u{1f5c}', '\u{1f5c}'), ('\u{1f5e}',
-        '\u{1f5e}'), ('\u{1f7e}', '\u{1f7f}'), ('\u{1fb5}', '\u{1fb5}'),
-        ('\u{1fc5}', '\u{1fc5}'), ('\u{1fd4}', '\u{1fd5}'), ('\u{1fdc}',
-        '\u{1fdc}'), ('\u{1ff0}', '\u{1ff1}'), ('\u{1ff5}', '\u{1ff5}'),
-        ('\u{1fff}', '\u{1fff}'), ('\u{200b}', '\u{200f}'), ('\u{202a}',
-        '\u{202e}'), ('\u{2060}', '\u{206f}'), ('\u{2072}', '\u{2073}'),
-        ('\u{208f}', '\u{208f}'), ('\u{209d}', '\u{209f}'), ('\u{20c0}',
-        '\u{20cf}'), ('\u{20f1}', '\u{20ff}'), ('\u{218c}', '\u{218f}'),
-        ('\u{2427}', '\u{243f}'), ('\u{244b}', '\u{245f}'), ('\u{2b74}',
-        '\u{2b75}'), ('\u{2b96}', '\u{2b97}'), ('\u{2bba}', '\u{2bbc}'),
-        ('\u{2bc9}', '\u{2bc9}'), ('\u{2bd3}', '\u{2beb}'), ('\u{2bf0}',
-        '\u{2bff}'), ('\u{2c2f}', '\u{2c2f}'), ('\u{2c5f}', '\u{2c5f}'),
-        ('\u{2cf4}', '\u{2cf8}'), ('\u{2d26}', '\u{2d26}'), ('\u{2d28}',
-        '\u{2d2c}'), ('\u{2d2e}', '\u{2d2f}'), ('\u{2d68}', '\u{2d6e}'),
-        ('\u{2d71}', '\u{2d7e}'), ('\u{2d97}', '\u{2d9f}'), ('\u{2da7}',
-        '\u{2da7}'), ('\u{2daf}', '\u{2daf}'), ('\u{2db7}', '\u{2db7}'),
-        ('\u{2dbf}', '\u{2dbf}'), ('\u{2dc7}', '\u{2dc7}'), ('\u{2dcf}',
-        '\u{2dcf}'), ('\u{2dd7}', '\u{2dd7}'), ('\u{2ddf}', '\u{2ddf}'),
-        ('\u{2e4a}', '\u{2e7f}'), ('\u{2e9a}', '\u{2e9a}'), ('\u{2ef4}',
-        '\u{2eff}'), ('\u{2fd6}', '\u{2fef}'), ('\u{2ffc}', '\u{2fff}'),
-        ('\u{3040}', '\u{3040}'), ('\u{3097}', '\u{3098}'), ('\u{3100}',
-        '\u{3104}'), ('\u{312f}', '\u{3130}'), ('\u{318f}', '\u{318f}'),
-        ('\u{31bb}', '\u{31bf}'), ('\u{31e4}', '\u{31ef}'), ('\u{321f}',
-        '\u{321f}'), ('\u{32ff}', '\u{32ff}'), ('\u{4db6}', '\u{4dbf}'),
-        ('\u{9feb}', '\u{9fff}'), ('\u{a48d}', '\u{a48f}'), ('\u{a4c7}',
-        '\u{a4cf}'), ('\u{a62c}', '\u{a63f}'), ('\u{a6f8}', '\u{a6ff}'),
-        ('\u{a7af}', '\u{a7af}'), ('\u{a7b8}', '\u{a7f6}'), ('\u{a82c}',
-        '\u{a82f}'), ('\u{a83a}', '\u{a83f}'), ('\u{a878}', '\u{a87f}'),
-        ('\u{a8c6}', '\u{a8cd}'), ('\u{a8da}', '\u{a8df}'), ('\u{a8fe}',
-        '\u{a8ff}'), ('\u{a954}', '\u{a95e}'), ('\u{a97d}', '\u{a97f}'),
-        ('\u{a9ce}', '\u{a9ce}'), ('\u{a9da}', '\u{a9dd}'), ('\u{a9ff}',
-        '\u{a9ff}'), ('\u{aa37}', '\u{aa3f}'), ('\u{aa4e}', '\u{aa4f}'),
-        ('\u{aa5a}', '\u{aa5b}'), ('\u{aac3}', '\u{aada}'), ('\u{aaf7}',
-        '\u{ab00}'), ('\u{ab07}', '\u{ab08}'), ('\u{ab0f}', '\u{ab10}'),
-        ('\u{ab17}', '\u{ab1f}'), ('\u{ab27}', '\u{ab27}'), ('\u{ab2f}',
-        '\u{ab2f}'), ('\u{ab66}', '\u{ab6f}'), ('\u{abee}', '\u{abef}'),
-        ('\u{abfa}', '\u{abff}'), ('\u{d7a4}', '\u{d7af}'), ('\u{d7c7}',
-        '\u{d7ca}'), ('\u{d7fc}', '\u{d7ff}'), ('\u{e000}', '\u{f8ff}'),
-        ('\u{fa6e}', '\u{fa6f}'), ('\u{fada}', '\u{faff}'), ('\u{fb07}',
-        '\u{fb12}'), ('\u{fb18}', '\u{fb1c}'), ('\u{fb37}', '\u{fb37}'),
-        ('\u{fb3d}', '\u{fb3d}'), ('\u{fb3f}', '\u{fb3f}'), ('\u{fb42}',
-        '\u{fb42}'), ('\u{fb45}', '\u{fb45}'), ('\u{fbc2}', '\u{fbd2}'),
-        ('\u{fd40}', '\u{fd4f}'), ('\u{fd90}', '\u{fd91}'), ('\u{fdc8}',
-        '\u{fdef}'), ('\u{fdfe}', '\u{fdff}'), ('\u{fe1a}', '\u{fe1f}'),
-        ('\u{fe53}', '\u{fe53}'), ('\u{fe67}', '\u{fe67}'), ('\u{fe6c}',
-        '\u{fe6f}'), ('\u{fe75}', '\u{fe75}'), ('\u{fefd}', '\u{ff00}'),
-        ('\u{ffbf}', '\u{ffc1}'), ('\u{ffc8}', '\u{ffc9}'), ('\u{ffd0}',
-        '\u{ffd1}'), ('\u{ffd8}', '\u{ffd9}'), ('\u{ffdd}', '\u{ffdf}'),
-        ('\u{ffe7}', '\u{ffe7}'), ('\u{ffef}', '\u{fffb}'), ('\u{fffe}',
-        '\u{ffff}'), ('\u{1000c}', '\u{1000c}'), ('\u{10027}', '\u{10027}'),
-        ('\u{1003b}', '\u{1003b}'), ('\u{1003e}', '\u{1003e}'), ('\u{1004e}',
-        '\u{1004f}'), ('\u{1005e}', '\u{1007f}'), ('\u{100fb}', '\u{100ff}'),
-        ('\u{10103}', '\u{10106}'), ('\u{10134}', '\u{10136}'), ('\u{1018f}',
-        '\u{1018f}'), ('\u{1019c}', '\u{1019f}'), ('\u{101a1}', '\u{101cf}'),
-        ('\u{101fe}', '\u{1027f}'), ('\u{1029d}', '\u{1029f}'), ('\u{102d1}',
-        '\u{102df}'), ('\u{102fc}', '\u{102ff}'), ('\u{10324}', '\u{1032c}'),
-        ('\u{1034b}', '\u{1034f}'), ('\u{1037b}', '\u{1037f}'), ('\u{1039e}',
-        '\u{1039e}'), ('\u{103c4}', '\u{103c7}'), ('\u{103d6}', '\u{103ff}'),
-        ('\u{1049e}', '\u{1049f}'), ('\u{104aa}', '\u{104af}'), ('\u{104d4}',
-        '\u{104d7}'), ('\u{104fc}', '\u{104ff}'), ('\u{10528}', '\u{1052f}'),
-        ('\u{10564}', '\u{1056e}'), ('\u{10570}', '\u{105ff}'), ('\u{10737}',
-        '\u{1073f}'), ('\u{10756}', '\u{1075f}'), ('\u{10768}', '\u{107ff}'),
-        ('\u{10806}', '\u{10807}'), ('\u{10809}', '\u{10809}'), ('\u{10836}',
-        '\u{10836}'), ('\u{10839}', '\u{1083b}'), ('\u{1083d}', '\u{1083e}'),
-        ('\u{10856}', '\u{10856}'), ('\u{1089f}', '\u{108a6}'), ('\u{108b0}',
-        '\u{108df}'), ('\u{108f3}', '\u{108f3}'), ('\u{108f6}', '\u{108fa}'),
-        ('\u{1091c}', '\u{1091e}'), ('\u{1093a}', '\u{1093e}'), ('\u{10940}',
-        '\u{1097f}'), ('\u{109b8}', '\u{109bb}'), ('\u{109d0}', '\u{109d1}'),
-        ('\u{10a04}', '\u{10a04}'), ('\u{10a07}', '\u{10a0b}'), ('\u{10a14}',
-        '\u{10a14}'), ('\u{10a18}', '\u{10a18}'), ('\u{10a34}', '\u{10a37}'),
-        ('\u{10a3b}', '\u{10a3e}'), ('\u{10a48}', '\u{10a4f}'), ('\u{10a59}',
-        '\u{10a5f}'), ('\u{10aa0}', '\u{10abf}'), ('\u{10ae7}', '\u{10aea}'),
-        ('\u{10af7}', '\u{10aff}'), ('\u{10b36}', '\u{10b38}'), ('\u{10b56}',
-        '\u{10b57}'), ('\u{10b73}', '\u{10b77}'), ('\u{10b92}', '\u{10b98}'),
-        ('\u{10b9d}', '\u{10ba8}'), ('\u{10bb0}', '\u{10bff}'), ('\u{10c49}',
-        '\u{10c7f}'), ('\u{10cb3}', '\u{10cbf}'), ('\u{10cf3}', '\u{10cf9}'),
-        ('\u{10d00}', '\u{10e5f}'), ('\u{10e7f}', '\u{10fff}'), ('\u{1104e}',
-        '\u{11051}'), ('\u{11070}', '\u{1107e}'), ('\u{110bd}', '\u{110bd}'),
-        ('\u{110c2}', '\u{110cf}'), ('\u{110e9}', '\u{110ef}'), ('\u{110fa}',
-        '\u{110ff}'), ('\u{11135}', '\u{11135}'), ('\u{11144}', '\u{1114f}'),
-        ('\u{11177}', '\u{1117f}'), ('\u{111ce}', '\u{111cf}'), ('\u{111e0}',
-        '\u{111e0}'), ('\u{111f5}', '\u{111ff}'), ('\u{11212}', '\u{11212}'),
-        ('\u{1123f}', '\u{1127f}'), ('\u{11287}', '\u{11287}'), ('\u{11289}',
-        '\u{11289}'), ('\u{1128e}', '\u{1128e}'), ('\u{1129e}', '\u{1129e}'),
-        ('\u{112aa}', '\u{112af}'), ('\u{112eb}', '\u{112ef}'), ('\u{112fa}',
-        '\u{112ff}'), ('\u{11304}', '\u{11304}'), ('\u{1130d}', '\u{1130e}'),
-        ('\u{11311}', '\u{11312}'), ('\u{11329}', '\u{11329}'), ('\u{11331}',
-        '\u{11331}'), ('\u{11334}', '\u{11334}'), ('\u{1133a}', '\u{1133b}'),
-        ('\u{11345}', '\u{11346}'), ('\u{11349}', '\u{1134a}'), ('\u{1134e}',
-        '\u{1134f}'), ('\u{11351}', '\u{11356}'), ('\u{11358}', '\u{1135c}'),
-        ('\u{11364}', '\u{11365}'), ('\u{1136d}', '\u{1136f}'), ('\u{11375}',
-        '\u{113ff}'), ('\u{1145a}', '\u{1145a}'), ('\u{1145c}', '\u{1145c}'),
-        ('\u{1145e}', '\u{1147f}'), ('\u{114c8}', '\u{114cf}'), ('\u{114da}',
-        '\u{1157f}'), ('\u{115b6}', '\u{115b7}'), ('\u{115de}', '\u{115ff}'),
-        ('\u{11645}', '\u{1164f}'), ('\u{1165a}', '\u{1165f}'), ('\u{1166d}',
-        '\u{1167f}'), ('\u{116b8}', '\u{116bf}'), ('\u{116ca}', '\u{116ff}'),
-        ('\u{1171a}', '\u{1171c}'), ('\u{1172c}', '\u{1172f}'), ('\u{11740}',
-        '\u{1189f}'), ('\u{118f3}', '\u{118fe}'), ('\u{11900}', '\u{119ff}'),
-        ('\u{11a48}', '\u{11a4f}'), ('\u{11a84}', '\u{11a85}'), ('\u{11a9d}',
-        '\u{11a9d}'), ('\u{11aa3}', '\u{11abf}'), ('\u{11af9}', '\u{11bff}'),
-        ('\u{11c09}', '\u{11c09}'), ('\u{11c37}', '\u{11c37}'), ('\u{11c46}',
-        '\u{11c4f}'), ('\u{11c6d}', '\u{11c6f}'), ('\u{11c90}', '\u{11c91}'),
-        ('\u{11ca8}', '\u{11ca8}'), ('\u{11cb7}', '\u{11cff}'), ('\u{11d07}',
-        '\u{11d07}'), ('\u{11d0a}', '\u{11d0a}'), ('\u{11d37}', '\u{11d39}'),
-        ('\u{11d3b}', '\u{11d3b}'), ('\u{11d3e}', '\u{11d3e}'), ('\u{11d48}',
-        '\u{11d4f}'), ('\u{11d5a}', '\u{11fff}'), ('\u{1239a}', '\u{123ff}'),
-        ('\u{1246f}', '\u{1246f}'), ('\u{12475}', '\u{1247f}'), ('\u{12544}',
-        '\u{12fff}'), ('\u{1342f}', '\u{143ff}'), ('\u{14647}', '\u{167ff}'),
-        ('\u{16a39}', '\u{16a3f}'), ('\u{16a5f}', '\u{16a5f}'), ('\u{16a6a}',
-        '\u{16a6d}'), ('\u{16a70}', '\u{16acf}'), ('\u{16aee}', '\u{16aef}'),
-        ('\u{16af6}', '\u{16aff}'), ('\u{16b46}', '\u{16b4f}'), ('\u{16b5a}',
-        '\u{16b5a}'), ('\u{16b62}', '\u{16b62}'), ('\u{16b78}', '\u{16b7c}'),
-        ('\u{16b90}', '\u{16eff}'), ('\u{16f45}', '\u{16f4f}'), ('\u{16f7f}',
-        '\u{16f8e}'), ('\u{16fa0}', '\u{16fdf}'), ('\u{16fe2}', '\u{16fff}'),
-        ('\u{187ed}', '\u{187ff}'), ('\u{18af3}', '\u{1afff}'), ('\u{1b11f}',
-        '\u{1b16f}'), ('\u{1b2fc}', '\u{1bbff}'), ('\u{1bc6b}', '\u{1bc6f}'),
-        ('\u{1bc7d}', '\u{1bc7f}'), ('\u{1bc89}', '\u{1bc8f}'), ('\u{1bc9a}',
-        '\u{1bc9b}'), ('\u{1bca0}', '\u{1cfff}'), ('\u{1d0f6}', '\u{1d0ff}'),
-        ('\u{1d127}', '\u{1d128}'), ('\u{1d173}', '\u{1d17a}'), ('\u{1d1e9}',
-        '\u{1d1ff}'), ('\u{1d246}', '\u{1d2ff}'), ('\u{1d357}', '\u{1d35f}'),
-        ('\u{1d372}', '\u{1d3ff}'), ('\u{1d455}', '\u{1d455}'), ('\u{1d49d}',
-        '\u{1d49d}'), ('\u{1d4a0}', '\u{1d4a1}'), ('\u{1d4a3}', '\u{1d4a4}'),
-        ('\u{1d4a7}', '\u{1d4a8}'), ('\u{1d4ad}', '\u{1d4ad}'), ('\u{1d4ba}',
-        '\u{1d4ba}'), ('\u{1d4bc}', '\u{1d4bc}'), ('\u{1d4c4}', '\u{1d4c4}'),
-        ('\u{1d506}', '\u{1d506}'), ('\u{1d50b}', '\u{1d50c}'), ('\u{1d515}',
-        '\u{1d515}'), ('\u{1d51d}', '\u{1d51d}'), ('\u{1d53a}', '\u{1d53a}'),
-        ('\u{1d53f}', '\u{1d53f}'), ('\u{1d545}', '\u{1d545}'), ('\u{1d547}',
-        '\u{1d549}'), ('\u{1d551}', '\u{1d551}'), ('\u{1d6a6}', '\u{1d6a7}'),
-        ('\u{1d7cc}', '\u{1d7cd}'), ('\u{1da8c}', '\u{1da9a}'), ('\u{1daa0}',
-        '\u{1daa0}'), ('\u{1dab0}', '\u{1dfff}'), ('\u{1e007}', '\u{1e007}'),
-        ('\u{1e019}', '\u{1e01a}'), ('\u{1e022}', '\u{1e022}'), ('\u{1e025}',
-        '\u{1e025}'), ('\u{1e02b}', '\u{1e7ff}'), ('\u{1e8c5}', '\u{1e8c6}'),
-        ('\u{1e8d7}', '\u{1e8ff}'), ('\u{1e94b}', '\u{1e94f}'), ('\u{1e95a}',
-        '\u{1e95d}'), ('\u{1e960}', '\u{1edff}'), ('\u{1ee04}', '\u{1ee04}'),
-        ('\u{1ee20}', '\u{1ee20}'), ('\u{1ee23}', '\u{1ee23}'), ('\u{1ee25}',
-        '\u{1ee26}'), ('\u{1ee28}', '\u{1ee28}'), ('\u{1ee33}', '\u{1ee33}'),
-        ('\u{1ee38}', '\u{1ee38}'), ('\u{1ee3a}', '\u{1ee3a}'), ('\u{1ee3c}',
-        '\u{1ee41}'), ('\u{1ee43}', '\u{1ee46}'), ('\u{1ee48}', '\u{1ee48}'),
-        ('\u{1ee4a}', '\u{1ee4a}'), ('\u{1ee4c}', '\u{1ee4c}'), ('\u{1ee50}',
-        '\u{1ee50}'), ('\u{1ee53}', '\u{1ee53}'), ('\u{1ee55}', '\u{1ee56}'),
-        ('\u{1ee58}', '\u{1ee58}'), ('\u{1ee5a}', '\u{1ee5a}'), ('\u{1ee5c}',
-        '\u{1ee5c}'), ('\u{1ee5e}', '\u{1ee5e}'), ('\u{1ee60}', '\u{1ee60}'),
-        ('\u{1ee63}', '\u{1ee63}'), ('\u{1ee65}', '\u{1ee66}'), ('\u{1ee6b}',
-        '\u{1ee6b}'), ('\u{1ee73}', '\u{1ee73}'), ('\u{1ee78}', '\u{1ee78}'),
-        ('\u{1ee7d}', '\u{1ee7d}'), ('\u{1ee7f}', '\u{1ee7f}'), ('\u{1ee8a}',
-        '\u{1ee8a}'), ('\u{1ee9c}', '\u{1eea0}'), ('\u{1eea4}', '\u{1eea4}'),
-        ('\u{1eeaa}', '\u{1eeaa}'), ('\u{1eebc}', '\u{1eeef}'), ('\u{1eef2}',
-        '\u{1efff}'), ('\u{1f02c}', '\u{1f02f}'), ('\u{1f094}', '\u{1f09f}'),
-        ('\u{1f0af}', '\u{1f0b0}'), ('\u{1f0c0}', '\u{1f0c0}'), ('\u{1f0d0}',
-        '\u{1f0d0}'), ('\u{1f0f6}', '\u{1f0ff}'), ('\u{1f10d}', '\u{1f10f}'),
-        ('\u{1f12f}', '\u{1f12f}'), ('\u{1f16c}', '\u{1f16f}'), ('\u{1f1ad}',
-        '\u{1f1e5}'), ('\u{1f203}', '\u{1f20f}'), ('\u{1f23c}', '\u{1f23f}'),
-        ('\u{1f249}', '\u{1f24f}'), ('\u{1f252}', '\u{1f25f}'), ('\u{1f266}',
-        '\u{1f2ff}'), ('\u{1f6d5}', '\u{1f6df}'), ('\u{1f6ed}', '\u{1f6ef}'),
-        ('\u{1f6f9}', '\u{1f6ff}'), ('\u{1f774}', '\u{1f77f}'), ('\u{1f7d5}',
-        '\u{1f7ff}'), ('\u{1f80c}', '\u{1f80f}'), ('\u{1f848}', '\u{1f84f}'),
-        ('\u{1f85a}', '\u{1f85f}'), ('\u{1f888}', '\u{1f88f}'), ('\u{1f8ae}',
-        '\u{1f8ff}'), ('\u{1f90c}', '\u{1f90f}'), ('\u{1f93f}', '\u{1f93f}'),
-        ('\u{1f94d}', '\u{1f94f}'), ('\u{1f96c}', '\u{1f97f}'), ('\u{1f998}',
-        '\u{1f9bf}'), ('\u{1f9c1}', '\u{1f9cf}'), ('\u{1f9e7}', '\u{1ffff}'),
-        ('\u{2a6d7}', '\u{2a6ff}'), ('\u{2b735}', '\u{2b73f}'), ('\u{2b81e}',
-        '\u{2b81f}'), ('\u{2cea2}', '\u{2ceaf}'), ('\u{2ebe1}', '\u{2f7ff}'),
-        ('\u{2fa1e}', '\u{e00ff}'), ('\u{e01f0}', '\u{10ffff}')
-    ];
-
-    pub const Cc_table: &'static [(char, char)] = &[
-        ('\u{0}', '\u{1f}'), ('\u{7f}', '\u{9f}')
-    ];
-
-    pub const Cf_table: &'static [(char, char)] = &[
-        ('\u{ad}', '\u{ad}'), ('\u{600}', '\u{605}'), ('\u{61c}', '\u{61c}'),
-        ('\u{6dd}', '\u{6dd}'), ('\u{70f}', '\u{70f}'), ('\u{8e2}', '\u{8e2}'),
-        ('\u{180e}', '\u{180e}'), ('\u{200b}', '\u{200f}'), ('\u{202a}',
-        '\u{202e}'), ('\u{2060}', '\u{2064}'), ('\u{2066}', '\u{206f}'),
-        ('\u{feff}', '\u{feff}'), ('\u{fff9}', '\u{fffb}'), ('\u{110bd}',
-        '\u{110bd}'), ('\u{1bca0}', '\u{1bca3}'), ('\u{1d173}', '\u{1d17a}'),
-        ('\u{e0001}', '\u{e0001}'), ('\u{e0020}', '\u{e007f}')
-    ];
-
-    pub const Cn_table: &'static [(char, char)] = &[
-        ('\u{378}', '\u{379}'), ('\u{380}', '\u{383}'), ('\u{38b}', '\u{38b}'),
-        ('\u{38d}', '\u{38d}'), ('\u{3a2}', '\u{3a2}'), ('\u{530}', '\u{530}'),
-        ('\u{557}', '\u{558}'), ('\u{560}', '\u{560}'), ('\u{588}', '\u{588}'),
-        ('\u{58b}', '\u{58c}'), ('\u{590}', '\u{590}'), ('\u{5c8}', '\u{5cf}'),
-        ('\u{5eb}', '\u{5ef}'), ('\u{5f5}', '\u{5ff}'), ('\u{61d}', '\u{61d}'),
-        ('\u{70e}', '\u{70e}'), ('\u{74b}', '\u{74c}'), ('\u{7b2}', '\u{7bf}'),
-        ('\u{7fb}', '\u{7ff}'), ('\u{82e}', '\u{82f}'), ('\u{83f}', '\u{83f}'),
-        ('\u{85c}', '\u{85d}'), ('\u{85f}', '\u{85f}'), ('\u{86b}', '\u{89f}'),
-        ('\u{8b5}', '\u{8b5}'), ('\u{8be}', '\u{8d3}'), ('\u{984}', '\u{984}'),
-        ('\u{98d}', '\u{98e}'), ('\u{991}', '\u{992}'), ('\u{9a9}', '\u{9a9}'),
-        ('\u{9b1}', '\u{9b1}'), ('\u{9b3}', '\u{9b5}'), ('\u{9ba}', '\u{9bb}'),
-        ('\u{9c5}', '\u{9c6}'), ('\u{9c9}', '\u{9ca}'), ('\u{9cf}', '\u{9d6}'),
-        ('\u{9d8}', '\u{9db}'), ('\u{9de}', '\u{9de}'), ('\u{9e4}', '\u{9e5}'),
-        ('\u{9fe}', '\u{a00}'), ('\u{a04}', '\u{a04}'), ('\u{a0b}', '\u{a0e}'),
-        ('\u{a11}', '\u{a12}'), ('\u{a29}', '\u{a29}'), ('\u{a31}', '\u{a31}'),
-        ('\u{a34}', '\u{a34}'), ('\u{a37}', '\u{a37}'), ('\u{a3a}', '\u{a3b}'),
-        ('\u{a3d}', '\u{a3d}'), ('\u{a43}', '\u{a46}'), ('\u{a49}', '\u{a4a}'),
-        ('\u{a4e}', '\u{a50}'), ('\u{a52}', '\u{a58}'), ('\u{a5d}', '\u{a5d}'),
-        ('\u{a5f}', '\u{a65}'), ('\u{a76}', '\u{a80}'), ('\u{a84}', '\u{a84}'),
-        ('\u{a8e}', '\u{a8e}'), ('\u{a92}', '\u{a92}'), ('\u{aa9}', '\u{aa9}'),
-        ('\u{ab1}', '\u{ab1}'), ('\u{ab4}', '\u{ab4}'), ('\u{aba}', '\u{abb}'),
-        ('\u{ac6}', '\u{ac6}'), ('\u{aca}', '\u{aca}'), ('\u{ace}', '\u{acf}'),
-        ('\u{ad1}', '\u{adf}'), ('\u{ae4}', '\u{ae5}'), ('\u{af2}', '\u{af8}'),
-        ('\u{b00}', '\u{b00}'), ('\u{b04}', '\u{b04}'), ('\u{b0d}', '\u{b0e}'),
-        ('\u{b11}', '\u{b12}'), ('\u{b29}', '\u{b29}'), ('\u{b31}', '\u{b31}'),
-        ('\u{b34}', '\u{b34}'), ('\u{b3a}', '\u{b3b}'), ('\u{b45}', '\u{b46}'),
-        ('\u{b49}', '\u{b4a}'), ('\u{b4e}', '\u{b55}'), ('\u{b58}', '\u{b5b}'),
-        ('\u{b5e}', '\u{b5e}'), ('\u{b64}', '\u{b65}'), ('\u{b78}', '\u{b81}'),
-        ('\u{b84}', '\u{b84}'), ('\u{b8b}', '\u{b8d}'), ('\u{b91}', '\u{b91}'),
-        ('\u{b96}', '\u{b98}'), ('\u{b9b}', '\u{b9b}'), ('\u{b9d}', '\u{b9d}'),
-        ('\u{ba0}', '\u{ba2}'), ('\u{ba5}', '\u{ba7}'), ('\u{bab}', '\u{bad}'),
-        ('\u{bba}', '\u{bbd}'), ('\u{bc3}', '\u{bc5}'), ('\u{bc9}', '\u{bc9}'),
-        ('\u{bce}', '\u{bcf}'), ('\u{bd1}', '\u{bd6}'), ('\u{bd8}', '\u{be5}'),
-        ('\u{bfb}', '\u{bff}'), ('\u{c04}', '\u{c04}'), ('\u{c0d}', '\u{c0d}'),
-        ('\u{c11}', '\u{c11}'), ('\u{c29}', '\u{c29}'), ('\u{c3a}', '\u{c3c}'),
-        ('\u{c45}', '\u{c45}'), ('\u{c49}', '\u{c49}'), ('\u{c4e}', '\u{c54}'),
-        ('\u{c57}', '\u{c57}'), ('\u{c5b}', '\u{c5f}'), ('\u{c64}', '\u{c65}'),
-        ('\u{c70}', '\u{c77}'), ('\u{c84}', '\u{c84}'), ('\u{c8d}', '\u{c8d}'),
-        ('\u{c91}', '\u{c91}'), ('\u{ca9}', '\u{ca9}'), ('\u{cb4}', '\u{cb4}'),
-        ('\u{cba}', '\u{cbb}'), ('\u{cc5}', '\u{cc5}'), ('\u{cc9}', '\u{cc9}'),
-        ('\u{cce}', '\u{cd4}'), ('\u{cd7}', '\u{cdd}'), ('\u{cdf}', '\u{cdf}'),
-        ('\u{ce4}', '\u{ce5}'), ('\u{cf0}', '\u{cf0}'), ('\u{cf3}', '\u{cff}'),
-        ('\u{d04}', '\u{d04}'), ('\u{d0d}', '\u{d0d}'), ('\u{d11}', '\u{d11}'),
-        ('\u{d45}', '\u{d45}'), ('\u{d49}', '\u{d49}'), ('\u{d50}', '\u{d53}'),
-        ('\u{d64}', '\u{d65}'), ('\u{d80}', '\u{d81}'), ('\u{d84}', '\u{d84}'),
-        ('\u{d97}', '\u{d99}'), ('\u{db2}', '\u{db2}'), ('\u{dbc}', '\u{dbc}'),
-        ('\u{dbe}', '\u{dbf}'), ('\u{dc7}', '\u{dc9}'), ('\u{dcb}', '\u{dce}'),
-        ('\u{dd5}', '\u{dd5}'), ('\u{dd7}', '\u{dd7}'), ('\u{de0}', '\u{de5}'),
-        ('\u{df0}', '\u{df1}'), ('\u{df5}', '\u{e00}'), ('\u{e3b}', '\u{e3e}'),
-        ('\u{e5c}', '\u{e80}'), ('\u{e83}', '\u{e83}'), ('\u{e85}', '\u{e86}'),
-        ('\u{e89}', '\u{e89}'), ('\u{e8b}', '\u{e8c}'), ('\u{e8e}', '\u{e93}'),
-        ('\u{e98}', '\u{e98}'), ('\u{ea0}', '\u{ea0}'), ('\u{ea4}', '\u{ea4}'),
-        ('\u{ea6}', '\u{ea6}'), ('\u{ea8}', '\u{ea9}'), ('\u{eac}', '\u{eac}'),
-        ('\u{eba}', '\u{eba}'), ('\u{ebe}', '\u{ebf}'), ('\u{ec5}', '\u{ec5}'),
-        ('\u{ec7}', '\u{ec7}'), ('\u{ece}', '\u{ecf}'), ('\u{eda}', '\u{edb}'),
-        ('\u{ee0}', '\u{eff}'), ('\u{f48}', '\u{f48}'), ('\u{f6d}', '\u{f70}'),
-        ('\u{f98}', '\u{f98}'), ('\u{fbd}', '\u{fbd}'), ('\u{fcd}', '\u{fcd}'),
-        ('\u{fdb}', '\u{fff}'), ('\u{10c6}', '\u{10c6}'), ('\u{10c8}',
-        '\u{10cc}'), ('\u{10ce}', '\u{10cf}'), ('\u{1249}', '\u{1249}'),
-        ('\u{124e}', '\u{124f}'), ('\u{1257}', '\u{1257}'), ('\u{1259}',
-        '\u{1259}'), ('\u{125e}', '\u{125f}'), ('\u{1289}', '\u{1289}'),
-        ('\u{128e}', '\u{128f}'), ('\u{12b1}', '\u{12b1}'), ('\u{12b6}',
-        '\u{12b7}'), ('\u{12bf}', '\u{12bf}'), ('\u{12c1}', '\u{12c1}'),
-        ('\u{12c6}', '\u{12c7}'), ('\u{12d7}', '\u{12d7}'), ('\u{1311}',
-        '\u{1311}'), ('\u{1316}', '\u{1317}'), ('\u{135b}', '\u{135c}'),
-        ('\u{137d}', '\u{137f}'), ('\u{139a}', '\u{139f}'), ('\u{13f6}',
-        '\u{13f7}'), ('\u{13fe}', '\u{13ff}'), ('\u{169d}', '\u{169f}'),
-        ('\u{16f9}', '\u{16ff}'), ('\u{170d}', '\u{170d}'), ('\u{1715}',
-        '\u{171f}'), ('\u{1737}', '\u{173f}'), ('\u{1754}', '\u{175f}'),
-        ('\u{176d}', '\u{176d}'), ('\u{1771}', '\u{1771}'), ('\u{1774}',
-        '\u{177f}'), ('\u{17de}', '\u{17df}'), ('\u{17ea}', '\u{17ef}'),
-        ('\u{17fa}', '\u{17ff}'), ('\u{180f}', '\u{180f}'), ('\u{181a}',
-        '\u{181f}'), ('\u{1878}', '\u{187f}'), ('\u{18ab}', '\u{18af}'),
-        ('\u{18f6}', '\u{18ff}'), ('\u{191f}', '\u{191f}'), ('\u{192c}',
-        '\u{192f}'), ('\u{193c}', '\u{193f}'), ('\u{1941}', '\u{1943}'),
-        ('\u{196e}', '\u{196f}'), ('\u{1975}', '\u{197f}'), ('\u{19ac}',
-        '\u{19af}'), ('\u{19ca}', '\u{19cf}'), ('\u{19db}', '\u{19dd}'),
-        ('\u{1a1c}', '\u{1a1d}'), ('\u{1a5f}', '\u{1a5f}'), ('\u{1a7d}',
-        '\u{1a7e}'), ('\u{1a8a}', '\u{1a8f}'), ('\u{1a9a}', '\u{1a9f}'),
-        ('\u{1aae}', '\u{1aaf}'), ('\u{1abf}', '\u{1aff}'), ('\u{1b4c}',
-        '\u{1b4f}'), ('\u{1b7d}', '\u{1b7f}'), ('\u{1bf4}', '\u{1bfb}'),
-        ('\u{1c38}', '\u{1c3a}'), ('\u{1c4a}', '\u{1c4c}'), ('\u{1c89}',
-        '\u{1cbf}'), ('\u{1cc8}', '\u{1ccf}'), ('\u{1cfa}', '\u{1cff}'),
-        ('\u{1dfa}', '\u{1dfa}'), ('\u{1f16}', '\u{1f17}'), ('\u{1f1e}',
-        '\u{1f1f}'), ('\u{1f46}', '\u{1f47}'), ('\u{1f4e}', '\u{1f4f}'),
-        ('\u{1f58}', '\u{1f58}'), ('\u{1f5a}', '\u{1f5a}'), ('\u{1f5c}',
-        '\u{1f5c}'), ('\u{1f5e}', '\u{1f5e}'), ('\u{1f7e}', '\u{1f7f}'),
-        ('\u{1fb5}', '\u{1fb5}'), ('\u{1fc5}', '\u{1fc5}'), ('\u{1fd4}',
-        '\u{1fd5}'), ('\u{1fdc}', '\u{1fdc}'), ('\u{1ff0}', '\u{1ff1}'),
-        ('\u{1ff5}', '\u{1ff5}'), ('\u{1fff}', '\u{1fff}'), ('\u{2065}',
-        '\u{2065}'), ('\u{2072}', '\u{2073}'), ('\u{208f}', '\u{208f}'),
-        ('\u{209d}', '\u{209f}'), ('\u{20c0}', '\u{20cf}'), ('\u{20f1}',
-        '\u{20ff}'), ('\u{218c}', '\u{218f}'), ('\u{2427}', '\u{243f}'),
-        ('\u{244b}', '\u{245f}'), ('\u{2b74}', '\u{2b75}'), ('\u{2b96}',
-        '\u{2b97}'), ('\u{2bba}', '\u{2bbc}'), ('\u{2bc9}', '\u{2bc9}'),
-        ('\u{2bd3}', '\u{2beb}'), ('\u{2bf0}', '\u{2bff}'), ('\u{2c2f}',
-        '\u{2c2f}'), ('\u{2c5f}', '\u{2c5f}'), ('\u{2cf4}', '\u{2cf8}'),
-        ('\u{2d26}', '\u{2d26}'), ('\u{2d28}', '\u{2d2c}'), ('\u{2d2e}',
-        '\u{2d2f}'), ('\u{2d68}', '\u{2d6e}'), ('\u{2d71}', '\u{2d7e}'),
-        ('\u{2d97}', '\u{2d9f}'), ('\u{2da7}', '\u{2da7}'), ('\u{2daf}',
-        '\u{2daf}'), ('\u{2db7}', '\u{2db7}'), ('\u{2dbf}', '\u{2dbf}'),
-        ('\u{2dc7}', '\u{2dc7}'), ('\u{2dcf}', '\u{2dcf}'), ('\u{2dd7}',
-        '\u{2dd7}'), ('\u{2ddf}', '\u{2ddf}'), ('\u{2e4a}', '\u{2e7f}'),
-        ('\u{2e9a}', '\u{2e9a}'), ('\u{2ef4}', '\u{2eff}'), ('\u{2fd6}',
-        '\u{2fef}'), ('\u{2ffc}', '\u{2fff}'), ('\u{3040}', '\u{3040}'),
-        ('\u{3097}', '\u{3098}'), ('\u{3100}', '\u{3104}'), ('\u{312f}',
-        '\u{3130}'), ('\u{318f}', '\u{318f}'), ('\u{31bb}', '\u{31bf}'),
-        ('\u{31e4}', '\u{31ef}'), ('\u{321f}', '\u{321f}'), ('\u{32ff}',
-        '\u{32ff}'), ('\u{4db6}', '\u{4dbf}'), ('\u{9feb}', '\u{9fff}'),
-        ('\u{a48d}', '\u{a48f}'), ('\u{a4c7}', '\u{a4cf}'), ('\u{a62c}',
-        '\u{a63f}'), ('\u{a6f8}', '\u{a6ff}'), ('\u{a7af}', '\u{a7af}'),
-        ('\u{a7b8}', '\u{a7f6}'), ('\u{a82c}', '\u{a82f}'), ('\u{a83a}',
-        '\u{a83f}'), ('\u{a878}', '\u{a87f}'), ('\u{a8c6}', '\u{a8cd}'),
-        ('\u{a8da}', '\u{a8df}'), ('\u{a8fe}', '\u{a8ff}'), ('\u{a954}',
-        '\u{a95e}'), ('\u{a97d}', '\u{a97f}'), ('\u{a9ce}', '\u{a9ce}'),
-        ('\u{a9da}', '\u{a9dd}'), ('\u{a9ff}', '\u{a9ff}'), ('\u{aa37}',
-        '\u{aa3f}'), ('\u{aa4e}', '\u{aa4f}'), ('\u{aa5a}', '\u{aa5b}'),
-        ('\u{aac3}', '\u{aada}'), ('\u{aaf7}', '\u{ab00}'), ('\u{ab07}',
-        '\u{ab08}'), ('\u{ab0f}', '\u{ab10}'), ('\u{ab17}', '\u{ab1f}'),
-        ('\u{ab27}', '\u{ab27}'), ('\u{ab2f}', '\u{ab2f}'), ('\u{ab66}',
-        '\u{ab6f}'), ('\u{abee}', '\u{abef}'), ('\u{abfa}', '\u{abff}'),
-        ('\u{d7a4}', '\u{d7af}'), ('\u{d7c7}', '\u{d7ca}'), ('\u{d7fc}',
-        '\u{d7ff}'), ('\u{fa6e}', '\u{fa6f}'), ('\u{fada}', '\u{faff}'),
-        ('\u{fb07}', '\u{fb12}'), ('\u{fb18}', '\u{fb1c}'), ('\u{fb37}',
-        '\u{fb37}'), ('\u{fb3d}', '\u{fb3d}'), ('\u{fb3f}', '\u{fb3f}'),
-        ('\u{fb42}', '\u{fb42}'), ('\u{fb45}', '\u{fb45}'), ('\u{fbc2}',
-        '\u{fbd2}'), ('\u{fd40}', '\u{fd4f}'), ('\u{fd90}', '\u{fd91}'),
-        ('\u{fdc8}', '\u{fdef}'), ('\u{fdfe}', '\u{fdff}'), ('\u{fe1a}',
-        '\u{fe1f}'), ('\u{fe53}', '\u{fe53}'), ('\u{fe67}', '\u{fe67}'),
-        ('\u{fe6c}', '\u{fe6f}'), ('\u{fe75}', '\u{fe75}'), ('\u{fefd}',
-        '\u{fefe}'), ('\u{ff00}', '\u{ff00}'), ('\u{ffbf}', '\u{ffc1}'),
-        ('\u{ffc8}', '\u{ffc9}'), ('\u{ffd0}', '\u{ffd1}'), ('\u{ffd8}',
-        '\u{ffd9}'), ('\u{ffdd}', '\u{ffdf}'), ('\u{ffe7}', '\u{ffe7}'),
-        ('\u{ffef}', '\u{fff8}'), ('\u{fffe}', '\u{ffff}'), ('\u{1000c}',
-        '\u{1000c}'), ('\u{10027}', '\u{10027}'), ('\u{1003b}', '\u{1003b}'),
-        ('\u{1003e}', '\u{1003e}'), ('\u{1004e}', '\u{1004f}'), ('\u{1005e}',
-        '\u{1007f}'), ('\u{100fb}', '\u{100ff}'), ('\u{10103}', '\u{10106}'),
-        ('\u{10134}', '\u{10136}'), ('\u{1018f}', '\u{1018f}'), ('\u{1019c}',
-        '\u{1019f}'), ('\u{101a1}', '\u{101cf}'), ('\u{101fe}', '\u{1027f}'),
-        ('\u{1029d}', '\u{1029f}'), ('\u{102d1}', '\u{102df}'), ('\u{102fc}',
-        '\u{102ff}'), ('\u{10324}', '\u{1032c}'), ('\u{1034b}', '\u{1034f}'),
-        ('\u{1037b}', '\u{1037f}'), ('\u{1039e}', '\u{1039e}'), ('\u{103c4}',
-        '\u{103c7}'), ('\u{103d6}', '\u{103ff}'), ('\u{1049e}', '\u{1049f}'),
-        ('\u{104aa}', '\u{104af}'), ('\u{104d4}', '\u{104d7}'), ('\u{104fc}',
-        '\u{104ff}'), ('\u{10528}', '\u{1052f}'), ('\u{10564}', '\u{1056e}'),
-        ('\u{10570}', '\u{105ff}'), ('\u{10737}', '\u{1073f}'), ('\u{10756}',
-        '\u{1075f}'), ('\u{10768}', '\u{107ff}'), ('\u{10806}', '\u{10807}'),
-        ('\u{10809}', '\u{10809}'), ('\u{10836}', '\u{10836}'), ('\u{10839}',
-        '\u{1083b}'), ('\u{1083d}', '\u{1083e}'), ('\u{10856}', '\u{10856}'),
-        ('\u{1089f}', '\u{108a6}'), ('\u{108b0}', '\u{108df}'), ('\u{108f3}',
-        '\u{108f3}'), ('\u{108f6}', '\u{108fa}'), ('\u{1091c}', '\u{1091e}'),
-        ('\u{1093a}', '\u{1093e}'), ('\u{10940}', '\u{1097f}'), ('\u{109b8}',
-        '\u{109bb}'), ('\u{109d0}', '\u{109d1}'), ('\u{10a04}', '\u{10a04}'),
-        ('\u{10a07}', '\u{10a0b}'), ('\u{10a14}', '\u{10a14}'), ('\u{10a18}',
-        '\u{10a18}'), ('\u{10a34}', '\u{10a37}'), ('\u{10a3b}', '\u{10a3e}'),
-        ('\u{10a48}', '\u{10a4f}'), ('\u{10a59}', '\u{10a5f}'), ('\u{10aa0}',
-        '\u{10abf}'), ('\u{10ae7}', '\u{10aea}'), ('\u{10af7}', '\u{10aff}'),
-        ('\u{10b36}', '\u{10b38}'), ('\u{10b56}', '\u{10b57}'), ('\u{10b73}',
-        '\u{10b77}'), ('\u{10b92}', '\u{10b98}'), ('\u{10b9d}', '\u{10ba8}'),
-        ('\u{10bb0}', '\u{10bff}'), ('\u{10c49}', '\u{10c7f}'), ('\u{10cb3}',
-        '\u{10cbf}'), ('\u{10cf3}', '\u{10cf9}'), ('\u{10d00}', '\u{10e5f}'),
-        ('\u{10e7f}', '\u{10fff}'), ('\u{1104e}', '\u{11051}'), ('\u{11070}',
-        '\u{1107e}'), ('\u{110c2}', '\u{110cf}'), ('\u{110e9}', '\u{110ef}'),
-        ('\u{110fa}', '\u{110ff}'), ('\u{11135}', '\u{11135}'), ('\u{11144}',
-        '\u{1114f}'), ('\u{11177}', '\u{1117f}'), ('\u{111ce}', '\u{111cf}'),
-        ('\u{111e0}', '\u{111e0}'), ('\u{111f5}', '\u{111ff}'), ('\u{11212}',
-        '\u{11212}'), ('\u{1123f}', '\u{1127f}'), ('\u{11287}', '\u{11287}'),
-        ('\u{11289}', '\u{11289}'), ('\u{1128e}', '\u{1128e}'), ('\u{1129e}',
-        '\u{1129e}'), ('\u{112aa}', '\u{112af}'), ('\u{112eb}', '\u{112ef}'),
-        ('\u{112fa}', '\u{112ff}'), ('\u{11304}', '\u{11304}'), ('\u{1130d}',
-        '\u{1130e}'), ('\u{11311}', '\u{11312}'), ('\u{11329}', '\u{11329}'),
-        ('\u{11331}', '\u{11331}'), ('\u{11334}', '\u{11334}'), ('\u{1133a}',
-        '\u{1133b}'), ('\u{11345}', '\u{11346}'), ('\u{11349}', '\u{1134a}'),
-        ('\u{1134e}', '\u{1134f}'), ('\u{11351}', '\u{11356}'), ('\u{11358}',
-        '\u{1135c}'), ('\u{11364}', '\u{11365}'), ('\u{1136d}', '\u{1136f}'),
-        ('\u{11375}', '\u{113ff}'), ('\u{1145a}', '\u{1145a}'), ('\u{1145c}',
-        '\u{1145c}'), ('\u{1145e}', '\u{1147f}'), ('\u{114c8}', '\u{114cf}'),
-        ('\u{114da}', '\u{1157f}'), ('\u{115b6}', '\u{115b7}'), ('\u{115de}',
-        '\u{115ff}'), ('\u{11645}', '\u{1164f}'), ('\u{1165a}', '\u{1165f}'),
-        ('\u{1166d}', '\u{1167f}'), ('\u{116b8}', '\u{116bf}'), ('\u{116ca}',
-        '\u{116ff}'), ('\u{1171a}', '\u{1171c}'), ('\u{1172c}', '\u{1172f}'),
-        ('\u{11740}', '\u{1189f}'), ('\u{118f3}', '\u{118fe}'), ('\u{11900}',
-        '\u{119ff}'), ('\u{11a48}', '\u{11a4f}'), ('\u{11a84}', '\u{11a85}'),
-        ('\u{11a9d}', '\u{11a9d}'), ('\u{11aa3}', '\u{11abf}'), ('\u{11af9}',
-        '\u{11bff}'), ('\u{11c09}', '\u{11c09}'), ('\u{11c37}', '\u{11c37}'),
-        ('\u{11c46}', '\u{11c4f}'), ('\u{11c6d}', '\u{11c6f}'), ('\u{11c90}',
-        '\u{11c91}'), ('\u{11ca8}', '\u{11ca8}'), ('\u{11cb7}', '\u{11cff}'),
-        ('\u{11d07}', '\u{11d07}'), ('\u{11d0a}', '\u{11d0a}'), ('\u{11d37}',
-        '\u{11d39}'), ('\u{11d3b}', '\u{11d3b}'), ('\u{11d3e}', '\u{11d3e}'),
-        ('\u{11d48}', '\u{11d4f}'), ('\u{11d5a}', '\u{11fff}'), ('\u{1239a}',
-        '\u{123ff}'), ('\u{1246f}', '\u{1246f}'), ('\u{12475}', '\u{1247f}'),
-        ('\u{12544}', '\u{12fff}'), ('\u{1342f}', '\u{143ff}'), ('\u{14647}',
-        '\u{167ff}'), ('\u{16a39}', '\u{16a3f}'), ('\u{16a5f}', '\u{16a5f}'),
-        ('\u{16a6a}', '\u{16a6d}'), ('\u{16a70}', '\u{16acf}'), ('\u{16aee}',
-        '\u{16aef}'), ('\u{16af6}', '\u{16aff}'), ('\u{16b46}', '\u{16b4f}'),
-        ('\u{16b5a}', '\u{16b5a}'), ('\u{16b62}', '\u{16b62}'), ('\u{16b78}',
-        '\u{16b7c}'), ('\u{16b90}', '\u{16eff}'), ('\u{16f45}', '\u{16f4f}'),
-        ('\u{16f7f}', '\u{16f8e}'), ('\u{16fa0}', '\u{16fdf}'), ('\u{16fe2}',
-        '\u{16fff}'), ('\u{187ed}', '\u{187ff}'), ('\u{18af3}', '\u{1afff}'),
-        ('\u{1b11f}', '\u{1b16f}'), ('\u{1b2fc}', '\u{1bbff}'), ('\u{1bc6b}',
-        '\u{1bc6f}'), ('\u{1bc7d}', '\u{1bc7f}'), ('\u{1bc89}', '\u{1bc8f}'),
-        ('\u{1bc9a}', '\u{1bc9b}'), ('\u{1bca4}', '\u{1cfff}'), ('\u{1d0f6}',
-        '\u{1d0ff}'), ('\u{1d127}', '\u{1d128}'), ('\u{1d1e9}', '\u{1d1ff}'),
-        ('\u{1d246}', '\u{1d2ff}'), ('\u{1d357}', '\u{1d35f}'), ('\u{1d372}',
-        '\u{1d3ff}'), ('\u{1d455}', '\u{1d455}'), ('\u{1d49d}', '\u{1d49d}'),
-        ('\u{1d4a0}', '\u{1d4a1}'), ('\u{1d4a3}', '\u{1d4a4}'), ('\u{1d4a7}',
-        '\u{1d4a8}'), ('\u{1d4ad}', '\u{1d4ad}'), ('\u{1d4ba}', '\u{1d4ba}'),
-        ('\u{1d4bc}', '\u{1d4bc}'), ('\u{1d4c4}', '\u{1d4c4}'), ('\u{1d506}',
-        '\u{1d506}'), ('\u{1d50b}', '\u{1d50c}'), ('\u{1d515}', '\u{1d515}'),
-        ('\u{1d51d}', '\u{1d51d}'), ('\u{1d53a}', '\u{1d53a}'), ('\u{1d53f}',
-        '\u{1d53f}'), ('\u{1d545}', '\u{1d545}'), ('\u{1d547}', '\u{1d549}'),
-        ('\u{1d551}', '\u{1d551}'), ('\u{1d6a6}', '\u{1d6a7}'), ('\u{1d7cc}',
-        '\u{1d7cd}'), ('\u{1da8c}', '\u{1da9a}'), ('\u{1daa0}', '\u{1daa0}'),
-        ('\u{1dab0}', '\u{1dfff}'), ('\u{1e007}', '\u{1e007}'), ('\u{1e019}',
-        '\u{1e01a}'), ('\u{1e022}', '\u{1e022}'), ('\u{1e025}', '\u{1e025}'),
-        ('\u{1e02b}', '\u{1e7ff}'), ('\u{1e8c5}', '\u{1e8c6}'), ('\u{1e8d7}',
-        '\u{1e8ff}'), ('\u{1e94b}', '\u{1e94f}'), ('\u{1e95a}', '\u{1e95d}'),
-        ('\u{1e960}', '\u{1edff}'), ('\u{1ee04}', '\u{1ee04}'), ('\u{1ee20}',
-        '\u{1ee20}'), ('\u{1ee23}', '\u{1ee23}'), ('\u{1ee25}', '\u{1ee26}'),
-        ('\u{1ee28}', '\u{1ee28}'), ('\u{1ee33}', '\u{1ee33}'), ('\u{1ee38}',
-        '\u{1ee38}'), ('\u{1ee3a}', '\u{1ee3a}'), ('\u{1ee3c}', '\u{1ee41}'),
-        ('\u{1ee43}', '\u{1ee46}'), ('\u{1ee48}', '\u{1ee48}'), ('\u{1ee4a}',
-        '\u{1ee4a}'), ('\u{1ee4c}', '\u{1ee4c}'), ('\u{1ee50}', '\u{1ee50}'),
-        ('\u{1ee53}', '\u{1ee53}'), ('\u{1ee55}', '\u{1ee56}'), ('\u{1ee58}',
-        '\u{1ee58}'), ('\u{1ee5a}', '\u{1ee5a}'), ('\u{1ee5c}', '\u{1ee5c}'),
-        ('\u{1ee5e}', '\u{1ee5e}'), ('\u{1ee60}', '\u{1ee60}'), ('\u{1ee63}',
-        '\u{1ee63}'), ('\u{1ee65}', '\u{1ee66}'), ('\u{1ee6b}', '\u{1ee6b}'),
-        ('\u{1ee73}', '\u{1ee73}'), ('\u{1ee78}', '\u{1ee78}'), ('\u{1ee7d}',
-        '\u{1ee7d}'), ('\u{1ee7f}', '\u{1ee7f}'), ('\u{1ee8a}', '\u{1ee8a}'),
-        ('\u{1ee9c}', '\u{1eea0}'), ('\u{1eea4}', '\u{1eea4}'), ('\u{1eeaa}',
-        '\u{1eeaa}'), ('\u{1eebc}', '\u{1eeef}'), ('\u{1eef2}', '\u{1efff}'),
-        ('\u{1f02c}', '\u{1f02f}'), ('\u{1f094}', '\u{1f09f}'), ('\u{1f0af}',
-        '\u{1f0b0}'), ('\u{1f0c0}', '\u{1f0c0}'), ('\u{1f0d0}', '\u{1f0d0}'),
-        ('\u{1f0f6}', '\u{1f0ff}'), ('\u{1f10d}', '\u{1f10f}'), ('\u{1f12f}',
-        '\u{1f12f}'), ('\u{1f16c}', '\u{1f16f}'), ('\u{1f1ad}', '\u{1f1e5}'),
-        ('\u{1f203}', '\u{1f20f}'), ('\u{1f23c}', '\u{1f23f}'), ('\u{1f249}',
-        '\u{1f24f}'), ('\u{1f252}', '\u{1f25f}'), ('\u{1f266}', '\u{1f2ff}'),
-        ('\u{1f6d5}', '\u{1f6df}'), ('\u{1f6ed}', '\u{1f6ef}'), ('\u{1f6f9}',
-        '\u{1f6ff}'), ('\u{1f774}', '\u{1f77f}'), ('\u{1f7d5}', '\u{1f7ff}'),
-        ('\u{1f80c}', '\u{1f80f}'), ('\u{1f848}', '\u{1f84f}'), ('\u{1f85a}',
-        '\u{1f85f}'), ('\u{1f888}', '\u{1f88f}'), ('\u{1f8ae}', '\u{1f8ff}'),
-        ('\u{1f90c}', '\u{1f90f}'), ('\u{1f93f}', '\u{1f93f}'), ('\u{1f94d}',
-        '\u{1f94f}'), ('\u{1f96c}', '\u{1f97f}'), ('\u{1f998}', '\u{1f9bf}'),
-        ('\u{1f9c1}', '\u{1f9cf}'), ('\u{1f9e7}', '\u{1ffff}'), ('\u{2a6d7}',
-        '\u{2a6ff}'), ('\u{2b735}', '\u{2b73f}'), ('\u{2b81e}', '\u{2b81f}'),
-        ('\u{2cea2}', '\u{2ceaf}'), ('\u{2ebe1}', '\u{2f7ff}'), ('\u{2fa1e}',
-        '\u{e0000}'), ('\u{e0002}', '\u{e001f}'), ('\u{e0080}', '\u{e00ff}'),
-        ('\u{e01f0}', '\u{effff}'), ('\u{ffffe}', '\u{fffff}'), ('\u{10fffe}',
-        '\u{10ffff}')
-    ];
-
-    pub const Co_table: &'static [(char, char)] = &[
-        ('\u{e000}', '\u{f8ff}'), ('\u{f0000}', '\u{ffffd}'), ('\u{100000}',
-        '\u{10fffd}')
-    ];
-
-    pub const L_table: &'static [(char, char)] = &[
-        ('\u{41}', '\u{5a}'), ('\u{61}', '\u{7a}'), ('\u{aa}', '\u{aa}'),
-        ('\u{b5}', '\u{b5}'), ('\u{ba}', '\u{ba}'), ('\u{c0}', '\u{d6}'),
-        ('\u{d8}', '\u{f6}'), ('\u{f8}', '\u{2c1}'), ('\u{2c6}', '\u{2d1}'),
-        ('\u{2e0}', '\u{2e4}'), ('\u{2ec}', '\u{2ec}'), ('\u{2ee}', '\u{2ee}'),
-        ('\u{370}', '\u{374}'), ('\u{376}', '\u{377}'), ('\u{37a}', '\u{37d}'),
-        ('\u{37f}', '\u{37f}'), ('\u{386}', '\u{386}'), ('\u{388}', '\u{38a}'),
-        ('\u{38c}', '\u{38c}'), ('\u{38e}', '\u{3a1}'), ('\u{3a3}', '\u{3f5}'),
-        ('\u{3f7}', '\u{481}'), ('\u{48a}', '\u{52f}'), ('\u{531}', '\u{556}'),
-        ('\u{559}', '\u{559}'), ('\u{561}', '\u{587}'), ('\u{5d0}', '\u{5ea}'),
-        ('\u{5f0}', '\u{5f2}'), ('\u{620}', '\u{64a}'), ('\u{66e}', '\u{66f}'),
-        ('\u{671}', '\u{6d3}'), ('\u{6d5}', '\u{6d5}'), ('\u{6e5}', '\u{6e6}'),
-        ('\u{6ee}', '\u{6ef}'), ('\u{6fa}', '\u{6fc}'), ('\u{6ff}', '\u{6ff}'),
-        ('\u{710}', '\u{710}'), ('\u{712}', '\u{72f}'), ('\u{74d}', '\u{7a5}'),
-        ('\u{7b1}', '\u{7b1}'), ('\u{7ca}', '\u{7ea}'), ('\u{7f4}', '\u{7f5}'),
-        ('\u{7fa}', '\u{7fa}'), ('\u{800}', '\u{815}'), ('\u{81a}', '\u{81a}'),
-        ('\u{824}', '\u{824}'), ('\u{828}', '\u{828}'), ('\u{840}', '\u{858}'),
-        ('\u{860}', '\u{86a}'), ('\u{8a0}', '\u{8b4}'), ('\u{8b6}', '\u{8bd}'),
-        ('\u{904}', '\u{939}'), ('\u{93d}', '\u{93d}'), ('\u{950}', '\u{950}'),
-        ('\u{958}', '\u{961}'), ('\u{971}', '\u{980}'), ('\u{985}', '\u{98c}'),
-        ('\u{98f}', '\u{990}'), ('\u{993}', '\u{9a8}'), ('\u{9aa}', '\u{9b0}'),
-        ('\u{9b2}', '\u{9b2}'), ('\u{9b6}', '\u{9b9}'), ('\u{9bd}', '\u{9bd}'),
-        ('\u{9ce}', '\u{9ce}'), ('\u{9dc}', '\u{9dd}'), ('\u{9df}', '\u{9e1}'),
-        ('\u{9f0}', '\u{9f1}'), ('\u{9fc}', '\u{9fc}'), ('\u{a05}', '\u{a0a}'),
-        ('\u{a0f}', '\u{a10}'), ('\u{a13}', '\u{a28}'), ('\u{a2a}', '\u{a30}'),
-        ('\u{a32}', '\u{a33}'), ('\u{a35}', '\u{a36}'), ('\u{a38}', '\u{a39}'),
-        ('\u{a59}', '\u{a5c}'), ('\u{a5e}', '\u{a5e}'), ('\u{a72}', '\u{a74}'),
-        ('\u{a85}', '\u{a8d}'), ('\u{a8f}', '\u{a91}'), ('\u{a93}', '\u{aa8}'),
-        ('\u{aaa}', '\u{ab0}'), ('\u{ab2}', '\u{ab3}'), ('\u{ab5}', '\u{ab9}'),
-        ('\u{abd}', '\u{abd}'), ('\u{ad0}', '\u{ad0}'), ('\u{ae0}', '\u{ae1}'),
-        ('\u{af9}', '\u{af9}'), ('\u{b05}', '\u{b0c}'), ('\u{b0f}', '\u{b10}'),
-        ('\u{b13}', '\u{b28}'), ('\u{b2a}', '\u{b30}'), ('\u{b32}', '\u{b33}'),
-        ('\u{b35}', '\u{b39}'), ('\u{b3d}', '\u{b3d}'), ('\u{b5c}', '\u{b5d}'),
-        ('\u{b5f}', '\u{b61}'), ('\u{b71}', '\u{b71}'), ('\u{b83}', '\u{b83}'),
-        ('\u{b85}', '\u{b8a}'), ('\u{b8e}', '\u{b90}'), ('\u{b92}', '\u{b95}'),
-        ('\u{b99}', '\u{b9a}'), ('\u{b9c}', '\u{b9c}'), ('\u{b9e}', '\u{b9f}'),
-        ('\u{ba3}', '\u{ba4}'), ('\u{ba8}', '\u{baa}'), ('\u{bae}', '\u{bb9}'),
-        ('\u{bd0}', '\u{bd0}'), ('\u{c05}', '\u{c0c}'), ('\u{c0e}', '\u{c10}'),
-        ('\u{c12}', '\u{c28}'), ('\u{c2a}', '\u{c39}'), ('\u{c3d}', '\u{c3d}'),
-        ('\u{c58}', '\u{c5a}'), ('\u{c60}', '\u{c61}'), ('\u{c80}', '\u{c80}'),
-        ('\u{c85}', '\u{c8c}'), ('\u{c8e}', '\u{c90}'), ('\u{c92}', '\u{ca8}'),
-        ('\u{caa}', '\u{cb3}'), ('\u{cb5}', '\u{cb9}'), ('\u{cbd}', '\u{cbd}'),
-        ('\u{cde}', '\u{cde}'), ('\u{ce0}', '\u{ce1}'), ('\u{cf1}', '\u{cf2}'),
-        ('\u{d05}', '\u{d0c}'), ('\u{d0e}', '\u{d10}'), ('\u{d12}', '\u{d3a}'),
-        ('\u{d3d}', '\u{d3d}'), ('\u{d4e}', '\u{d4e}'), ('\u{d54}', '\u{d56}'),
-        ('\u{d5f}', '\u{d61}'), ('\u{d7a}', '\u{d7f}'), ('\u{d85}', '\u{d96}'),
-        ('\u{d9a}', '\u{db1}'), ('\u{db3}', '\u{dbb}'), ('\u{dbd}', '\u{dbd}'),
-        ('\u{dc0}', '\u{dc6}'), ('\u{e01}', '\u{e30}'), ('\u{e32}', '\u{e33}'),
-        ('\u{e40}', '\u{e46}'), ('\u{e81}', '\u{e82}'), ('\u{e84}', '\u{e84}'),
-        ('\u{e87}', '\u{e88}'), ('\u{e8a}', '\u{e8a}'), ('\u{e8d}', '\u{e8d}'),
-        ('\u{e94}', '\u{e97}'), ('\u{e99}', '\u{e9f}'), ('\u{ea1}', '\u{ea3}'),
-        ('\u{ea5}', '\u{ea5}'), ('\u{ea7}', '\u{ea7}'), ('\u{eaa}', '\u{eab}'),
-        ('\u{ead}', '\u{eb0}'), ('\u{eb2}', '\u{eb3}'), ('\u{ebd}', '\u{ebd}'),
-        ('\u{ec0}', '\u{ec4}'), ('\u{ec6}', '\u{ec6}'), ('\u{edc}', '\u{edf}'),
-        ('\u{f00}', '\u{f00}'), ('\u{f40}', '\u{f47}'), ('\u{f49}', '\u{f6c}'),
-        ('\u{f88}', '\u{f8c}'), ('\u{1000}', '\u{102a}'), ('\u{103f}',
-        '\u{103f}'), ('\u{1050}', '\u{1055}'), ('\u{105a}', '\u{105d}'),
-        ('\u{1061}', '\u{1061}'), ('\u{1065}', '\u{1066}'), ('\u{106e}',
-        '\u{1070}'), ('\u{1075}', '\u{1081}'), ('\u{108e}', '\u{108e}'),
-        ('\u{10a0}', '\u{10c5}'), ('\u{10c7}', '\u{10c7}'), ('\u{10cd}',
-        '\u{10cd}'), ('\u{10d0}', '\u{10fa}'), ('\u{10fc}', '\u{1248}'),
-        ('\u{124a}', '\u{124d}'), ('\u{1250}', '\u{1256}'), ('\u{1258}',
-        '\u{1258}'), ('\u{125a}', '\u{125d}'), ('\u{1260}', '\u{1288}'),
-        ('\u{128a}', '\u{128d}'), ('\u{1290}', '\u{12b0}'), ('\u{12b2}',
-        '\u{12b5}'), ('\u{12b8}', '\u{12be}'), ('\u{12c0}', '\u{12c0}'),
-        ('\u{12c2}', '\u{12c5}'), ('\u{12c8}', '\u{12d6}'), ('\u{12d8}',
-        '\u{1310}'), ('\u{1312}', '\u{1315}'), ('\u{1318}', '\u{135a}'),
-        ('\u{1380}', '\u{138f}'), ('\u{13a0}', '\u{13f5}'), ('\u{13f8}',
-        '\u{13fd}'), ('\u{1401}', '\u{166c}'), ('\u{166f}', '\u{167f}'),
-        ('\u{1681}', '\u{169a}'), ('\u{16a0}', '\u{16ea}'), ('\u{16f1}',
-        '\u{16f8}'), ('\u{1700}', '\u{170c}'), ('\u{170e}', '\u{1711}'),
-        ('\u{1720}', '\u{1731}'), ('\u{1740}', '\u{1751}'), ('\u{1760}',
-        '\u{176c}'), ('\u{176e}', '\u{1770}'), ('\u{1780}', '\u{17b3}'),
-        ('\u{17d7}', '\u{17d7}'), ('\u{17dc}', '\u{17dc}'), ('\u{1820}',
-        '\u{1877}'), ('\u{1880}', '\u{1884}'), ('\u{1887}', '\u{18a8}'),
-        ('\u{18aa}', '\u{18aa}'), ('\u{18b0}', '\u{18f5}'), ('\u{1900}',
-        '\u{191e}'), ('\u{1950}', '\u{196d}'), ('\u{1970}', '\u{1974}'),
-        ('\u{1980}', '\u{19ab}'), ('\u{19b0}', '\u{19c9}'), ('\u{1a00}',
-        '\u{1a16}'), ('\u{1a20}', '\u{1a54}'), ('\u{1aa7}', '\u{1aa7}'),
-        ('\u{1b05}', '\u{1b33}'), ('\u{1b45}', '\u{1b4b}'), ('\u{1b83}',
-        '\u{1ba0}'), ('\u{1bae}', '\u{1baf}'), ('\u{1bba}', '\u{1be5}'),
-        ('\u{1c00}', '\u{1c23}'), ('\u{1c4d}', '\u{1c4f}'), ('\u{1c5a}',
-        '\u{1c7d}'), ('\u{1c80}', '\u{1c88}'), ('\u{1ce9}', '\u{1cec}'),
-        ('\u{1cee}', '\u{1cf1}'), ('\u{1cf5}', '\u{1cf6}'), ('\u{1d00}',
-        '\u{1dbf}'), ('\u{1e00}', '\u{1f15}'), ('\u{1f18}', '\u{1f1d}'),
-        ('\u{1f20}', '\u{1f45}'), ('\u{1f48}', '\u{1f4d}'), ('\u{1f50}',
-        '\u{1f57}'), ('\u{1f59}', '\u{1f59}'), ('\u{1f5b}', '\u{1f5b}'),
-        ('\u{1f5d}', '\u{1f5d}'), ('\u{1f5f}', '\u{1f7d}'), ('\u{1f80}',
-        '\u{1fb4}'), ('\u{1fb6}', '\u{1fbc}'), ('\u{1fbe}', '\u{1fbe}'),
-        ('\u{1fc2}', '\u{1fc4}'), ('\u{1fc6}', '\u{1fcc}'), ('\u{1fd0}',
-        '\u{1fd3}'), ('\u{1fd6}', '\u{1fdb}'), ('\u{1fe0}', '\u{1fec}'),
-        ('\u{1ff2}', '\u{1ff4}'), ('\u{1ff6}', '\u{1ffc}'), ('\u{2071}',
-        '\u{2071}'), ('\u{207f}', '\u{207f}'), ('\u{2090}', '\u{209c}'),
-        ('\u{2102}', '\u{2102}'), ('\u{2107}', '\u{2107}'), ('\u{210a}',
-        '\u{2113}'), ('\u{2115}', '\u{2115}'), ('\u{2119}', '\u{211d}'),
-        ('\u{2124}', '\u{2124}'), ('\u{2126}', '\u{2126}'), ('\u{2128}',
-        '\u{2128}'), ('\u{212a}', '\u{212d}'), ('\u{212f}', '\u{2139}'),
-        ('\u{213c}', '\u{213f}'), ('\u{2145}', '\u{2149}'), ('\u{214e}',
-        '\u{214e}'), ('\u{2183}', '\u{2184}'), ('\u{2c00}', '\u{2c2e}'),
-        ('\u{2c30}', '\u{2c5e}'), ('\u{2c60}', '\u{2ce4}'), ('\u{2ceb}',
-        '\u{2cee}'), ('\u{2cf2}', '\u{2cf3}'), ('\u{2d00}', '\u{2d25}'),
-        ('\u{2d27}', '\u{2d27}'), ('\u{2d2d}', '\u{2d2d}'), ('\u{2d30}',
-        '\u{2d67}'), ('\u{2d6f}', '\u{2d6f}'), ('\u{2d80}', '\u{2d96}'),
-        ('\u{2da0}', '\u{2da6}'), ('\u{2da8}', '\u{2dae}'), ('\u{2db0}',
-        '\u{2db6}'), ('\u{2db8}', '\u{2dbe}'), ('\u{2dc0}', '\u{2dc6}'),
-        ('\u{2dc8}', '\u{2dce}'), ('\u{2dd0}', '\u{2dd6}'), ('\u{2dd8}',
-        '\u{2dde}'), ('\u{2e2f}', '\u{2e2f}'), ('\u{3005}', '\u{3006}'),
-        ('\u{3031}', '\u{3035}'), ('\u{303b}', '\u{303c}'), ('\u{3041}',
-        '\u{3096}'), ('\u{309d}', '\u{309f}'), ('\u{30a1}', '\u{30fa}'),
-        ('\u{30fc}', '\u{30ff}'), ('\u{3105}', '\u{312e}'), ('\u{3131}',
-        '\u{318e}'), ('\u{31a0}', '\u{31ba}'), ('\u{31f0}', '\u{31ff}'),
-        ('\u{3400}', '\u{4db5}'), ('\u{4e00}', '\u{9fea}'), ('\u{a000}',
-        '\u{a48c}'), ('\u{a4d0}', '\u{a4fd}'), ('\u{a500}', '\u{a60c}'),
-        ('\u{a610}', '\u{a61f}'), ('\u{a62a}', '\u{a62b}'), ('\u{a640}',
-        '\u{a66e}'), ('\u{a67f}', '\u{a69d}'), ('\u{a6a0}', '\u{a6e5}'),
-        ('\u{a717}', '\u{a71f}'), ('\u{a722}', '\u{a788}'), ('\u{a78b}',
-        '\u{a7ae}'), ('\u{a7b0}', '\u{a7b7}'), ('\u{a7f7}', '\u{a801}'),
-        ('\u{a803}', '\u{a805}'), ('\u{a807}', '\u{a80a}'), ('\u{a80c}',
-        '\u{a822}'), ('\u{a840}', '\u{a873}'), ('\u{a882}', '\u{a8b3}'),
-        ('\u{a8f2}', '\u{a8f7}'), ('\u{a8fb}', '\u{a8fb}'), ('\u{a8fd}',
-        '\u{a8fd}'), ('\u{a90a}', '\u{a925}'), ('\u{a930}', '\u{a946}'),
-        ('\u{a960}', '\u{a97c}'), ('\u{a984}', '\u{a9b2}'), ('\u{a9cf}',
-        '\u{a9cf}'), ('\u{a9e0}', '\u{a9e4}'), ('\u{a9e6}', '\u{a9ef}'),
-        ('\u{a9fa}', '\u{a9fe}'), ('\u{aa00}', '\u{aa28}'), ('\u{aa40}',
-        '\u{aa42}'), ('\u{aa44}', '\u{aa4b}'), ('\u{aa60}', '\u{aa76}'),
-        ('\u{aa7a}', '\u{aa7a}'), ('\u{aa7e}', '\u{aaaf}'), ('\u{aab1}',
-        '\u{aab1}'), ('\u{aab5}', '\u{aab6}'), ('\u{aab9}', '\u{aabd}'),
-        ('\u{aac0}', '\u{aac0}'), ('\u{aac2}', '\u{aac2}'), ('\u{aadb}',
-        '\u{aadd}'), ('\u{aae0}', '\u{aaea}'), ('\u{aaf2}', '\u{aaf4}'),
-        ('\u{ab01}', '\u{ab06}'), ('\u{ab09}', '\u{ab0e}'), ('\u{ab11}',
-        '\u{ab16}'), ('\u{ab20}', '\u{ab26}'), ('\u{ab28}', '\u{ab2e}'),
-        ('\u{ab30}', '\u{ab5a}'), ('\u{ab5c}', '\u{ab65}'), ('\u{ab70}',
-        '\u{abe2}'), ('\u{ac00}', '\u{d7a3}'), ('\u{d7b0}', '\u{d7c6}'),
-        ('\u{d7cb}', '\u{d7fb}'), ('\u{f900}', '\u{fa6d}'), ('\u{fa70}',
-        '\u{fad9}'), ('\u{fb00}', '\u{fb06}'), ('\u{fb13}', '\u{fb17}'),
-        ('\u{fb1d}', '\u{fb1d}'), ('\u{fb1f}', '\u{fb28}'), ('\u{fb2a}',
-        '\u{fb36}'), ('\u{fb38}', '\u{fb3c}'), ('\u{fb3e}', '\u{fb3e}'),
-        ('\u{fb40}', '\u{fb41}'), ('\u{fb43}', '\u{fb44}'), ('\u{fb46}',
-        '\u{fbb1}'), ('\u{fbd3}', '\u{fd3d}'), ('\u{fd50}', '\u{fd8f}'),
-        ('\u{fd92}', '\u{fdc7}'), ('\u{fdf0}', '\u{fdfb}'), ('\u{fe70}',
-        '\u{fe74}'), ('\u{fe76}', '\u{fefc}'), ('\u{ff21}', '\u{ff3a}'),
-        ('\u{ff41}', '\u{ff5a}'), ('\u{ff66}', '\u{ffbe}'), ('\u{ffc2}',
-        '\u{ffc7}'), ('\u{ffca}', '\u{ffcf}'), ('\u{ffd2}', '\u{ffd7}'),
-        ('\u{ffda}', '\u{ffdc}'), ('\u{10000}', '\u{1000b}'), ('\u{1000d}',
-        '\u{10026}'), ('\u{10028}', '\u{1003a}'), ('\u{1003c}', '\u{1003d}'),
-        ('\u{1003f}', '\u{1004d}'), ('\u{10050}', '\u{1005d}'), ('\u{10080}',
-        '\u{100fa}'), ('\u{10280}', '\u{1029c}'), ('\u{102a0}', '\u{102d0}'),
-        ('\u{10300}', '\u{1031f}'), ('\u{1032d}', '\u{10340}'), ('\u{10342}',
-        '\u{10349}'), ('\u{10350}', '\u{10375}'), ('\u{10380}', '\u{1039d}'),
-        ('\u{103a0}', '\u{103c3}'), ('\u{103c8}', '\u{103cf}'), ('\u{10400}',
-        '\u{1049d}'), ('\u{104b0}', '\u{104d3}'), ('\u{104d8}', '\u{104fb}'),
-        ('\u{10500}', '\u{10527}'), ('\u{10530}', '\u{10563}'), ('\u{10600}',
-        '\u{10736}'), ('\u{10740}', '\u{10755}'), ('\u{10760}', '\u{10767}'),
-        ('\u{10800}', '\u{10805}'), ('\u{10808}', '\u{10808}'), ('\u{1080a}',
-        '\u{10835}'), ('\u{10837}', '\u{10838}'), ('\u{1083c}', '\u{1083c}'),
-        ('\u{1083f}', '\u{10855}'), ('\u{10860}', '\u{10876}'), ('\u{10880}',
-        '\u{1089e}'), ('\u{108e0}', '\u{108f2}'), ('\u{108f4}', '\u{108f5}'),
-        ('\u{10900}', '\u{10915}'), ('\u{10920}', '\u{10939}'), ('\u{10980}',
-        '\u{109b7}'), ('\u{109be}', '\u{109bf}'), ('\u{10a00}', '\u{10a00}'),
-        ('\u{10a10}', '\u{10a13}'), ('\u{10a15}', '\u{10a17}'), ('\u{10a19}',
-        '\u{10a33}'), ('\u{10a60}', '\u{10a7c}'), ('\u{10a80}', '\u{10a9c}'),
-        ('\u{10ac0}', '\u{10ac7}'), ('\u{10ac9}', '\u{10ae4}'), ('\u{10b00}',
-        '\u{10b35}'), ('\u{10b40}', '\u{10b55}'), ('\u{10b60}', '\u{10b72}'),
-        ('\u{10b80}', '\u{10b91}'), ('\u{10c00}', '\u{10c48}'), ('\u{10c80}',
-        '\u{10cb2}'), ('\u{10cc0}', '\u{10cf2}'), ('\u{11003}', '\u{11037}'),
-        ('\u{11083}', '\u{110af}'), ('\u{110d0}', '\u{110e8}'), ('\u{11103}',
-        '\u{11126}'), ('\u{11150}', '\u{11172}'), ('\u{11176}', '\u{11176}'),
-        ('\u{11183}', '\u{111b2}'), ('\u{111c1}', '\u{111c4}'), ('\u{111da}',
-        '\u{111da}'), ('\u{111dc}', '\u{111dc}'), ('\u{11200}', '\u{11211}'),
-        ('\u{11213}', '\u{1122b}'), ('\u{11280}', '\u{11286}'), ('\u{11288}',
-        '\u{11288}'), ('\u{1128a}', '\u{1128d}'), ('\u{1128f}', '\u{1129d}'),
-        ('\u{1129f}', '\u{112a8}'), ('\u{112b0}', '\u{112de}'), ('\u{11305}',
-        '\u{1130c}'), ('\u{1130f}', '\u{11310}'), ('\u{11313}', '\u{11328}'),
-        ('\u{1132a}', '\u{11330}'), ('\u{11332}', '\u{11333}'), ('\u{11335}',
-        '\u{11339}'), ('\u{1133d}', '\u{1133d}'), ('\u{11350}', '\u{11350}'),
-        ('\u{1135d}', '\u{11361}'), ('\u{11400}', '\u{11434}'), ('\u{11447}',
-        '\u{1144a}'), ('\u{11480}', '\u{114af}'), ('\u{114c4}', '\u{114c5}'),
-        ('\u{114c7}', '\u{114c7}'), ('\u{11580}', '\u{115ae}'), ('\u{115d8}',
-        '\u{115db}'), ('\u{11600}', '\u{1162f}'), ('\u{11644}', '\u{11644}'),
-        ('\u{11680}', '\u{116aa}'), ('\u{11700}', '\u{11719}'), ('\u{118a0}',
-        '\u{118df}'), ('\u{118ff}', '\u{118ff}'), ('\u{11a00}', '\u{11a00}'),
-        ('\u{11a0b}', '\u{11a32}'), ('\u{11a3a}', '\u{11a3a}'), ('\u{11a50}',
-        '\u{11a50}'), ('\u{11a5c}', '\u{11a83}'), ('\u{11a86}', '\u{11a89}'),
-        ('\u{11ac0}', '\u{11af8}'), ('\u{11c00}', '\u{11c08}'), ('\u{11c0a}',
-        '\u{11c2e}'), ('\u{11c40}', '\u{11c40}'), ('\u{11c72}', '\u{11c8f}'),
-        ('\u{11d00}', '\u{11d06}'), ('\u{11d08}', '\u{11d09}'), ('\u{11d0b}',
-        '\u{11d30}'), ('\u{11d46}', '\u{11d46}'), ('\u{12000}', '\u{12399}'),
-        ('\u{12480}', '\u{12543}'), ('\u{13000}', '\u{1342e}'), ('\u{14400}',
-        '\u{14646}'), ('\u{16800}', '\u{16a38}'), ('\u{16a40}', '\u{16a5e}'),
-        ('\u{16ad0}', '\u{16aed}'), ('\u{16b00}', '\u{16b2f}'), ('\u{16b40}',
-        '\u{16b43}'), ('\u{16b63}', '\u{16b77}'), ('\u{16b7d}', '\u{16b8f}'),
-        ('\u{16f00}', '\u{16f44}'), ('\u{16f50}', '\u{16f50}'), ('\u{16f93}',
-        '\u{16f9f}'), ('\u{16fe0}', '\u{16fe1}'), ('\u{17000}', '\u{187ec}'),
-        ('\u{18800}', '\u{18af2}'), ('\u{1b000}', '\u{1b11e}'), ('\u{1b170}',
-        '\u{1b2fb}'), ('\u{1bc00}', '\u{1bc6a}'), ('\u{1bc70}', '\u{1bc7c}'),
-        ('\u{1bc80}', '\u{1bc88}'), ('\u{1bc90}', '\u{1bc99}'), ('\u{1d400}',
-        '\u{1d454}'), ('\u{1d456}', '\u{1d49c}'), ('\u{1d49e}', '\u{1d49f}'),
-        ('\u{1d4a2}', '\u{1d4a2}'), ('\u{1d4a5}', '\u{1d4a6}'), ('\u{1d4a9}',
-        '\u{1d4ac}'), ('\u{1d4ae}', '\u{1d4b9}'), ('\u{1d4bb}', '\u{1d4bb}'),
-        ('\u{1d4bd}', '\u{1d4c3}'), ('\u{1d4c5}', '\u{1d505}'), ('\u{1d507}',
-        '\u{1d50a}'), ('\u{1d50d}', '\u{1d514}'), ('\u{1d516}', '\u{1d51c}'),
-        ('\u{1d51e}', '\u{1d539}'), ('\u{1d53b}', '\u{1d53e}'), ('\u{1d540}',
-        '\u{1d544}'), ('\u{1d546}', '\u{1d546}'), ('\u{1d54a}', '\u{1d550}'),
-        ('\u{1d552}', '\u{1d6a5}'), ('\u{1d6a8}', '\u{1d6c0}'), ('\u{1d6c2}',
-        '\u{1d6da}'), ('\u{1d6dc}', '\u{1d6fa}'), ('\u{1d6fc}', '\u{1d714}'),
-        ('\u{1d716}', '\u{1d734}'), ('\u{1d736}', '\u{1d74e}'), ('\u{1d750}',
-        '\u{1d76e}'), ('\u{1d770}', '\u{1d788}'), ('\u{1d78a}', '\u{1d7a8}'),
-        ('\u{1d7aa}', '\u{1d7c2}'), ('\u{1d7c4}', '\u{1d7cb}'), ('\u{1e800}',
-        '\u{1e8c4}'), ('\u{1e900}', '\u{1e943}'), ('\u{1ee00}', '\u{1ee03}'),
-        ('\u{1ee05}', '\u{1ee1f}'), ('\u{1ee21}', '\u{1ee22}'), ('\u{1ee24}',
-        '\u{1ee24}'), ('\u{1ee27}', '\u{1ee27}'), ('\u{1ee29}', '\u{1ee32}'),
-        ('\u{1ee34}', '\u{1ee37}'), ('\u{1ee39}', '\u{1ee39}'), ('\u{1ee3b}',
-        '\u{1ee3b}'), ('\u{1ee42}', '\u{1ee42}'), ('\u{1ee47}', '\u{1ee47}'),
-        ('\u{1ee49}', '\u{1ee49}'), ('\u{1ee4b}', '\u{1ee4b}'), ('\u{1ee4d}',
-        '\u{1ee4f}'), ('\u{1ee51}', '\u{1ee52}'), ('\u{1ee54}', '\u{1ee54}'),
-        ('\u{1ee57}', '\u{1ee57}'), ('\u{1ee59}', '\u{1ee59}'), ('\u{1ee5b}',
-        '\u{1ee5b}'), ('\u{1ee5d}', '\u{1ee5d}'), ('\u{1ee5f}', '\u{1ee5f}'),
-        ('\u{1ee61}', '\u{1ee62}'), ('\u{1ee64}', '\u{1ee64}'), ('\u{1ee67}',
-        '\u{1ee6a}'), ('\u{1ee6c}', '\u{1ee72}'), ('\u{1ee74}', '\u{1ee77}'),
-        ('\u{1ee79}', '\u{1ee7c}'), ('\u{1ee7e}', '\u{1ee7e}'), ('\u{1ee80}',
-        '\u{1ee89}'), ('\u{1ee8b}', '\u{1ee9b}'), ('\u{1eea1}', '\u{1eea3}'),
-        ('\u{1eea5}', '\u{1eea9}'), ('\u{1eeab}', '\u{1eebb}'), ('\u{20000}',
-        '\u{2a6d6}'), ('\u{2a700}', '\u{2b734}'), ('\u{2b740}', '\u{2b81d}'),
-        ('\u{2b820}', '\u{2cea1}'), ('\u{2ceb0}', '\u{2ebe0}'), ('\u{2f800}',
-        '\u{2fa1d}')
-    ];
-
-    pub const LC_table: &'static [(char, char)] = &[
-        ('\u{41}', '\u{5a}'), ('\u{61}', '\u{7a}'), ('\u{b5}', '\u{b5}'),
-        ('\u{c0}', '\u{d6}'), ('\u{d8}', '\u{f6}'), ('\u{f8}', '\u{1ba}'),
-        ('\u{1bc}', '\u{1bf}'), ('\u{1c4}', '\u{293}'), ('\u{295}', '\u{2af}'),
-        ('\u{370}', '\u{373}'), ('\u{376}', '\u{377}'), ('\u{37b}', '\u{37d}'),
-        ('\u{37f}', '\u{37f}'), ('\u{386}', '\u{386}'), ('\u{388}', '\u{38a}'),
-        ('\u{38c}', '\u{38c}'), ('\u{38e}', '\u{3a1}'), ('\u{3a3}', '\u{3f5}'),
-        ('\u{3f7}', '\u{481}'), ('\u{48a}', '\u{52f}'), ('\u{531}', '\u{556}'),
-        ('\u{561}', '\u{587}'), ('\u{10a0}', '\u{10c5}'), ('\u{10c7}',
-        '\u{10c7}'), ('\u{10cd}', '\u{10cd}'), ('\u{13a0}', '\u{13f5}'),
-        ('\u{13f8}', '\u{13fd}'), ('\u{1c80}', '\u{1c88}'), ('\u{1d00}',
-        '\u{1d2b}'), ('\u{1d6b}', '\u{1d77}'), ('\u{1d79}', '\u{1d9a}'),
-        ('\u{1e00}', '\u{1f15}'), ('\u{1f18}', '\u{1f1d}'), ('\u{1f20}',
-        '\u{1f45}'), ('\u{1f48}', '\u{1f4d}'), ('\u{1f50}', '\u{1f57}'),
-        ('\u{1f59}', '\u{1f59}'), ('\u{1f5b}', '\u{1f5b}'), ('\u{1f5d}',
-        '\u{1f5d}'), ('\u{1f5f}', '\u{1f7d}'), ('\u{1f80}', '\u{1fb4}'),
-        ('\u{1fb6}', '\u{1fbc}'), ('\u{1fbe}', '\u{1fbe}'), ('\u{1fc2}',
-        '\u{1fc4}'), ('\u{1fc6}', '\u{1fcc}'), ('\u{1fd0}', '\u{1fd3}'),
-        ('\u{1fd6}', '\u{1fdb}'), ('\u{1fe0}', '\u{1fec}'), ('\u{1ff2}',
-        '\u{1ff4}'), ('\u{1ff6}', '\u{1ffc}'), ('\u{2102}', '\u{2102}'),
-        ('\u{2107}', '\u{2107}'), ('\u{210a}', '\u{2113}'), ('\u{2115}',
-        '\u{2115}'), ('\u{2119}', '\u{211d}'), ('\u{2124}', '\u{2124}'),
-        ('\u{2126}', '\u{2126}'), ('\u{2128}', '\u{2128}'), ('\u{212a}',
-        '\u{212d}'), ('\u{212f}', '\u{2134}'), ('\u{2139}', '\u{2139}'),
-        ('\u{213c}', '\u{213f}'), ('\u{2145}', '\u{2149}'), ('\u{214e}',
-        '\u{214e}'), ('\u{2183}', '\u{2184}'), ('\u{2c00}', '\u{2c2e}'),
-        ('\u{2c30}', '\u{2c5e}'), ('\u{2c60}', '\u{2c7b}'), ('\u{2c7e}',
-        '\u{2ce4}'), ('\u{2ceb}', '\u{2cee}'), ('\u{2cf2}', '\u{2cf3}'),
-        ('\u{2d00}', '\u{2d25}'), ('\u{2d27}', '\u{2d27}'), ('\u{2d2d}',
-        '\u{2d2d}'), ('\u{a640}', '\u{a66d}'), ('\u{a680}', '\u{a69b}'),
-        ('\u{a722}', '\u{a76f}'), ('\u{a771}', '\u{a787}'), ('\u{a78b}',
-        '\u{a78e}'), ('\u{a790}', '\u{a7ae}'), ('\u{a7b0}', '\u{a7b7}'),
-        ('\u{a7fa}', '\u{a7fa}'), ('\u{ab30}', '\u{ab5a}'), ('\u{ab60}',
-        '\u{ab65}'), ('\u{ab70}', '\u{abbf}'), ('\u{fb00}', '\u{fb06}'),
-        ('\u{fb13}', '\u{fb17}'), ('\u{ff21}', '\u{ff3a}'), ('\u{ff41}',
-        '\u{ff5a}'), ('\u{10400}', '\u{1044f}'), ('\u{104b0}', '\u{104d3}'),
-        ('\u{104d8}', '\u{104fb}'), ('\u{10c80}', '\u{10cb2}'), ('\u{10cc0}',
-        '\u{10cf2}'), ('\u{118a0}', '\u{118df}'), ('\u{1d400}', '\u{1d454}'),
-        ('\u{1d456}', '\u{1d49c}'), ('\u{1d49e}', '\u{1d49f}'), ('\u{1d4a2}',
-        '\u{1d4a2}'), ('\u{1d4a5}', '\u{1d4a6}'), ('\u{1d4a9}', '\u{1d4ac}'),
-        ('\u{1d4ae}', '\u{1d4b9}'), ('\u{1d4bb}', '\u{1d4bb}'), ('\u{1d4bd}',
-        '\u{1d4c3}'), ('\u{1d4c5}', '\u{1d505}'), ('\u{1d507}', '\u{1d50a}'),
-        ('\u{1d50d}', '\u{1d514}'), ('\u{1d516}', '\u{1d51c}'), ('\u{1d51e}',
-        '\u{1d539}'), ('\u{1d53b}', '\u{1d53e}'), ('\u{1d540}', '\u{1d544}'),
-        ('\u{1d546}', '\u{1d546}'), ('\u{1d54a}', '\u{1d550}'), ('\u{1d552}',
-        '\u{1d6a5}'), ('\u{1d6a8}', '\u{1d6c0}'), ('\u{1d6c2}', '\u{1d6da}'),
-        ('\u{1d6dc}', '\u{1d6fa}'), ('\u{1d6fc}', '\u{1d714}'), ('\u{1d716}',
-        '\u{1d734}'), ('\u{1d736}', '\u{1d74e}'), ('\u{1d750}', '\u{1d76e}'),
-        ('\u{1d770}', '\u{1d788}'), ('\u{1d78a}', '\u{1d7a8}'), ('\u{1d7aa}',
-        '\u{1d7c2}'), ('\u{1d7c4}', '\u{1d7cb}'), ('\u{1e900}', '\u{1e943}')
-    ];
-
-    pub const Ll_table: &'static [(char, char)] = &[
-        ('\u{61}', '\u{7a}'), ('\u{b5}', '\u{b5}'), ('\u{df}', '\u{f6}'),
-        ('\u{f8}', '\u{ff}'), ('\u{101}', '\u{101}'), ('\u{103}', '\u{103}'),
-        ('\u{105}', '\u{105}'), ('\u{107}', '\u{107}'), ('\u{109}', '\u{109}'),
-        ('\u{10b}', '\u{10b}'), ('\u{10d}', '\u{10d}'), ('\u{10f}', '\u{10f}'),
-        ('\u{111}', '\u{111}'), ('\u{113}', '\u{113}'), ('\u{115}', '\u{115}'),
-        ('\u{117}', '\u{117}'), ('\u{119}', '\u{119}'), ('\u{11b}', '\u{11b}'),
-        ('\u{11d}', '\u{11d}'), ('\u{11f}', '\u{11f}'), ('\u{121}', '\u{121}'),
-        ('\u{123}', '\u{123}'), ('\u{125}', '\u{125}'), ('\u{127}', '\u{127}'),
-        ('\u{129}', '\u{129}'), ('\u{12b}', '\u{12b}'), ('\u{12d}', '\u{12d}'),
-        ('\u{12f}', '\u{12f}'), ('\u{131}', '\u{131}'), ('\u{133}', '\u{133}'),
-        ('\u{135}', '\u{135}'), ('\u{137}', '\u{138}'), ('\u{13a}', '\u{13a}'),
-        ('\u{13c}', '\u{13c}'), ('\u{13e}', '\u{13e}'), ('\u{140}', '\u{140}'),
-        ('\u{142}', '\u{142}'), ('\u{144}', '\u{144}'), ('\u{146}', '\u{146}'),
-        ('\u{148}', '\u{149}'), ('\u{14b}', '\u{14b}'), ('\u{14d}', '\u{14d}'),
-        ('\u{14f}', '\u{14f}'), ('\u{151}', '\u{151}'), ('\u{153}', '\u{153}'),
-        ('\u{155}', '\u{155}'), ('\u{157}', '\u{157}'), ('\u{159}', '\u{159}'),
-        ('\u{15b}', '\u{15b}'), ('\u{15d}', '\u{15d}'), ('\u{15f}', '\u{15f}'),
-        ('\u{161}', '\u{161}'), ('\u{163}', '\u{163}'), ('\u{165}', '\u{165}'),
-        ('\u{167}', '\u{167}'), ('\u{169}', '\u{169}'), ('\u{16b}', '\u{16b}'),
-        ('\u{16d}', '\u{16d}'), ('\u{16f}', '\u{16f}'), ('\u{171}', '\u{171}'),
-        ('\u{173}', '\u{173}'), ('\u{175}', '\u{175}'), ('\u{177}', '\u{177}'),
-        ('\u{17a}', '\u{17a}'), ('\u{17c}', '\u{17c}'), ('\u{17e}', '\u{180}'),
-        ('\u{183}', '\u{183}'), ('\u{185}', '\u{185}'), ('\u{188}', '\u{188}'),
-        ('\u{18c}', '\u{18d}'), ('\u{192}', '\u{192}'), ('\u{195}', '\u{195}'),
-        ('\u{199}', '\u{19b}'), ('\u{19e}', '\u{19e}'), ('\u{1a1}', '\u{1a1}'),
-        ('\u{1a3}', '\u{1a3}'), ('\u{1a5}', '\u{1a5}'), ('\u{1a8}', '\u{1a8}'),
-        ('\u{1aa}', '\u{1ab}'), ('\u{1ad}', '\u{1ad}'), ('\u{1b0}', '\u{1b0}'),
-        ('\u{1b4}', '\u{1b4}'), ('\u{1b6}', '\u{1b6}'), ('\u{1b9}', '\u{1ba}'),
-        ('\u{1bd}', '\u{1bf}'), ('\u{1c6}', '\u{1c6}'), ('\u{1c9}', '\u{1c9}'),
-        ('\u{1cc}', '\u{1cc}'), ('\u{1ce}', '\u{1ce}'), ('\u{1d0}', '\u{1d0}'),
-        ('\u{1d2}', '\u{1d2}'), ('\u{1d4}', '\u{1d4}'), ('\u{1d6}', '\u{1d6}'),
-        ('\u{1d8}', '\u{1d8}'), ('\u{1da}', '\u{1da}'), ('\u{1dc}', '\u{1dd}'),
-        ('\u{1df}', '\u{1df}'), ('\u{1e1}', '\u{1e1}'), ('\u{1e3}', '\u{1e3}'),
-        ('\u{1e5}', '\u{1e5}'), ('\u{1e7}', '\u{1e7}'), ('\u{1e9}', '\u{1e9}'),
-        ('\u{1eb}', '\u{1eb}'), ('\u{1ed}', '\u{1ed}'), ('\u{1ef}', '\u{1f0}'),
-        ('\u{1f3}', '\u{1f3}'), ('\u{1f5}', '\u{1f5}'), ('\u{1f9}', '\u{1f9}'),
-        ('\u{1fb}', '\u{1fb}'), ('\u{1fd}', '\u{1fd}'), ('\u{1ff}', '\u{1ff}'),
-        ('\u{201}', '\u{201}'), ('\u{203}', '\u{203}'), ('\u{205}', '\u{205}'),
-        ('\u{207}', '\u{207}'), ('\u{209}', '\u{209}'), ('\u{20b}', '\u{20b}'),
-        ('\u{20d}', '\u{20d}'), ('\u{20f}', '\u{20f}'), ('\u{211}', '\u{211}'),
-        ('\u{213}', '\u{213}'), ('\u{215}', '\u{215}'), ('\u{217}', '\u{217}'),
-        ('\u{219}', '\u{219}'), ('\u{21b}', '\u{21b}'), ('\u{21d}', '\u{21d}'),
-        ('\u{21f}', '\u{21f}'), ('\u{221}', '\u{221}'), ('\u{223}', '\u{223}'),
-        ('\u{225}', '\u{225}'), ('\u{227}', '\u{227}'), ('\u{229}', '\u{229}'),
-        ('\u{22b}', '\u{22b}'), ('\u{22d}', '\u{22d}'), ('\u{22f}', '\u{22f}'),
-        ('\u{231}', '\u{231}'), ('\u{233}', '\u{239}'), ('\u{23c}', '\u{23c}'),
-        ('\u{23f}', '\u{240}'), ('\u{242}', '\u{242}'), ('\u{247}', '\u{247}'),
-        ('\u{249}', '\u{249}'), ('\u{24b}', '\u{24b}'), ('\u{24d}', '\u{24d}'),
-        ('\u{24f}', '\u{293}'), ('\u{295}', '\u{2af}'), ('\u{371}', '\u{371}'),
-        ('\u{373}', '\u{373}'), ('\u{377}', '\u{377}'), ('\u{37b}', '\u{37d}'),
-        ('\u{390}', '\u{390}'), ('\u{3ac}', '\u{3ce}'), ('\u{3d0}', '\u{3d1}'),
-        ('\u{3d5}', '\u{3d7}'), ('\u{3d9}', '\u{3d9}'), ('\u{3db}', '\u{3db}'),
-        ('\u{3dd}', '\u{3dd}'), ('\u{3df}', '\u{3df}'), ('\u{3e1}', '\u{3e1}'),
-        ('\u{3e3}', '\u{3e3}'), ('\u{3e5}', '\u{3e5}'), ('\u{3e7}', '\u{3e7}'),
-        ('\u{3e9}', '\u{3e9}'), ('\u{3eb}', '\u{3eb}'), ('\u{3ed}', '\u{3ed}'),
-        ('\u{3ef}', '\u{3f3}'), ('\u{3f5}', '\u{3f5}'), ('\u{3f8}', '\u{3f8}'),
-        ('\u{3fb}', '\u{3fc}'), ('\u{430}', '\u{45f}'), ('\u{461}', '\u{461}'),
-        ('\u{463}', '\u{463}'), ('\u{465}', '\u{465}'), ('\u{467}', '\u{467}'),
-        ('\u{469}', '\u{469}'), ('\u{46b}', '\u{46b}'), ('\u{46d}', '\u{46d}'),
-        ('\u{46f}', '\u{46f}'), ('\u{471}', '\u{471}'), ('\u{473}', '\u{473}'),
-        ('\u{475}', '\u{475}'), ('\u{477}', '\u{477}'), ('\u{479}', '\u{479}'),
-        ('\u{47b}', '\u{47b}'), ('\u{47d}', '\u{47d}'), ('\u{47f}', '\u{47f}'),
-        ('\u{481}', '\u{481}'), ('\u{48b}', '\u{48b}'), ('\u{48d}', '\u{48d}'),
-        ('\u{48f}', '\u{48f}'), ('\u{491}', '\u{491}'), ('\u{493}', '\u{493}'),
-        ('\u{495}', '\u{495}'), ('\u{497}', '\u{497}'), ('\u{499}', '\u{499}'),
-        ('\u{49b}', '\u{49b}'), ('\u{49d}', '\u{49d}'), ('\u{49f}', '\u{49f}'),
-        ('\u{4a1}', '\u{4a1}'), ('\u{4a3}', '\u{4a3}'), ('\u{4a5}', '\u{4a5}'),
-        ('\u{4a7}', '\u{4a7}'), ('\u{4a9}', '\u{4a9}'), ('\u{4ab}', '\u{4ab}'),
-        ('\u{4ad}', '\u{4ad}'), ('\u{4af}', '\u{4af}'), ('\u{4b1}', '\u{4b1}'),
-        ('\u{4b3}', '\u{4b3}'), ('\u{4b5}', '\u{4b5}'), ('\u{4b7}', '\u{4b7}'),
-        ('\u{4b9}', '\u{4b9}'), ('\u{4bb}', '\u{4bb}'), ('\u{4bd}', '\u{4bd}'),
-        ('\u{4bf}', '\u{4bf}'), ('\u{4c2}', '\u{4c2}'), ('\u{4c4}', '\u{4c4}'),
-        ('\u{4c6}', '\u{4c6}'), ('\u{4c8}', '\u{4c8}'), ('\u{4ca}', '\u{4ca}'),
-        ('\u{4cc}', '\u{4cc}'), ('\u{4ce}', '\u{4cf}'), ('\u{4d1}', '\u{4d1}'),
-        ('\u{4d3}', '\u{4d3}'), ('\u{4d5}', '\u{4d5}'), ('\u{4d7}', '\u{4d7}'),
-        ('\u{4d9}', '\u{4d9}'), ('\u{4db}', '\u{4db}'), ('\u{4dd}', '\u{4dd}'),
-        ('\u{4df}', '\u{4df}'), ('\u{4e1}', '\u{4e1}'), ('\u{4e3}', '\u{4e3}'),
-        ('\u{4e5}', '\u{4e5}'), ('\u{4e7}', '\u{4e7}'), ('\u{4e9}', '\u{4e9}'),
-        ('\u{4eb}', '\u{4eb}'), ('\u{4ed}', '\u{4ed}'), ('\u{4ef}', '\u{4ef}'),
-        ('\u{4f1}', '\u{4f1}'), ('\u{4f3}', '\u{4f3}'), ('\u{4f5}', '\u{4f5}'),
-        ('\u{4f7}', '\u{4f7}'), ('\u{4f9}', '\u{4f9}'), ('\u{4fb}', '\u{4fb}'),
-        ('\u{4fd}', '\u{4fd}'), ('\u{4ff}', '\u{4ff}'), ('\u{501}', '\u{501}'),
-        ('\u{503}', '\u{503}'), ('\u{505}', '\u{505}'), ('\u{507}', '\u{507}'),
-        ('\u{509}', '\u{509}'), ('\u{50b}', '\u{50b}'), ('\u{50d}', '\u{50d}'),
-        ('\u{50f}', '\u{50f}'), ('\u{511}', '\u{511}'), ('\u{513}', '\u{513}'),
-        ('\u{515}', '\u{515}'), ('\u{517}', '\u{517}'), ('\u{519}', '\u{519}'),
-        ('\u{51b}', '\u{51b}'), ('\u{51d}', '\u{51d}'), ('\u{51f}', '\u{51f}'),
-        ('\u{521}', '\u{521}'), ('\u{523}', '\u{523}'), ('\u{525}', '\u{525}'),
-        ('\u{527}', '\u{527}'), ('\u{529}', '\u{529}'), ('\u{52b}', '\u{52b}'),
-        ('\u{52d}', '\u{52d}'), ('\u{52f}', '\u{52f}'), ('\u{561}', '\u{587}'),
-        ('\u{13f8}', '\u{13fd}'), ('\u{1c80}', '\u{1c88}'), ('\u{1d00}',
-        '\u{1d2b}'), ('\u{1d6b}', '\u{1d77}'), ('\u{1d79}', '\u{1d9a}'),
-        ('\u{1e01}', '\u{1e01}'), ('\u{1e03}', '\u{1e03}'), ('\u{1e05}',
-        '\u{1e05}'), ('\u{1e07}', '\u{1e07}'), ('\u{1e09}', '\u{1e09}'),
-        ('\u{1e0b}', '\u{1e0b}'), ('\u{1e0d}', '\u{1e0d}'), ('\u{1e0f}',
-        '\u{1e0f}'), ('\u{1e11}', '\u{1e11}'), ('\u{1e13}', '\u{1e13}'),
-        ('\u{1e15}', '\u{1e15}'), ('\u{1e17}', '\u{1e17}'), ('\u{1e19}',
-        '\u{1e19}'), ('\u{1e1b}', '\u{1e1b}'), ('\u{1e1d}', '\u{1e1d}'),
-        ('\u{1e1f}', '\u{1e1f}'), ('\u{1e21}', '\u{1e21}'), ('\u{1e23}',
-        '\u{1e23}'), ('\u{1e25}', '\u{1e25}'), ('\u{1e27}', '\u{1e27}'),
-        ('\u{1e29}', '\u{1e29}'), ('\u{1e2b}', '\u{1e2b}'), ('\u{1e2d}',
-        '\u{1e2d}'), ('\u{1e2f}', '\u{1e2f}'), ('\u{1e31}', '\u{1e31}'),
-        ('\u{1e33}', '\u{1e33}'), ('\u{1e35}', '\u{1e35}'), ('\u{1e37}',
-        '\u{1e37}'), ('\u{1e39}', '\u{1e39}'), ('\u{1e3b}', '\u{1e3b}'),
-        ('\u{1e3d}', '\u{1e3d}'), ('\u{1e3f}', '\u{1e3f}'), ('\u{1e41}',
-        '\u{1e41}'), ('\u{1e43}', '\u{1e43}'), ('\u{1e45}', '\u{1e45}'),
-        ('\u{1e47}', '\u{1e47}'), ('\u{1e49}', '\u{1e49}'), ('\u{1e4b}',
-        '\u{1e4b}'), ('\u{1e4d}', '\u{1e4d}'), ('\u{1e4f}', '\u{1e4f}'),
-        ('\u{1e51}', '\u{1e51}'), ('\u{1e53}', '\u{1e53}'), ('\u{1e55}',
-        '\u{1e55}'), ('\u{1e57}', '\u{1e57}'), ('\u{1e59}', '\u{1e59}'),
-        ('\u{1e5b}', '\u{1e5b}'), ('\u{1e5d}', '\u{1e5d}'), ('\u{1e5f}',
-        '\u{1e5f}'), ('\u{1e61}', '\u{1e61}'), ('\u{1e63}', '\u{1e63}'),
-        ('\u{1e65}', '\u{1e65}'), ('\u{1e67}', '\u{1e67}'), ('\u{1e69}',
-        '\u{1e69}'), ('\u{1e6b}', '\u{1e6b}'), ('\u{1e6d}', '\u{1e6d}'),
-        ('\u{1e6f}', '\u{1e6f}'), ('\u{1e71}', '\u{1e71}'), ('\u{1e73}',
-        '\u{1e73}'), ('\u{1e75}', '\u{1e75}'), ('\u{1e77}', '\u{1e77}'),
-        ('\u{1e79}', '\u{1e79}'), ('\u{1e7b}', '\u{1e7b}'), ('\u{1e7d}',
-        '\u{1e7d}'), ('\u{1e7f}', '\u{1e7f}'), ('\u{1e81}', '\u{1e81}'),
-        ('\u{1e83}', '\u{1e83}'), ('\u{1e85}', '\u{1e85}'), ('\u{1e87}',
-        '\u{1e87}'), ('\u{1e89}', '\u{1e89}'), ('\u{1e8b}', '\u{1e8b}'),
-        ('\u{1e8d}', '\u{1e8d}'), ('\u{1e8f}', '\u{1e8f}'), ('\u{1e91}',
-        '\u{1e91}'), ('\u{1e93}', '\u{1e93}'), ('\u{1e95}', '\u{1e9d}'),
-        ('\u{1e9f}', '\u{1e9f}'), ('\u{1ea1}', '\u{1ea1}'), ('\u{1ea3}',
-        '\u{1ea3}'), ('\u{1ea5}', '\u{1ea5}'), ('\u{1ea7}', '\u{1ea7}'),
-        ('\u{1ea9}', '\u{1ea9}'), ('\u{1eab}', '\u{1eab}'), ('\u{1ead}',
-        '\u{1ead}'), ('\u{1eaf}', '\u{1eaf}'), ('\u{1eb1}', '\u{1eb1}'),
-        ('\u{1eb3}', '\u{1eb3}'), ('\u{1eb5}', '\u{1eb5}'), ('\u{1eb7}',
-        '\u{1eb7}'), ('\u{1eb9}', '\u{1eb9}'), ('\u{1ebb}', '\u{1ebb}'),
-        ('\u{1ebd}', '\u{1ebd}'), ('\u{1ebf}', '\u{1ebf}'), ('\u{1ec1}',
-        '\u{1ec1}'), ('\u{1ec3}', '\u{1ec3}'), ('\u{1ec5}', '\u{1ec5}'),
-        ('\u{1ec7}', '\u{1ec7}'), ('\u{1ec9}', '\u{1ec9}'), ('\u{1ecb}',
-        '\u{1ecb}'), ('\u{1ecd}', '\u{1ecd}'), ('\u{1ecf}', '\u{1ecf}'),
-        ('\u{1ed1}', '\u{1ed1}'), ('\u{1ed3}', '\u{1ed3}'), ('\u{1ed5}',
-        '\u{1ed5}'), ('\u{1ed7}', '\u{1ed7}'), ('\u{1ed9}', '\u{1ed9}'),
-        ('\u{1edb}', '\u{1edb}'), ('\u{1edd}', '\u{1edd}'), ('\u{1edf}',
-        '\u{1edf}'), ('\u{1ee1}', '\u{1ee1}'), ('\u{1ee3}', '\u{1ee3}'),
-        ('\u{1ee5}', '\u{1ee5}'), ('\u{1ee7}', '\u{1ee7}'), ('\u{1ee9}',
-        '\u{1ee9}'), ('\u{1eeb}', '\u{1eeb}'), ('\u{1eed}', '\u{1eed}'),
-        ('\u{1eef}', '\u{1eef}'), ('\u{1ef1}', '\u{1ef1}'), ('\u{1ef3}',
-        '\u{1ef3}'), ('\u{1ef5}', '\u{1ef5}'), ('\u{1ef7}', '\u{1ef7}'),
-        ('\u{1ef9}', '\u{1ef9}'), ('\u{1efb}', '\u{1efb}'), ('\u{1efd}',
-        '\u{1efd}'), ('\u{1eff}', '\u{1f07}'), ('\u{1f10}', '\u{1f15}'),
-        ('\u{1f20}', '\u{1f27}'), ('\u{1f30}', '\u{1f37}'), ('\u{1f40}',
-        '\u{1f45}'), ('\u{1f50}', '\u{1f57}'), ('\u{1f60}', '\u{1f67}'),
-        ('\u{1f70}', '\u{1f7d}'), ('\u{1f80}', '\u{1f87}'), ('\u{1f90}',
-        '\u{1f97}'), ('\u{1fa0}', '\u{1fa7}'), ('\u{1fb0}', '\u{1fb4}'),
-        ('\u{1fb6}', '\u{1fb7}'), ('\u{1fbe}', '\u{1fbe}'), ('\u{1fc2}',
-        '\u{1fc4}'), ('\u{1fc6}', '\u{1fc7}'), ('\u{1fd0}', '\u{1fd3}'),
-        ('\u{1fd6}', '\u{1fd7}'), ('\u{1fe0}', '\u{1fe7}'), ('\u{1ff2}',
-        '\u{1ff4}'), ('\u{1ff6}', '\u{1ff7}'), ('\u{210a}', '\u{210a}'),
-        ('\u{210e}', '\u{210f}'), ('\u{2113}', '\u{2113}'), ('\u{212f}',
-        '\u{212f}'), ('\u{2134}', '\u{2134}'), ('\u{2139}', '\u{2139}'),
-        ('\u{213c}', '\u{213d}'), ('\u{2146}', '\u{2149}'), ('\u{214e}',
-        '\u{214e}'), ('\u{2184}', '\u{2184}'), ('\u{2c30}', '\u{2c5e}'),
-        ('\u{2c61}', '\u{2c61}'), ('\u{2c65}', '\u{2c66}'), ('\u{2c68}',
-        '\u{2c68}'), ('\u{2c6a}', '\u{2c6a}'), ('\u{2c6c}', '\u{2c6c}'),
-        ('\u{2c71}', '\u{2c71}'), ('\u{2c73}', '\u{2c74}'), ('\u{2c76}',
-        '\u{2c7b}'), ('\u{2c81}', '\u{2c81}'), ('\u{2c83}', '\u{2c83}'),
-        ('\u{2c85}', '\u{2c85}'), ('\u{2c87}', '\u{2c87}'), ('\u{2c89}',
-        '\u{2c89}'), ('\u{2c8b}', '\u{2c8b}'), ('\u{2c8d}', '\u{2c8d}'),
-        ('\u{2c8f}', '\u{2c8f}'), ('\u{2c91}', '\u{2c91}'), ('\u{2c93}',
-        '\u{2c93}'), ('\u{2c95}', '\u{2c95}'), ('\u{2c97}', '\u{2c97}'),
-        ('\u{2c99}', '\u{2c99}'), ('\u{2c9b}', '\u{2c9b}'), ('\u{2c9d}',
-        '\u{2c9d}'), ('\u{2c9f}', '\u{2c9f}'), ('\u{2ca1}', '\u{2ca1}'),
-        ('\u{2ca3}', '\u{2ca3}'), ('\u{2ca5}', '\u{2ca5}'), ('\u{2ca7}',
-        '\u{2ca7}'), ('\u{2ca9}', '\u{2ca9}'), ('\u{2cab}', '\u{2cab}'),
-        ('\u{2cad}', '\u{2cad}'), ('\u{2caf}', '\u{2caf}'), ('\u{2cb1}',
-        '\u{2cb1}'), ('\u{2cb3}', '\u{2cb3}'), ('\u{2cb5}', '\u{2cb5}'),
-        ('\u{2cb7}', '\u{2cb7}'), ('\u{2cb9}', '\u{2cb9}'), ('\u{2cbb}',
-        '\u{2cbb}'), ('\u{2cbd}', '\u{2cbd}'), ('\u{2cbf}', '\u{2cbf}'),
-        ('\u{2cc1}', '\u{2cc1}'), ('\u{2cc3}', '\u{2cc3}'), ('\u{2cc5}',
-        '\u{2cc5}'), ('\u{2cc7}', '\u{2cc7}'), ('\u{2cc9}', '\u{2cc9}'),
-        ('\u{2ccb}', '\u{2ccb}'), ('\u{2ccd}', '\u{2ccd}'), ('\u{2ccf}',
-        '\u{2ccf}'), ('\u{2cd1}', '\u{2cd1}'), ('\u{2cd3}', '\u{2cd3}'),
-        ('\u{2cd5}', '\u{2cd5}'), ('\u{2cd7}', '\u{2cd7}'), ('\u{2cd9}',
-        '\u{2cd9}'), ('\u{2cdb}', '\u{2cdb}'), ('\u{2cdd}', '\u{2cdd}'),
-        ('\u{2cdf}', '\u{2cdf}'), ('\u{2ce1}', '\u{2ce1}'), ('\u{2ce3}',
-        '\u{2ce4}'), ('\u{2cec}', '\u{2cec}'), ('\u{2cee}', '\u{2cee}'),
-        ('\u{2cf3}', '\u{2cf3}'), ('\u{2d00}', '\u{2d25}'), ('\u{2d27}',
-        '\u{2d27}'), ('\u{2d2d}', '\u{2d2d}'), ('\u{a641}', '\u{a641}'),
-        ('\u{a643}', '\u{a643}'), ('\u{a645}', '\u{a645}'), ('\u{a647}',
-        '\u{a647}'), ('\u{a649}', '\u{a649}'), ('\u{a64b}', '\u{a64b}'),
-        ('\u{a64d}', '\u{a64d}'), ('\u{a64f}', '\u{a64f}'), ('\u{a651}',
-        '\u{a651}'), ('\u{a653}', '\u{a653}'), ('\u{a655}', '\u{a655}'),
-        ('\u{a657}', '\u{a657}'), ('\u{a659}', '\u{a659}'), ('\u{a65b}',
-        '\u{a65b}'), ('\u{a65d}', '\u{a65d}'), ('\u{a65f}', '\u{a65f}'),
-        ('\u{a661}', '\u{a661}'), ('\u{a663}', '\u{a663}'), ('\u{a665}',
-        '\u{a665}'), ('\u{a667}', '\u{a667}'), ('\u{a669}', '\u{a669}'),
-        ('\u{a66b}', '\u{a66b}'), ('\u{a66d}', '\u{a66d}'), ('\u{a681}',
-        '\u{a681}'), ('\u{a683}', '\u{a683}'), ('\u{a685}', '\u{a685}'),
-        ('\u{a687}', '\u{a687}'), ('\u{a689}', '\u{a689}'), ('\u{a68b}',
-        '\u{a68b}'), ('\u{a68d}', '\u{a68d}'), ('\u{a68f}', '\u{a68f}'),
-        ('\u{a691}', '\u{a691}'), ('\u{a693}', '\u{a693}'), ('\u{a695}',
-        '\u{a695}'), ('\u{a697}', '\u{a697}'), ('\u{a699}', '\u{a699}'),
-        ('\u{a69b}', '\u{a69b}'), ('\u{a723}', '\u{a723}'), ('\u{a725}',
-        '\u{a725}'), ('\u{a727}', '\u{a727}'), ('\u{a729}', '\u{a729}'),
-        ('\u{a72b}', '\u{a72b}'), ('\u{a72d}', '\u{a72d}'), ('\u{a72f}',
-        '\u{a731}'), ('\u{a733}', '\u{a733}'), ('\u{a735}', '\u{a735}'),
-        ('\u{a737}', '\u{a737}'), ('\u{a739}', '\u{a739}'), ('\u{a73b}',
-        '\u{a73b}'), ('\u{a73d}', '\u{a73d}'), ('\u{a73f}', '\u{a73f}'),
-        ('\u{a741}', '\u{a741}'), ('\u{a743}', '\u{a743}'), ('\u{a745}',
-        '\u{a745}'), ('\u{a747}', '\u{a747}'), ('\u{a749}', '\u{a749}'),
-        ('\u{a74b}', '\u{a74b}'), ('\u{a74d}', '\u{a74d}'), ('\u{a74f}',
-        '\u{a74f}'), ('\u{a751}', '\u{a751}'), ('\u{a753}', '\u{a753}'),
-        ('\u{a755}', '\u{a755}'), ('\u{a757}', '\u{a757}'), ('\u{a759}',
-        '\u{a759}'), ('\u{a75b}', '\u{a75b}'), ('\u{a75d}', '\u{a75d}'),
-        ('\u{a75f}', '\u{a75f}'), ('\u{a761}', '\u{a761}'), ('\u{a763}',
-        '\u{a763}'), ('\u{a765}', '\u{a765}'), ('\u{a767}', '\u{a767}'),
-        ('\u{a769}', '\u{a769}'), ('\u{a76b}', '\u{a76b}'), ('\u{a76d}',
-        '\u{a76d}'), ('\u{a76f}', '\u{a76f}'), ('\u{a771}', '\u{a778}'),
-        ('\u{a77a}', '\u{a77a}'), ('\u{a77c}', '\u{a77c}'), ('\u{a77f}',
-        '\u{a77f}'), ('\u{a781}', '\u{a781}'), ('\u{a783}', '\u{a783}'),
-        ('\u{a785}', '\u{a785}'), ('\u{a787}', '\u{a787}'), ('\u{a78c}',
-        '\u{a78c}'), ('\u{a78e}', '\u{a78e}'), ('\u{a791}', '\u{a791}'),
-        ('\u{a793}', '\u{a795}'), ('\u{a797}', '\u{a797}'), ('\u{a799}',
-        '\u{a799}'), ('\u{a79b}', '\u{a79b}'), ('\u{a79d}', '\u{a79d}'),
-        ('\u{a79f}', '\u{a79f}'), ('\u{a7a1}', '\u{a7a1}'), ('\u{a7a3}',
-        '\u{a7a3}'), ('\u{a7a5}', '\u{a7a5}'), ('\u{a7a7}', '\u{a7a7}'),
-        ('\u{a7a9}', '\u{a7a9}'), ('\u{a7b5}', '\u{a7b5}'), ('\u{a7b7}',
-        '\u{a7b7}'), ('\u{a7fa}', '\u{a7fa}'), ('\u{ab30}', '\u{ab5a}'),
-        ('\u{ab60}', '\u{ab65}'), ('\u{ab70}', '\u{abbf}'), ('\u{fb00}',
-        '\u{fb06}'), ('\u{fb13}', '\u{fb17}'), ('\u{ff41}', '\u{ff5a}'),
-        ('\u{10428}', '\u{1044f}'), ('\u{104d8}', '\u{104fb}'), ('\u{10cc0}',
-        '\u{10cf2}'), ('\u{118c0}', '\u{118df}'), ('\u{1d41a}', '\u{1d433}'),
-        ('\u{1d44e}', '\u{1d454}'), ('\u{1d456}', '\u{1d467}'), ('\u{1d482}',
-        '\u{1d49b}'), ('\u{1d4b6}', '\u{1d4b9}'), ('\u{1d4bb}', '\u{1d4bb}'),
-        ('\u{1d4bd}', '\u{1d4c3}'), ('\u{1d4c5}', '\u{1d4cf}'), ('\u{1d4ea}',
-        '\u{1d503}'), ('\u{1d51e}', '\u{1d537}'), ('\u{1d552}', '\u{1d56b}'),
-        ('\u{1d586}', '\u{1d59f}'), ('\u{1d5ba}', '\u{1d5d3}'), ('\u{1d5ee}',
-        '\u{1d607}'), ('\u{1d622}', '\u{1d63b}'), ('\u{1d656}', '\u{1d66f}'),
-        ('\u{1d68a}', '\u{1d6a5}'), ('\u{1d6c2}', '\u{1d6da}'), ('\u{1d6dc}',
-        '\u{1d6e1}'), ('\u{1d6fc}', '\u{1d714}'), ('\u{1d716}', '\u{1d71b}'),
-        ('\u{1d736}', '\u{1d74e}'), ('\u{1d750}', '\u{1d755}'), ('\u{1d770}',
-        '\u{1d788}'), ('\u{1d78a}', '\u{1d78f}'), ('\u{1d7aa}', '\u{1d7c2}'),
-        ('\u{1d7c4}', '\u{1d7c9}'), ('\u{1d7cb}', '\u{1d7cb}'), ('\u{1e922}',
-        '\u{1e943}')
-    ];
-
-    pub const Lm_table: &'static [(char, char)] = &[
-        ('\u{2b0}', '\u{2c1}'), ('\u{2c6}', '\u{2d1}'), ('\u{2e0}', '\u{2e4}'),
-        ('\u{2ec}', '\u{2ec}'), ('\u{2ee}', '\u{2ee}'), ('\u{374}', '\u{374}'),
-        ('\u{37a}', '\u{37a}'), ('\u{559}', '\u{559}'), ('\u{640}', '\u{640}'),
-        ('\u{6e5}', '\u{6e6}'), ('\u{7f4}', '\u{7f5}'), ('\u{7fa}', '\u{7fa}'),
-        ('\u{81a}', '\u{81a}'), ('\u{824}', '\u{824}'), ('\u{828}', '\u{828}'),
-        ('\u{971}', '\u{971}'), ('\u{e46}', '\u{e46}'), ('\u{ec6}', '\u{ec6}'),
-        ('\u{10fc}', '\u{10fc}'), ('\u{17d7}', '\u{17d7}'), ('\u{1843}',
-        '\u{1843}'), ('\u{1aa7}', '\u{1aa7}'), ('\u{1c78}', '\u{1c7d}'),
-        ('\u{1d2c}', '\u{1d6a}'), ('\u{1d78}', '\u{1d78}'), ('\u{1d9b}',
-        '\u{1dbf}'), ('\u{2071}', '\u{2071}'), ('\u{207f}', '\u{207f}'),
-        ('\u{2090}', '\u{209c}'), ('\u{2c7c}', '\u{2c7d}'), ('\u{2d6f}',
-        '\u{2d6f}'), ('\u{2e2f}', '\u{2e2f}'), ('\u{3005}', '\u{3005}'),
-        ('\u{3031}', '\u{3035}'), ('\u{303b}', '\u{303b}'), ('\u{309d}',
-        '\u{309e}'), ('\u{30fc}', '\u{30fe}'), ('\u{a015}', '\u{a015}'),
-        ('\u{a4f8}', '\u{a4fd}'), ('\u{a60c}', '\u{a60c}'), ('\u{a67f}',
-        '\u{a67f}'), ('\u{a69c}', '\u{a69d}'), ('\u{a717}', '\u{a71f}'),
-        ('\u{a770}', '\u{a770}'), ('\u{a788}', '\u{a788}'), ('\u{a7f8}',
-        '\u{a7f9}'), ('\u{a9cf}', '\u{a9cf}'), ('\u{a9e6}', '\u{a9e6}'),
-        ('\u{aa70}', '\u{aa70}'), ('\u{aadd}', '\u{aadd}'), ('\u{aaf3}',
-        '\u{aaf4}'), ('\u{ab5c}', '\u{ab5f}'), ('\u{ff70}', '\u{ff70}'),
-        ('\u{ff9e}', '\u{ff9f}'), ('\u{16b40}', '\u{16b43}'), ('\u{16f93}',
-        '\u{16f9f}'), ('\u{16fe0}', '\u{16fe1}')
-    ];
-
-    pub const Lo_table: &'static [(char, char)] = &[
-        ('\u{aa}', '\u{aa}'), ('\u{ba}', '\u{ba}'), ('\u{1bb}', '\u{1bb}'),
-        ('\u{1c0}', '\u{1c3}'), ('\u{294}', '\u{294}'), ('\u{5d0}', '\u{5ea}'),
-        ('\u{5f0}', '\u{5f2}'), ('\u{620}', '\u{63f}'), ('\u{641}', '\u{64a}'),
-        ('\u{66e}', '\u{66f}'), ('\u{671}', '\u{6d3}'), ('\u{6d5}', '\u{6d5}'),
-        ('\u{6ee}', '\u{6ef}'), ('\u{6fa}', '\u{6fc}'), ('\u{6ff}', '\u{6ff}'),
-        ('\u{710}', '\u{710}'), ('\u{712}', '\u{72f}'), ('\u{74d}', '\u{7a5}'),
-        ('\u{7b1}', '\u{7b1}'), ('\u{7ca}', '\u{7ea}'), ('\u{800}', '\u{815}'),
-        ('\u{840}', '\u{858}'), ('\u{860}', '\u{86a}'), ('\u{8a0}', '\u{8b4}'),
-        ('\u{8b6}', '\u{8bd}'), ('\u{904}', '\u{939}'), ('\u{93d}', '\u{93d}'),
-        ('\u{950}', '\u{950}'), ('\u{958}', '\u{961}'), ('\u{972}', '\u{980}'),
-        ('\u{985}', '\u{98c}'), ('\u{98f}', '\u{990}'), ('\u{993}', '\u{9a8}'),
-        ('\u{9aa}', '\u{9b0}'), ('\u{9b2}', '\u{9b2}'), ('\u{9b6}', '\u{9b9}'),
-        ('\u{9bd}', '\u{9bd}'), ('\u{9ce}', '\u{9ce}'), ('\u{9dc}', '\u{9dd}'),
-        ('\u{9df}', '\u{9e1}'), ('\u{9f0}', '\u{9f1}'), ('\u{9fc}', '\u{9fc}'),
-        ('\u{a05}', '\u{a0a}'), ('\u{a0f}', '\u{a10}'), ('\u{a13}', '\u{a28}'),
-        ('\u{a2a}', '\u{a30}'), ('\u{a32}', '\u{a33}'), ('\u{a35}', '\u{a36}'),
-        ('\u{a38}', '\u{a39}'), ('\u{a59}', '\u{a5c}'), ('\u{a5e}', '\u{a5e}'),
-        ('\u{a72}', '\u{a74}'), ('\u{a85}', '\u{a8d}'), ('\u{a8f}', '\u{a91}'),
-        ('\u{a93}', '\u{aa8}'), ('\u{aaa}', '\u{ab0}'), ('\u{ab2}', '\u{ab3}'),
-        ('\u{ab5}', '\u{ab9}'), ('\u{abd}', '\u{abd}'), ('\u{ad0}', '\u{ad0}'),
-        ('\u{ae0}', '\u{ae1}'), ('\u{af9}', '\u{af9}'), ('\u{b05}', '\u{b0c}'),
-        ('\u{b0f}', '\u{b10}'), ('\u{b13}', '\u{b28}'), ('\u{b2a}', '\u{b30}'),
-        ('\u{b32}', '\u{b33}'), ('\u{b35}', '\u{b39}'), ('\u{b3d}', '\u{b3d}'),
-        ('\u{b5c}', '\u{b5d}'), ('\u{b5f}', '\u{b61}'), ('\u{b71}', '\u{b71}'),
-        ('\u{b83}', '\u{b83}'), ('\u{b85}', '\u{b8a}'), ('\u{b8e}', '\u{b90}'),
-        ('\u{b92}', '\u{b95}'), ('\u{b99}', '\u{b9a}'), ('\u{b9c}', '\u{b9c}'),
-        ('\u{b9e}', '\u{b9f}'), ('\u{ba3}', '\u{ba4}'), ('\u{ba8}', '\u{baa}'),
-        ('\u{bae}', '\u{bb9}'), ('\u{bd0}', '\u{bd0}'), ('\u{c05}', '\u{c0c}'),
-        ('\u{c0e}', '\u{c10}'), ('\u{c12}', '\u{c28}'), ('\u{c2a}', '\u{c39}'),
-        ('\u{c3d}', '\u{c3d}'), ('\u{c58}', '\u{c5a}'), ('\u{c60}', '\u{c61}'),
-        ('\u{c80}', '\u{c80}'), ('\u{c85}', '\u{c8c}'), ('\u{c8e}', '\u{c90}'),
-        ('\u{c92}', '\u{ca8}'), ('\u{caa}', '\u{cb3}'), ('\u{cb5}', '\u{cb9}'),
-        ('\u{cbd}', '\u{cbd}'), ('\u{cde}', '\u{cde}'), ('\u{ce0}', '\u{ce1}'),
-        ('\u{cf1}', '\u{cf2}'), ('\u{d05}', '\u{d0c}'), ('\u{d0e}', '\u{d10}'),
-        ('\u{d12}', '\u{d3a}'), ('\u{d3d}', '\u{d3d}'), ('\u{d4e}', '\u{d4e}'),
-        ('\u{d54}', '\u{d56}'), ('\u{d5f}', '\u{d61}'), ('\u{d7a}', '\u{d7f}'),
-        ('\u{d85}', '\u{d96}'), ('\u{d9a}', '\u{db1}'), ('\u{db3}', '\u{dbb}'),
-        ('\u{dbd}', '\u{dbd}'), ('\u{dc0}', '\u{dc6}'), ('\u{e01}', '\u{e30}'),
-        ('\u{e32}', '\u{e33}'), ('\u{e40}', '\u{e45}'), ('\u{e81}', '\u{e82}'),
-        ('\u{e84}', '\u{e84}'), ('\u{e87}', '\u{e88}'), ('\u{e8a}', '\u{e8a}'),
-        ('\u{e8d}', '\u{e8d}'), ('\u{e94}', '\u{e97}'), ('\u{e99}', '\u{e9f}'),
-        ('\u{ea1}', '\u{ea3}'), ('\u{ea5}', '\u{ea5}'), ('\u{ea7}', '\u{ea7}'),
-        ('\u{eaa}', '\u{eab}'), ('\u{ead}', '\u{eb0}'), ('\u{eb2}', '\u{eb3}'),
-        ('\u{ebd}', '\u{ebd}'), ('\u{ec0}', '\u{ec4}'), ('\u{edc}', '\u{edf}'),
-        ('\u{f00}', '\u{f00}'), ('\u{f40}', '\u{f47}'), ('\u{f49}', '\u{f6c}'),
-        ('\u{f88}', '\u{f8c}'), ('\u{1000}', '\u{102a}'), ('\u{103f}',
-        '\u{103f}'), ('\u{1050}', '\u{1055}'), ('\u{105a}', '\u{105d}'),
-        ('\u{1061}', '\u{1061}'), ('\u{1065}', '\u{1066}'), ('\u{106e}',
-        '\u{1070}'), ('\u{1075}', '\u{1081}'), ('\u{108e}', '\u{108e}'),
-        ('\u{10d0}', '\u{10fa}'), ('\u{10fd}', '\u{1248}'), ('\u{124a}',
-        '\u{124d}'), ('\u{1250}', '\u{1256}'), ('\u{1258}', '\u{1258}'),
-        ('\u{125a}', '\u{125d}'), ('\u{1260}', '\u{1288}'), ('\u{128a}',
-        '\u{128d}'), ('\u{1290}', '\u{12b0}'), ('\u{12b2}', '\u{12b5}'),
-        ('\u{12b8}', '\u{12be}'), ('\u{12c0}', '\u{12c0}'), ('\u{12c2}',
-        '\u{12c5}'), ('\u{12c8}', '\u{12d6}'), ('\u{12d8}', '\u{1310}'),
-        ('\u{1312}', '\u{1315}'), ('\u{1318}', '\u{135a}'), ('\u{1380}',
-        '\u{138f}'), ('\u{1401}', '\u{166c}'), ('\u{166f}', '\u{167f}'),
-        ('\u{1681}', '\u{169a}'), ('\u{16a0}', '\u{16ea}'), ('\u{16f1}',
-        '\u{16f8}'), ('\u{1700}', '\u{170c}'), ('\u{170e}', '\u{1711}'),
-        ('\u{1720}', '\u{1731}'), ('\u{1740}', '\u{1751}'), ('\u{1760}',
-        '\u{176c}'), ('\u{176e}', '\u{1770}'), ('\u{1780}', '\u{17b3}'),
-        ('\u{17dc}', '\u{17dc}'), ('\u{1820}', '\u{1842}'), ('\u{1844}',
-        '\u{1877}'), ('\u{1880}', '\u{1884}'), ('\u{1887}', '\u{18a8}'),
-        ('\u{18aa}', '\u{18aa}'), ('\u{18b0}', '\u{18f5}'), ('\u{1900}',
-        '\u{191e}'), ('\u{1950}', '\u{196d}'), ('\u{1970}', '\u{1974}'),
-        ('\u{1980}', '\u{19ab}'), ('\u{19b0}', '\u{19c9}'), ('\u{1a00}',
-        '\u{1a16}'), ('\u{1a20}', '\u{1a54}'), ('\u{1b05}', '\u{1b33}'),
-        ('\u{1b45}', '\u{1b4b}'), ('\u{1b83}', '\u{1ba0}'), ('\u{1bae}',
-        '\u{1baf}'), ('\u{1bba}', '\u{1be5}'), ('\u{1c00}', '\u{1c23}'),
-        ('\u{1c4d}', '\u{1c4f}'), ('\u{1c5a}', '\u{1c77}'), ('\u{1ce9}',
-        '\u{1cec}'), ('\u{1cee}', '\u{1cf1}'), ('\u{1cf5}', '\u{1cf6}'),
-        ('\u{2135}', '\u{2138}'), ('\u{2d30}', '\u{2d67}'), ('\u{2d80}',
-        '\u{2d96}'), ('\u{2da0}', '\u{2da6}'), ('\u{2da8}', '\u{2dae}'),
-        ('\u{2db0}', '\u{2db6}'), ('\u{2db8}', '\u{2dbe}'), ('\u{2dc0}',
-        '\u{2dc6}'), ('\u{2dc8}', '\u{2dce}'), ('\u{2dd0}', '\u{2dd6}'),
-        ('\u{2dd8}', '\u{2dde}'), ('\u{3006}', '\u{3006}'), ('\u{303c}',
-        '\u{303c}'), ('\u{3041}', '\u{3096}'), ('\u{309f}', '\u{309f}'),
-        ('\u{30a1}', '\u{30fa}'), ('\u{30ff}', '\u{30ff}'), ('\u{3105}',
-        '\u{312e}'), ('\u{3131}', '\u{318e}'), ('\u{31a0}', '\u{31ba}'),
-        ('\u{31f0}', '\u{31ff}'), ('\u{3400}', '\u{4db5}'), ('\u{4e00}',
-        '\u{9fea}'), ('\u{a000}', '\u{a014}'), ('\u{a016}', '\u{a48c}'),
-        ('\u{a4d0}', '\u{a4f7}'), ('\u{a500}', '\u{a60b}'), ('\u{a610}',
-        '\u{a61f}'), ('\u{a62a}', '\u{a62b}'), ('\u{a66e}', '\u{a66e}'),
-        ('\u{a6a0}', '\u{a6e5}'), ('\u{a78f}', '\u{a78f}'), ('\u{a7f7}',
-        '\u{a7f7}'), ('\u{a7fb}', '\u{a801}'), ('\u{a803}', '\u{a805}'),
-        ('\u{a807}', '\u{a80a}'), ('\u{a80c}', '\u{a822}'), ('\u{a840}',
-        '\u{a873}'), ('\u{a882}', '\u{a8b3}'), ('\u{a8f2}', '\u{a8f7}'),
-        ('\u{a8fb}', '\u{a8fb}'), ('\u{a8fd}', '\u{a8fd}'), ('\u{a90a}',
-        '\u{a925}'), ('\u{a930}', '\u{a946}'), ('\u{a960}', '\u{a97c}'),
-        ('\u{a984}', '\u{a9b2}'), ('\u{a9e0}', '\u{a9e4}'), ('\u{a9e7}',
-        '\u{a9ef}'), ('\u{a9fa}', '\u{a9fe}'), ('\u{aa00}', '\u{aa28}'),
-        ('\u{aa40}', '\u{aa42}'), ('\u{aa44}', '\u{aa4b}'), ('\u{aa60}',
-        '\u{aa6f}'), ('\u{aa71}', '\u{aa76}'), ('\u{aa7a}', '\u{aa7a}'),
-        ('\u{aa7e}', '\u{aaaf}'), ('\u{aab1}', '\u{aab1}'), ('\u{aab5}',
-        '\u{aab6}'), ('\u{aab9}', '\u{aabd}'), ('\u{aac0}', '\u{aac0}'),
-        ('\u{aac2}', '\u{aac2}'), ('\u{aadb}', '\u{aadc}'), ('\u{aae0}',
-        '\u{aaea}'), ('\u{aaf2}', '\u{aaf2}'), ('\u{ab01}', '\u{ab06}'),
-        ('\u{ab09}', '\u{ab0e}'), ('\u{ab11}', '\u{ab16}'), ('\u{ab20}',
-        '\u{ab26}'), ('\u{ab28}', '\u{ab2e}'), ('\u{abc0}', '\u{abe2}'),
-        ('\u{ac00}', '\u{d7a3}'), ('\u{d7b0}', '\u{d7c6}'), ('\u{d7cb}',
-        '\u{d7fb}'), ('\u{f900}', '\u{fa6d}'), ('\u{fa70}', '\u{fad9}'),
-        ('\u{fb1d}', '\u{fb1d}'), ('\u{fb1f}', '\u{fb28}'), ('\u{fb2a}',
-        '\u{fb36}'), ('\u{fb38}', '\u{fb3c}'), ('\u{fb3e}', '\u{fb3e}'),
-        ('\u{fb40}', '\u{fb41}'), ('\u{fb43}', '\u{fb44}'), ('\u{fb46}',
-        '\u{fbb1}'), ('\u{fbd3}', '\u{fd3d}'), ('\u{fd50}', '\u{fd8f}'),
-        ('\u{fd92}', '\u{fdc7}'), ('\u{fdf0}', '\u{fdfb}'), ('\u{fe70}',
-        '\u{fe74}'), ('\u{fe76}', '\u{fefc}'), ('\u{ff66}', '\u{ff6f}'),
-        ('\u{ff71}', '\u{ff9d}'), ('\u{ffa0}', '\u{ffbe}'), ('\u{ffc2}',
-        '\u{ffc7}'), ('\u{ffca}', '\u{ffcf}'), ('\u{ffd2}', '\u{ffd7}'),
-        ('\u{ffda}', '\u{ffdc}'), ('\u{10000}', '\u{1000b}'), ('\u{1000d}',
-        '\u{10026}'), ('\u{10028}', '\u{1003a}'), ('\u{1003c}', '\u{1003d}'),
-        ('\u{1003f}', '\u{1004d}'), ('\u{10050}', '\u{1005d}'), ('\u{10080}',
-        '\u{100fa}'), ('\u{10280}', '\u{1029c}'), ('\u{102a0}', '\u{102d0}'),
-        ('\u{10300}', '\u{1031f}'), ('\u{1032d}', '\u{10340}'), ('\u{10342}',
-        '\u{10349}'), ('\u{10350}', '\u{10375}'), ('\u{10380}', '\u{1039d}'),
-        ('\u{103a0}', '\u{103c3}'), ('\u{103c8}', '\u{103cf}'), ('\u{10450}',
-        '\u{1049d}'), ('\u{10500}', '\u{10527}'), ('\u{10530}', '\u{10563}'),
-        ('\u{10600}', '\u{10736}'), ('\u{10740}', '\u{10755}'), ('\u{10760}',
-        '\u{10767}'), ('\u{10800}', '\u{10805}'), ('\u{10808}', '\u{10808}'),
-        ('\u{1080a}', '\u{10835}'), ('\u{10837}', '\u{10838}'), ('\u{1083c}',
-        '\u{1083c}'), ('\u{1083f}', '\u{10855}'), ('\u{10860}', '\u{10876}'),
-        ('\u{10880}', '\u{1089e}'), ('\u{108e0}', '\u{108f2}'), ('\u{108f4}',
-        '\u{108f5}'), ('\u{10900}', '\u{10915}'), ('\u{10920}', '\u{10939}'),
-        ('\u{10980}', '\u{109b7}'), ('\u{109be}', '\u{109bf}'), ('\u{10a00}',
-        '\u{10a00}'), ('\u{10a10}', '\u{10a13}'), ('\u{10a15}', '\u{10a17}'),
-        ('\u{10a19}', '\u{10a33}'), ('\u{10a60}', '\u{10a7c}'), ('\u{10a80}',
-        '\u{10a9c}'), ('\u{10ac0}', '\u{10ac7}'), ('\u{10ac9}', '\u{10ae4}'),
-        ('\u{10b00}', '\u{10b35}'), ('\u{10b40}', '\u{10b55}'), ('\u{10b60}',
-        '\u{10b72}'), ('\u{10b80}', '\u{10b91}'), ('\u{10c00}', '\u{10c48}'),
-        ('\u{11003}', '\u{11037}'), ('\u{11083}', '\u{110af}'), ('\u{110d0}',
-        '\u{110e8}'), ('\u{11103}', '\u{11126}'), ('\u{11150}', '\u{11172}'),
-        ('\u{11176}', '\u{11176}'), ('\u{11183}', '\u{111b2}'), ('\u{111c1}',
-        '\u{111c4}'), ('\u{111da}', '\u{111da}'), ('\u{111dc}', '\u{111dc}'),
-        ('\u{11200}', '\u{11211}'), ('\u{11213}', '\u{1122b}'), ('\u{11280}',
-        '\u{11286}'), ('\u{11288}', '\u{11288}'), ('\u{1128a}', '\u{1128d}'),
-        ('\u{1128f}', '\u{1129d}'), ('\u{1129f}', '\u{112a8}'), ('\u{112b0}',
-        '\u{112de}'), ('\u{11305}', '\u{1130c}'), ('\u{1130f}', '\u{11310}'),
-        ('\u{11313}', '\u{11328}'), ('\u{1132a}', '\u{11330}'), ('\u{11332}',
-        '\u{11333}'), ('\u{11335}', '\u{11339}'), ('\u{1133d}', '\u{1133d}'),
-        ('\u{11350}', '\u{11350}'), ('\u{1135d}', '\u{11361}'), ('\u{11400}',
-        '\u{11434}'), ('\u{11447}', '\u{1144a}'), ('\u{11480}', '\u{114af}'),
-        ('\u{114c4}', '\u{114c5}'), ('\u{114c7}', '\u{114c7}'), ('\u{11580}',
-        '\u{115ae}'), ('\u{115d8}', '\u{115db}'), ('\u{11600}', '\u{1162f}'),
-        ('\u{11644}', '\u{11644}'), ('\u{11680}', '\u{116aa}'), ('\u{11700}',
-        '\u{11719}'), ('\u{118ff}', '\u{118ff}'), ('\u{11a00}', '\u{11a00}'),
-        ('\u{11a0b}', '\u{11a32}'), ('\u{11a3a}', '\u{11a3a}'), ('\u{11a50}',
-        '\u{11a50}'), ('\u{11a5c}', '\u{11a83}'), ('\u{11a86}', '\u{11a89}'),
-        ('\u{11ac0}', '\u{11af8}'), ('\u{11c00}', '\u{11c08}'), ('\u{11c0a}',
-        '\u{11c2e}'), ('\u{11c40}', '\u{11c40}'), ('\u{11c72}', '\u{11c8f}'),
-        ('\u{11d00}', '\u{11d06}'), ('\u{11d08}', '\u{11d09}'), ('\u{11d0b}',
-        '\u{11d30}'), ('\u{11d46}', '\u{11d46}'), ('\u{12000}', '\u{12399}'),
-        ('\u{12480}', '\u{12543}'), ('\u{13000}', '\u{1342e}'), ('\u{14400}',
-        '\u{14646}'), ('\u{16800}', '\u{16a38}'), ('\u{16a40}', '\u{16a5e}'),
-        ('\u{16ad0}', '\u{16aed}'), ('\u{16b00}', '\u{16b2f}'), ('\u{16b63}',
-        '\u{16b77}'), ('\u{16b7d}', '\u{16b8f}'), ('\u{16f00}', '\u{16f44}'),
-        ('\u{16f50}', '\u{16f50}'), ('\u{17000}', '\u{187ec}'), ('\u{18800}',
-        '\u{18af2}'), ('\u{1b000}', '\u{1b11e}'), ('\u{1b170}', '\u{1b2fb}'),
-        ('\u{1bc00}', '\u{1bc6a}'), ('\u{1bc70}', '\u{1bc7c}'), ('\u{1bc80}',
-        '\u{1bc88}'), ('\u{1bc90}', '\u{1bc99}'), ('\u{1e800}', '\u{1e8c4}'),
-        ('\u{1ee00}', '\u{1ee03}'), ('\u{1ee05}', '\u{1ee1f}'), ('\u{1ee21}',
-        '\u{1ee22}'), ('\u{1ee24}', '\u{1ee24}'), ('\u{1ee27}', '\u{1ee27}'),
-        ('\u{1ee29}', '\u{1ee32}'), ('\u{1ee34}', '\u{1ee37}'), ('\u{1ee39}',
-        '\u{1ee39}'), ('\u{1ee3b}', '\u{1ee3b}'), ('\u{1ee42}', '\u{1ee42}'),
-        ('\u{1ee47}', '\u{1ee47}'), ('\u{1ee49}', '\u{1ee49}'), ('\u{1ee4b}',
-        '\u{1ee4b}'), ('\u{1ee4d}', '\u{1ee4f}'), ('\u{1ee51}', '\u{1ee52}'),
-        ('\u{1ee54}', '\u{1ee54}'), ('\u{1ee57}', '\u{1ee57}'), ('\u{1ee59}',
-        '\u{1ee59}'), ('\u{1ee5b}', '\u{1ee5b}'), ('\u{1ee5d}', '\u{1ee5d}'),
-        ('\u{1ee5f}', '\u{1ee5f}'), ('\u{1ee61}', '\u{1ee62}'), ('\u{1ee64}',
-        '\u{1ee64}'), ('\u{1ee67}', '\u{1ee6a}'), ('\u{1ee6c}', '\u{1ee72}'),
-        ('\u{1ee74}', '\u{1ee77}'), ('\u{1ee79}', '\u{1ee7c}'), ('\u{1ee7e}',
-        '\u{1ee7e}'), ('\u{1ee80}', '\u{1ee89}'), ('\u{1ee8b}', '\u{1ee9b}'),
-        ('\u{1eea1}', '\u{1eea3}'), ('\u{1eea5}', '\u{1eea9}'), ('\u{1eeab}',
-        '\u{1eebb}'), ('\u{20000}', '\u{2a6d6}'), ('\u{2a700}', '\u{2b734}'),
-        ('\u{2b740}', '\u{2b81d}'), ('\u{2b820}', '\u{2cea1}'), ('\u{2ceb0}',
-        '\u{2ebe0}'), ('\u{2f800}', '\u{2fa1d}')
-    ];
-
-    pub const Lt_table: &'static [(char, char)] = &[
-        ('\u{1c5}', '\u{1c5}'), ('\u{1c8}', '\u{1c8}'), ('\u{1cb}', '\u{1cb}'),
-        ('\u{1f2}', '\u{1f2}'), ('\u{1f88}', '\u{1f8f}'), ('\u{1f98}',
-        '\u{1f9f}'), ('\u{1fa8}', '\u{1faf}'), ('\u{1fbc}', '\u{1fbc}'),
-        ('\u{1fcc}', '\u{1fcc}'), ('\u{1ffc}', '\u{1ffc}')
-    ];
-
-    pub const Lu_table: &'static [(char, char)] = &[
-        ('\u{41}', '\u{5a}'), ('\u{c0}', '\u{d6}'), ('\u{d8}', '\u{de}'),
-        ('\u{100}', '\u{100}'), ('\u{102}', '\u{102}'), ('\u{104}', '\u{104}'),
-        ('\u{106}', '\u{106}'), ('\u{108}', '\u{108}'), ('\u{10a}', '\u{10a}'),
-        ('\u{10c}', '\u{10c}'), ('\u{10e}', '\u{10e}'), ('\u{110}', '\u{110}'),
-        ('\u{112}', '\u{112}'), ('\u{114}', '\u{114}'), ('\u{116}', '\u{116}'),
-        ('\u{118}', '\u{118}'), ('\u{11a}', '\u{11a}'), ('\u{11c}', '\u{11c}'),
-        ('\u{11e}', '\u{11e}'), ('\u{120}', '\u{120}'), ('\u{122}', '\u{122}'),
-        ('\u{124}', '\u{124}'), ('\u{126}', '\u{126}'), ('\u{128}', '\u{128}'),
-        ('\u{12a}', '\u{12a}'), ('\u{12c}', '\u{12c}'), ('\u{12e}', '\u{12e}'),
-        ('\u{130}', '\u{130}'), ('\u{132}', '\u{132}'), ('\u{134}', '\u{134}'),
-        ('\u{136}', '\u{136}'), ('\u{139}', '\u{139}'), ('\u{13b}', '\u{13b}'),
-        ('\u{13d}', '\u{13d}'), ('\u{13f}', '\u{13f}'), ('\u{141}', '\u{141}'),
-        ('\u{143}', '\u{143}'), ('\u{145}', '\u{145}'), ('\u{147}', '\u{147}'),
-        ('\u{14a}', '\u{14a}'), ('\u{14c}', '\u{14c}'), ('\u{14e}', '\u{14e}'),
-        ('\u{150}', '\u{150}'), ('\u{152}', '\u{152}'), ('\u{154}', '\u{154}'),
-        ('\u{156}', '\u{156}'), ('\u{158}', '\u{158}'), ('\u{15a}', '\u{15a}'),
-        ('\u{15c}', '\u{15c}'), ('\u{15e}', '\u{15e}'), ('\u{160}', '\u{160}'),
-        ('\u{162}', '\u{162}'), ('\u{164}', '\u{164}'), ('\u{166}', '\u{166}'),
-        ('\u{168}', '\u{168}'), ('\u{16a}', '\u{16a}'), ('\u{16c}', '\u{16c}'),
-        ('\u{16e}', '\u{16e}'), ('\u{170}', '\u{170}'), ('\u{172}', '\u{172}'),
-        ('\u{174}', '\u{174}'), ('\u{176}', '\u{176}'), ('\u{178}', '\u{179}'),
-        ('\u{17b}', '\u{17b}'), ('\u{17d}', '\u{17d}'), ('\u{181}', '\u{182}'),
-        ('\u{184}', '\u{184}'), ('\u{186}', '\u{187}'), ('\u{189}', '\u{18b}'),
-        ('\u{18e}', '\u{191}'), ('\u{193}', '\u{194}'), ('\u{196}', '\u{198}'),
-        ('\u{19c}', '\u{19d}'), ('\u{19f}', '\u{1a0}'), ('\u{1a2}', '\u{1a2}'),
-        ('\u{1a4}', '\u{1a4}'), ('\u{1a6}', '\u{1a7}'), ('\u{1a9}', '\u{1a9}'),
-        ('\u{1ac}', '\u{1ac}'), ('\u{1ae}', '\u{1af}'), ('\u{1b1}', '\u{1b3}'),
-        ('\u{1b5}', '\u{1b5}'), ('\u{1b7}', '\u{1b8}'), ('\u{1bc}', '\u{1bc}'),
-        ('\u{1c4}', '\u{1c4}'), ('\u{1c7}', '\u{1c7}'), ('\u{1ca}', '\u{1ca}'),
-        ('\u{1cd}', '\u{1cd}'), ('\u{1cf}', '\u{1cf}'), ('\u{1d1}', '\u{1d1}'),
-        ('\u{1d3}', '\u{1d3}'), ('\u{1d5}', '\u{1d5}'), ('\u{1d7}', '\u{1d7}'),
-        ('\u{1d9}', '\u{1d9}'), ('\u{1db}', '\u{1db}'), ('\u{1de}', '\u{1de}'),
-        ('\u{1e0}', '\u{1e0}'), ('\u{1e2}', '\u{1e2}'), ('\u{1e4}', '\u{1e4}'),
-        ('\u{1e6}', '\u{1e6}'), ('\u{1e8}', '\u{1e8}'), ('\u{1ea}', '\u{1ea}'),
-        ('\u{1ec}', '\u{1ec}'), ('\u{1ee}', '\u{1ee}'), ('\u{1f1}', '\u{1f1}'),
-        ('\u{1f4}', '\u{1f4}'), ('\u{1f6}', '\u{1f8}'), ('\u{1fa}', '\u{1fa}'),
-        ('\u{1fc}', '\u{1fc}'), ('\u{1fe}', '\u{1fe}'), ('\u{200}', '\u{200}'),
-        ('\u{202}', '\u{202}'), ('\u{204}', '\u{204}'), ('\u{206}', '\u{206}'),
-        ('\u{208}', '\u{208}'), ('\u{20a}', '\u{20a}'), ('\u{20c}', '\u{20c}'),
-        ('\u{20e}', '\u{20e}'), ('\u{210}', '\u{210}'), ('\u{212}', '\u{212}'),
-        ('\u{214}', '\u{214}'), ('\u{216}', '\u{216}'), ('\u{218}', '\u{218}'),
-        ('\u{21a}', '\u{21a}'), ('\u{21c}', '\u{21c}'), ('\u{21e}', '\u{21e}'),
-        ('\u{220}', '\u{220}'), ('\u{222}', '\u{222}'), ('\u{224}', '\u{224}'),
-        ('\u{226}', '\u{226}'), ('\u{228}', '\u{228}'), ('\u{22a}', '\u{22a}'),
-        ('\u{22c}', '\u{22c}'), ('\u{22e}', '\u{22e}'), ('\u{230}', '\u{230}'),
-        ('\u{232}', '\u{232}'), ('\u{23a}', '\u{23b}'), ('\u{23d}', '\u{23e}'),
-        ('\u{241}', '\u{241}'), ('\u{243}', '\u{246}'), ('\u{248}', '\u{248}'),
-        ('\u{24a}', '\u{24a}'), ('\u{24c}', '\u{24c}'), ('\u{24e}', '\u{24e}'),
-        ('\u{370}', '\u{370}'), ('\u{372}', '\u{372}'), ('\u{376}', '\u{376}'),
-        ('\u{37f}', '\u{37f}'), ('\u{386}', '\u{386}'), ('\u{388}', '\u{38a}'),
-        ('\u{38c}', '\u{38c}'), ('\u{38e}', '\u{38f}'), ('\u{391}', '\u{3a1}'),
-        ('\u{3a3}', '\u{3ab}'), ('\u{3cf}', '\u{3cf}'), ('\u{3d2}', '\u{3d4}'),
-        ('\u{3d8}', '\u{3d8}'), ('\u{3da}', '\u{3da}'), ('\u{3dc}', '\u{3dc}'),
-        ('\u{3de}', '\u{3de}'), ('\u{3e0}', '\u{3e0}'), ('\u{3e2}', '\u{3e2}'),
-        ('\u{3e4}', '\u{3e4}'), ('\u{3e6}', '\u{3e6}'), ('\u{3e8}', '\u{3e8}'),
-        ('\u{3ea}', '\u{3ea}'), ('\u{3ec}', '\u{3ec}'), ('\u{3ee}', '\u{3ee}'),
-        ('\u{3f4}', '\u{3f4}'), ('\u{3f7}', '\u{3f7}'), ('\u{3f9}', '\u{3fa}'),
-        ('\u{3fd}', '\u{42f}'), ('\u{460}', '\u{460}'), ('\u{462}', '\u{462}'),
-        ('\u{464}', '\u{464}'), ('\u{466}', '\u{466}'), ('\u{468}', '\u{468}'),
-        ('\u{46a}', '\u{46a}'), ('\u{46c}', '\u{46c}'), ('\u{46e}', '\u{46e}'),
-        ('\u{470}', '\u{470}'), ('\u{472}', '\u{472}'), ('\u{474}', '\u{474}'),
-        ('\u{476}', '\u{476}'), ('\u{478}', '\u{478}'), ('\u{47a}', '\u{47a}'),
-        ('\u{47c}', '\u{47c}'), ('\u{47e}', '\u{47e}'), ('\u{480}', '\u{480}'),
-        ('\u{48a}', '\u{48a}'), ('\u{48c}', '\u{48c}'), ('\u{48e}', '\u{48e}'),
-        ('\u{490}', '\u{490}'), ('\u{492}', '\u{492}'), ('\u{494}', '\u{494}'),
-        ('\u{496}', '\u{496}'), ('\u{498}', '\u{498}'), ('\u{49a}', '\u{49a}'),
-        ('\u{49c}', '\u{49c}'), ('\u{49e}', '\u{49e}'), ('\u{4a0}', '\u{4a0}'),
-        ('\u{4a2}', '\u{4a2}'), ('\u{4a4}', '\u{4a4}'), ('\u{4a6}', '\u{4a6}'),
-        ('\u{4a8}', '\u{4a8}'), ('\u{4aa}', '\u{4aa}'), ('\u{4ac}', '\u{4ac}'),
-        ('\u{4ae}', '\u{4ae}'), ('\u{4b0}', '\u{4b0}'), ('\u{4b2}', '\u{4b2}'),
-        ('\u{4b4}', '\u{4b4}'), ('\u{4b6}', '\u{4b6}'), ('\u{4b8}', '\u{4b8}'),
-        ('\u{4ba}', '\u{4ba}'), ('\u{4bc}', '\u{4bc}'), ('\u{4be}', '\u{4be}'),
-        ('\u{4c0}', '\u{4c1}'), ('\u{4c3}', '\u{4c3}'), ('\u{4c5}', '\u{4c5}'),
-        ('\u{4c7}', '\u{4c7}'), ('\u{4c9}', '\u{4c9}'), ('\u{4cb}', '\u{4cb}'),
-        ('\u{4cd}', '\u{4cd}'), ('\u{4d0}', '\u{4d0}'), ('\u{4d2}', '\u{4d2}'),
-        ('\u{4d4}', '\u{4d4}'), ('\u{4d6}', '\u{4d6}'), ('\u{4d8}', '\u{4d8}'),
-        ('\u{4da}', '\u{4da}'), ('\u{4dc}', '\u{4dc}'), ('\u{4de}', '\u{4de}'),
-        ('\u{4e0}', '\u{4e0}'), ('\u{4e2}', '\u{4e2}'), ('\u{4e4}', '\u{4e4}'),
-        ('\u{4e6}', '\u{4e6}'), ('\u{4e8}', '\u{4e8}'), ('\u{4ea}', '\u{4ea}'),
-        ('\u{4ec}', '\u{4ec}'), ('\u{4ee}', '\u{4ee}'), ('\u{4f0}', '\u{4f0}'),
-        ('\u{4f2}', '\u{4f2}'), ('\u{4f4}', '\u{4f4}'), ('\u{4f6}', '\u{4f6}'),
-        ('\u{4f8}', '\u{4f8}'), ('\u{4fa}', '\u{4fa}'), ('\u{4fc}', '\u{4fc}'),
-        ('\u{4fe}', '\u{4fe}'), ('\u{500}', '\u{500}'), ('\u{502}', '\u{502}'),
-        ('\u{504}', '\u{504}'), ('\u{506}', '\u{506}'), ('\u{508}', '\u{508}'),
-        ('\u{50a}', '\u{50a}'), ('\u{50c}', '\u{50c}'), ('\u{50e}', '\u{50e}'),
-        ('\u{510}', '\u{510}'), ('\u{512}', '\u{512}'), ('\u{514}', '\u{514}'),
-        ('\u{516}', '\u{516}'), ('\u{518}', '\u{518}'), ('\u{51a}', '\u{51a}'),
-        ('\u{51c}', '\u{51c}'), ('\u{51e}', '\u{51e}'), ('\u{520}', '\u{520}'),
-        ('\u{522}', '\u{522}'), ('\u{524}', '\u{524}'), ('\u{526}', '\u{526}'),
-        ('\u{528}', '\u{528}'), ('\u{52a}', '\u{52a}'), ('\u{52c}', '\u{52c}'),
-        ('\u{52e}', '\u{52e}'), ('\u{531}', '\u{556}'), ('\u{10a0}',
-        '\u{10c5}'), ('\u{10c7}', '\u{10c7}'), ('\u{10cd}', '\u{10cd}'),
-        ('\u{13a0}', '\u{13f5}'), ('\u{1e00}', '\u{1e00}'), ('\u{1e02}',
-        '\u{1e02}'), ('\u{1e04}', '\u{1e04}'), ('\u{1e06}', '\u{1e06}'),
-        ('\u{1e08}', '\u{1e08}'), ('\u{1e0a}', '\u{1e0a}'), ('\u{1e0c}',
-        '\u{1e0c}'), ('\u{1e0e}', '\u{1e0e}'), ('\u{1e10}', '\u{1e10}'),
-        ('\u{1e12}', '\u{1e12}'), ('\u{1e14}', '\u{1e14}'), ('\u{1e16}',
-        '\u{1e16}'), ('\u{1e18}', '\u{1e18}'), ('\u{1e1a}', '\u{1e1a}'),
-        ('\u{1e1c}', '\u{1e1c}'), ('\u{1e1e}', '\u{1e1e}'), ('\u{1e20}',
-        '\u{1e20}'), ('\u{1e22}', '\u{1e22}'), ('\u{1e24}', '\u{1e24}'),
-        ('\u{1e26}', '\u{1e26}'), ('\u{1e28}', '\u{1e28}'), ('\u{1e2a}',
-        '\u{1e2a}'), ('\u{1e2c}', '\u{1e2c}'), ('\u{1e2e}', '\u{1e2e}'),
-        ('\u{1e30}', '\u{1e30}'), ('\u{1e32}', '\u{1e32}'), ('\u{1e34}',
-        '\u{1e34}'), ('\u{1e36}', '\u{1e36}'), ('\u{1e38}', '\u{1e38}'),
-        ('\u{1e3a}', '\u{1e3a}'), ('\u{1e3c}', '\u{1e3c}'), ('\u{1e3e}',
-        '\u{1e3e}'), ('\u{1e40}', '\u{1e40}'), ('\u{1e42}', '\u{1e42}'),
-        ('\u{1e44}', '\u{1e44}'), ('\u{1e46}', '\u{1e46}'), ('\u{1e48}',
-        '\u{1e48}'), ('\u{1e4a}', '\u{1e4a}'), ('\u{1e4c}', '\u{1e4c}'),
-        ('\u{1e4e}', '\u{1e4e}'), ('\u{1e50}', '\u{1e50}'), ('\u{1e52}',
-        '\u{1e52}'), ('\u{1e54}', '\u{1e54}'), ('\u{1e56}', '\u{1e56}'),
-        ('\u{1e58}', '\u{1e58}'), ('\u{1e5a}', '\u{1e5a}'), ('\u{1e5c}',
-        '\u{1e5c}'), ('\u{1e5e}', '\u{1e5e}'), ('\u{1e60}', '\u{1e60}'),
-        ('\u{1e62}', '\u{1e62}'), ('\u{1e64}', '\u{1e64}'), ('\u{1e66}',
-        '\u{1e66}'), ('\u{1e68}', '\u{1e68}'), ('\u{1e6a}', '\u{1e6a}'),
-        ('\u{1e6c}', '\u{1e6c}'), ('\u{1e6e}', '\u{1e6e}'), ('\u{1e70}',
-        '\u{1e70}'), ('\u{1e72}', '\u{1e72}'), ('\u{1e74}', '\u{1e74}'),
-        ('\u{1e76}', '\u{1e76}'), ('\u{1e78}', '\u{1e78}'), ('\u{1e7a}',
-        '\u{1e7a}'), ('\u{1e7c}', '\u{1e7c}'), ('\u{1e7e}', '\u{1e7e}'),
-        ('\u{1e80}', '\u{1e80}'), ('\u{1e82}', '\u{1e82}'), ('\u{1e84}',
-        '\u{1e84}'), ('\u{1e86}', '\u{1e86}'), ('\u{1e88}', '\u{1e88}'),
-        ('\u{1e8a}', '\u{1e8a}'), ('\u{1e8c}', '\u{1e8c}'), ('\u{1e8e}',
-        '\u{1e8e}'), ('\u{1e90}', '\u{1e90}'), ('\u{1e92}', '\u{1e92}'),
-        ('\u{1e94}', '\u{1e94}'), ('\u{1e9e}', '\u{1e9e}'), ('\u{1ea0}',
-        '\u{1ea0}'), ('\u{1ea2}', '\u{1ea2}'), ('\u{1ea4}', '\u{1ea4}'),
-        ('\u{1ea6}', '\u{1ea6}'), ('\u{1ea8}', '\u{1ea8}'), ('\u{1eaa}',
-        '\u{1eaa}'), ('\u{1eac}', '\u{1eac}'), ('\u{1eae}', '\u{1eae}'),
-        ('\u{1eb0}', '\u{1eb0}'), ('\u{1eb2}', '\u{1eb2}'), ('\u{1eb4}',
-        '\u{1eb4}'), ('\u{1eb6}', '\u{1eb6}'), ('\u{1eb8}', '\u{1eb8}'),
-        ('\u{1eba}', '\u{1eba}'), ('\u{1ebc}', '\u{1ebc}'), ('\u{1ebe}',
-        '\u{1ebe}'), ('\u{1ec0}', '\u{1ec0}'), ('\u{1ec2}', '\u{1ec2}'),
-        ('\u{1ec4}', '\u{1ec4}'), ('\u{1ec6}', '\u{1ec6}'), ('\u{1ec8}',
-        '\u{1ec8}'), ('\u{1eca}', '\u{1eca}'), ('\u{1ecc}', '\u{1ecc}'),
-        ('\u{1ece}', '\u{1ece}'), ('\u{1ed0}', '\u{1ed0}'), ('\u{1ed2}',
-        '\u{1ed2}'), ('\u{1ed4}', '\u{1ed4}'), ('\u{1ed6}', '\u{1ed6}'),
-        ('\u{1ed8}', '\u{1ed8}'), ('\u{1eda}', '\u{1eda}'), ('\u{1edc}',
-        '\u{1edc}'), ('\u{1ede}', '\u{1ede}'), ('\u{1ee0}', '\u{1ee0}'),
-        ('\u{1ee2}', '\u{1ee2}'), ('\u{1ee4}', '\u{1ee4}'), ('\u{1ee6}',
-        '\u{1ee6}'), ('\u{1ee8}', '\u{1ee8}'), ('\u{1eea}', '\u{1eea}'),
-        ('\u{1eec}', '\u{1eec}'), ('\u{1eee}', '\u{1eee}'), ('\u{1ef0}',
-        '\u{1ef0}'), ('\u{1ef2}', '\u{1ef2}'), ('\u{1ef4}', '\u{1ef4}'),
-        ('\u{1ef6}', '\u{1ef6}'), ('\u{1ef8}', '\u{1ef8}'), ('\u{1efa}',
-        '\u{1efa}'), ('\u{1efc}', '\u{1efc}'), ('\u{1efe}', '\u{1efe}'),
-        ('\u{1f08}', '\u{1f0f}'), ('\u{1f18}', '\u{1f1d}'), ('\u{1f28}',
-        '\u{1f2f}'), ('\u{1f38}', '\u{1f3f}'), ('\u{1f48}', '\u{1f4d}'),
-        ('\u{1f59}', '\u{1f59}'), ('\u{1f5b}', '\u{1f5b}'), ('\u{1f5d}',
-        '\u{1f5d}'), ('\u{1f5f}', '\u{1f5f}'), ('\u{1f68}', '\u{1f6f}'),
-        ('\u{1fb8}', '\u{1fbb}'), ('\u{1fc8}', '\u{1fcb}'), ('\u{1fd8}',
-        '\u{1fdb}'), ('\u{1fe8}', '\u{1fec}'), ('\u{1ff8}', '\u{1ffb}'),
-        ('\u{2102}', '\u{2102}'), ('\u{2107}', '\u{2107}'), ('\u{210b}',
-        '\u{210d}'), ('\u{2110}', '\u{2112}'), ('\u{2115}', '\u{2115}'),
-        ('\u{2119}', '\u{211d}'), ('\u{2124}', '\u{2124}'), ('\u{2126}',
-        '\u{2126}'), ('\u{2128}', '\u{2128}'), ('\u{212a}', '\u{212d}'),
-        ('\u{2130}', '\u{2133}'), ('\u{213e}', '\u{213f}'), ('\u{2145}',
-        '\u{2145}'), ('\u{2183}', '\u{2183}'), ('\u{2c00}', '\u{2c2e}'),
-        ('\u{2c60}', '\u{2c60}'), ('\u{2c62}', '\u{2c64}'), ('\u{2c67}',
-        '\u{2c67}'), ('\u{2c69}', '\u{2c69}'), ('\u{2c6b}', '\u{2c6b}'),
-        ('\u{2c6d}', '\u{2c70}'), ('\u{2c72}', '\u{2c72}'), ('\u{2c75}',
-        '\u{2c75}'), ('\u{2c7e}', '\u{2c80}'), ('\u{2c82}', '\u{2c82}'),
-        ('\u{2c84}', '\u{2c84}'), ('\u{2c86}', '\u{2c86}'), ('\u{2c88}',
-        '\u{2c88}'), ('\u{2c8a}', '\u{2c8a}'), ('\u{2c8c}', '\u{2c8c}'),
-        ('\u{2c8e}', '\u{2c8e}'), ('\u{2c90}', '\u{2c90}'), ('\u{2c92}',
-        '\u{2c92}'), ('\u{2c94}', '\u{2c94}'), ('\u{2c96}', '\u{2c96}'),
-        ('\u{2c98}', '\u{2c98}'), ('\u{2c9a}', '\u{2c9a}'), ('\u{2c9c}',
-        '\u{2c9c}'), ('\u{2c9e}', '\u{2c9e}'), ('\u{2ca0}', '\u{2ca0}'),
-        ('\u{2ca2}', '\u{2ca2}'), ('\u{2ca4}', '\u{2ca4}'), ('\u{2ca6}',
-        '\u{2ca6}'), ('\u{2ca8}', '\u{2ca8}'), ('\u{2caa}', '\u{2caa}'),
-        ('\u{2cac}', '\u{2cac}'), ('\u{2cae}', '\u{2cae}'), ('\u{2cb0}',
-        '\u{2cb0}'), ('\u{2cb2}', '\u{2cb2}'), ('\u{2cb4}', '\u{2cb4}'),
-        ('\u{2cb6}', '\u{2cb6}'), ('\u{2cb8}', '\u{2cb8}'), ('\u{2cba}',
-        '\u{2cba}'), ('\u{2cbc}', '\u{2cbc}'), ('\u{2cbe}', '\u{2cbe}'),
-        ('\u{2cc0}', '\u{2cc0}'), ('\u{2cc2}', '\u{2cc2}'), ('\u{2cc4}',
-        '\u{2cc4}'), ('\u{2cc6}', '\u{2cc6}'), ('\u{2cc8}', '\u{2cc8}'),
-        ('\u{2cca}', '\u{2cca}'), ('\u{2ccc}', '\u{2ccc}'), ('\u{2cce}',
-        '\u{2cce}'), ('\u{2cd0}', '\u{2cd0}'), ('\u{2cd2}', '\u{2cd2}'),
-        ('\u{2cd4}', '\u{2cd4}'), ('\u{2cd6}', '\u{2cd6}'), ('\u{2cd8}',
-        '\u{2cd8}'), ('\u{2cda}', '\u{2cda}'), ('\u{2cdc}', '\u{2cdc}'),
-        ('\u{2cde}', '\u{2cde}'), ('\u{2ce0}', '\u{2ce0}'), ('\u{2ce2}',
-        '\u{2ce2}'), ('\u{2ceb}', '\u{2ceb}'), ('\u{2ced}', '\u{2ced}'),
-        ('\u{2cf2}', '\u{2cf2}'), ('\u{a640}', '\u{a640}'), ('\u{a642}',
-        '\u{a642}'), ('\u{a644}', '\u{a644}'), ('\u{a646}', '\u{a646}'),
-        ('\u{a648}', '\u{a648}'), ('\u{a64a}', '\u{a64a}'), ('\u{a64c}',
-        '\u{a64c}'), ('\u{a64e}', '\u{a64e}'), ('\u{a650}', '\u{a650}'),
-        ('\u{a652}', '\u{a652}'), ('\u{a654}', '\u{a654}'), ('\u{a656}',
-        '\u{a656}'), ('\u{a658}', '\u{a658}'), ('\u{a65a}', '\u{a65a}'),
-        ('\u{a65c}', '\u{a65c}'), ('\u{a65e}', '\u{a65e}'), ('\u{a660}',
-        '\u{a660}'), ('\u{a662}', '\u{a662}'), ('\u{a664}', '\u{a664}'),
-        ('\u{a666}', '\u{a666}'), ('\u{a668}', '\u{a668}'), ('\u{a66a}',
-        '\u{a66a}'), ('\u{a66c}', '\u{a66c}'), ('\u{a680}', '\u{a680}'),
-        ('\u{a682}', '\u{a682}'), ('\u{a684}', '\u{a684}'), ('\u{a686}',
-        '\u{a686}'), ('\u{a688}', '\u{a688}'), ('\u{a68a}', '\u{a68a}'),
-        ('\u{a68c}', '\u{a68c}'), ('\u{a68e}', '\u{a68e}'), ('\u{a690}',
-        '\u{a690}'), ('\u{a692}', '\u{a692}'), ('\u{a694}', '\u{a694}'),
-        ('\u{a696}', '\u{a696}'), ('\u{a698}', '\u{a698}'), ('\u{a69a}',
-        '\u{a69a}'), ('\u{a722}', '\u{a722}'), ('\u{a724}', '\u{a724}'),
-        ('\u{a726}', '\u{a726}'), ('\u{a728}', '\u{a728}'), ('\u{a72a}',
-        '\u{a72a}'), ('\u{a72c}', '\u{a72c}'), ('\u{a72e}', '\u{a72e}'),
-        ('\u{a732}', '\u{a732}'), ('\u{a734}', '\u{a734}'), ('\u{a736}',
-        '\u{a736}'), ('\u{a738}', '\u{a738}'), ('\u{a73a}', '\u{a73a}'),
-        ('\u{a73c}', '\u{a73c}'), ('\u{a73e}', '\u{a73e}'), ('\u{a740}',
-        '\u{a740}'), ('\u{a742}', '\u{a742}'), ('\u{a744}', '\u{a744}'),
-        ('\u{a746}', '\u{a746}'), ('\u{a748}', '\u{a748}'), ('\u{a74a}',
-        '\u{a74a}'), ('\u{a74c}', '\u{a74c}'), ('\u{a74e}', '\u{a74e}'),
-        ('\u{a750}', '\u{a750}'), ('\u{a752}', '\u{a752}'), ('\u{a754}',
-        '\u{a754}'), ('\u{a756}', '\u{a756}'), ('\u{a758}', '\u{a758}'),
-        ('\u{a75a}', '\u{a75a}'), ('\u{a75c}', '\u{a75c}'), ('\u{a75e}',
-        '\u{a75e}'), ('\u{a760}', '\u{a760}'), ('\u{a762}', '\u{a762}'),
-        ('\u{a764}', '\u{a764}'), ('\u{a766}', '\u{a766}'), ('\u{a768}',
-        '\u{a768}'), ('\u{a76a}', '\u{a76a}'), ('\u{a76c}', '\u{a76c}'),
-        ('\u{a76e}', '\u{a76e}'), ('\u{a779}', '\u{a779}'), ('\u{a77b}',
-        '\u{a77b}'), ('\u{a77d}', '\u{a77e}'), ('\u{a780}', '\u{a780}'),
-        ('\u{a782}', '\u{a782}'), ('\u{a784}', '\u{a784}'), ('\u{a786}',
-        '\u{a786}'), ('\u{a78b}', '\u{a78b}'), ('\u{a78d}', '\u{a78d}'),
-        ('\u{a790}', '\u{a790}'), ('\u{a792}', '\u{a792}'), ('\u{a796}',
-        '\u{a796}'), ('\u{a798}', '\u{a798}'), ('\u{a79a}', '\u{a79a}'),
-        ('\u{a79c}', '\u{a79c}'), ('\u{a79e}', '\u{a79e}'), ('\u{a7a0}',
-        '\u{a7a0}'), ('\u{a7a2}', '\u{a7a2}'), ('\u{a7a4}', '\u{a7a4}'),
-        ('\u{a7a6}', '\u{a7a6}'), ('\u{a7a8}', '\u{a7a8}'), ('\u{a7aa}',
-        '\u{a7ae}'), ('\u{a7b0}', '\u{a7b4}'), ('\u{a7b6}', '\u{a7b6}'),
-        ('\u{ff21}', '\u{ff3a}'), ('\u{10400}', '\u{10427}'), ('\u{104b0}',
-        '\u{104d3}'), ('\u{10c80}', '\u{10cb2}'), ('\u{118a0}', '\u{118bf}'),
-        ('\u{1d400}', '\u{1d419}'), ('\u{1d434}', '\u{1d44d}'), ('\u{1d468}',
-        '\u{1d481}'), ('\u{1d49c}', '\u{1d49c}'), ('\u{1d49e}', '\u{1d49f}'),
-        ('\u{1d4a2}', '\u{1d4a2}'), ('\u{1d4a5}', '\u{1d4a6}'), ('\u{1d4a9}',
-        '\u{1d4ac}'), ('\u{1d4ae}', '\u{1d4b5}'), ('\u{1d4d0}', '\u{1d4e9}'),
-        ('\u{1d504}', '\u{1d505}'), ('\u{1d507}', '\u{1d50a}'), ('\u{1d50d}',
-        '\u{1d514}'), ('\u{1d516}', '\u{1d51c}'), ('\u{1d538}', '\u{1d539}'),
-        ('\u{1d53b}', '\u{1d53e}'), ('\u{1d540}', '\u{1d544}'), ('\u{1d546}',
-        '\u{1d546}'), ('\u{1d54a}', '\u{1d550}'), ('\u{1d56c}', '\u{1d585}'),
-        ('\u{1d5a0}', '\u{1d5b9}'), ('\u{1d5d4}', '\u{1d5ed}'), ('\u{1d608}',
-        '\u{1d621}'), ('\u{1d63c}', '\u{1d655}'), ('\u{1d670}', '\u{1d689}'),
-        ('\u{1d6a8}', '\u{1d6c0}'), ('\u{1d6e2}', '\u{1d6fa}'), ('\u{1d71c}',
-        '\u{1d734}'), ('\u{1d756}', '\u{1d76e}'), ('\u{1d790}', '\u{1d7a8}'),
-        ('\u{1d7ca}', '\u{1d7ca}'), ('\u{1e900}', '\u{1e921}')
-    ];
-
-    pub const M_table: &'static [(char, char)] = &[
-        ('\u{300}', '\u{36f}'), ('\u{483}', '\u{489}'), ('\u{591}', '\u{5bd}'),
-        ('\u{5bf}', '\u{5bf}'), ('\u{5c1}', '\u{5c2}'), ('\u{5c4}', '\u{5c5}'),
-        ('\u{5c7}', '\u{5c7}'), ('\u{610}', '\u{61a}'), ('\u{64b}', '\u{65f}'),
-        ('\u{670}', '\u{670}'), ('\u{6d6}', '\u{6dc}'), ('\u{6df}', '\u{6e4}'),
-        ('\u{6e7}', '\u{6e8}'), ('\u{6ea}', '\u{6ed}'), ('\u{711}', '\u{711}'),
-        ('\u{730}', '\u{74a}'), ('\u{7a6}', '\u{7b0}'), ('\u{7eb}', '\u{7f3}'),
-        ('\u{816}', '\u{819}'), ('\u{81b}', '\u{823}'), ('\u{825}', '\u{827}'),
-        ('\u{829}', '\u{82d}'), ('\u{859}', '\u{85b}'), ('\u{8d4}', '\u{8e1}'),
-        ('\u{8e3}', '\u{903}'), ('\u{93a}', '\u{93c}'), ('\u{93e}', '\u{94f}'),
-        ('\u{951}', '\u{957}'), ('\u{962}', '\u{963}'), ('\u{981}', '\u{983}'),
-        ('\u{9bc}', '\u{9bc}'), ('\u{9be}', '\u{9c4}'), ('\u{9c7}', '\u{9c8}'),
-        ('\u{9cb}', '\u{9cd}'), ('\u{9d7}', '\u{9d7}'), ('\u{9e2}', '\u{9e3}'),
-        ('\u{a01}', '\u{a03}'), ('\u{a3c}', '\u{a3c}'), ('\u{a3e}', '\u{a42}'),
-        ('\u{a47}', '\u{a48}'), ('\u{a4b}', '\u{a4d}'), ('\u{a51}', '\u{a51}'),
-        ('\u{a70}', '\u{a71}'), ('\u{a75}', '\u{a75}'), ('\u{a81}', '\u{a83}'),
-        ('\u{abc}', '\u{abc}'), ('\u{abe}', '\u{ac5}'), ('\u{ac7}', '\u{ac9}'),
-        ('\u{acb}', '\u{acd}'), ('\u{ae2}', '\u{ae3}'), ('\u{afa}', '\u{aff}'),
-        ('\u{b01}', '\u{b03}'), ('\u{b3c}', '\u{b3c}'), ('\u{b3e}', '\u{b44}'),
-        ('\u{b47}', '\u{b48}'), ('\u{b4b}', '\u{b4d}'), ('\u{b56}', '\u{b57}'),
-        ('\u{b62}', '\u{b63}'), ('\u{b82}', '\u{b82}'), ('\u{bbe}', '\u{bc2}'),
-        ('\u{bc6}', '\u{bc8}'), ('\u{bca}', '\u{bcd}'), ('\u{bd7}', '\u{bd7}'),
-        ('\u{c00}', '\u{c03}'), ('\u{c3e}', '\u{c44}'), ('\u{c46}', '\u{c48}'),
-        ('\u{c4a}', '\u{c4d}'), ('\u{c55}', '\u{c56}'), ('\u{c62}', '\u{c63}'),
-        ('\u{c81}', '\u{c83}'), ('\u{cbc}', '\u{cbc}'), ('\u{cbe}', '\u{cc4}'),
-        ('\u{cc6}', '\u{cc8}'), ('\u{cca}', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'),
-        ('\u{ce2}', '\u{ce3}'), ('\u{d00}', '\u{d03}'), ('\u{d3b}', '\u{d3c}'),
-        ('\u{d3e}', '\u{d44}'), ('\u{d46}', '\u{d48}'), ('\u{d4a}', '\u{d4d}'),
-        ('\u{d57}', '\u{d57}'), ('\u{d62}', '\u{d63}'), ('\u{d82}', '\u{d83}'),
-        ('\u{dca}', '\u{dca}'), ('\u{dcf}', '\u{dd4}'), ('\u{dd6}', '\u{dd6}'),
-        ('\u{dd8}', '\u{ddf}'), ('\u{df2}', '\u{df3}'), ('\u{e31}', '\u{e31}'),
-        ('\u{e34}', '\u{e3a}'), ('\u{e47}', '\u{e4e}'), ('\u{eb1}', '\u{eb1}'),
-        ('\u{eb4}', '\u{eb9}'), ('\u{ebb}', '\u{ebc}'), ('\u{ec8}', '\u{ecd}'),
-        ('\u{f18}', '\u{f19}'), ('\u{f35}', '\u{f35}'), ('\u{f37}', '\u{f37}'),
-        ('\u{f39}', '\u{f39}'), ('\u{f3e}', '\u{f3f}'), ('\u{f71}', '\u{f84}'),
-        ('\u{f86}', '\u{f87}'), ('\u{f8d}', '\u{f97}'), ('\u{f99}', '\u{fbc}'),
-        ('\u{fc6}', '\u{fc6}'), ('\u{102b}', '\u{103e}'), ('\u{1056}',
-        '\u{1059}'), ('\u{105e}', '\u{1060}'), ('\u{1062}', '\u{1064}'),
-        ('\u{1067}', '\u{106d}'), ('\u{1071}', '\u{1074}'), ('\u{1082}',
-        '\u{108d}'), ('\u{108f}', '\u{108f}'), ('\u{109a}', '\u{109d}'),
-        ('\u{135d}', '\u{135f}'), ('\u{1712}', '\u{1714}'), ('\u{1732}',
-        '\u{1734}'), ('\u{1752}', '\u{1753}'), ('\u{1772}', '\u{1773}'),
-        ('\u{17b4}', '\u{17d3}'), ('\u{17dd}', '\u{17dd}'), ('\u{180b}',
-        '\u{180d}'), ('\u{1885}', '\u{1886}'), ('\u{18a9}', '\u{18a9}'),
-        ('\u{1920}', '\u{192b}'), ('\u{1930}', '\u{193b}'), ('\u{1a17}',
-        '\u{1a1b}'), ('\u{1a55}', '\u{1a5e}'), ('\u{1a60}', '\u{1a7c}'),
-        ('\u{1a7f}', '\u{1a7f}'), ('\u{1ab0}', '\u{1abe}'), ('\u{1b00}',
-        '\u{1b04}'), ('\u{1b34}', '\u{1b44}'), ('\u{1b6b}', '\u{1b73}'),
-        ('\u{1b80}', '\u{1b82}'), ('\u{1ba1}', '\u{1bad}'), ('\u{1be6}',
-        '\u{1bf3}'), ('\u{1c24}', '\u{1c37}'), ('\u{1cd0}', '\u{1cd2}'),
-        ('\u{1cd4}', '\u{1ce8}'), ('\u{1ced}', '\u{1ced}'), ('\u{1cf2}',
-        '\u{1cf4}'), ('\u{1cf7}', '\u{1cf9}'), ('\u{1dc0}', '\u{1df9}'),
-        ('\u{1dfb}', '\u{1dff}'), ('\u{20d0}', '\u{20f0}'), ('\u{2cef}',
-        '\u{2cf1}'), ('\u{2d7f}', '\u{2d7f}'), ('\u{2de0}', '\u{2dff}'),
-        ('\u{302a}', '\u{302f}'), ('\u{3099}', '\u{309a}'), ('\u{a66f}',
-        '\u{a672}'), ('\u{a674}', '\u{a67d}'), ('\u{a69e}', '\u{a69f}'),
-        ('\u{a6f0}', '\u{a6f1}'), ('\u{a802}', '\u{a802}'), ('\u{a806}',
-        '\u{a806}'), ('\u{a80b}', '\u{a80b}'), ('\u{a823}', '\u{a827}'),
-        ('\u{a880}', '\u{a881}'), ('\u{a8b4}', '\u{a8c5}'), ('\u{a8e0}',
-        '\u{a8f1}'), ('\u{a926}', '\u{a92d}'), ('\u{a947}', '\u{a953}'),
-        ('\u{a980}', '\u{a983}'), ('\u{a9b3}', '\u{a9c0}'), ('\u{a9e5}',
-        '\u{a9e5}'), ('\u{aa29}', '\u{aa36}'), ('\u{aa43}', '\u{aa43}'),
-        ('\u{aa4c}', '\u{aa4d}'), ('\u{aa7b}', '\u{aa7d}'), ('\u{aab0}',
-        '\u{aab0}'), ('\u{aab2}', '\u{aab4}'), ('\u{aab7}', '\u{aab8}'),
-        ('\u{aabe}', '\u{aabf}'), ('\u{aac1}', '\u{aac1}'), ('\u{aaeb}',
-        '\u{aaef}'), ('\u{aaf5}', '\u{aaf6}'), ('\u{abe3}', '\u{abea}'),
-        ('\u{abec}', '\u{abed}'), ('\u{fb1e}', '\u{fb1e}'), ('\u{fe00}',
-        '\u{fe0f}'), ('\u{fe20}', '\u{fe2f}'), ('\u{101fd}', '\u{101fd}'),
-        ('\u{102e0}', '\u{102e0}'), ('\u{10376}', '\u{1037a}'), ('\u{10a01}',
-        '\u{10a03}'), ('\u{10a05}', '\u{10a06}'), ('\u{10a0c}', '\u{10a0f}'),
-        ('\u{10a38}', '\u{10a3a}'), ('\u{10a3f}', '\u{10a3f}'), ('\u{10ae5}',
-        '\u{10ae6}'), ('\u{11000}', '\u{11002}'), ('\u{11038}', '\u{11046}'),
-        ('\u{1107f}', '\u{11082}'), ('\u{110b0}', '\u{110ba}'), ('\u{11100}',
-        '\u{11102}'), ('\u{11127}', '\u{11134}'), ('\u{11173}', '\u{11173}'),
-        ('\u{11180}', '\u{11182}'), ('\u{111b3}', '\u{111c0}'), ('\u{111ca}',
-        '\u{111cc}'), ('\u{1122c}', '\u{11237}'), ('\u{1123e}', '\u{1123e}'),
-        ('\u{112df}', '\u{112ea}'), ('\u{11300}', '\u{11303}'), ('\u{1133c}',
-        '\u{1133c}'), ('\u{1133e}', '\u{11344}'), ('\u{11347}', '\u{11348}'),
-        ('\u{1134b}', '\u{1134d}'), ('\u{11357}', '\u{11357}'), ('\u{11362}',
-        '\u{11363}'), ('\u{11366}', '\u{1136c}'), ('\u{11370}', '\u{11374}'),
-        ('\u{11435}', '\u{11446}'), ('\u{114b0}', '\u{114c3}'), ('\u{115af}',
-        '\u{115b5}'), ('\u{115b8}', '\u{115c0}'), ('\u{115dc}', '\u{115dd}'),
-        ('\u{11630}', '\u{11640}'), ('\u{116ab}', '\u{116b7}'), ('\u{1171d}',
-        '\u{1172b}'), ('\u{11a01}', '\u{11a0a}'), ('\u{11a33}', '\u{11a39}'),
-        ('\u{11a3b}', '\u{11a3e}'), ('\u{11a47}', '\u{11a47}'), ('\u{11a51}',
-        '\u{11a5b}'), ('\u{11a8a}', '\u{11a99}'), ('\u{11c2f}', '\u{11c36}'),
-        ('\u{11c38}', '\u{11c3f}'), ('\u{11c92}', '\u{11ca7}'), ('\u{11ca9}',
-        '\u{11cb6}'), ('\u{11d31}', '\u{11d36}'), ('\u{11d3a}', '\u{11d3a}'),
-        ('\u{11d3c}', '\u{11d3d}'), ('\u{11d3f}', '\u{11d45}'), ('\u{11d47}',
-        '\u{11d47}'), ('\u{16af0}', '\u{16af4}'), ('\u{16b30}', '\u{16b36}'),
-        ('\u{16f51}', '\u{16f7e}'), ('\u{16f8f}', '\u{16f92}'), ('\u{1bc9d}',
-        '\u{1bc9e}'), ('\u{1d165}', '\u{1d169}'), ('\u{1d16d}', '\u{1d172}'),
-        ('\u{1d17b}', '\u{1d182}'), ('\u{1d185}', '\u{1d18b}'), ('\u{1d1aa}',
-        '\u{1d1ad}'), ('\u{1d242}', '\u{1d244}'), ('\u{1da00}', '\u{1da36}'),
-        ('\u{1da3b}', '\u{1da6c}'), ('\u{1da75}', '\u{1da75}'), ('\u{1da84}',
-        '\u{1da84}'), ('\u{1da9b}', '\u{1da9f}'), ('\u{1daa1}', '\u{1daaf}'),
-        ('\u{1e000}', '\u{1e006}'), ('\u{1e008}', '\u{1e018}'), ('\u{1e01b}',
-        '\u{1e021}'), ('\u{1e023}', '\u{1e024}'), ('\u{1e026}', '\u{1e02a}'),
-        ('\u{1e8d0}', '\u{1e8d6}'), ('\u{1e944}', '\u{1e94a}'), ('\u{e0100}',
-        '\u{e01ef}')
-    ];
-
-    pub const Mc_table: &'static [(char, char)] = &[
-        ('\u{903}', '\u{903}'), ('\u{93b}', '\u{93b}'), ('\u{93e}', '\u{940}'),
-        ('\u{949}', '\u{94c}'), ('\u{94e}', '\u{94f}'), ('\u{982}', '\u{983}'),
-        ('\u{9be}', '\u{9c0}'), ('\u{9c7}', '\u{9c8}'), ('\u{9cb}', '\u{9cc}'),
-        ('\u{9d7}', '\u{9d7}'), ('\u{a03}', '\u{a03}'), ('\u{a3e}', '\u{a40}'),
-        ('\u{a83}', '\u{a83}'), ('\u{abe}', '\u{ac0}'), ('\u{ac9}', '\u{ac9}'),
-        ('\u{acb}', '\u{acc}'), ('\u{b02}', '\u{b03}'), ('\u{b3e}', '\u{b3e}'),
-        ('\u{b40}', '\u{b40}'), ('\u{b47}', '\u{b48}'), ('\u{b4b}', '\u{b4c}'),
-        ('\u{b57}', '\u{b57}'), ('\u{bbe}', '\u{bbf}'), ('\u{bc1}', '\u{bc2}'),
-        ('\u{bc6}', '\u{bc8}'), ('\u{bca}', '\u{bcc}'), ('\u{bd7}', '\u{bd7}'),
-        ('\u{c01}', '\u{c03}'), ('\u{c41}', '\u{c44}'), ('\u{c82}', '\u{c83}'),
-        ('\u{cbe}', '\u{cbe}'), ('\u{cc0}', '\u{cc4}'), ('\u{cc7}', '\u{cc8}'),
-        ('\u{cca}', '\u{ccb}'), ('\u{cd5}', '\u{cd6}'), ('\u{d02}', '\u{d03}'),
-        ('\u{d3e}', '\u{d40}'), ('\u{d46}', '\u{d48}'), ('\u{d4a}', '\u{d4c}'),
-        ('\u{d57}', '\u{d57}'), ('\u{d82}', '\u{d83}'), ('\u{dcf}', '\u{dd1}'),
-        ('\u{dd8}', '\u{ddf}'), ('\u{df2}', '\u{df3}'), ('\u{f3e}', '\u{f3f}'),
-        ('\u{f7f}', '\u{f7f}'), ('\u{102b}', '\u{102c}'), ('\u{1031}',
-        '\u{1031}'), ('\u{1038}', '\u{1038}'), ('\u{103b}', '\u{103c}'),
-        ('\u{1056}', '\u{1057}'), ('\u{1062}', '\u{1064}'), ('\u{1067}',
-        '\u{106d}'), ('\u{1083}', '\u{1084}'), ('\u{1087}', '\u{108c}'),
-        ('\u{108f}', '\u{108f}'), ('\u{109a}', '\u{109c}'), ('\u{17b6}',
-        '\u{17b6}'), ('\u{17be}', '\u{17c5}'), ('\u{17c7}', '\u{17c8}'),
-        ('\u{1923}', '\u{1926}'), ('\u{1929}', '\u{192b}'), ('\u{1930}',
-        '\u{1931}'), ('\u{1933}', '\u{1938}'), ('\u{1a19}', '\u{1a1a}'),
-        ('\u{1a55}', '\u{1a55}'), ('\u{1a57}', '\u{1a57}'), ('\u{1a61}',
-        '\u{1a61}'), ('\u{1a63}', '\u{1a64}'), ('\u{1a6d}', '\u{1a72}'),
-        ('\u{1b04}', '\u{1b04}'), ('\u{1b35}', '\u{1b35}'), ('\u{1b3b}',
-        '\u{1b3b}'), ('\u{1b3d}', '\u{1b41}'), ('\u{1b43}', '\u{1b44}'),
-        ('\u{1b82}', '\u{1b82}'), ('\u{1ba1}', '\u{1ba1}'), ('\u{1ba6}',
-        '\u{1ba7}'), ('\u{1baa}', '\u{1baa}'), ('\u{1be7}', '\u{1be7}'),
-        ('\u{1bea}', '\u{1bec}'), ('\u{1bee}', '\u{1bee}'), ('\u{1bf2}',
-        '\u{1bf3}'), ('\u{1c24}', '\u{1c2b}'), ('\u{1c34}', '\u{1c35}'),
-        ('\u{1ce1}', '\u{1ce1}'), ('\u{1cf2}', '\u{1cf3}'), ('\u{1cf7}',
-        '\u{1cf7}'), ('\u{302e}', '\u{302f}'), ('\u{a823}', '\u{a824}'),
-        ('\u{a827}', '\u{a827}'), ('\u{a880}', '\u{a881}'), ('\u{a8b4}',
-        '\u{a8c3}'), ('\u{a952}', '\u{a953}'), ('\u{a983}', '\u{a983}'),
-        ('\u{a9b4}', '\u{a9b5}'), ('\u{a9ba}', '\u{a9bb}'), ('\u{a9bd}',
-        '\u{a9c0}'), ('\u{aa2f}', '\u{aa30}'), ('\u{aa33}', '\u{aa34}'),
-        ('\u{aa4d}', '\u{aa4d}'), ('\u{aa7b}', '\u{aa7b}'), ('\u{aa7d}',
-        '\u{aa7d}'), ('\u{aaeb}', '\u{aaeb}'), ('\u{aaee}', '\u{aaef}'),
-        ('\u{aaf5}', '\u{aaf5}'), ('\u{abe3}', '\u{abe4}'), ('\u{abe6}',
-        '\u{abe7}'), ('\u{abe9}', '\u{abea}'), ('\u{abec}', '\u{abec}'),
-        ('\u{11000}', '\u{11000}'), ('\u{11002}', '\u{11002}'), ('\u{11082}',
-        '\u{11082}'), ('\u{110b0}', '\u{110b2}'), ('\u{110b7}', '\u{110b8}'),
-        ('\u{1112c}', '\u{1112c}'), ('\u{11182}', '\u{11182}'), ('\u{111b3}',
-        '\u{111b5}'), ('\u{111bf}', '\u{111c0}'), ('\u{1122c}', '\u{1122e}'),
-        ('\u{11232}', '\u{11233}'), ('\u{11235}', '\u{11235}'), ('\u{112e0}',
-        '\u{112e2}'), ('\u{11302}', '\u{11303}'), ('\u{1133e}', '\u{1133f}'),
-        ('\u{11341}', '\u{11344}'), ('\u{11347}', '\u{11348}'), ('\u{1134b}',
-        '\u{1134d}'), ('\u{11357}', '\u{11357}'), ('\u{11362}', '\u{11363}'),
-        ('\u{11435}', '\u{11437}'), ('\u{11440}', '\u{11441}'), ('\u{11445}',
-        '\u{11445}'), ('\u{114b0}', '\u{114b2}'), ('\u{114b9}', '\u{114b9}'),
-        ('\u{114bb}', '\u{114be}'), ('\u{114c1}', '\u{114c1}'), ('\u{115af}',
-        '\u{115b1}'), ('\u{115b8}', '\u{115bb}'), ('\u{115be}', '\u{115be}'),
-        ('\u{11630}', '\u{11632}'), ('\u{1163b}', '\u{1163c}'), ('\u{1163e}',
-        '\u{1163e}'), ('\u{116ac}', '\u{116ac}'), ('\u{116ae}', '\u{116af}'),
-        ('\u{116b6}', '\u{116b6}'), ('\u{11720}', '\u{11721}'), ('\u{11726}',
-        '\u{11726}'), ('\u{11a07}', '\u{11a08}'), ('\u{11a39}', '\u{11a39}'),
-        ('\u{11a57}', '\u{11a58}'), ('\u{11a97}', '\u{11a97}'), ('\u{11c2f}',
-        '\u{11c2f}'), ('\u{11c3e}', '\u{11c3e}'), ('\u{11ca9}', '\u{11ca9}'),
-        ('\u{11cb1}', '\u{11cb1}'), ('\u{11cb4}', '\u{11cb4}'), ('\u{16f51}',
-        '\u{16f7e}'), ('\u{1d165}', '\u{1d166}'), ('\u{1d16d}', '\u{1d172}')
-    ];
-
-    pub const Me_table: &'static [(char, char)] = &[
-        ('\u{488}', '\u{489}'), ('\u{1abe}', '\u{1abe}'), ('\u{20dd}',
-        '\u{20e0}'), ('\u{20e2}', '\u{20e4}'), ('\u{a670}', '\u{a672}')
-    ];
-
-    pub const Mn_table: &'static [(char, char)] = &[
-        ('\u{300}', '\u{36f}'), ('\u{483}', '\u{487}'), ('\u{591}', '\u{5bd}'),
-        ('\u{5bf}', '\u{5bf}'), ('\u{5c1}', '\u{5c2}'), ('\u{5c4}', '\u{5c5}'),
-        ('\u{5c7}', '\u{5c7}'), ('\u{610}', '\u{61a}'), ('\u{64b}', '\u{65f}'),
-        ('\u{670}', '\u{670}'), ('\u{6d6}', '\u{6dc}'), ('\u{6df}', '\u{6e4}'),
-        ('\u{6e7}', '\u{6e8}'), ('\u{6ea}', '\u{6ed}'), ('\u{711}', '\u{711}'),
-        ('\u{730}', '\u{74a}'), ('\u{7a6}', '\u{7b0}'), ('\u{7eb}', '\u{7f3}'),
-        ('\u{816}', '\u{819}'), ('\u{81b}', '\u{823}'), ('\u{825}', '\u{827}'),
-        ('\u{829}', '\u{82d}'), ('\u{859}', '\u{85b}'), ('\u{8d4}', '\u{8e1}'),
-        ('\u{8e3}', '\u{902}'), ('\u{93a}', '\u{93a}'), ('\u{93c}', '\u{93c}'),
-        ('\u{941}', '\u{948}'), ('\u{94d}', '\u{94d}'), ('\u{951}', '\u{957}'),
-        ('\u{962}', '\u{963}'), ('\u{981}', '\u{981}'), ('\u{9bc}', '\u{9bc}'),
-        ('\u{9c1}', '\u{9c4}'), ('\u{9cd}', '\u{9cd}'), ('\u{9e2}', '\u{9e3}'),
-        ('\u{a01}', '\u{a02}'), ('\u{a3c}', '\u{a3c}'), ('\u{a41}', '\u{a42}'),
-        ('\u{a47}', '\u{a48}'), ('\u{a4b}', '\u{a4d}'), ('\u{a51}', '\u{a51}'),
-        ('\u{a70}', '\u{a71}'), ('\u{a75}', '\u{a75}'), ('\u{a81}', '\u{a82}'),
-        ('\u{abc}', '\u{abc}'), ('\u{ac1}', '\u{ac5}'), ('\u{ac7}', '\u{ac8}'),
-        ('\u{acd}', '\u{acd}'), ('\u{ae2}', '\u{ae3}'), ('\u{afa}', '\u{aff}'),
-        ('\u{b01}', '\u{b01}'), ('\u{b3c}', '\u{b3c}'), ('\u{b3f}', '\u{b3f}'),
-        ('\u{b41}', '\u{b44}'), ('\u{b4d}', '\u{b4d}'), ('\u{b56}', '\u{b56}'),
-        ('\u{b62}', '\u{b63}'), ('\u{b82}', '\u{b82}'), ('\u{bc0}', '\u{bc0}'),
-        ('\u{bcd}', '\u{bcd}'), ('\u{c00}', '\u{c00}'), ('\u{c3e}', '\u{c40}'),
-        ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4d}'), ('\u{c55}', '\u{c56}'),
-        ('\u{c62}', '\u{c63}'), ('\u{c81}', '\u{c81}'), ('\u{cbc}', '\u{cbc}'),
-        ('\u{cbf}', '\u{cbf}'), ('\u{cc6}', '\u{cc6}'), ('\u{ccc}', '\u{ccd}'),
-        ('\u{ce2}', '\u{ce3}'), ('\u{d00}', '\u{d01}'), ('\u{d3b}', '\u{d3c}'),
-        ('\u{d41}', '\u{d44}'), ('\u{d4d}', '\u{d4d}'), ('\u{d62}', '\u{d63}'),
-        ('\u{dca}', '\u{dca}'), ('\u{dd2}', '\u{dd4}'), ('\u{dd6}', '\u{dd6}'),
-        ('\u{e31}', '\u{e31}'), ('\u{e34}', '\u{e3a}'), ('\u{e47}', '\u{e4e}'),
-        ('\u{eb1}', '\u{eb1}'), ('\u{eb4}', '\u{eb9}'), ('\u{ebb}', '\u{ebc}'),
-        ('\u{ec8}', '\u{ecd}'), ('\u{f18}', '\u{f19}'), ('\u{f35}', '\u{f35}'),
-        ('\u{f37}', '\u{f37}'), ('\u{f39}', '\u{f39}'), ('\u{f71}', '\u{f7e}'),
-        ('\u{f80}', '\u{f84}'), ('\u{f86}', '\u{f87}'), ('\u{f8d}', '\u{f97}'),
-        ('\u{f99}', '\u{fbc}'), ('\u{fc6}', '\u{fc6}'), ('\u{102d}',
-        '\u{1030}'), ('\u{1032}', '\u{1037}'), ('\u{1039}', '\u{103a}'),
-        ('\u{103d}', '\u{103e}'), ('\u{1058}', '\u{1059}'), ('\u{105e}',
-        '\u{1060}'), ('\u{1071}', '\u{1074}'), ('\u{1082}', '\u{1082}'),
-        ('\u{1085}', '\u{1086}'), ('\u{108d}', '\u{108d}'), ('\u{109d}',
-        '\u{109d}'), ('\u{135d}', '\u{135f}'), ('\u{1712}', '\u{1714}'),
-        ('\u{1732}', '\u{1734}'), ('\u{1752}', '\u{1753}'), ('\u{1772}',
-        '\u{1773}'), ('\u{17b4}', '\u{17b5}'), ('\u{17b7}', '\u{17bd}'),
-        ('\u{17c6}', '\u{17c6}'), ('\u{17c9}', '\u{17d3}'), ('\u{17dd}',
-        '\u{17dd}'), ('\u{180b}', '\u{180d}'), ('\u{1885}', '\u{1886}'),
-        ('\u{18a9}', '\u{18a9}'), ('\u{1920}', '\u{1922}'), ('\u{1927}',
-        '\u{1928}'), ('\u{1932}', '\u{1932}'), ('\u{1939}', '\u{193b}'),
-        ('\u{1a17}', '\u{1a18}'), ('\u{1a1b}', '\u{1a1b}'), ('\u{1a56}',
-        '\u{1a56}'), ('\u{1a58}', '\u{1a5e}'), ('\u{1a60}', '\u{1a60}'),
-        ('\u{1a62}', '\u{1a62}'), ('\u{1a65}', '\u{1a6c}'), ('\u{1a73}',
-        '\u{1a7c}'), ('\u{1a7f}', '\u{1a7f}'), ('\u{1ab0}', '\u{1abd}'),
-        ('\u{1b00}', '\u{1b03}'), ('\u{1b34}', '\u{1b34}'), ('\u{1b36}',
-        '\u{1b3a}'), ('\u{1b3c}', '\u{1b3c}'), ('\u{1b42}', '\u{1b42}'),
-        ('\u{1b6b}', '\u{1b73}'), ('\u{1b80}', '\u{1b81}'), ('\u{1ba2}',
-        '\u{1ba5}'), ('\u{1ba8}', '\u{1ba9}'), ('\u{1bab}', '\u{1bad}'),
-        ('\u{1be6}', '\u{1be6}'), ('\u{1be8}', '\u{1be9}'), ('\u{1bed}',
-        '\u{1bed}'), ('\u{1bef}', '\u{1bf1}'), ('\u{1c2c}', '\u{1c33}'),
-        ('\u{1c36}', '\u{1c37}'), ('\u{1cd0}', '\u{1cd2}'), ('\u{1cd4}',
-        '\u{1ce0}'), ('\u{1ce2}', '\u{1ce8}'), ('\u{1ced}', '\u{1ced}'),
-        ('\u{1cf4}', '\u{1cf4}'), ('\u{1cf8}', '\u{1cf9}'), ('\u{1dc0}',
-        '\u{1df9}'), ('\u{1dfb}', '\u{1dff}'), ('\u{20d0}', '\u{20dc}'),
-        ('\u{20e1}', '\u{20e1}'), ('\u{20e5}', '\u{20f0}'), ('\u{2cef}',
-        '\u{2cf1}'), ('\u{2d7f}', '\u{2d7f}'), ('\u{2de0}', '\u{2dff}'),
-        ('\u{302a}', '\u{302d}'), ('\u{3099}', '\u{309a}'), ('\u{a66f}',
-        '\u{a66f}'), ('\u{a674}', '\u{a67d}'), ('\u{a69e}', '\u{a69f}'),
-        ('\u{a6f0}', '\u{a6f1}'), ('\u{a802}', '\u{a802}'), ('\u{a806}',
-        '\u{a806}'), ('\u{a80b}', '\u{a80b}'), ('\u{a825}', '\u{a826}'),
-        ('\u{a8c4}', '\u{a8c5}'), ('\u{a8e0}', '\u{a8f1}'), ('\u{a926}',
-        '\u{a92d}'), ('\u{a947}', '\u{a951}'), ('\u{a980}', '\u{a982}'),
-        ('\u{a9b3}', '\u{a9b3}'), ('\u{a9b6}', '\u{a9b9}'), ('\u{a9bc}',
-        '\u{a9bc}'), ('\u{a9e5}', '\u{a9e5}'), ('\u{aa29}', '\u{aa2e}'),
-        ('\u{aa31}', '\u{aa32}'), ('\u{aa35}', '\u{aa36}'), ('\u{aa43}',
-        '\u{aa43}'), ('\u{aa4c}', '\u{aa4c}'), ('\u{aa7c}', '\u{aa7c}'),
-        ('\u{aab0}', '\u{aab0}'), ('\u{aab2}', '\u{aab4}'), ('\u{aab7}',
-        '\u{aab8}'), ('\u{aabe}', '\u{aabf}'), ('\u{aac1}', '\u{aac1}'),
-        ('\u{aaec}', '\u{aaed}'), ('\u{aaf6}', '\u{aaf6}'), ('\u{abe5}',
-        '\u{abe5}'), ('\u{abe8}', '\u{abe8}'), ('\u{abed}', '\u{abed}'),
-        ('\u{fb1e}', '\u{fb1e}'), ('\u{fe00}', '\u{fe0f}'), ('\u{fe20}',
-        '\u{fe2f}'), ('\u{101fd}', '\u{101fd}'), ('\u{102e0}', '\u{102e0}'),
-        ('\u{10376}', '\u{1037a}'), ('\u{10a01}', '\u{10a03}'), ('\u{10a05}',
-        '\u{10a06}'), ('\u{10a0c}', '\u{10a0f}'), ('\u{10a38}', '\u{10a3a}'),
-        ('\u{10a3f}', '\u{10a3f}'), ('\u{10ae5}', '\u{10ae6}'), ('\u{11001}',
-        '\u{11001}'), ('\u{11038}', '\u{11046}'), ('\u{1107f}', '\u{11081}'),
-        ('\u{110b3}', '\u{110b6}'), ('\u{110b9}', '\u{110ba}'), ('\u{11100}',
-        '\u{11102}'), ('\u{11127}', '\u{1112b}'), ('\u{1112d}', '\u{11134}'),
-        ('\u{11173}', '\u{11173}'), ('\u{11180}', '\u{11181}'), ('\u{111b6}',
-        '\u{111be}'), ('\u{111ca}', '\u{111cc}'), ('\u{1122f}', '\u{11231}'),
-        ('\u{11234}', '\u{11234}'), ('\u{11236}', '\u{11237}'), ('\u{1123e}',
-        '\u{1123e}'), ('\u{112df}', '\u{112df}'), ('\u{112e3}', '\u{112ea}'),
-        ('\u{11300}', '\u{11301}'), ('\u{1133c}', '\u{1133c}'), ('\u{11340}',
-        '\u{11340}'), ('\u{11366}', '\u{1136c}'), ('\u{11370}', '\u{11374}'),
-        ('\u{11438}', '\u{1143f}'), ('\u{11442}', '\u{11444}'), ('\u{11446}',
-        '\u{11446}'), ('\u{114b3}', '\u{114b8}'), ('\u{114ba}', '\u{114ba}'),
-        ('\u{114bf}', '\u{114c0}'), ('\u{114c2}', '\u{114c3}'), ('\u{115b2}',
-        '\u{115b5}'), ('\u{115bc}', '\u{115bd}'), ('\u{115bf}', '\u{115c0}'),
-        ('\u{115dc}', '\u{115dd}'), ('\u{11633}', '\u{1163a}'), ('\u{1163d}',
-        '\u{1163d}'), ('\u{1163f}', '\u{11640}'), ('\u{116ab}', '\u{116ab}'),
-        ('\u{116ad}', '\u{116ad}'), ('\u{116b0}', '\u{116b5}'), ('\u{116b7}',
-        '\u{116b7}'), ('\u{1171d}', '\u{1171f}'), ('\u{11722}', '\u{11725}'),
-        ('\u{11727}', '\u{1172b}'), ('\u{11a01}', '\u{11a06}'), ('\u{11a09}',
-        '\u{11a0a}'), ('\u{11a33}', '\u{11a38}'), ('\u{11a3b}', '\u{11a3e}'),
-        ('\u{11a47}', '\u{11a47}'), ('\u{11a51}', '\u{11a56}'), ('\u{11a59}',
-        '\u{11a5b}'), ('\u{11a8a}', '\u{11a96}'), ('\u{11a98}', '\u{11a99}'),
-        ('\u{11c30}', '\u{11c36}'), ('\u{11c38}', '\u{11c3d}'), ('\u{11c3f}',
-        '\u{11c3f}'), ('\u{11c92}', '\u{11ca7}'), ('\u{11caa}', '\u{11cb0}'),
-        ('\u{11cb2}', '\u{11cb3}'), ('\u{11cb5}', '\u{11cb6}'), ('\u{11d31}',
-        '\u{11d36}'), ('\u{11d3a}', '\u{11d3a}'), ('\u{11d3c}', '\u{11d3d}'),
-        ('\u{11d3f}', '\u{11d45}'), ('\u{11d47}', '\u{11d47}'), ('\u{16af0}',
-        '\u{16af4}'), ('\u{16b30}', '\u{16b36}'), ('\u{16f8f}', '\u{16f92}'),
-        ('\u{1bc9d}', '\u{1bc9e}'), ('\u{1d167}', '\u{1d169}'), ('\u{1d17b}',
-        '\u{1d182}'), ('\u{1d185}', '\u{1d18b}'), ('\u{1d1aa}', '\u{1d1ad}'),
-        ('\u{1d242}', '\u{1d244}'), ('\u{1da00}', '\u{1da36}'), ('\u{1da3b}',
-        '\u{1da6c}'), ('\u{1da75}', '\u{1da75}'), ('\u{1da84}', '\u{1da84}'),
-        ('\u{1da9b}', '\u{1da9f}'), ('\u{1daa1}', '\u{1daaf}'), ('\u{1e000}',
-        '\u{1e006}'), ('\u{1e008}', '\u{1e018}'), ('\u{1e01b}', '\u{1e021}'),
-        ('\u{1e023}', '\u{1e024}'), ('\u{1e026}', '\u{1e02a}'), ('\u{1e8d0}',
-        '\u{1e8d6}'), ('\u{1e944}', '\u{1e94a}'), ('\u{e0100}', '\u{e01ef}')
-    ];
-
-    pub const N_table: &'static [(char, char)] = &[
-        ('\u{30}', '\u{39}'), ('\u{660}', '\u{669}'), ('\u{6f0}', '\u{6f9}'),
-        ('\u{7c0}', '\u{7c9}'), ('\u{966}', '\u{96f}'), ('\u{9e6}', '\u{9ef}'),
-        ('\u{a66}', '\u{a6f}'), ('\u{ae6}', '\u{aef}'), ('\u{b66}', '\u{b6f}'),
-        ('\u{be6}', '\u{bef}'), ('\u{c66}', '\u{c6f}'), ('\u{ce6}', '\u{cef}'),
-        ('\u{d66}', '\u{d6f}'), ('\u{de6}', '\u{def}'), ('\u{e50}', '\u{e59}'),
-        ('\u{ed0}', '\u{ed9}'), ('\u{f20}', '\u{f29}'), ('\u{1040}',
-        '\u{1049}'), ('\u{1090}', '\u{1099}'), ('\u{16ee}', '\u{16f0}'),
-        ('\u{17e0}', '\u{17e9}'), ('\u{1810}', '\u{1819}'), ('\u{1946}',
-        '\u{194f}'), ('\u{19d0}', '\u{19d9}'), ('\u{1a80}', '\u{1a89}'),
-        ('\u{1a90}', '\u{1a99}'), ('\u{1b50}', '\u{1b59}'), ('\u{1bb0}',
-        '\u{1bb9}'), ('\u{1c40}', '\u{1c49}'), ('\u{1c50}', '\u{1c59}'),
-        ('\u{2160}', '\u{2182}'), ('\u{2185}', '\u{2188}'), ('\u{3007}',
-        '\u{3007}'), ('\u{3021}', '\u{3029}'), ('\u{3038}', '\u{303a}'),
-        ('\u{a620}', '\u{a629}'), ('\u{a6e6}', '\u{a6ef}'), ('\u{a8d0}',
-        '\u{a8d9}'), ('\u{a900}', '\u{a909}'), ('\u{a9d0}', '\u{a9d9}'),
-        ('\u{a9f0}', '\u{a9f9}'), ('\u{aa50}', '\u{aa59}'), ('\u{abf0}',
-        '\u{abf9}'), ('\u{ff10}', '\u{ff19}'), ('\u{10140}', '\u{10174}'),
-        ('\u{10341}', '\u{10341}'), ('\u{1034a}', '\u{1034a}'), ('\u{103d1}',
-        '\u{103d5}'), ('\u{104a0}', '\u{104a9}'), ('\u{11066}', '\u{1106f}'),
-        ('\u{110f0}', '\u{110f9}'), ('\u{11136}', '\u{1113f}'), ('\u{111d0}',
-        '\u{111d9}'), ('\u{112f0}', '\u{112f9}'), ('\u{11450}', '\u{11459}'),
-        ('\u{114d0}', '\u{114d9}'), ('\u{11650}', '\u{11659}'), ('\u{116c0}',
-        '\u{116c9}'), ('\u{11730}', '\u{11739}'), ('\u{118e0}', '\u{118e9}'),
-        ('\u{11c50}', '\u{11c59}'), ('\u{11d50}', '\u{11d59}'), ('\u{12400}',
-        '\u{1246e}'), ('\u{16a60}', '\u{16a69}'), ('\u{16b50}', '\u{16b59}'),
-        ('\u{1d7ce}', '\u{1d7ff}'), ('\u{1e950}', '\u{1e959}')
-    ];
-
-    pub const Nd_table: &'static [(char, char)] = &[
-        ('\u{30}', '\u{39}'), ('\u{660}', '\u{669}'), ('\u{6f0}', '\u{6f9}'),
-        ('\u{7c0}', '\u{7c9}'), ('\u{966}', '\u{96f}'), ('\u{9e6}', '\u{9ef}'),
-        ('\u{a66}', '\u{a6f}'), ('\u{ae6}', '\u{aef}'), ('\u{b66}', '\u{b6f}'),
-        ('\u{be6}', '\u{bef}'), ('\u{c66}', '\u{c6f}'), ('\u{ce6}', '\u{cef}'),
-        ('\u{d66}', '\u{d6f}'), ('\u{de6}', '\u{def}'), ('\u{e50}', '\u{e59}'),
-        ('\u{ed0}', '\u{ed9}'), ('\u{f20}', '\u{f29}'), ('\u{1040}',
-        '\u{1049}'), ('\u{1090}', '\u{1099}'), ('\u{17e0}', '\u{17e9}'),
-        ('\u{1810}', '\u{1819}'), ('\u{1946}', '\u{194f}'), ('\u{19d0}',
-        '\u{19d9}'), ('\u{1a80}', '\u{1a89}'), ('\u{1a90}', '\u{1a99}'),
-        ('\u{1b50}', '\u{1b59}'), ('\u{1bb0}', '\u{1bb9}'), ('\u{1c40}',
-        '\u{1c49}'), ('\u{1c50}', '\u{1c59}'), ('\u{a620}', '\u{a629}'),
-        ('\u{a8d0}', '\u{a8d9}'), ('\u{a900}', '\u{a909}'), ('\u{a9d0}',
-        '\u{a9d9}'), ('\u{a9f0}', '\u{a9f9}'), ('\u{aa50}', '\u{aa59}'),
-        ('\u{abf0}', '\u{abf9}'), ('\u{ff10}', '\u{ff19}'), ('\u{104a0}',
-        '\u{104a9}'), ('\u{11066}', '\u{1106f}'), ('\u{110f0}', '\u{110f9}'),
-        ('\u{11136}', '\u{1113f}'), ('\u{111d0}', '\u{111d9}'), ('\u{112f0}',
-        '\u{112f9}'), ('\u{11450}', '\u{11459}'), ('\u{114d0}', '\u{114d9}'),
-        ('\u{11650}', '\u{11659}'), ('\u{116c0}', '\u{116c9}'), ('\u{11730}',
-        '\u{11739}'), ('\u{118e0}', '\u{118e9}'), ('\u{11c50}', '\u{11c59}'),
-        ('\u{11d50}', '\u{11d59}'), ('\u{16a60}', '\u{16a69}'), ('\u{16b50}',
-        '\u{16b59}'), ('\u{1d7ce}', '\u{1d7ff}'), ('\u{1e950}', '\u{1e959}')
-    ];
-
-    pub const Nl_table: &'static [(char, char)] = &[
-        ('\u{16ee}', '\u{16f0}'), ('\u{2160}', '\u{2182}'), ('\u{2185}',
-        '\u{2188}'), ('\u{3007}', '\u{3007}'), ('\u{3021}', '\u{3029}'),
-        ('\u{3038}', '\u{303a}'), ('\u{a6e6}', '\u{a6ef}'), ('\u{10140}',
-        '\u{10174}'), ('\u{10341}', '\u{10341}'), ('\u{1034a}', '\u{1034a}'),
-        ('\u{103d1}', '\u{103d5}'), ('\u{12400}', '\u{1246e}')
-    ];
-
-    pub const No_table: &'static [(char, char)] = &[
-        ('\u{b2}', '\u{b3}'), ('\u{b9}', '\u{b9}'), ('\u{bc}', '\u{be}'),
-        ('\u{9f4}', '\u{9f9}'), ('\u{b72}', '\u{b77}'), ('\u{bf0}', '\u{bf2}'),
-        ('\u{c78}', '\u{c7e}'), ('\u{d58}', '\u{d5e}'), ('\u{d70}', '\u{d78}'),
-        ('\u{f2a}', '\u{f33}'), ('\u{1369}', '\u{137c}'), ('\u{17f0}',
-        '\u{17f9}'), ('\u{19da}', '\u{19da}'), ('\u{2070}', '\u{2070}'),
-        ('\u{2074}', '\u{2079}'), ('\u{2080}', '\u{2089}'), ('\u{2150}',
-        '\u{215f}'), ('\u{2189}', '\u{2189}'), ('\u{2460}', '\u{249b}'),
-        ('\u{24ea}', '\u{24ff}'), ('\u{2776}', '\u{2793}'), ('\u{2cfd}',
-        '\u{2cfd}'), ('\u{3192}', '\u{3195}'), ('\u{3220}', '\u{3229}'),
-        ('\u{3248}', '\u{324f}'), ('\u{3251}', '\u{325f}'), ('\u{3280}',
-        '\u{3289}'), ('\u{32b1}', '\u{32bf}'), ('\u{a830}', '\u{a835}'),
-        ('\u{10107}', '\u{10133}'), ('\u{10175}', '\u{10178}'), ('\u{1018a}',
-        '\u{1018b}'), ('\u{102e1}', '\u{102fb}'), ('\u{10320}', '\u{10323}'),
-        ('\u{10858}', '\u{1085f}'), ('\u{10879}', '\u{1087f}'), ('\u{108a7}',
-        '\u{108af}'), ('\u{108fb}', '\u{108ff}'), ('\u{10916}', '\u{1091b}'),
-        ('\u{109bc}', '\u{109bd}'), ('\u{109c0}', '\u{109cf}'), ('\u{109d2}',
-        '\u{109ff}'), ('\u{10a40}', '\u{10a47}'), ('\u{10a7d}', '\u{10a7e}'),
-        ('\u{10a9d}', '\u{10a9f}'), ('\u{10aeb}', '\u{10aef}'), ('\u{10b58}',
-        '\u{10b5f}'), ('\u{10b78}', '\u{10b7f}'), ('\u{10ba9}', '\u{10baf}'),
-        ('\u{10cfa}', '\u{10cff}'), ('\u{10e60}', '\u{10e7e}'), ('\u{11052}',
-        '\u{11065}'), ('\u{111e1}', '\u{111f4}'), ('\u{1173a}', '\u{1173b}'),
-        ('\u{118ea}', '\u{118f2}'), ('\u{11c5a}', '\u{11c6c}'), ('\u{16b5b}',
-        '\u{16b61}'), ('\u{1d360}', '\u{1d371}'), ('\u{1e8c7}', '\u{1e8cf}'),
-        ('\u{1f100}', '\u{1f10c}')
-    ];
-
-    pub const P_table: &'static [(char, char)] = &[
-        ('\u{21}', '\u{23}'), ('\u{25}', '\u{2a}'), ('\u{2c}', '\u{2f}'),
-        ('\u{3a}', '\u{3b}'), ('\u{3f}', '\u{40}'), ('\u{5b}', '\u{5d}'),
-        ('\u{5f}', '\u{5f}'), ('\u{7b}', '\u{7b}'), ('\u{7d}', '\u{7d}'),
-        ('\u{a1}', '\u{a1}'), ('\u{a7}', '\u{a7}'), ('\u{ab}', '\u{ab}'),
-        ('\u{b6}', '\u{b7}'), ('\u{bb}', '\u{bb}'), ('\u{bf}', '\u{bf}'),
-        ('\u{37e}', '\u{37e}'), ('\u{387}', '\u{387}'), ('\u{55a}', '\u{55f}'),
-        ('\u{589}', '\u{58a}'), ('\u{5be}', '\u{5be}'), ('\u{5c0}', '\u{5c0}'),
-        ('\u{5c3}', '\u{5c3}'), ('\u{5c6}', '\u{5c6}'), ('\u{5f3}', '\u{5f4}'),
-        ('\u{609}', '\u{60a}'), ('\u{60c}', '\u{60d}'), ('\u{61b}', '\u{61b}'),
-        ('\u{61e}', '\u{61f}'), ('\u{66a}', '\u{66d}'), ('\u{6d4}', '\u{6d4}'),
-        ('\u{700}', '\u{70d}'), ('\u{7f7}', '\u{7f9}'), ('\u{830}', '\u{83e}'),
-        ('\u{85e}', '\u{85e}'), ('\u{964}', '\u{965}'), ('\u{970}', '\u{970}'),
-        ('\u{9fd}', '\u{9fd}'), ('\u{af0}', '\u{af0}'), ('\u{df4}', '\u{df4}'),
-        ('\u{e4f}', '\u{e4f}'), ('\u{e5a}', '\u{e5b}'), ('\u{f04}', '\u{f12}'),
-        ('\u{f14}', '\u{f14}'), ('\u{f3a}', '\u{f3d}'), ('\u{f85}', '\u{f85}'),
-        ('\u{fd0}', '\u{fd4}'), ('\u{fd9}', '\u{fda}'), ('\u{104a}',
-        '\u{104f}'), ('\u{10fb}', '\u{10fb}'), ('\u{1360}', '\u{1368}'),
-        ('\u{1400}', '\u{1400}'), ('\u{166d}', '\u{166e}'), ('\u{169b}',
-        '\u{169c}'), ('\u{16eb}', '\u{16ed}'), ('\u{1735}', '\u{1736}'),
-        ('\u{17d4}', '\u{17d6}'), ('\u{17d8}', '\u{17da}'), ('\u{1800}',
-        '\u{180a}'), ('\u{1944}', '\u{1945}'), ('\u{1a1e}', '\u{1a1f}'),
-        ('\u{1aa0}', '\u{1aa6}'), ('\u{1aa8}', '\u{1aad}'), ('\u{1b5a}',
-        '\u{1b60}'), ('\u{1bfc}', '\u{1bff}'), ('\u{1c3b}', '\u{1c3f}'),
-        ('\u{1c7e}', '\u{1c7f}'), ('\u{1cc0}', '\u{1cc7}'), ('\u{1cd3}',
-        '\u{1cd3}'), ('\u{2010}', '\u{2027}'), ('\u{2030}', '\u{2043}'),
-        ('\u{2045}', '\u{2051}'), ('\u{2053}', '\u{205e}'), ('\u{207d}',
-        '\u{207e}'), ('\u{208d}', '\u{208e}'), ('\u{2308}', '\u{230b}'),
-        ('\u{2329}', '\u{232a}'), ('\u{2768}', '\u{2775}'), ('\u{27c5}',
-        '\u{27c6}'), ('\u{27e6}', '\u{27ef}'), ('\u{2983}', '\u{2998}'),
-        ('\u{29d8}', '\u{29db}'), ('\u{29fc}', '\u{29fd}'), ('\u{2cf9}',
-        '\u{2cfc}'), ('\u{2cfe}', '\u{2cff}'), ('\u{2d70}', '\u{2d70}'),
-        ('\u{2e00}', '\u{2e2e}'), ('\u{2e30}', '\u{2e49}'), ('\u{3001}',
-        '\u{3003}'), ('\u{3008}', '\u{3011}'), ('\u{3014}', '\u{301f}'),
-        ('\u{3030}', '\u{3030}'), ('\u{303d}', '\u{303d}'), ('\u{30a0}',
-        '\u{30a0}'), ('\u{30fb}', '\u{30fb}'), ('\u{a4fe}', '\u{a4ff}'),
-        ('\u{a60d}', '\u{a60f}'), ('\u{a673}', '\u{a673}'), ('\u{a67e}',
-        '\u{a67e}'), ('\u{a6f2}', '\u{a6f7}'), ('\u{a874}', '\u{a877}'),
-        ('\u{a8ce}', '\u{a8cf}'), ('\u{a8f8}', '\u{a8fa}'), ('\u{a8fc}',
-        '\u{a8fc}'), ('\u{a92e}', '\u{a92f}'), ('\u{a95f}', '\u{a95f}'),
-        ('\u{a9c1}', '\u{a9cd}'), ('\u{a9de}', '\u{a9df}'), ('\u{aa5c}',
-        '\u{aa5f}'), ('\u{aade}', '\u{aadf}'), ('\u{aaf0}', '\u{aaf1}'),
-        ('\u{abeb}', '\u{abeb}'), ('\u{fd3e}', '\u{fd3f}'), ('\u{fe10}',
-        '\u{fe19}'), ('\u{fe30}', '\u{fe52}'), ('\u{fe54}', '\u{fe61}'),
-        ('\u{fe63}', '\u{fe63}'), ('\u{fe68}', '\u{fe68}'), ('\u{fe6a}',
-        '\u{fe6b}'), ('\u{ff01}', '\u{ff03}'), ('\u{ff05}', '\u{ff0a}'),
-        ('\u{ff0c}', '\u{ff0f}'), ('\u{ff1a}', '\u{ff1b}'), ('\u{ff1f}',
-        '\u{ff20}'), ('\u{ff3b}', '\u{ff3d}'), ('\u{ff3f}', '\u{ff3f}'),
-        ('\u{ff5b}', '\u{ff5b}'), ('\u{ff5d}', '\u{ff5d}'), ('\u{ff5f}',
-        '\u{ff65}'), ('\u{10100}', '\u{10102}'), ('\u{1039f}', '\u{1039f}'),
-        ('\u{103d0}', '\u{103d0}'), ('\u{1056f}', '\u{1056f}'), ('\u{10857}',
-        '\u{10857}'), ('\u{1091f}', '\u{1091f}'), ('\u{1093f}', '\u{1093f}'),
-        ('\u{10a50}', '\u{10a58}'), ('\u{10a7f}', '\u{10a7f}'), ('\u{10af0}',
-        '\u{10af6}'), ('\u{10b39}', '\u{10b3f}'), ('\u{10b99}', '\u{10b9c}'),
-        ('\u{11047}', '\u{1104d}'), ('\u{110bb}', '\u{110bc}'), ('\u{110be}',
-        '\u{110c1}'), ('\u{11140}', '\u{11143}'), ('\u{11174}', '\u{11175}'),
-        ('\u{111c5}', '\u{111c9}'), ('\u{111cd}', '\u{111cd}'), ('\u{111db}',
-        '\u{111db}'), ('\u{111dd}', '\u{111df}'), ('\u{11238}', '\u{1123d}'),
-        ('\u{112a9}', '\u{112a9}'), ('\u{1144b}', '\u{1144f}'), ('\u{1145b}',
-        '\u{1145b}'), ('\u{1145d}', '\u{1145d}'), ('\u{114c6}', '\u{114c6}'),
-        ('\u{115c1}', '\u{115d7}'), ('\u{11641}', '\u{11643}'), ('\u{11660}',
-        '\u{1166c}'), ('\u{1173c}', '\u{1173e}'), ('\u{11a3f}', '\u{11a46}'),
-        ('\u{11a9a}', '\u{11a9c}'), ('\u{11a9e}', '\u{11aa2}'), ('\u{11c41}',
-        '\u{11c45}'), ('\u{11c70}', '\u{11c71}'), ('\u{12470}', '\u{12474}'),
-        ('\u{16a6e}', '\u{16a6f}'), ('\u{16af5}', '\u{16af5}'), ('\u{16b37}',
-        '\u{16b3b}'), ('\u{16b44}', '\u{16b44}'), ('\u{1bc9f}', '\u{1bc9f}'),
-        ('\u{1da87}', '\u{1da8b}'), ('\u{1e95e}', '\u{1e95f}')
-    ];
-
-    pub const Pc_table: &'static [(char, char)] = &[
-        ('\u{5f}', '\u{5f}'), ('\u{203f}', '\u{2040}'), ('\u{2054}',
-        '\u{2054}'), ('\u{fe33}', '\u{fe34}'), ('\u{fe4d}', '\u{fe4f}'),
-        ('\u{ff3f}', '\u{ff3f}')
-    ];
-
-    pub const Pd_table: &'static [(char, char)] = &[
-        ('\u{2d}', '\u{2d}'), ('\u{58a}', '\u{58a}'), ('\u{5be}', '\u{5be}'),
-        ('\u{1400}', '\u{1400}'), ('\u{1806}', '\u{1806}'), ('\u{2010}',
-        '\u{2015}'), ('\u{2e17}', '\u{2e17}'), ('\u{2e1a}', '\u{2e1a}'),
-        ('\u{2e3a}', '\u{2e3b}'), ('\u{2e40}', '\u{2e40}'), ('\u{301c}',
-        '\u{301c}'), ('\u{3030}', '\u{3030}'), ('\u{30a0}', '\u{30a0}'),
-        ('\u{fe31}', '\u{fe32}'), ('\u{fe58}', '\u{fe58}'), ('\u{fe63}',
-        '\u{fe63}'), ('\u{ff0d}', '\u{ff0d}')
-    ];
-
-    pub const Pe_table: &'static [(char, char)] = &[
-        ('\u{29}', '\u{29}'), ('\u{5d}', '\u{5d}'), ('\u{7d}', '\u{7d}'),
-        ('\u{f3b}', '\u{f3b}'), ('\u{f3d}', '\u{f3d}'), ('\u{169c}',
-        '\u{169c}'), ('\u{2046}', '\u{2046}'), ('\u{207e}', '\u{207e}'),
-        ('\u{208e}', '\u{208e}'), ('\u{2309}', '\u{2309}'), ('\u{230b}',
-        '\u{230b}'), ('\u{232a}', '\u{232a}'), ('\u{2769}', '\u{2769}'),
-        ('\u{276b}', '\u{276b}'), ('\u{276d}', '\u{276d}'), ('\u{276f}',
-        '\u{276f}'), ('\u{2771}', '\u{2771}'), ('\u{2773}', '\u{2773}'),
-        ('\u{2775}', '\u{2775}'), ('\u{27c6}', '\u{27c6}'), ('\u{27e7}',
-        '\u{27e7}'), ('\u{27e9}', '\u{27e9}'), ('\u{27eb}', '\u{27eb}'),
-        ('\u{27ed}', '\u{27ed}'), ('\u{27ef}', '\u{27ef}'), ('\u{2984}',
-        '\u{2984}'), ('\u{2986}', '\u{2986}'), ('\u{2988}', '\u{2988}'),
-        ('\u{298a}', '\u{298a}'), ('\u{298c}', '\u{298c}'), ('\u{298e}',
-        '\u{298e}'), ('\u{2990}', '\u{2990}'), ('\u{2992}', '\u{2992}'),
-        ('\u{2994}', '\u{2994}'), ('\u{2996}', '\u{2996}'), ('\u{2998}',
-        '\u{2998}'), ('\u{29d9}', '\u{29d9}'), ('\u{29db}', '\u{29db}'),
-        ('\u{29fd}', '\u{29fd}'), ('\u{2e23}', '\u{2e23}'), ('\u{2e25}',
-        '\u{2e25}'), ('\u{2e27}', '\u{2e27}'), ('\u{2e29}', '\u{2e29}'),
-        ('\u{3009}', '\u{3009}'), ('\u{300b}', '\u{300b}'), ('\u{300d}',
-        '\u{300d}'), ('\u{300f}', '\u{300f}'), ('\u{3011}', '\u{3011}'),
-        ('\u{3015}', '\u{3015}'), ('\u{3017}', '\u{3017}'), ('\u{3019}',
-        '\u{3019}'), ('\u{301b}', '\u{301b}'), ('\u{301e}', '\u{301f}'),
-        ('\u{fd3e}', '\u{fd3e}'), ('\u{fe18}', '\u{fe18}'), ('\u{fe36}',
-        '\u{fe36}'), ('\u{fe38}', '\u{fe38}'), ('\u{fe3a}', '\u{fe3a}'),
-        ('\u{fe3c}', '\u{fe3c}'), ('\u{fe3e}', '\u{fe3e}'), ('\u{fe40}',
-        '\u{fe40}'), ('\u{fe42}', '\u{fe42}'), ('\u{fe44}', '\u{fe44}'),
-        ('\u{fe48}', '\u{fe48}'), ('\u{fe5a}', '\u{fe5a}'), ('\u{fe5c}',
-        '\u{fe5c}'), ('\u{fe5e}', '\u{fe5e}'), ('\u{ff09}', '\u{ff09}'),
-        ('\u{ff3d}', '\u{ff3d}'), ('\u{ff5d}', '\u{ff5d}'), ('\u{ff60}',
-        '\u{ff60}'), ('\u{ff63}', '\u{ff63}')
-    ];
-
-    pub const Pf_table: &'static [(char, char)] = &[
-        ('\u{bb}', '\u{bb}'), ('\u{2019}', '\u{2019}'), ('\u{201d}',
-        '\u{201d}'), ('\u{203a}', '\u{203a}'), ('\u{2e03}', '\u{2e03}'),
-        ('\u{2e05}', '\u{2e05}'), ('\u{2e0a}', '\u{2e0a}'), ('\u{2e0d}',
-        '\u{2e0d}'), ('\u{2e1d}', '\u{2e1d}'), ('\u{2e21}', '\u{2e21}')
-    ];
-
-    pub const Pi_table: &'static [(char, char)] = &[
-        ('\u{ab}', '\u{ab}'), ('\u{2018}', '\u{2018}'), ('\u{201b}',
-        '\u{201c}'), ('\u{201f}', '\u{201f}'), ('\u{2039}', '\u{2039}'),
-        ('\u{2e02}', '\u{2e02}'), ('\u{2e04}', '\u{2e04}'), ('\u{2e09}',
-        '\u{2e09}'), ('\u{2e0c}', '\u{2e0c}'), ('\u{2e1c}', '\u{2e1c}'),
-        ('\u{2e20}', '\u{2e20}')
-    ];
-
-    pub const Po_table: &'static [(char, char)] = &[
-        ('\u{21}', '\u{23}'), ('\u{25}', '\u{27}'), ('\u{2a}', '\u{2a}'),
-        ('\u{2c}', '\u{2c}'), ('\u{2e}', '\u{2f}'), ('\u{3a}', '\u{3b}'),
-        ('\u{3f}', '\u{40}'), ('\u{5c}', '\u{5c}'), ('\u{a1}', '\u{a1}'),
-        ('\u{a7}', '\u{a7}'), ('\u{b6}', '\u{b7}'), ('\u{bf}', '\u{bf}'),
-        ('\u{37e}', '\u{37e}'), ('\u{387}', '\u{387}'), ('\u{55a}', '\u{55f}'),
-        ('\u{589}', '\u{589}'), ('\u{5c0}', '\u{5c0}'), ('\u{5c3}', '\u{5c3}'),
-        ('\u{5c6}', '\u{5c6}'), ('\u{5f3}', '\u{5f4}'), ('\u{609}', '\u{60a}'),
-        ('\u{60c}', '\u{60d}'), ('\u{61b}', '\u{61b}'), ('\u{61e}', '\u{61f}'),
-        ('\u{66a}', '\u{66d}'), ('\u{6d4}', '\u{6d4}'), ('\u{700}', '\u{70d}'),
-        ('\u{7f7}', '\u{7f9}'), ('\u{830}', '\u{83e}'), ('\u{85e}', '\u{85e}'),
-        ('\u{964}', '\u{965}'), ('\u{970}', '\u{970}'), ('\u{9fd}', '\u{9fd}'),
-        ('\u{af0}', '\u{af0}'), ('\u{df4}', '\u{df4}'), ('\u{e4f}', '\u{e4f}'),
-        ('\u{e5a}', '\u{e5b}'), ('\u{f04}', '\u{f12}'), ('\u{f14}', '\u{f14}'),
-        ('\u{f85}', '\u{f85}'), ('\u{fd0}', '\u{fd4}'), ('\u{fd9}', '\u{fda}'),
-        ('\u{104a}', '\u{104f}'), ('\u{10fb}', '\u{10fb}'), ('\u{1360}',
-        '\u{1368}'), ('\u{166d}', '\u{166e}'), ('\u{16eb}', '\u{16ed}'),
-        ('\u{1735}', '\u{1736}'), ('\u{17d4}', '\u{17d6}'), ('\u{17d8}',
-        '\u{17da}'), ('\u{1800}', '\u{1805}'), ('\u{1807}', '\u{180a}'),
-        ('\u{1944}', '\u{1945}'), ('\u{1a1e}', '\u{1a1f}'), ('\u{1aa0}',
-        '\u{1aa6}'), ('\u{1aa8}', '\u{1aad}'), ('\u{1b5a}', '\u{1b60}'),
-        ('\u{1bfc}', '\u{1bff}'), ('\u{1c3b}', '\u{1c3f}'), ('\u{1c7e}',
-        '\u{1c7f}'), ('\u{1cc0}', '\u{1cc7}'), ('\u{1cd3}', '\u{1cd3}'),
-        ('\u{2016}', '\u{2017}'), ('\u{2020}', '\u{2027}'), ('\u{2030}',
-        '\u{2038}'), ('\u{203b}', '\u{203e}'), ('\u{2041}', '\u{2043}'),
-        ('\u{2047}', '\u{2051}'), ('\u{2053}', '\u{2053}'), ('\u{2055}',
-        '\u{205e}'), ('\u{2cf9}', '\u{2cfc}'), ('\u{2cfe}', '\u{2cff}'),
-        ('\u{2d70}', '\u{2d70}'), ('\u{2e00}', '\u{2e01}'), ('\u{2e06}',
-        '\u{2e08}'), ('\u{2e0b}', '\u{2e0b}'), ('\u{2e0e}', '\u{2e16}'),
-        ('\u{2e18}', '\u{2e19}'), ('\u{2e1b}', '\u{2e1b}'), ('\u{2e1e}',
-        '\u{2e1f}'), ('\u{2e2a}', '\u{2e2e}'), ('\u{2e30}', '\u{2e39}'),
-        ('\u{2e3c}', '\u{2e3f}'), ('\u{2e41}', '\u{2e41}'), ('\u{2e43}',
-        '\u{2e49}'), ('\u{3001}', '\u{3003}'), ('\u{303d}', '\u{303d}'),
-        ('\u{30fb}', '\u{30fb}'), ('\u{a4fe}', '\u{a4ff}'), ('\u{a60d}',
-        '\u{a60f}'), ('\u{a673}', '\u{a673}'), ('\u{a67e}', '\u{a67e}'),
-        ('\u{a6f2}', '\u{a6f7}'), ('\u{a874}', '\u{a877}'), ('\u{a8ce}',
-        '\u{a8cf}'), ('\u{a8f8}', '\u{a8fa}'), ('\u{a8fc}', '\u{a8fc}'),
-        ('\u{a92e}', '\u{a92f}'), ('\u{a95f}', '\u{a95f}'), ('\u{a9c1}',
-        '\u{a9cd}'), ('\u{a9de}', '\u{a9df}'), ('\u{aa5c}', '\u{aa5f}'),
-        ('\u{aade}', '\u{aadf}'), ('\u{aaf0}', '\u{aaf1}'), ('\u{abeb}',
-        '\u{abeb}'), ('\u{fe10}', '\u{fe16}'), ('\u{fe19}', '\u{fe19}'),
-        ('\u{fe30}', '\u{fe30}'), ('\u{fe45}', '\u{fe46}'), ('\u{fe49}',
-        '\u{fe4c}'), ('\u{fe50}', '\u{fe52}'), ('\u{fe54}', '\u{fe57}'),
-        ('\u{fe5f}', '\u{fe61}'), ('\u{fe68}', '\u{fe68}'), ('\u{fe6a}',
-        '\u{fe6b}'), ('\u{ff01}', '\u{ff03}'), ('\u{ff05}', '\u{ff07}'),
-        ('\u{ff0a}', '\u{ff0a}'), ('\u{ff0c}', '\u{ff0c}'), ('\u{ff0e}',
-        '\u{ff0f}'), ('\u{ff1a}', '\u{ff1b}'), ('\u{ff1f}', '\u{ff20}'),
-        ('\u{ff3c}', '\u{ff3c}'), ('\u{ff61}', '\u{ff61}'), ('\u{ff64}',
-        '\u{ff65}'), ('\u{10100}', '\u{10102}'), ('\u{1039f}', '\u{1039f}'),
-        ('\u{103d0}', '\u{103d0}'), ('\u{1056f}', '\u{1056f}'), ('\u{10857}',
-        '\u{10857}'), ('\u{1091f}', '\u{1091f}'), ('\u{1093f}', '\u{1093f}'),
-        ('\u{10a50}', '\u{10a58}'), ('\u{10a7f}', '\u{10a7f}'), ('\u{10af0}',
-        '\u{10af6}'), ('\u{10b39}', '\u{10b3f}'), ('\u{10b99}', '\u{10b9c}'),
-        ('\u{11047}', '\u{1104d}'), ('\u{110bb}', '\u{110bc}'), ('\u{110be}',
-        '\u{110c1}'), ('\u{11140}', '\u{11143}'), ('\u{11174}', '\u{11175}'),
-        ('\u{111c5}', '\u{111c9}'), ('\u{111cd}', '\u{111cd}'), ('\u{111db}',
-        '\u{111db}'), ('\u{111dd}', '\u{111df}'), ('\u{11238}', '\u{1123d}'),
-        ('\u{112a9}', '\u{112a9}'), ('\u{1144b}', '\u{1144f}'), ('\u{1145b}',
-        '\u{1145b}'), ('\u{1145d}', '\u{1145d}'), ('\u{114c6}', '\u{114c6}'),
-        ('\u{115c1}', '\u{115d7}'), ('\u{11641}', '\u{11643}'), ('\u{11660}',
-        '\u{1166c}'), ('\u{1173c}', '\u{1173e}'), ('\u{11a3f}', '\u{11a46}'),
-        ('\u{11a9a}', '\u{11a9c}'), ('\u{11a9e}', '\u{11aa2}'), ('\u{11c41}',
-        '\u{11c45}'), ('\u{11c70}', '\u{11c71}'), ('\u{12470}', '\u{12474}'),
-        ('\u{16a6e}', '\u{16a6f}'), ('\u{16af5}', '\u{16af5}'), ('\u{16b37}',
-        '\u{16b3b}'), ('\u{16b44}', '\u{16b44}'), ('\u{1bc9f}', '\u{1bc9f}'),
-        ('\u{1da87}', '\u{1da8b}'), ('\u{1e95e}', '\u{1e95f}')
-    ];
-
-    pub const Ps_table: &'static [(char, char)] = &[
-        ('\u{28}', '\u{28}'), ('\u{5b}', '\u{5b}'), ('\u{7b}', '\u{7b}'),
-        ('\u{f3a}', '\u{f3a}'), ('\u{f3c}', '\u{f3c}'), ('\u{169b}',
-        '\u{169b}'), ('\u{201a}', '\u{201a}'), ('\u{201e}', '\u{201e}'),
-        ('\u{2045}', '\u{2045}'), ('\u{207d}', '\u{207d}'), ('\u{208d}',
-        '\u{208d}'), ('\u{2308}', '\u{2308}'), ('\u{230a}', '\u{230a}'),
-        ('\u{2329}', '\u{2329}'), ('\u{2768}', '\u{2768}'), ('\u{276a}',
-        '\u{276a}'), ('\u{276c}', '\u{276c}'), ('\u{276e}', '\u{276e}'),
-        ('\u{2770}', '\u{2770}'), ('\u{2772}', '\u{2772}'), ('\u{2774}',
-        '\u{2774}'), ('\u{27c5}', '\u{27c5}'), ('\u{27e6}', '\u{27e6}'),
-        ('\u{27e8}', '\u{27e8}'), ('\u{27ea}', '\u{27ea}'), ('\u{27ec}',
-        '\u{27ec}'), ('\u{27ee}', '\u{27ee}'), ('\u{2983}', '\u{2983}'),
-        ('\u{2985}', '\u{2985}'), ('\u{2987}', '\u{2987}'), ('\u{2989}',
-        '\u{2989}'), ('\u{298b}', '\u{298b}'), ('\u{298d}', '\u{298d}'),
-        ('\u{298f}', '\u{298f}'), ('\u{2991}', '\u{2991}'), ('\u{2993}',
-        '\u{2993}'), ('\u{2995}', '\u{2995}'), ('\u{2997}', '\u{2997}'),
-        ('\u{29d8}', '\u{29d8}'), ('\u{29da}', '\u{29da}'), ('\u{29fc}',
-        '\u{29fc}'), ('\u{2e22}', '\u{2e22}'), ('\u{2e24}', '\u{2e24}'),
-        ('\u{2e26}', '\u{2e26}'), ('\u{2e28}', '\u{2e28}'), ('\u{2e42}',
-        '\u{2e42}'), ('\u{3008}', '\u{3008}'), ('\u{300a}', '\u{300a}'),
-        ('\u{300c}', '\u{300c}'), ('\u{300e}', '\u{300e}'), ('\u{3010}',
-        '\u{3010}'), ('\u{3014}', '\u{3014}'), ('\u{3016}', '\u{3016}'),
-        ('\u{3018}', '\u{3018}'), ('\u{301a}', '\u{301a}'), ('\u{301d}',
-        '\u{301d}'), ('\u{fd3f}', '\u{fd3f}'), ('\u{fe17}', '\u{fe17}'),
-        ('\u{fe35}', '\u{fe35}'), ('\u{fe37}', '\u{fe37}'), ('\u{fe39}',
-        '\u{fe39}'), ('\u{fe3b}', '\u{fe3b}'), ('\u{fe3d}', '\u{fe3d}'),
-        ('\u{fe3f}', '\u{fe3f}'), ('\u{fe41}', '\u{fe41}'), ('\u{fe43}',
-        '\u{fe43}'), ('\u{fe47}', '\u{fe47}'), ('\u{fe59}', '\u{fe59}'),
-        ('\u{fe5b}', '\u{fe5b}'), ('\u{fe5d}', '\u{fe5d}'), ('\u{ff08}',
-        '\u{ff08}'), ('\u{ff3b}', '\u{ff3b}'), ('\u{ff5b}', '\u{ff5b}'),
-        ('\u{ff5f}', '\u{ff5f}'), ('\u{ff62}', '\u{ff62}')
-    ];
-
-    pub const S_table: &'static [(char, char)] = &[
-        ('\u{24}', '\u{24}'), ('\u{2b}', '\u{2b}'), ('\u{3c}', '\u{3e}'),
-        ('\u{5e}', '\u{5e}'), ('\u{60}', '\u{60}'), ('\u{7c}', '\u{7c}'),
-        ('\u{7e}', '\u{7e}'), ('\u{a2}', '\u{a6}'), ('\u{a8}', '\u{a9}'),
-        ('\u{ac}', '\u{ac}'), ('\u{ae}', '\u{b1}'), ('\u{b4}', '\u{b4}'),
-        ('\u{b8}', '\u{b8}'), ('\u{d7}', '\u{d7}'), ('\u{f7}', '\u{f7}'),
-        ('\u{2c2}', '\u{2c5}'), ('\u{2d2}', '\u{2df}'), ('\u{2e5}', '\u{2eb}'),
-        ('\u{2ed}', '\u{2ed}'), ('\u{2ef}', '\u{2ff}'), ('\u{375}', '\u{375}'),
-        ('\u{384}', '\u{385}'), ('\u{3f6}', '\u{3f6}'), ('\u{482}', '\u{482}'),
-        ('\u{58d}', '\u{58f}'), ('\u{606}', '\u{608}'), ('\u{60b}', '\u{60b}'),
-        ('\u{60e}', '\u{60f}'), ('\u{6de}', '\u{6de}'), ('\u{6e9}', '\u{6e9}'),
-        ('\u{6fd}', '\u{6fe}'), ('\u{7f6}', '\u{7f6}'), ('\u{9f2}', '\u{9f3}'),
-        ('\u{9fa}', '\u{9fb}'), ('\u{af1}', '\u{af1}'), ('\u{b70}', '\u{b70}'),
-        ('\u{bf3}', '\u{bfa}'), ('\u{c7f}', '\u{c7f}'), ('\u{d4f}', '\u{d4f}'),
-        ('\u{d79}', '\u{d79}'), ('\u{e3f}', '\u{e3f}'), ('\u{f01}', '\u{f03}'),
-        ('\u{f13}', '\u{f13}'), ('\u{f15}', '\u{f17}'), ('\u{f1a}', '\u{f1f}'),
-        ('\u{f34}', '\u{f34}'), ('\u{f36}', '\u{f36}'), ('\u{f38}', '\u{f38}'),
-        ('\u{fbe}', '\u{fc5}'), ('\u{fc7}', '\u{fcc}'), ('\u{fce}', '\u{fcf}'),
-        ('\u{fd5}', '\u{fd8}'), ('\u{109e}', '\u{109f}'), ('\u{1390}',
-        '\u{1399}'), ('\u{17db}', '\u{17db}'), ('\u{1940}', '\u{1940}'),
-        ('\u{19de}', '\u{19ff}'), ('\u{1b61}', '\u{1b6a}'), ('\u{1b74}',
-        '\u{1b7c}'), ('\u{1fbd}', '\u{1fbd}'), ('\u{1fbf}', '\u{1fc1}'),
-        ('\u{1fcd}', '\u{1fcf}'), ('\u{1fdd}', '\u{1fdf}'), ('\u{1fed}',
-        '\u{1fef}'), ('\u{1ffd}', '\u{1ffe}'), ('\u{2044}', '\u{2044}'),
-        ('\u{2052}', '\u{2052}'), ('\u{207a}', '\u{207c}'), ('\u{208a}',
-        '\u{208c}'), ('\u{20a0}', '\u{20bf}'), ('\u{2100}', '\u{2101}'),
-        ('\u{2103}', '\u{2106}'), ('\u{2108}', '\u{2109}'), ('\u{2114}',
-        '\u{2114}'), ('\u{2116}', '\u{2118}'), ('\u{211e}', '\u{2123}'),
-        ('\u{2125}', '\u{2125}'), ('\u{2127}', '\u{2127}'), ('\u{2129}',
-        '\u{2129}'), ('\u{212e}', '\u{212e}'), ('\u{213a}', '\u{213b}'),
-        ('\u{2140}', '\u{2144}'), ('\u{214a}', '\u{214d}'), ('\u{214f}',
-        '\u{214f}'), ('\u{218a}', '\u{218b}'), ('\u{2190}', '\u{2307}'),
-        ('\u{230c}', '\u{2328}'), ('\u{232b}', '\u{2426}'), ('\u{2440}',
-        '\u{244a}'), ('\u{249c}', '\u{24e9}'), ('\u{2500}', '\u{2767}'),
-        ('\u{2794}', '\u{27c4}'), ('\u{27c7}', '\u{27e5}'), ('\u{27f0}',
-        '\u{2982}'), ('\u{2999}', '\u{29d7}'), ('\u{29dc}', '\u{29fb}'),
-        ('\u{29fe}', '\u{2b73}'), ('\u{2b76}', '\u{2b95}'), ('\u{2b98}',
-        '\u{2bb9}'), ('\u{2bbd}', '\u{2bc8}'), ('\u{2bca}', '\u{2bd2}'),
-        ('\u{2bec}', '\u{2bef}'), ('\u{2ce5}', '\u{2cea}'), ('\u{2e80}',
-        '\u{2e99}'), ('\u{2e9b}', '\u{2ef3}'), ('\u{2f00}', '\u{2fd5}'),
-        ('\u{2ff0}', '\u{2ffb}'), ('\u{3004}', '\u{3004}'), ('\u{3012}',
-        '\u{3013}'), ('\u{3020}', '\u{3020}'), ('\u{3036}', '\u{3037}'),
-        ('\u{303e}', '\u{303f}'), ('\u{309b}', '\u{309c}'), ('\u{3190}',
-        '\u{3191}'), ('\u{3196}', '\u{319f}'), ('\u{31c0}', '\u{31e3}'),
-        ('\u{3200}', '\u{321e}'), ('\u{322a}', '\u{3247}'), ('\u{3250}',
-        '\u{3250}'), ('\u{3260}', '\u{327f}'), ('\u{328a}', '\u{32b0}'),
-        ('\u{32c0}', '\u{32fe}'), ('\u{3300}', '\u{33ff}'), ('\u{4dc0}',
-        '\u{4dff}'), ('\u{a490}', '\u{a4c6}'), ('\u{a700}', '\u{a716}'),
-        ('\u{a720}', '\u{a721}'), ('\u{a789}', '\u{a78a}'), ('\u{a828}',
-        '\u{a82b}'), ('\u{a836}', '\u{a839}'), ('\u{aa77}', '\u{aa79}'),
-        ('\u{ab5b}', '\u{ab5b}'), ('\u{fb29}', '\u{fb29}'), ('\u{fbb2}',
-        '\u{fbc1}'), ('\u{fdfc}', '\u{fdfd}'), ('\u{fe62}', '\u{fe62}'),
-        ('\u{fe64}', '\u{fe66}'), ('\u{fe69}', '\u{fe69}'), ('\u{ff04}',
-        '\u{ff04}'), ('\u{ff0b}', '\u{ff0b}'), ('\u{ff1c}', '\u{ff1e}'),
-        ('\u{ff3e}', '\u{ff3e}'), ('\u{ff40}', '\u{ff40}'), ('\u{ff5c}',
-        '\u{ff5c}'), ('\u{ff5e}', '\u{ff5e}'), ('\u{ffe0}', '\u{ffe6}'),
-        ('\u{ffe8}', '\u{ffee}'), ('\u{fffc}', '\u{fffd}'), ('\u{10137}',
-        '\u{1013f}'), ('\u{10179}', '\u{10189}'), ('\u{1018c}', '\u{1018e}'),
-        ('\u{10190}', '\u{1019b}'), ('\u{101a0}', '\u{101a0}'), ('\u{101d0}',
-        '\u{101fc}'), ('\u{10877}', '\u{10878}'), ('\u{10ac8}', '\u{10ac8}'),
-        ('\u{1173f}', '\u{1173f}'), ('\u{16b3c}', '\u{16b3f}'), ('\u{16b45}',
-        '\u{16b45}'), ('\u{1bc9c}', '\u{1bc9c}'), ('\u{1d000}', '\u{1d0f5}'),
-        ('\u{1d100}', '\u{1d126}'), ('\u{1d129}', '\u{1d164}'), ('\u{1d16a}',
-        '\u{1d16c}'), ('\u{1d183}', '\u{1d184}'), ('\u{1d18c}', '\u{1d1a9}'),
-        ('\u{1d1ae}', '\u{1d1e8}'), ('\u{1d200}', '\u{1d241}'), ('\u{1d245}',
-        '\u{1d245}'), ('\u{1d300}', '\u{1d356}'), ('\u{1d6c1}', '\u{1d6c1}'),
-        ('\u{1d6db}', '\u{1d6db}'), ('\u{1d6fb}', '\u{1d6fb}'), ('\u{1d715}',
-        '\u{1d715}'), ('\u{1d735}', '\u{1d735}'), ('\u{1d74f}', '\u{1d74f}'),
-        ('\u{1d76f}', '\u{1d76f}'), ('\u{1d789}', '\u{1d789}'), ('\u{1d7a9}',
-        '\u{1d7a9}'), ('\u{1d7c3}', '\u{1d7c3}'), ('\u{1d800}', '\u{1d9ff}'),
-        ('\u{1da37}', '\u{1da3a}'), ('\u{1da6d}', '\u{1da74}'), ('\u{1da76}',
-        '\u{1da83}'), ('\u{1da85}', '\u{1da86}'), ('\u{1eef0}', '\u{1eef1}'),
-        ('\u{1f000}', '\u{1f02b}'), ('\u{1f030}', '\u{1f093}'), ('\u{1f0a0}',
-        '\u{1f0ae}'), ('\u{1f0b1}', '\u{1f0bf}'), ('\u{1f0c1}', '\u{1f0cf}'),
-        ('\u{1f0d1}', '\u{1f0f5}'), ('\u{1f110}', '\u{1f12e}'), ('\u{1f130}',
-        '\u{1f16b}'), ('\u{1f170}', '\u{1f1ac}'), ('\u{1f1e6}', '\u{1f202}'),
-        ('\u{1f210}', '\u{1f23b}'), ('\u{1f240}', '\u{1f248}'), ('\u{1f250}',
-        '\u{1f251}'), ('\u{1f260}', '\u{1f265}'), ('\u{1f300}', '\u{1f6d4}'),
-        ('\u{1f6e0}', '\u{1f6ec}'), ('\u{1f6f0}', '\u{1f6f8}'), ('\u{1f700}',
-        '\u{1f773}'), ('\u{1f780}', '\u{1f7d4}'), ('\u{1f800}', '\u{1f80b}'),
-        ('\u{1f810}', '\u{1f847}'), ('\u{1f850}', '\u{1f859}'), ('\u{1f860}',
-        '\u{1f887}'), ('\u{1f890}', '\u{1f8ad}'), ('\u{1f900}', '\u{1f90b}'),
-        ('\u{1f910}', '\u{1f93e}'), ('\u{1f940}', '\u{1f94c}'), ('\u{1f950}',
-        '\u{1f96b}'), ('\u{1f980}', '\u{1f997}'), ('\u{1f9c0}', '\u{1f9c0}'),
-        ('\u{1f9d0}', '\u{1f9e6}')
-    ];
-
-    pub const Sc_table: &'static [(char, char)] = &[
-        ('\u{24}', '\u{24}'), ('\u{a2}', '\u{a5}'), ('\u{58f}', '\u{58f}'),
-        ('\u{60b}', '\u{60b}'), ('\u{9f2}', '\u{9f3}'), ('\u{9fb}', '\u{9fb}'),
-        ('\u{af1}', '\u{af1}'), ('\u{bf9}', '\u{bf9}'), ('\u{e3f}', '\u{e3f}'),
-        ('\u{17db}', '\u{17db}'), ('\u{20a0}', '\u{20bf}'), ('\u{a838}',
-        '\u{a838}'), ('\u{fdfc}', '\u{fdfc}'), ('\u{fe69}', '\u{fe69}'),
-        ('\u{ff04}', '\u{ff04}'), ('\u{ffe0}', '\u{ffe1}'), ('\u{ffe5}',
-        '\u{ffe6}')
-    ];
-
-    pub const Sk_table: &'static [(char, char)] = &[
-        ('\u{5e}', '\u{5e}'), ('\u{60}', '\u{60}'), ('\u{a8}', '\u{a8}'),
-        ('\u{af}', '\u{af}'), ('\u{b4}', '\u{b4}'), ('\u{b8}', '\u{b8}'),
-        ('\u{2c2}', '\u{2c5}'), ('\u{2d2}', '\u{2df}'), ('\u{2e5}', '\u{2eb}'),
-        ('\u{2ed}', '\u{2ed}'), ('\u{2ef}', '\u{2ff}'), ('\u{375}', '\u{375}'),
-        ('\u{384}', '\u{385}'), ('\u{1fbd}', '\u{1fbd}'), ('\u{1fbf}',
-        '\u{1fc1}'), ('\u{1fcd}', '\u{1fcf}'), ('\u{1fdd}', '\u{1fdf}'),
-        ('\u{1fed}', '\u{1fef}'), ('\u{1ffd}', '\u{1ffe}'), ('\u{309b}',
-        '\u{309c}'), ('\u{a700}', '\u{a716}'), ('\u{a720}', '\u{a721}'),
-        ('\u{a789}', '\u{a78a}'), ('\u{ab5b}', '\u{ab5b}'), ('\u{fbb2}',
-        '\u{fbc1}'), ('\u{ff3e}', '\u{ff3e}'), ('\u{ff40}', '\u{ff40}'),
-        ('\u{ffe3}', '\u{ffe3}'), ('\u{1f3fb}', '\u{1f3ff}')
-    ];
-
-    pub const Sm_table: &'static [(char, char)] = &[
-        ('\u{2b}', '\u{2b}'), ('\u{3c}', '\u{3e}'), ('\u{7c}', '\u{7c}'),
-        ('\u{7e}', '\u{7e}'), ('\u{ac}', '\u{ac}'), ('\u{b1}', '\u{b1}'),
-        ('\u{d7}', '\u{d7}'), ('\u{f7}', '\u{f7}'), ('\u{3f6}', '\u{3f6}'),
-        ('\u{606}', '\u{608}'), ('\u{2044}', '\u{2044}'), ('\u{2052}',
-        '\u{2052}'), ('\u{207a}', '\u{207c}'), ('\u{208a}', '\u{208c}'),
-        ('\u{2118}', '\u{2118}'), ('\u{2140}', '\u{2144}'), ('\u{214b}',
-        '\u{214b}'), ('\u{2190}', '\u{2194}'), ('\u{219a}', '\u{219b}'),
-        ('\u{21a0}', '\u{21a0}'), ('\u{21a3}', '\u{21a3}'), ('\u{21a6}',
-        '\u{21a6}'), ('\u{21ae}', '\u{21ae}'), ('\u{21ce}', '\u{21cf}'),
-        ('\u{21d2}', '\u{21d2}'), ('\u{21d4}', '\u{21d4}'), ('\u{21f4}',
-        '\u{22ff}'), ('\u{2320}', '\u{2321}'), ('\u{237c}', '\u{237c}'),
-        ('\u{239b}', '\u{23b3}'), ('\u{23dc}', '\u{23e1}'), ('\u{25b7}',
-        '\u{25b7}'), ('\u{25c1}', '\u{25c1}'), ('\u{25f8}', '\u{25ff}'),
-        ('\u{266f}', '\u{266f}'), ('\u{27c0}', '\u{27c4}'), ('\u{27c7}',
-        '\u{27e5}'), ('\u{27f0}', '\u{27ff}'), ('\u{2900}', '\u{2982}'),
-        ('\u{2999}', '\u{29d7}'), ('\u{29dc}', '\u{29fb}'), ('\u{29fe}',
-        '\u{2aff}'), ('\u{2b30}', '\u{2b44}'), ('\u{2b47}', '\u{2b4c}'),
-        ('\u{fb29}', '\u{fb29}'), ('\u{fe62}', '\u{fe62}'), ('\u{fe64}',
-        '\u{fe66}'), ('\u{ff0b}', '\u{ff0b}'), ('\u{ff1c}', '\u{ff1e}'),
-        ('\u{ff5c}', '\u{ff5c}'), ('\u{ff5e}', '\u{ff5e}'), ('\u{ffe2}',
-        '\u{ffe2}'), ('\u{ffe9}', '\u{ffec}'), ('\u{1d6c1}', '\u{1d6c1}'),
-        ('\u{1d6db}', '\u{1d6db}'), ('\u{1d6fb}', '\u{1d6fb}'), ('\u{1d715}',
-        '\u{1d715}'), ('\u{1d735}', '\u{1d735}'), ('\u{1d74f}', '\u{1d74f}'),
-        ('\u{1d76f}', '\u{1d76f}'), ('\u{1d789}', '\u{1d789}'), ('\u{1d7a9}',
-        '\u{1d7a9}'), ('\u{1d7c3}', '\u{1d7c3}'), ('\u{1eef0}', '\u{1eef1}')
-    ];
-
-    pub const So_table: &'static [(char, char)] = &[
-        ('\u{a6}', '\u{a6}'), ('\u{a9}', '\u{a9}'), ('\u{ae}', '\u{ae}'),
-        ('\u{b0}', '\u{b0}'), ('\u{482}', '\u{482}'), ('\u{58d}', '\u{58e}'),
-        ('\u{60e}', '\u{60f}'), ('\u{6de}', '\u{6de}'), ('\u{6e9}', '\u{6e9}'),
-        ('\u{6fd}', '\u{6fe}'), ('\u{7f6}', '\u{7f6}'), ('\u{9fa}', '\u{9fa}'),
-        ('\u{b70}', '\u{b70}'), ('\u{bf3}', '\u{bf8}'), ('\u{bfa}', '\u{bfa}'),
-        ('\u{c7f}', '\u{c7f}'), ('\u{d4f}', '\u{d4f}'), ('\u{d79}', '\u{d79}'),
-        ('\u{f01}', '\u{f03}'), ('\u{f13}', '\u{f13}'), ('\u{f15}', '\u{f17}'),
-        ('\u{f1a}', '\u{f1f}'), ('\u{f34}', '\u{f34}'), ('\u{f36}', '\u{f36}'),
-        ('\u{f38}', '\u{f38}'), ('\u{fbe}', '\u{fc5}'), ('\u{fc7}', '\u{fcc}'),
-        ('\u{fce}', '\u{fcf}'), ('\u{fd5}', '\u{fd8}'), ('\u{109e}',
-        '\u{109f}'), ('\u{1390}', '\u{1399}'), ('\u{1940}', '\u{1940}'),
-        ('\u{19de}', '\u{19ff}'), ('\u{1b61}', '\u{1b6a}'), ('\u{1b74}',
-        '\u{1b7c}'), ('\u{2100}', '\u{2101}'), ('\u{2103}', '\u{2106}'),
-        ('\u{2108}', '\u{2109}'), ('\u{2114}', '\u{2114}'), ('\u{2116}',
-        '\u{2117}'), ('\u{211e}', '\u{2123}'), ('\u{2125}', '\u{2125}'),
-        ('\u{2127}', '\u{2127}'), ('\u{2129}', '\u{2129}'), ('\u{212e}',
-        '\u{212e}'), ('\u{213a}', '\u{213b}'), ('\u{214a}', '\u{214a}'),
-        ('\u{214c}', '\u{214d}'), ('\u{214f}', '\u{214f}'), ('\u{218a}',
-        '\u{218b}'), ('\u{2195}', '\u{2199}'), ('\u{219c}', '\u{219f}'),
-        ('\u{21a1}', '\u{21a2}'), ('\u{21a4}', '\u{21a5}'), ('\u{21a7}',
-        '\u{21ad}'), ('\u{21af}', '\u{21cd}'), ('\u{21d0}', '\u{21d1}'),
-        ('\u{21d3}', '\u{21d3}'), ('\u{21d5}', '\u{21f3}'), ('\u{2300}',
-        '\u{2307}'), ('\u{230c}', '\u{231f}'), ('\u{2322}', '\u{2328}'),
-        ('\u{232b}', '\u{237b}'), ('\u{237d}', '\u{239a}'), ('\u{23b4}',
-        '\u{23db}'), ('\u{23e2}', '\u{2426}'), ('\u{2440}', '\u{244a}'),
-        ('\u{249c}', '\u{24e9}'), ('\u{2500}', '\u{25b6}'), ('\u{25b8}',
-        '\u{25c0}'), ('\u{25c2}', '\u{25f7}'), ('\u{2600}', '\u{266e}'),
-        ('\u{2670}', '\u{2767}'), ('\u{2794}', '\u{27bf}'), ('\u{2800}',
-        '\u{28ff}'), ('\u{2b00}', '\u{2b2f}'), ('\u{2b45}', '\u{2b46}'),
-        ('\u{2b4d}', '\u{2b73}'), ('\u{2b76}', '\u{2b95}'), ('\u{2b98}',
-        '\u{2bb9}'), ('\u{2bbd}', '\u{2bc8}'), ('\u{2bca}', '\u{2bd2}'),
-        ('\u{2bec}', '\u{2bef}'), ('\u{2ce5}', '\u{2cea}'), ('\u{2e80}',
-        '\u{2e99}'), ('\u{2e9b}', '\u{2ef3}'), ('\u{2f00}', '\u{2fd5}'),
-        ('\u{2ff0}', '\u{2ffb}'), ('\u{3004}', '\u{3004}'), ('\u{3012}',
-        '\u{3013}'), ('\u{3020}', '\u{3020}'), ('\u{3036}', '\u{3037}'),
-        ('\u{303e}', '\u{303f}'), ('\u{3190}', '\u{3191}'), ('\u{3196}',
-        '\u{319f}'), ('\u{31c0}', '\u{31e3}'), ('\u{3200}', '\u{321e}'),
-        ('\u{322a}', '\u{3247}'), ('\u{3250}', '\u{3250}'), ('\u{3260}',
-        '\u{327f}'), ('\u{328a}', '\u{32b0}'), ('\u{32c0}', '\u{32fe}'),
-        ('\u{3300}', '\u{33ff}'), ('\u{4dc0}', '\u{4dff}'), ('\u{a490}',
-        '\u{a4c6}'), ('\u{a828}', '\u{a82b}'), ('\u{a836}', '\u{a837}'),
-        ('\u{a839}', '\u{a839}'), ('\u{aa77}', '\u{aa79}'), ('\u{fdfd}',
-        '\u{fdfd}'), ('\u{ffe4}', '\u{ffe4}'), ('\u{ffe8}', '\u{ffe8}'),
-        ('\u{ffed}', '\u{ffee}'), ('\u{fffc}', '\u{fffd}'), ('\u{10137}',
-        '\u{1013f}'), ('\u{10179}', '\u{10189}'), ('\u{1018c}', '\u{1018e}'),
-        ('\u{10190}', '\u{1019b}'), ('\u{101a0}', '\u{101a0}'), ('\u{101d0}',
-        '\u{101fc}'), ('\u{10877}', '\u{10878}'), ('\u{10ac8}', '\u{10ac8}'),
-        ('\u{1173f}', '\u{1173f}'), ('\u{16b3c}', '\u{16b3f}'), ('\u{16b45}',
-        '\u{16b45}'), ('\u{1bc9c}', '\u{1bc9c}'), ('\u{1d000}', '\u{1d0f5}'),
-        ('\u{1d100}', '\u{1d126}'), ('\u{1d129}', '\u{1d164}'), ('\u{1d16a}',
-        '\u{1d16c}'), ('\u{1d183}', '\u{1d184}'), ('\u{1d18c}', '\u{1d1a9}'),
-        ('\u{1d1ae}', '\u{1d1e8}'), ('\u{1d200}', '\u{1d241}'), ('\u{1d245}',
-        '\u{1d245}'), ('\u{1d300}', '\u{1d356}'), ('\u{1d800}', '\u{1d9ff}'),
-        ('\u{1da37}', '\u{1da3a}'), ('\u{1da6d}', '\u{1da74}'), ('\u{1da76}',
-        '\u{1da83}'), ('\u{1da85}', '\u{1da86}'), ('\u{1f000}', '\u{1f02b}'),
-        ('\u{1f030}', '\u{1f093}'), ('\u{1f0a0}', '\u{1f0ae}'), ('\u{1f0b1}',
-        '\u{1f0bf}'), ('\u{1f0c1}', '\u{1f0cf}'), ('\u{1f0d1}', '\u{1f0f5}'),
-        ('\u{1f110}', '\u{1f12e}'), ('\u{1f130}', '\u{1f16b}'), ('\u{1f170}',
-        '\u{1f1ac}'), ('\u{1f1e6}', '\u{1f202}'), ('\u{1f210}', '\u{1f23b}'),
-        ('\u{1f240}', '\u{1f248}'), ('\u{1f250}', '\u{1f251}'), ('\u{1f260}',
-        '\u{1f265}'), ('\u{1f300}', '\u{1f3fa}'), ('\u{1f400}', '\u{1f6d4}'),
-        ('\u{1f6e0}', '\u{1f6ec}'), ('\u{1f6f0}', '\u{1f6f8}'), ('\u{1f700}',
-        '\u{1f773}'), ('\u{1f780}', '\u{1f7d4}'), ('\u{1f800}', '\u{1f80b}'),
-        ('\u{1f810}', '\u{1f847}'), ('\u{1f850}', '\u{1f859}'), ('\u{1f860}',
-        '\u{1f887}'), ('\u{1f890}', '\u{1f8ad}'), ('\u{1f900}', '\u{1f90b}'),
-        ('\u{1f910}', '\u{1f93e}'), ('\u{1f940}', '\u{1f94c}'), ('\u{1f950}',
-        '\u{1f96b}'), ('\u{1f980}', '\u{1f997}'), ('\u{1f9c0}', '\u{1f9c0}'),
-        ('\u{1f9d0}', '\u{1f9e6}')
-    ];
-
-    pub const Z_table: &'static [(char, char)] = &[
-        ('\u{20}', '\u{20}'), ('\u{a0}', '\u{a0}'), ('\u{1680}', '\u{1680}'),
-        ('\u{2000}', '\u{200a}'), ('\u{2028}', '\u{2029}'), ('\u{202f}',
-        '\u{202f}'), ('\u{205f}', '\u{205f}'), ('\u{3000}', '\u{3000}')
-    ];
-
-    pub const Zl_table: &'static [(char, char)] = &[
-        ('\u{2028}', '\u{2028}')
-    ];
-
-    pub const Zp_table: &'static [(char, char)] = &[
-        ('\u{2029}', '\u{2029}')
-    ];
-
-    pub const Zs_table: &'static [(char, char)] = &[
-        ('\u{20}', '\u{20}'), ('\u{a0}', '\u{a0}'), ('\u{1680}', '\u{1680}'),
-        ('\u{2000}', '\u{200a}'), ('\u{202f}', '\u{202f}'), ('\u{205f}',
-        '\u{205f}'), ('\u{3000}', '\u{3000}')
-    ];
-
+use std::cmp::Ordering;
+use std::result;
+
+use ucd_util::{self, PropertyValues};
+
+use hir;
+use unicode_tables::age;
+use unicode_tables::case_folding_simple::CASE_FOLDING_SIMPLE;
+use unicode_tables::general_category;
+use unicode_tables::property_bool;
+use unicode_tables::property_names::PROPERTY_NAMES;
+use unicode_tables::property_values::PROPERTY_VALUES;
+use unicode_tables::script;
+use unicode_tables::script_extension;
+
+type Result<T> = result::Result<T, Error>;
+
+/// An error that occurs when dealing with Unicode.
+///
+/// We don't impl the Error trait here because these always get converted
+/// into other public errors. (This error type isn't exported.)
+#[derive(Debug)]
+pub enum Error {
+    PropertyNotFound,
+    PropertyValueNotFound,
 }
 
-pub mod derived_property {
-    pub const Alphabetic_table: &'static [(char, char)] = &[
-        ('\u{41}', '\u{5a}'), ('\u{61}', '\u{7a}'), ('\u{aa}', '\u{aa}'),
-        ('\u{b5}', '\u{b5}'), ('\u{ba}', '\u{ba}'), ('\u{c0}', '\u{d6}'),
-        ('\u{d8}', '\u{f6}'), ('\u{f8}', '\u{2c1}'), ('\u{2c6}', '\u{2d1}'),
-        ('\u{2e0}', '\u{2e4}'), ('\u{2ec}', '\u{2ec}'), ('\u{2ee}', '\u{2ee}'),
-        ('\u{345}', '\u{345}'), ('\u{370}', '\u{374}'), ('\u{376}', '\u{377}'),
-        ('\u{37a}', '\u{37d}'), ('\u{37f}', '\u{37f}'), ('\u{386}', '\u{386}'),
-        ('\u{388}', '\u{38a}'), ('\u{38c}', '\u{38c}'), ('\u{38e}', '\u{3a1}'),
-        ('\u{3a3}', '\u{3f5}'), ('\u{3f7}', '\u{481}'), ('\u{48a}', '\u{52f}'),
-        ('\u{531}', '\u{556}'), ('\u{559}', '\u{559}'), ('\u{561}', '\u{587}'),
-        ('\u{5b0}', '\u{5bd}'), ('\u{5bf}', '\u{5bf}'), ('\u{5c1}', '\u{5c2}'),
-        ('\u{5c4}', '\u{5c5}'), ('\u{5c7}', '\u{5c7}'), ('\u{5d0}', '\u{5ea}'),
-        ('\u{5f0}', '\u{5f2}'), ('\u{610}', '\u{61a}'), ('\u{620}', '\u{657}'),
-        ('\u{659}', '\u{65f}'), ('\u{66e}', '\u{6d3}'), ('\u{6d5}', '\u{6dc}'),
-        ('\u{6e1}', '\u{6e8}'), ('\u{6ed}', '\u{6ef}'), ('\u{6fa}', '\u{6fc}'),
-        ('\u{6ff}', '\u{6ff}'), ('\u{710}', '\u{73f}'), ('\u{74d}', '\u{7b1}'),
-        ('\u{7ca}', '\u{7ea}'), ('\u{7f4}', '\u{7f5}'), ('\u{7fa}', '\u{7fa}'),
-        ('\u{800}', '\u{817}'), ('\u{81a}', '\u{82c}'), ('\u{840}', '\u{858}'),
-        ('\u{860}', '\u{86a}'), ('\u{8a0}', '\u{8b4}'), ('\u{8b6}', '\u{8bd}'),
-        ('\u{8d4}', '\u{8df}'), ('\u{8e3}', '\u{8e9}'), ('\u{8f0}', '\u{93b}'),
-        ('\u{93d}', '\u{94c}'), ('\u{94e}', '\u{950}'), ('\u{955}', '\u{963}'),
-        ('\u{971}', '\u{983}'), ('\u{985}', '\u{98c}'), ('\u{98f}', '\u{990}'),
-        ('\u{993}', '\u{9a8}'), ('\u{9aa}', '\u{9b0}'), ('\u{9b2}', '\u{9b2}'),
-        ('\u{9b6}', '\u{9b9}'), ('\u{9bd}', '\u{9c4}'), ('\u{9c7}', '\u{9c8}'),
-        ('\u{9cb}', '\u{9cc}'), ('\u{9ce}', '\u{9ce}'), ('\u{9d7}', '\u{9d7}'),
-        ('\u{9dc}', '\u{9dd}'), ('\u{9df}', '\u{9e3}'), ('\u{9f0}', '\u{9f1}'),
-        ('\u{9fc}', '\u{9fc}'), ('\u{a01}', '\u{a03}'), ('\u{a05}', '\u{a0a}'),
-        ('\u{a0f}', '\u{a10}'), ('\u{a13}', '\u{a28}'), ('\u{a2a}', '\u{a30}'),
-        ('\u{a32}', '\u{a33}'), ('\u{a35}', '\u{a36}'), ('\u{a38}', '\u{a39}'),
-        ('\u{a3e}', '\u{a42}'), ('\u{a47}', '\u{a48}'), ('\u{a4b}', '\u{a4c}'),
-        ('\u{a51}', '\u{a51}'), ('\u{a59}', '\u{a5c}'), ('\u{a5e}', '\u{a5e}'),
-        ('\u{a70}', '\u{a75}'), ('\u{a81}', '\u{a83}'), ('\u{a85}', '\u{a8d}'),
-        ('\u{a8f}', '\u{a91}'), ('\u{a93}', '\u{aa8}'), ('\u{aaa}', '\u{ab0}'),
-        ('\u{ab2}', '\u{ab3}'), ('\u{ab5}', '\u{ab9}'), ('\u{abd}', '\u{ac5}'),
-        ('\u{ac7}', '\u{ac9}'), ('\u{acb}', '\u{acc}'), ('\u{ad0}', '\u{ad0}'),
-        ('\u{ae0}', '\u{ae3}'), ('\u{af9}', '\u{afc}'), ('\u{b01}', '\u{b03}'),
-        ('\u{b05}', '\u{b0c}'), ('\u{b0f}', '\u{b10}'), ('\u{b13}', '\u{b28}'),
-        ('\u{b2a}', '\u{b30}'), ('\u{b32}', '\u{b33}'), ('\u{b35}', '\u{b39}'),
-        ('\u{b3d}', '\u{b44}'), ('\u{b47}', '\u{b48}'), ('\u{b4b}', '\u{b4c}'),
-        ('\u{b56}', '\u{b57}'), ('\u{b5c}', '\u{b5d}'), ('\u{b5f}', '\u{b63}'),
-        ('\u{b71}', '\u{b71}'), ('\u{b82}', '\u{b83}'), ('\u{b85}', '\u{b8a}'),
-        ('\u{b8e}', '\u{b90}'), ('\u{b92}', '\u{b95}'), ('\u{b99}', '\u{b9a}'),
-        ('\u{b9c}', '\u{b9c}'), ('\u{b9e}', '\u{b9f}'), ('\u{ba3}', '\u{ba4}'),
-        ('\u{ba8}', '\u{baa}'), ('\u{bae}', '\u{bb9}'), ('\u{bbe}', '\u{bc2}'),
-        ('\u{bc6}', '\u{bc8}'), ('\u{bca}', '\u{bcc}'), ('\u{bd0}', '\u{bd0}'),
-        ('\u{bd7}', '\u{bd7}'), ('\u{c00}', '\u{c03}'), ('\u{c05}', '\u{c0c}'),
-        ('\u{c0e}', '\u{c10}'), ('\u{c12}', '\u{c28}'), ('\u{c2a}', '\u{c39}'),
-        ('\u{c3d}', '\u{c44}'), ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4c}'),
-        ('\u{c55}', '\u{c56}'), ('\u{c58}', '\u{c5a}'), ('\u{c60}', '\u{c63}'),
-        ('\u{c80}', '\u{c83}'), ('\u{c85}', '\u{c8c}'), ('\u{c8e}', '\u{c90}'),
-        ('\u{c92}', '\u{ca8}'), ('\u{caa}', '\u{cb3}'), ('\u{cb5}', '\u{cb9}'),
-        ('\u{cbd}', '\u{cc4}'), ('\u{cc6}', '\u{cc8}'), ('\u{cca}', '\u{ccc}'),
-        ('\u{cd5}', '\u{cd6}'), ('\u{cde}', '\u{cde}'), ('\u{ce0}', '\u{ce3}'),
-        ('\u{cf1}', '\u{cf2}'), ('\u{d00}', '\u{d03}'), ('\u{d05}', '\u{d0c}'),
-        ('\u{d0e}', '\u{d10}'), ('\u{d12}', '\u{d3a}'), ('\u{d3d}', '\u{d44}'),
-        ('\u{d46}', '\u{d48}'), ('\u{d4a}', '\u{d4c}'), ('\u{d4e}', '\u{d4e}'),
-        ('\u{d54}', '\u{d57}'), ('\u{d5f}', '\u{d63}'), ('\u{d7a}', '\u{d7f}'),
-        ('\u{d82}', '\u{d83}'), ('\u{d85}', '\u{d96}'), ('\u{d9a}', '\u{db1}'),
-        ('\u{db3}', '\u{dbb}'), ('\u{dbd}', '\u{dbd}'), ('\u{dc0}', '\u{dc6}'),
-        ('\u{dcf}', '\u{dd4}'), ('\u{dd6}', '\u{dd6}'), ('\u{dd8}', '\u{ddf}'),
-        ('\u{df2}', '\u{df3}'), ('\u{e01}', '\u{e3a}'), ('\u{e40}', '\u{e46}'),
-        ('\u{e4d}', '\u{e4d}'), ('\u{e81}', '\u{e82}'), ('\u{e84}', '\u{e84}'),
-        ('\u{e87}', '\u{e88}'), ('\u{e8a}', '\u{e8a}'), ('\u{e8d}', '\u{e8d}'),
-        ('\u{e94}', '\u{e97}'), ('\u{e99}', '\u{e9f}'), ('\u{ea1}', '\u{ea3}'),
-        ('\u{ea5}', '\u{ea5}'), ('\u{ea7}', '\u{ea7}'), ('\u{eaa}', '\u{eab}'),
-        ('\u{ead}', '\u{eb9}'), ('\u{ebb}', '\u{ebd}'), ('\u{ec0}', '\u{ec4}'),
-        ('\u{ec6}', '\u{ec6}'), ('\u{ecd}', '\u{ecd}'), ('\u{edc}', '\u{edf}'),
-        ('\u{f00}', '\u{f00}'), ('\u{f40}', '\u{f47}'), ('\u{f49}', '\u{f6c}'),
-        ('\u{f71}', '\u{f81}'), ('\u{f88}', '\u{f97}'), ('\u{f99}', '\u{fbc}'),
-        ('\u{1000}', '\u{1036}'), ('\u{1038}', '\u{1038}'), ('\u{103b}',
-        '\u{103f}'), ('\u{1050}', '\u{1062}'), ('\u{1065}', '\u{1068}'),
-        ('\u{106e}', '\u{1086}'), ('\u{108e}', '\u{108e}'), ('\u{109c}',
-        '\u{109d}'), ('\u{10a0}', '\u{10c5}'), ('\u{10c7}', '\u{10c7}'),
-        ('\u{10cd}', '\u{10cd}'), ('\u{10d0}', '\u{10fa}'), ('\u{10fc}',
-        '\u{1248}'), ('\u{124a}', '\u{124d}'), ('\u{1250}', '\u{1256}'),
-        ('\u{1258}', '\u{1258}'), ('\u{125a}', '\u{125d}'), ('\u{1260}',
-        '\u{1288}'), ('\u{128a}', '\u{128d}'), ('\u{1290}', '\u{12b0}'),
-        ('\u{12b2}', '\u{12b5}'), ('\u{12b8}', '\u{12be}'), ('\u{12c0}',
-        '\u{12c0}'), ('\u{12c2}', '\u{12c5}'), ('\u{12c8}', '\u{12d6}'),
-        ('\u{12d8}', '\u{1310}'), ('\u{1312}', '\u{1315}'), ('\u{1318}',
-        '\u{135a}'), ('\u{135f}', '\u{135f}'), ('\u{1380}', '\u{138f}'),
-        ('\u{13a0}', '\u{13f5}'), ('\u{13f8}', '\u{13fd}'), ('\u{1401}',
-        '\u{166c}'), ('\u{166f}', '\u{167f}'), ('\u{1681}', '\u{169a}'),
-        ('\u{16a0}', '\u{16ea}'), ('\u{16ee}', '\u{16f8}'), ('\u{1700}',
-        '\u{170c}'), ('\u{170e}', '\u{1713}'), ('\u{1720}', '\u{1733}'),
-        ('\u{1740}', '\u{1753}'), ('\u{1760}', '\u{176c}'), ('\u{176e}',
-        '\u{1770}'), ('\u{1772}', '\u{1773}'), ('\u{1780}', '\u{17b3}'),
-        ('\u{17b6}', '\u{17c8}'), ('\u{17d7}', '\u{17d7}'), ('\u{17dc}',
-        '\u{17dc}'), ('\u{1820}', '\u{1877}'), ('\u{1880}', '\u{18aa}'),
-        ('\u{18b0}', '\u{18f5}'), ('\u{1900}', '\u{191e}'), ('\u{1920}',
-        '\u{192b}'), ('\u{1930}', '\u{1938}'), ('\u{1950}', '\u{196d}'),
-        ('\u{1970}', '\u{1974}'), ('\u{1980}', '\u{19ab}'), ('\u{19b0}',
-        '\u{19c9}'), ('\u{1a00}', '\u{1a1b}'), ('\u{1a20}', '\u{1a5e}'),
-        ('\u{1a61}', '\u{1a74}'), ('\u{1aa7}', '\u{1aa7}'), ('\u{1b00}',
-        '\u{1b33}'), ('\u{1b35}', '\u{1b43}'), ('\u{1b45}', '\u{1b4b}'),
-        ('\u{1b80}', '\u{1ba9}'), ('\u{1bac}', '\u{1baf}'), ('\u{1bba}',
-        '\u{1be5}'), ('\u{1be7}', '\u{1bf1}'), ('\u{1c00}', '\u{1c35}'),
-        ('\u{1c4d}', '\u{1c4f}'), ('\u{1c5a}', '\u{1c7d}'), ('\u{1c80}',
-        '\u{1c88}'), ('\u{1ce9}', '\u{1cec}'), ('\u{1cee}', '\u{1cf3}'),
-        ('\u{1cf5}', '\u{1cf6}'), ('\u{1d00}', '\u{1dbf}'), ('\u{1de7}',
-        '\u{1df4}'), ('\u{1e00}', '\u{1f15}'), ('\u{1f18}', '\u{1f1d}'),
-        ('\u{1f20}', '\u{1f45}'), ('\u{1f48}', '\u{1f4d}'), ('\u{1f50}',
-        '\u{1f57}'), ('\u{1f59}', '\u{1f59}'), ('\u{1f5b}', '\u{1f5b}'),
-        ('\u{1f5d}', '\u{1f5d}'), ('\u{1f5f}', '\u{1f7d}'), ('\u{1f80}',
-        '\u{1fb4}'), ('\u{1fb6}', '\u{1fbc}'), ('\u{1fbe}', '\u{1fbe}'),
-        ('\u{1fc2}', '\u{1fc4}'), ('\u{1fc6}', '\u{1fcc}'), ('\u{1fd0}',
-        '\u{1fd3}'), ('\u{1fd6}', '\u{1fdb}'), ('\u{1fe0}', '\u{1fec}'),
-        ('\u{1ff2}', '\u{1ff4}'), ('\u{1ff6}', '\u{1ffc}'), ('\u{2071}',
-        '\u{2071}'), ('\u{207f}', '\u{207f}'), ('\u{2090}', '\u{209c}'),
-        ('\u{2102}', '\u{2102}'), ('\u{2107}', '\u{2107}'), ('\u{210a}',
-        '\u{2113}'), ('\u{2115}', '\u{2115}'), ('\u{2119}', '\u{211d}'),
-        ('\u{2124}', '\u{2124}'), ('\u{2126}', '\u{2126}'), ('\u{2128}',
-        '\u{2128}'), ('\u{212a}', '\u{212d}'), ('\u{212f}', '\u{2139}'),
-        ('\u{213c}', '\u{213f}'), ('\u{2145}', '\u{2149}'), ('\u{214e}',
-        '\u{214e}'), ('\u{2160}', '\u{2188}'), ('\u{24b6}', '\u{24e9}'),
-        ('\u{2c00}', '\u{2c2e}'), ('\u{2c30}', '\u{2c5e}'), ('\u{2c60}',
-        '\u{2ce4}'), ('\u{2ceb}', '\u{2cee}'), ('\u{2cf2}', '\u{2cf3}'),
-        ('\u{2d00}', '\u{2d25}'), ('\u{2d27}', '\u{2d27}'), ('\u{2d2d}',
-        '\u{2d2d}'), ('\u{2d30}', '\u{2d67}'), ('\u{2d6f}', '\u{2d6f}'),
-        ('\u{2d80}', '\u{2d96}'), ('\u{2da0}', '\u{2da6}'), ('\u{2da8}',
-        '\u{2dae}'), ('\u{2db0}', '\u{2db6}'), ('\u{2db8}', '\u{2dbe}'),
-        ('\u{2dc0}', '\u{2dc6}'), ('\u{2dc8}', '\u{2dce}'), ('\u{2dd0}',
-        '\u{2dd6}'), ('\u{2dd8}', '\u{2dde}'), ('\u{2de0}', '\u{2dff}'),
-        ('\u{2e2f}', '\u{2e2f}'), ('\u{3005}', '\u{3007}'), ('\u{3021}',
-        '\u{3029}'), ('\u{3031}', '\u{3035}'), ('\u{3038}', '\u{303c}'),
-        ('\u{3041}', '\u{3096}'), ('\u{309d}', '\u{309f}'), ('\u{30a1}',
-        '\u{30fa}'), ('\u{30fc}', '\u{30ff}'), ('\u{3105}', '\u{312e}'),
-        ('\u{3131}', '\u{318e}'), ('\u{31a0}', '\u{31ba}'), ('\u{31f0}',
-        '\u{31ff}'), ('\u{3400}', '\u{4db5}'), ('\u{4e00}', '\u{9fea}'),
-        ('\u{a000}', '\u{a48c}'), ('\u{a4d0}', '\u{a4fd}'), ('\u{a500}',
-        '\u{a60c}'), ('\u{a610}', '\u{a61f}'), ('\u{a62a}', '\u{a62b}'),
-        ('\u{a640}', '\u{a66e}'), ('\u{a674}', '\u{a67b}'), ('\u{a67f}',
-        '\u{a6ef}'), ('\u{a717}', '\u{a71f}'), ('\u{a722}', '\u{a788}'),
-        ('\u{a78b}', '\u{a7ae}'), ('\u{a7b0}', '\u{a7b7}'), ('\u{a7f7}',
-        '\u{a801}'), ('\u{a803}', '\u{a805}'), ('\u{a807}', '\u{a80a}'),
-        ('\u{a80c}', '\u{a827}'), ('\u{a840}', '\u{a873}'), ('\u{a880}',
-        '\u{a8c3}'), ('\u{a8c5}', '\u{a8c5}'), ('\u{a8f2}', '\u{a8f7}'),
-        ('\u{a8fb}', '\u{a8fb}'), ('\u{a8fd}', '\u{a8fd}'), ('\u{a90a}',
-        '\u{a92a}'), ('\u{a930}', '\u{a952}'), ('\u{a960}', '\u{a97c}'),
-        ('\u{a980}', '\u{a9b2}'), ('\u{a9b4}', '\u{a9bf}'), ('\u{a9cf}',
-        '\u{a9cf}'), ('\u{a9e0}', '\u{a9e4}'), ('\u{a9e6}', '\u{a9ef}'),
-        ('\u{a9fa}', '\u{a9fe}'), ('\u{aa00}', '\u{aa36}'), ('\u{aa40}',
-        '\u{aa4d}'), ('\u{aa60}', '\u{aa76}'), ('\u{aa7a}', '\u{aa7a}'),
-        ('\u{aa7e}', '\u{aabe}'), ('\u{aac0}', '\u{aac0}'), ('\u{aac2}',
-        '\u{aac2}'), ('\u{aadb}', '\u{aadd}'), ('\u{aae0}', '\u{aaef}'),
-        ('\u{aaf2}', '\u{aaf5}'), ('\u{ab01}', '\u{ab06}'), ('\u{ab09}',
-        '\u{ab0e}'), ('\u{ab11}', '\u{ab16}'), ('\u{ab20}', '\u{ab26}'),
-        ('\u{ab28}', '\u{ab2e}'), ('\u{ab30}', '\u{ab5a}'), ('\u{ab5c}',
-        '\u{ab65}'), ('\u{ab70}', '\u{abea}'), ('\u{ac00}', '\u{d7a3}'),
-        ('\u{d7b0}', '\u{d7c6}'), ('\u{d7cb}', '\u{d7fb}'), ('\u{f900}',
-        '\u{fa6d}'), ('\u{fa70}', '\u{fad9}'), ('\u{fb00}', '\u{fb06}'),
-        ('\u{fb13}', '\u{fb17}'), ('\u{fb1d}', '\u{fb28}'), ('\u{fb2a}',
-        '\u{fb36}'), ('\u{fb38}', '\u{fb3c}'), ('\u{fb3e}', '\u{fb3e}'),
-        ('\u{fb40}', '\u{fb41}'), ('\u{fb43}', '\u{fb44}'), ('\u{fb46}',
-        '\u{fbb1}'), ('\u{fbd3}', '\u{fd3d}'), ('\u{fd50}', '\u{fd8f}'),
-        ('\u{fd92}', '\u{fdc7}'), ('\u{fdf0}', '\u{fdfb}'), ('\u{fe70}',
-        '\u{fe74}'), ('\u{fe76}', '\u{fefc}'), ('\u{ff21}', '\u{ff3a}'),
-        ('\u{ff41}', '\u{ff5a}'), ('\u{ff66}', '\u{ffbe}'), ('\u{ffc2}',
-        '\u{ffc7}'), ('\u{ffca}', '\u{ffcf}'), ('\u{ffd2}', '\u{ffd7}'),
-        ('\u{ffda}', '\u{ffdc}'), ('\u{10000}', '\u{1000b}'), ('\u{1000d}',
-        '\u{10026}'), ('\u{10028}', '\u{1003a}'), ('\u{1003c}', '\u{1003d}'),
-        ('\u{1003f}', '\u{1004d}'), ('\u{10050}', '\u{1005d}'), ('\u{10080}',
-        '\u{100fa}'), ('\u{10140}', '\u{10174}'), ('\u{10280}', '\u{1029c}'),
-        ('\u{102a0}', '\u{102d0}'), ('\u{10300}', '\u{1031f}'), ('\u{1032d}',
-        '\u{1034a}'), ('\u{10350}', '\u{1037a}'), ('\u{10380}', '\u{1039d}'),
-        ('\u{103a0}', '\u{103c3}'), ('\u{103c8}', '\u{103cf}'), ('\u{103d1}',
-        '\u{103d5}'), ('\u{10400}', '\u{1049d}'), ('\u{104b0}', '\u{104d3}'),
-        ('\u{104d8}', '\u{104fb}'), ('\u{10500}', '\u{10527}'), ('\u{10530}',
-        '\u{10563}'), ('\u{10600}', '\u{10736}'), ('\u{10740}', '\u{10755}'),
-        ('\u{10760}', '\u{10767}'), ('\u{10800}', '\u{10805}'), ('\u{10808}',
-        '\u{10808}'), ('\u{1080a}', '\u{10835}'), ('\u{10837}', '\u{10838}'),
-        ('\u{1083c}', '\u{1083c}'), ('\u{1083f}', '\u{10855}'), ('\u{10860}',
-        '\u{10876}'), ('\u{10880}', '\u{1089e}'), ('\u{108e0}', '\u{108f2}'),
-        ('\u{108f4}', '\u{108f5}'), ('\u{10900}', '\u{10915}'), ('\u{10920}',
-        '\u{10939}'), ('\u{10980}', '\u{109b7}'), ('\u{109be}', '\u{109bf}'),
-        ('\u{10a00}', '\u{10a03}'), ('\u{10a05}', '\u{10a06}'), ('\u{10a0c}',
-        '\u{10a13}'), ('\u{10a15}', '\u{10a17}'), ('\u{10a19}', '\u{10a33}'),
-        ('\u{10a60}', '\u{10a7c}'), ('\u{10a80}', '\u{10a9c}'), ('\u{10ac0}',
-        '\u{10ac7}'), ('\u{10ac9}', '\u{10ae4}'), ('\u{10b00}', '\u{10b35}'),
-        ('\u{10b40}', '\u{10b55}'), ('\u{10b60}', '\u{10b72}'), ('\u{10b80}',
-        '\u{10b91}'), ('\u{10c00}', '\u{10c48}'), ('\u{10c80}', '\u{10cb2}'),
-        ('\u{10cc0}', '\u{10cf2}'), ('\u{11000}', '\u{11045}'), ('\u{11082}',
-        '\u{110b8}'), ('\u{110d0}', '\u{110e8}'), ('\u{11100}', '\u{11132}'),
-        ('\u{11150}', '\u{11172}'), ('\u{11176}', '\u{11176}'), ('\u{11180}',
-        '\u{111bf}'), ('\u{111c1}', '\u{111c4}'), ('\u{111da}', '\u{111da}'),
-        ('\u{111dc}', '\u{111dc}'), ('\u{11200}', '\u{11211}'), ('\u{11213}',
-        '\u{11234}'), ('\u{11237}', '\u{11237}'), ('\u{1123e}', '\u{1123e}'),
-        ('\u{11280}', '\u{11286}'), ('\u{11288}', '\u{11288}'), ('\u{1128a}',
-        '\u{1128d}'), ('\u{1128f}', '\u{1129d}'), ('\u{1129f}', '\u{112a8}'),
-        ('\u{112b0}', '\u{112e8}'), ('\u{11300}', '\u{11303}'), ('\u{11305}',
-        '\u{1130c}'), ('\u{1130f}', '\u{11310}'), ('\u{11313}', '\u{11328}'),
-        ('\u{1132a}', '\u{11330}'), ('\u{11332}', '\u{11333}'), ('\u{11335}',
-        '\u{11339}'), ('\u{1133d}', '\u{11344}'), ('\u{11347}', '\u{11348}'),
-        ('\u{1134b}', '\u{1134c}'), ('\u{11350}', '\u{11350}'), ('\u{11357}',
-        '\u{11357}'), ('\u{1135d}', '\u{11363}'), ('\u{11400}', '\u{11441}'),
-        ('\u{11443}', '\u{11445}'), ('\u{11447}', '\u{1144a}'), ('\u{11480}',
-        '\u{114c1}'), ('\u{114c4}', '\u{114c5}'), ('\u{114c7}', '\u{114c7}'),
-        ('\u{11580}', '\u{115b5}'), ('\u{115b8}', '\u{115be}'), ('\u{115d8}',
-        '\u{115dd}'), ('\u{11600}', '\u{1163e}'), ('\u{11640}', '\u{11640}'),
-        ('\u{11644}', '\u{11644}'), ('\u{11680}', '\u{116b5}'), ('\u{11700}',
-        '\u{11719}'), ('\u{1171d}', '\u{1172a}'), ('\u{118a0}', '\u{118df}'),
-        ('\u{118ff}', '\u{118ff}'), ('\u{11a00}', '\u{11a32}'), ('\u{11a35}',
-        '\u{11a3e}'), ('\u{11a50}', '\u{11a83}'), ('\u{11a86}', '\u{11a97}'),
-        ('\u{11ac0}', '\u{11af8}'), ('\u{11c00}', '\u{11c08}'), ('\u{11c0a}',
-        '\u{11c36}'), ('\u{11c38}', '\u{11c3e}'), ('\u{11c40}', '\u{11c40}'),
-        ('\u{11c72}', '\u{11c8f}'), ('\u{11c92}', '\u{11ca7}'), ('\u{11ca9}',
-        '\u{11cb6}'), ('\u{11d00}', '\u{11d06}'), ('\u{11d08}', '\u{11d09}'),
-        ('\u{11d0b}', '\u{11d36}'), ('\u{11d3a}', '\u{11d3a}'), ('\u{11d3c}',
-        '\u{11d3d}'), ('\u{11d3f}', '\u{11d41}'), ('\u{11d43}', '\u{11d43}'),
-        ('\u{11d46}', '\u{11d47}'), ('\u{12000}', '\u{12399}'), ('\u{12400}',
-        '\u{1246e}'), ('\u{12480}', '\u{12543}'), ('\u{13000}', '\u{1342e}'),
-        ('\u{14400}', '\u{14646}'), ('\u{16800}', '\u{16a38}'), ('\u{16a40}',
-        '\u{16a5e}'), ('\u{16ad0}', '\u{16aed}'), ('\u{16b00}', '\u{16b36}'),
-        ('\u{16b40}', '\u{16b43}'), ('\u{16b63}', '\u{16b77}'), ('\u{16b7d}',
-        '\u{16b8f}'), ('\u{16f00}', '\u{16f44}'), ('\u{16f50}', '\u{16f7e}'),
-        ('\u{16f93}', '\u{16f9f}'), ('\u{16fe0}', '\u{16fe1}'), ('\u{17000}',
-        '\u{187ec}'), ('\u{18800}', '\u{18af2}'), ('\u{1b000}', '\u{1b11e}'),
-        ('\u{1b170}', '\u{1b2fb}'), ('\u{1bc00}', '\u{1bc6a}'), ('\u{1bc70}',
-        '\u{1bc7c}'), ('\u{1bc80}', '\u{1bc88}'), ('\u{1bc90}', '\u{1bc99}'),
-        ('\u{1bc9e}', '\u{1bc9e}'), ('\u{1d400}', '\u{1d454}'), ('\u{1d456}',
-        '\u{1d49c}'), ('\u{1d49e}', '\u{1d49f}'), ('\u{1d4a2}', '\u{1d4a2}'),
-        ('\u{1d4a5}', '\u{1d4a6}'), ('\u{1d4a9}', '\u{1d4ac}'), ('\u{1d4ae}',
-        '\u{1d4b9}'), ('\u{1d4bb}', '\u{1d4bb}'), ('\u{1d4bd}', '\u{1d4c3}'),
-        ('\u{1d4c5}', '\u{1d505}'), ('\u{1d507}', '\u{1d50a}'), ('\u{1d50d}',
-        '\u{1d514}'), ('\u{1d516}', '\u{1d51c}'), ('\u{1d51e}', '\u{1d539}'),
-        ('\u{1d53b}', '\u{1d53e}'), ('\u{1d540}', '\u{1d544}'), ('\u{1d546}',
-        '\u{1d546}'), ('\u{1d54a}', '\u{1d550}'), ('\u{1d552}', '\u{1d6a5}'),
-        ('\u{1d6a8}', '\u{1d6c0}'), ('\u{1d6c2}', '\u{1d6da}'), ('\u{1d6dc}',
-        '\u{1d6fa}'), ('\u{1d6fc}', '\u{1d714}'), ('\u{1d716}', '\u{1d734}'),
-        ('\u{1d736}', '\u{1d74e}'), ('\u{1d750}', '\u{1d76e}'), ('\u{1d770}',
-        '\u{1d788}'), ('\u{1d78a}', '\u{1d7a8}'), ('\u{1d7aa}', '\u{1d7c2}'),
-        ('\u{1d7c4}', '\u{1d7cb}'), ('\u{1e000}', '\u{1e006}'), ('\u{1e008}',
-        '\u{1e018}'), ('\u{1e01b}', '\u{1e021}'), ('\u{1e023}', '\u{1e024}'),
-        ('\u{1e026}', '\u{1e02a}'), ('\u{1e800}', '\u{1e8c4}'), ('\u{1e900}',
-        '\u{1e943}'), ('\u{1e947}', '\u{1e947}'), ('\u{1ee00}', '\u{1ee03}'),
-        ('\u{1ee05}', '\u{1ee1f}'), ('\u{1ee21}', '\u{1ee22}'), ('\u{1ee24}',
-        '\u{1ee24}'), ('\u{1ee27}', '\u{1ee27}'), ('\u{1ee29}', '\u{1ee32}'),
-        ('\u{1ee34}', '\u{1ee37}'), ('\u{1ee39}', '\u{1ee39}'), ('\u{1ee3b}',
-        '\u{1ee3b}'), ('\u{1ee42}', '\u{1ee42}'), ('\u{1ee47}', '\u{1ee47}'),
-        ('\u{1ee49}', '\u{1ee49}'), ('\u{1ee4b}', '\u{1ee4b}'), ('\u{1ee4d}',
-        '\u{1ee4f}'), ('\u{1ee51}', '\u{1ee52}'), ('\u{1ee54}', '\u{1ee54}'),
-        ('\u{1ee57}', '\u{1ee57}'), ('\u{1ee59}', '\u{1ee59}'), ('\u{1ee5b}',
-        '\u{1ee5b}'), ('\u{1ee5d}', '\u{1ee5d}'), ('\u{1ee5f}', '\u{1ee5f}'),
-        ('\u{1ee61}', '\u{1ee62}'), ('\u{1ee64}', '\u{1ee64}'), ('\u{1ee67}',
-        '\u{1ee6a}'), ('\u{1ee6c}', '\u{1ee72}'), ('\u{1ee74}', '\u{1ee77}'),
-        ('\u{1ee79}', '\u{1ee7c}'), ('\u{1ee7e}', '\u{1ee7e}'), ('\u{1ee80}',
-        '\u{1ee89}'), ('\u{1ee8b}', '\u{1ee9b}'), ('\u{1eea1}', '\u{1eea3}'),
-        ('\u{1eea5}', '\u{1eea9}'), ('\u{1eeab}', '\u{1eebb}'), ('\u{1f130}',
-        '\u{1f149}'), ('\u{1f150}', '\u{1f169}'), ('\u{1f170}', '\u{1f189}'),
-        ('\u{20000}', '\u{2a6d6}'), ('\u{2a700}', '\u{2b734}'), ('\u{2b740}',
-        '\u{2b81d}'), ('\u{2b820}', '\u{2cea1}'), ('\u{2ceb0}', '\u{2ebe0}'),
-        ('\u{2f800}', '\u{2fa1d}')
-    ];
-
-    pub const Default_Ignorable_Code_Point_table: &'static [(char, char)] = &[
-        ('\u{ad}', '\u{ad}'), ('\u{34f}', '\u{34f}'), ('\u{61c}', '\u{61c}'),
-        ('\u{115f}', '\u{1160}'), ('\u{17b4}', '\u{17b5}'), ('\u{180b}',
-        '\u{180e}'), ('\u{200b}', '\u{200f}'), ('\u{202a}', '\u{202e}'),
-        ('\u{2060}', '\u{206f}'), ('\u{3164}', '\u{3164}'), ('\u{fe00}',
-        '\u{fe0f}'), ('\u{feff}', '\u{feff}'), ('\u{ffa0}', '\u{ffa0}'),
-        ('\u{fff0}', '\u{fff8}'), ('\u{1bca0}', '\u{1bca3}'), ('\u{1d173}',
-        '\u{1d17a}'), ('\u{e0000}', '\u{e0fff}')
-    ];
-
-    pub const Grapheme_Extend_table: &'static [(char, char)] = &[
-        ('\u{300}', '\u{36f}'), ('\u{483}', '\u{489}'), ('\u{591}', '\u{5bd}'),
-        ('\u{5bf}', '\u{5bf}'), ('\u{5c1}', '\u{5c2}'), ('\u{5c4}', '\u{5c5}'),
-        ('\u{5c7}', '\u{5c7}'), ('\u{610}', '\u{61a}'), ('\u{64b}', '\u{65f}'),
-        ('\u{670}', '\u{670}'), ('\u{6d6}', '\u{6dc}'), ('\u{6df}', '\u{6e4}'),
-        ('\u{6e7}', '\u{6e8}'), ('\u{6ea}', '\u{6ed}'), ('\u{711}', '\u{711}'),
-        ('\u{730}', '\u{74a}'), ('\u{7a6}', '\u{7b0}'), ('\u{7eb}', '\u{7f3}'),
-        ('\u{816}', '\u{819}'), ('\u{81b}', '\u{823}'), ('\u{825}', '\u{827}'),
-        ('\u{829}', '\u{82d}'), ('\u{859}', '\u{85b}'), ('\u{8d4}', '\u{8e1}'),
-        ('\u{8e3}', '\u{902}'), ('\u{93a}', '\u{93a}'), ('\u{93c}', '\u{93c}'),
-        ('\u{941}', '\u{948}'), ('\u{94d}', '\u{94d}'), ('\u{951}', '\u{957}'),
-        ('\u{962}', '\u{963}'), ('\u{981}', '\u{981}'), ('\u{9bc}', '\u{9bc}'),
-        ('\u{9be}', '\u{9be}'), ('\u{9c1}', '\u{9c4}'), ('\u{9cd}', '\u{9cd}'),
-        ('\u{9d7}', '\u{9d7}'), ('\u{9e2}', '\u{9e3}'), ('\u{a01}', '\u{a02}'),
-        ('\u{a3c}', '\u{a3c}'), ('\u{a41}', '\u{a42}'), ('\u{a47}', '\u{a48}'),
-        ('\u{a4b}', '\u{a4d}'), ('\u{a51}', '\u{a51}'), ('\u{a70}', '\u{a71}'),
-        ('\u{a75}', '\u{a75}'), ('\u{a81}', '\u{a82}'), ('\u{abc}', '\u{abc}'),
-        ('\u{ac1}', '\u{ac5}'), ('\u{ac7}', '\u{ac8}'), ('\u{acd}', '\u{acd}'),
-        ('\u{ae2}', '\u{ae3}'), ('\u{afa}', '\u{aff}'), ('\u{b01}', '\u{b01}'),
-        ('\u{b3c}', '\u{b3c}'), ('\u{b3e}', '\u{b3f}'), ('\u{b41}', '\u{b44}'),
-        ('\u{b4d}', '\u{b4d}'), ('\u{b56}', '\u{b57}'), ('\u{b62}', '\u{b63}'),
-        ('\u{b82}', '\u{b82}'), ('\u{bbe}', '\u{bbe}'), ('\u{bc0}', '\u{bc0}'),
-        ('\u{bcd}', '\u{bcd}'), ('\u{bd7}', '\u{bd7}'), ('\u{c00}', '\u{c00}'),
-        ('\u{c3e}', '\u{c40}'), ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4d}'),
-        ('\u{c55}', '\u{c56}'), ('\u{c62}', '\u{c63}'), ('\u{c81}', '\u{c81}'),
-        ('\u{cbc}', '\u{cbc}'), ('\u{cbf}', '\u{cbf}'), ('\u{cc2}', '\u{cc2}'),
-        ('\u{cc6}', '\u{cc6}'), ('\u{ccc}', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'),
-        ('\u{ce2}', '\u{ce3}'), ('\u{d00}', '\u{d01}'), ('\u{d3b}', '\u{d3c}'),
-        ('\u{d3e}', '\u{d3e}'), ('\u{d41}', '\u{d44}'), ('\u{d4d}', '\u{d4d}'),
-        ('\u{d57}', '\u{d57}'), ('\u{d62}', '\u{d63}'), ('\u{dca}', '\u{dca}'),
-        ('\u{dcf}', '\u{dcf}'), ('\u{dd2}', '\u{dd4}'), ('\u{dd6}', '\u{dd6}'),
-        ('\u{ddf}', '\u{ddf}'), ('\u{e31}', '\u{e31}'), ('\u{e34}', '\u{e3a}'),
-        ('\u{e47}', '\u{e4e}'), ('\u{eb1}', '\u{eb1}'), ('\u{eb4}', '\u{eb9}'),
-        ('\u{ebb}', '\u{ebc}'), ('\u{ec8}', '\u{ecd}'), ('\u{f18}', '\u{f19}'),
-        ('\u{f35}', '\u{f35}'), ('\u{f37}', '\u{f37}'), ('\u{f39}', '\u{f39}'),
-        ('\u{f71}', '\u{f7e}'), ('\u{f80}', '\u{f84}'), ('\u{f86}', '\u{f87}'),
-        ('\u{f8d}', '\u{f97}'), ('\u{f99}', '\u{fbc}'), ('\u{fc6}', '\u{fc6}'),
-        ('\u{102d}', '\u{1030}'), ('\u{1032}', '\u{1037}'), ('\u{1039}',
-        '\u{103a}'), ('\u{103d}', '\u{103e}'), ('\u{1058}', '\u{1059}'),
-        ('\u{105e}', '\u{1060}'), ('\u{1071}', '\u{1074}'), ('\u{1082}',
-        '\u{1082}'), ('\u{1085}', '\u{1086}'), ('\u{108d}', '\u{108d}'),
-        ('\u{109d}', '\u{109d}'), ('\u{135d}', '\u{135f}'), ('\u{1712}',
-        '\u{1714}'), ('\u{1732}', '\u{1734}'), ('\u{1752}', '\u{1753}'),
-        ('\u{1772}', '\u{1773}'), ('\u{17b4}', '\u{17b5}'), ('\u{17b7}',
-        '\u{17bd}'), ('\u{17c6}', '\u{17c6}'), ('\u{17c9}', '\u{17d3}'),
-        ('\u{17dd}', '\u{17dd}'), ('\u{180b}', '\u{180d}'), ('\u{1885}',
-        '\u{1886}'), ('\u{18a9}', '\u{18a9}'), ('\u{1920}', '\u{1922}'),
-        ('\u{1927}', '\u{1928}'), ('\u{1932}', '\u{1932}'), ('\u{1939}',
-        '\u{193b}'), ('\u{1a17}', '\u{1a18}'), ('\u{1a1b}', '\u{1a1b}'),
-        ('\u{1a56}', '\u{1a56}'), ('\u{1a58}', '\u{1a5e}'), ('\u{1a60}',
-        '\u{1a60}'), ('\u{1a62}', '\u{1a62}'), ('\u{1a65}', '\u{1a6c}'),
-        ('\u{1a73}', '\u{1a7c}'), ('\u{1a7f}', '\u{1a7f}'), ('\u{1ab0}',
-        '\u{1abe}'), ('\u{1b00}', '\u{1b03}'), ('\u{1b34}', '\u{1b34}'),
-        ('\u{1b36}', '\u{1b3a}'), ('\u{1b3c}', '\u{1b3c}'), ('\u{1b42}',
-        '\u{1b42}'), ('\u{1b6b}', '\u{1b73}'), ('\u{1b80}', '\u{1b81}'),
-        ('\u{1ba2}', '\u{1ba5}'), ('\u{1ba8}', '\u{1ba9}'), ('\u{1bab}',
-        '\u{1bad}'), ('\u{1be6}', '\u{1be6}'), ('\u{1be8}', '\u{1be9}'),
-        ('\u{1bed}', '\u{1bed}'), ('\u{1bef}', '\u{1bf1}'), ('\u{1c2c}',
-        '\u{1c33}'), ('\u{1c36}', '\u{1c37}'), ('\u{1cd0}', '\u{1cd2}'),
-        ('\u{1cd4}', '\u{1ce0}'), ('\u{1ce2}', '\u{1ce8}'), ('\u{1ced}',
-        '\u{1ced}'), ('\u{1cf4}', '\u{1cf4}'), ('\u{1cf8}', '\u{1cf9}'),
-        ('\u{1dc0}', '\u{1df9}'), ('\u{1dfb}', '\u{1dff}'), ('\u{200c}',
-        '\u{200c}'), ('\u{20d0}', '\u{20f0}'), ('\u{2cef}', '\u{2cf1}'),
-        ('\u{2d7f}', '\u{2d7f}'), ('\u{2de0}', '\u{2dff}'), ('\u{302a}',
-        '\u{302f}'), ('\u{3099}', '\u{309a}'), ('\u{a66f}', '\u{a672}'),
-        ('\u{a674}', '\u{a67d}'), ('\u{a69e}', '\u{a69f}'), ('\u{a6f0}',
-        '\u{a6f1}'), ('\u{a802}', '\u{a802}'), ('\u{a806}', '\u{a806}'),
-        ('\u{a80b}', '\u{a80b}'), ('\u{a825}', '\u{a826}'), ('\u{a8c4}',
-        '\u{a8c5}'), ('\u{a8e0}', '\u{a8f1}'), ('\u{a926}', '\u{a92d}'),
-        ('\u{a947}', '\u{a951}'), ('\u{a980}', '\u{a982}'), ('\u{a9b3}',
-        '\u{a9b3}'), ('\u{a9b6}', '\u{a9b9}'), ('\u{a9bc}', '\u{a9bc}'),
-        ('\u{a9e5}', '\u{a9e5}'), ('\u{aa29}', '\u{aa2e}'), ('\u{aa31}',
-        '\u{aa32}'), ('\u{aa35}', '\u{aa36}'), ('\u{aa43}', '\u{aa43}'),
-        ('\u{aa4c}', '\u{aa4c}'), ('\u{aa7c}', '\u{aa7c}'), ('\u{aab0}',
-        '\u{aab0}'), ('\u{aab2}', '\u{aab4}'), ('\u{aab7}', '\u{aab8}'),
-        ('\u{aabe}', '\u{aabf}'), ('\u{aac1}', '\u{aac1}'), ('\u{aaec}',
-        '\u{aaed}'), ('\u{aaf6}', '\u{aaf6}'), ('\u{abe5}', '\u{abe5}'),
-        ('\u{abe8}', '\u{abe8}'), ('\u{abed}', '\u{abed}'), ('\u{fb1e}',
-        '\u{fb1e}'), ('\u{fe00}', '\u{fe0f}'), ('\u{fe20}', '\u{fe2f}'),
-        ('\u{ff9e}', '\u{ff9f}'), ('\u{101fd}', '\u{101fd}'), ('\u{102e0}',
-        '\u{102e0}'), ('\u{10376}', '\u{1037a}'), ('\u{10a01}', '\u{10a03}'),
-        ('\u{10a05}', '\u{10a06}'), ('\u{10a0c}', '\u{10a0f}'), ('\u{10a38}',
-        '\u{10a3a}'), ('\u{10a3f}', '\u{10a3f}'), ('\u{10ae5}', '\u{10ae6}'),
-        ('\u{11001}', '\u{11001}'), ('\u{11038}', '\u{11046}'), ('\u{1107f}',
-        '\u{11081}'), ('\u{110b3}', '\u{110b6}'), ('\u{110b9}', '\u{110ba}'),
-        ('\u{11100}', '\u{11102}'), ('\u{11127}', '\u{1112b}'), ('\u{1112d}',
-        '\u{11134}'), ('\u{11173}', '\u{11173}'), ('\u{11180}', '\u{11181}'),
-        ('\u{111b6}', '\u{111be}'), ('\u{111ca}', '\u{111cc}'), ('\u{1122f}',
-        '\u{11231}'), ('\u{11234}', '\u{11234}'), ('\u{11236}', '\u{11237}'),
-        ('\u{1123e}', '\u{1123e}'), ('\u{112df}', '\u{112df}'), ('\u{112e3}',
-        '\u{112ea}'), ('\u{11300}', '\u{11301}'), ('\u{1133c}', '\u{1133c}'),
-        ('\u{1133e}', '\u{1133e}'), ('\u{11340}', '\u{11340}'), ('\u{11357}',
-        '\u{11357}'), ('\u{11366}', '\u{1136c}'), ('\u{11370}', '\u{11374}'),
-        ('\u{11438}', '\u{1143f}'), ('\u{11442}', '\u{11444}'), ('\u{11446}',
-        '\u{11446}'), ('\u{114b0}', '\u{114b0}'), ('\u{114b3}', '\u{114b8}'),
-        ('\u{114ba}', '\u{114ba}'), ('\u{114bd}', '\u{114bd}'), ('\u{114bf}',
-        '\u{114c0}'), ('\u{114c2}', '\u{114c3}'), ('\u{115af}', '\u{115af}'),
-        ('\u{115b2}', '\u{115b5}'), ('\u{115bc}', '\u{115bd}'), ('\u{115bf}',
-        '\u{115c0}'), ('\u{115dc}', '\u{115dd}'), ('\u{11633}', '\u{1163a}'),
-        ('\u{1163d}', '\u{1163d}'), ('\u{1163f}', '\u{11640}'), ('\u{116ab}',
-        '\u{116ab}'), ('\u{116ad}', '\u{116ad}'), ('\u{116b0}', '\u{116b5}'),
-        ('\u{116b7}', '\u{116b7}'), ('\u{1171d}', '\u{1171f}'), ('\u{11722}',
-        '\u{11725}'), ('\u{11727}', '\u{1172b}'), ('\u{11a01}', '\u{11a06}'),
-        ('\u{11a09}', '\u{11a0a}'), ('\u{11a33}', '\u{11a38}'), ('\u{11a3b}',
-        '\u{11a3e}'), ('\u{11a47}', '\u{11a47}'), ('\u{11a51}', '\u{11a56}'),
-        ('\u{11a59}', '\u{11a5b}'), ('\u{11a8a}', '\u{11a96}'), ('\u{11a98}',
-        '\u{11a99}'), ('\u{11c30}', '\u{11c36}'), ('\u{11c38}', '\u{11c3d}'),
-        ('\u{11c3f}', '\u{11c3f}'), ('\u{11c92}', '\u{11ca7}'), ('\u{11caa}',
-        '\u{11cb0}'), ('\u{11cb2}', '\u{11cb3}'), ('\u{11cb5}', '\u{11cb6}'),
-        ('\u{11d31}', '\u{11d36}'), ('\u{11d3a}', '\u{11d3a}'), ('\u{11d3c}',
-        '\u{11d3d}'), ('\u{11d3f}', '\u{11d45}'), ('\u{11d47}', '\u{11d47}'),
-        ('\u{16af0}', '\u{16af4}'), ('\u{16b30}', '\u{16b36}'), ('\u{16f8f}',
-        '\u{16f92}'), ('\u{1bc9d}', '\u{1bc9e}'), ('\u{1d165}', '\u{1d165}'),
-        ('\u{1d167}', '\u{1d169}'), ('\u{1d16e}', '\u{1d172}'), ('\u{1d17b}',
-        '\u{1d182}'), ('\u{1d185}', '\u{1d18b}'), ('\u{1d1aa}', '\u{1d1ad}'),
-        ('\u{1d242}', '\u{1d244}'), ('\u{1da00}', '\u{1da36}'), ('\u{1da3b}',
-        '\u{1da6c}'), ('\u{1da75}', '\u{1da75}'), ('\u{1da84}', '\u{1da84}'),
-        ('\u{1da9b}', '\u{1da9f}'), ('\u{1daa1}', '\u{1daaf}'), ('\u{1e000}',
-        '\u{1e006}'), ('\u{1e008}', '\u{1e018}'), ('\u{1e01b}', '\u{1e021}'),
-        ('\u{1e023}', '\u{1e024}'), ('\u{1e026}', '\u{1e02a}'), ('\u{1e8d0}',
-        '\u{1e8d6}'), ('\u{1e944}', '\u{1e94a}'), ('\u{e0020}', '\u{e007f}'),
-        ('\u{e0100}', '\u{e01ef}')
-    ];
-
-    pub const Lowercase_table: &'static [(char, char)] = &[
-        ('\u{61}', '\u{7a}'), ('\u{aa}', '\u{aa}'), ('\u{b5}', '\u{b5}'),
-        ('\u{ba}', '\u{ba}'), ('\u{df}', '\u{f6}'), ('\u{f8}', '\u{ff}'),
-        ('\u{101}', '\u{101}'), ('\u{103}', '\u{103}'), ('\u{105}', '\u{105}'),
-        ('\u{107}', '\u{107}'), ('\u{109}', '\u{109}'), ('\u{10b}', '\u{10b}'),
-        ('\u{10d}', '\u{10d}'), ('\u{10f}', '\u{10f}'), ('\u{111}', '\u{111}'),
-        ('\u{113}', '\u{113}'), ('\u{115}', '\u{115}'), ('\u{117}', '\u{117}'),
-        ('\u{119}', '\u{119}'), ('\u{11b}', '\u{11b}'), ('\u{11d}', '\u{11d}'),
-        ('\u{11f}', '\u{11f}'), ('\u{121}', '\u{121}'), ('\u{123}', '\u{123}'),
-        ('\u{125}', '\u{125}'), ('\u{127}', '\u{127}'), ('\u{129}', '\u{129}'),
-        ('\u{12b}', '\u{12b}'), ('\u{12d}', '\u{12d}'), ('\u{12f}', '\u{12f}'),
-        ('\u{131}', '\u{131}'), ('\u{133}', '\u{133}'), ('\u{135}', '\u{135}'),
-        ('\u{137}', '\u{138}'), ('\u{13a}', '\u{13a}'), ('\u{13c}', '\u{13c}'),
-        ('\u{13e}', '\u{13e}'), ('\u{140}', '\u{140}'), ('\u{142}', '\u{142}'),
-        ('\u{144}', '\u{144}'), ('\u{146}', '\u{146}'), ('\u{148}', '\u{149}'),
-        ('\u{14b}', '\u{14b}'), ('\u{14d}', '\u{14d}'), ('\u{14f}', '\u{14f}'),
-        ('\u{151}', '\u{151}'), ('\u{153}', '\u{153}'), ('\u{155}', '\u{155}'),
-        ('\u{157}', '\u{157}'), ('\u{159}', '\u{159}'), ('\u{15b}', '\u{15b}'),
-        ('\u{15d}', '\u{15d}'), ('\u{15f}', '\u{15f}'), ('\u{161}', '\u{161}'),
-        ('\u{163}', '\u{163}'), ('\u{165}', '\u{165}'), ('\u{167}', '\u{167}'),
-        ('\u{169}', '\u{169}'), ('\u{16b}', '\u{16b}'), ('\u{16d}', '\u{16d}'),
-        ('\u{16f}', '\u{16f}'), ('\u{171}', '\u{171}'), ('\u{173}', '\u{173}'),
-        ('\u{175}', '\u{175}'), ('\u{177}', '\u{177}'), ('\u{17a}', '\u{17a}'),
-        ('\u{17c}', '\u{17c}'), ('\u{17e}', '\u{180}'), ('\u{183}', '\u{183}'),
-        ('\u{185}', '\u{185}'), ('\u{188}', '\u{188}'), ('\u{18c}', '\u{18d}'),
-        ('\u{192}', '\u{192}'), ('\u{195}', '\u{195}'), ('\u{199}', '\u{19b}'),
-        ('\u{19e}', '\u{19e}'), ('\u{1a1}', '\u{1a1}'), ('\u{1a3}', '\u{1a3}'),
-        ('\u{1a5}', '\u{1a5}'), ('\u{1a8}', '\u{1a8}'), ('\u{1aa}', '\u{1ab}'),
-        ('\u{1ad}', '\u{1ad}'), ('\u{1b0}', '\u{1b0}'), ('\u{1b4}', '\u{1b4}'),
-        ('\u{1b6}', '\u{1b6}'), ('\u{1b9}', '\u{1ba}'), ('\u{1bd}', '\u{1bf}'),
-        ('\u{1c6}', '\u{1c6}'), ('\u{1c9}', '\u{1c9}'), ('\u{1cc}', '\u{1cc}'),
-        ('\u{1ce}', '\u{1ce}'), ('\u{1d0}', '\u{1d0}'), ('\u{1d2}', '\u{1d2}'),
-        ('\u{1d4}', '\u{1d4}'), ('\u{1d6}', '\u{1d6}'), ('\u{1d8}', '\u{1d8}'),
-        ('\u{1da}', '\u{1da}'), ('\u{1dc}', '\u{1dd}'), ('\u{1df}', '\u{1df}'),
-        ('\u{1e1}', '\u{1e1}'), ('\u{1e3}', '\u{1e3}'), ('\u{1e5}', '\u{1e5}'),
-        ('\u{1e7}', '\u{1e7}'), ('\u{1e9}', '\u{1e9}'), ('\u{1eb}', '\u{1eb}'),
-        ('\u{1ed}', '\u{1ed}'), ('\u{1ef}', '\u{1f0}'), ('\u{1f3}', '\u{1f3}'),
-        ('\u{1f5}', '\u{1f5}'), ('\u{1f9}', '\u{1f9}'), ('\u{1fb}', '\u{1fb}'),
-        ('\u{1fd}', '\u{1fd}'), ('\u{1ff}', '\u{1ff}'), ('\u{201}', '\u{201}'),
-        ('\u{203}', '\u{203}'), ('\u{205}', '\u{205}'), ('\u{207}', '\u{207}'),
-        ('\u{209}', '\u{209}'), ('\u{20b}', '\u{20b}'), ('\u{20d}', '\u{20d}'),
-        ('\u{20f}', '\u{20f}'), ('\u{211}', '\u{211}'), ('\u{213}', '\u{213}'),
-        ('\u{215}', '\u{215}'), ('\u{217}', '\u{217}'), ('\u{219}', '\u{219}'),
-        ('\u{21b}', '\u{21b}'), ('\u{21d}', '\u{21d}'), ('\u{21f}', '\u{21f}'),
-        ('\u{221}', '\u{221}'), ('\u{223}', '\u{223}'), ('\u{225}', '\u{225}'),
-        ('\u{227}', '\u{227}'), ('\u{229}', '\u{229}'), ('\u{22b}', '\u{22b}'),
-        ('\u{22d}', '\u{22d}'), ('\u{22f}', '\u{22f}'), ('\u{231}', '\u{231}'),
-        ('\u{233}', '\u{239}'), ('\u{23c}', '\u{23c}'), ('\u{23f}', '\u{240}'),
-        ('\u{242}', '\u{242}'), ('\u{247}', '\u{247}'), ('\u{249}', '\u{249}'),
-        ('\u{24b}', '\u{24b}'), ('\u{24d}', '\u{24d}'), ('\u{24f}', '\u{293}'),
-        ('\u{295}', '\u{2b8}'), ('\u{2c0}', '\u{2c1}'), ('\u{2e0}', '\u{2e4}'),
-        ('\u{345}', '\u{345}'), ('\u{371}', '\u{371}'), ('\u{373}', '\u{373}'),
-        ('\u{377}', '\u{377}'), ('\u{37a}', '\u{37d}'), ('\u{390}', '\u{390}'),
-        ('\u{3ac}', '\u{3ce}'), ('\u{3d0}', '\u{3d1}'), ('\u{3d5}', '\u{3d7}'),
-        ('\u{3d9}', '\u{3d9}'), ('\u{3db}', '\u{3db}'), ('\u{3dd}', '\u{3dd}'),
-        ('\u{3df}', '\u{3df}'), ('\u{3e1}', '\u{3e1}'), ('\u{3e3}', '\u{3e3}'),
-        ('\u{3e5}', '\u{3e5}'), ('\u{3e7}', '\u{3e7}'), ('\u{3e9}', '\u{3e9}'),
-        ('\u{3eb}', '\u{3eb}'), ('\u{3ed}', '\u{3ed}'), ('\u{3ef}', '\u{3f3}'),
-        ('\u{3f5}', '\u{3f5}'), ('\u{3f8}', '\u{3f8}'), ('\u{3fb}', '\u{3fc}'),
-        ('\u{430}', '\u{45f}'), ('\u{461}', '\u{461}'), ('\u{463}', '\u{463}'),
-        ('\u{465}', '\u{465}'), ('\u{467}', '\u{467}'), ('\u{469}', '\u{469}'),
-        ('\u{46b}', '\u{46b}'), ('\u{46d}', '\u{46d}'), ('\u{46f}', '\u{46f}'),
-        ('\u{471}', '\u{471}'), ('\u{473}', '\u{473}'), ('\u{475}', '\u{475}'),
-        ('\u{477}', '\u{477}'), ('\u{479}', '\u{479}'), ('\u{47b}', '\u{47b}'),
-        ('\u{47d}', '\u{47d}'), ('\u{47f}', '\u{47f}'), ('\u{481}', '\u{481}'),
-        ('\u{48b}', '\u{48b}'), ('\u{48d}', '\u{48d}'), ('\u{48f}', '\u{48f}'),
-        ('\u{491}', '\u{491}'), ('\u{493}', '\u{493}'), ('\u{495}', '\u{495}'),
-        ('\u{497}', '\u{497}'), ('\u{499}', '\u{499}'), ('\u{49b}', '\u{49b}'),
-        ('\u{49d}', '\u{49d}'), ('\u{49f}', '\u{49f}'), ('\u{4a1}', '\u{4a1}'),
-        ('\u{4a3}', '\u{4a3}'), ('\u{4a5}', '\u{4a5}'), ('\u{4a7}', '\u{4a7}'),
-        ('\u{4a9}', '\u{4a9}'), ('\u{4ab}', '\u{4ab}'), ('\u{4ad}', '\u{4ad}'),
-        ('\u{4af}', '\u{4af}'), ('\u{4b1}', '\u{4b1}'), ('\u{4b3}', '\u{4b3}'),
-        ('\u{4b5}', '\u{4b5}'), ('\u{4b7}', '\u{4b7}'), ('\u{4b9}', '\u{4b9}'),
-        ('\u{4bb}', '\u{4bb}'), ('\u{4bd}', '\u{4bd}'), ('\u{4bf}', '\u{4bf}'),
-        ('\u{4c2}', '\u{4c2}'), ('\u{4c4}', '\u{4c4}'), ('\u{4c6}', '\u{4c6}'),
-        ('\u{4c8}', '\u{4c8}'), ('\u{4ca}', '\u{4ca}'), ('\u{4cc}', '\u{4cc}'),
-        ('\u{4ce}', '\u{4cf}'), ('\u{4d1}', '\u{4d1}'), ('\u{4d3}', '\u{4d3}'),
-        ('\u{4d5}', '\u{4d5}'), ('\u{4d7}', '\u{4d7}'), ('\u{4d9}', '\u{4d9}'),
-        ('\u{4db}', '\u{4db}'), ('\u{4dd}', '\u{4dd}'), ('\u{4df}', '\u{4df}'),
-        ('\u{4e1}', '\u{4e1}'), ('\u{4e3}', '\u{4e3}'), ('\u{4e5}', '\u{4e5}'),
-        ('\u{4e7}', '\u{4e7}'), ('\u{4e9}', '\u{4e9}'), ('\u{4eb}', '\u{4eb}'),
-        ('\u{4ed}', '\u{4ed}'), ('\u{4ef}', '\u{4ef}'), ('\u{4f1}', '\u{4f1}'),
-        ('\u{4f3}', '\u{4f3}'), ('\u{4f5}', '\u{4f5}'), ('\u{4f7}', '\u{4f7}'),
-        ('\u{4f9}', '\u{4f9}'), ('\u{4fb}', '\u{4fb}'), ('\u{4fd}', '\u{4fd}'),
-        ('\u{4ff}', '\u{4ff}'), ('\u{501}', '\u{501}'), ('\u{503}', '\u{503}'),
-        ('\u{505}', '\u{505}'), ('\u{507}', '\u{507}'), ('\u{509}', '\u{509}'),
-        ('\u{50b}', '\u{50b}'), ('\u{50d}', '\u{50d}'), ('\u{50f}', '\u{50f}'),
-        ('\u{511}', '\u{511}'), ('\u{513}', '\u{513}'), ('\u{515}', '\u{515}'),
-        ('\u{517}', '\u{517}'), ('\u{519}', '\u{519}'), ('\u{51b}', '\u{51b}'),
-        ('\u{51d}', '\u{51d}'), ('\u{51f}', '\u{51f}'), ('\u{521}', '\u{521}'),
-        ('\u{523}', '\u{523}'), ('\u{525}', '\u{525}'), ('\u{527}', '\u{527}'),
-        ('\u{529}', '\u{529}'), ('\u{52b}', '\u{52b}'), ('\u{52d}', '\u{52d}'),
-        ('\u{52f}', '\u{52f}'), ('\u{561}', '\u{587}'), ('\u{13f8}',
-        '\u{13fd}'), ('\u{1c80}', '\u{1c88}'), ('\u{1d00}', '\u{1dbf}'),
-        ('\u{1e01}', '\u{1e01}'), ('\u{1e03}', '\u{1e03}'), ('\u{1e05}',
-        '\u{1e05}'), ('\u{1e07}', '\u{1e07}'), ('\u{1e09}', '\u{1e09}'),
-        ('\u{1e0b}', '\u{1e0b}'), ('\u{1e0d}', '\u{1e0d}'), ('\u{1e0f}',
-        '\u{1e0f}'), ('\u{1e11}', '\u{1e11}'), ('\u{1e13}', '\u{1e13}'),
-        ('\u{1e15}', '\u{1e15}'), ('\u{1e17}', '\u{1e17}'), ('\u{1e19}',
-        '\u{1e19}'), ('\u{1e1b}', '\u{1e1b}'), ('\u{1e1d}', '\u{1e1d}'),
-        ('\u{1e1f}', '\u{1e1f}'), ('\u{1e21}', '\u{1e21}'), ('\u{1e23}',
-        '\u{1e23}'), ('\u{1e25}', '\u{1e25}'), ('\u{1e27}', '\u{1e27}'),
-        ('\u{1e29}', '\u{1e29}'), ('\u{1e2b}', '\u{1e2b}'), ('\u{1e2d}',
-        '\u{1e2d}'), ('\u{1e2f}', '\u{1e2f}'), ('\u{1e31}', '\u{1e31}'),
-        ('\u{1e33}', '\u{1e33}'), ('\u{1e35}', '\u{1e35}'), ('\u{1e37}',
-        '\u{1e37}'), ('\u{1e39}', '\u{1e39}'), ('\u{1e3b}', '\u{1e3b}'),
-        ('\u{1e3d}', '\u{1e3d}'), ('\u{1e3f}', '\u{1e3f}'), ('\u{1e41}',
-        '\u{1e41}'), ('\u{1e43}', '\u{1e43}'), ('\u{1e45}', '\u{1e45}'),
-        ('\u{1e47}', '\u{1e47}'), ('\u{1e49}', '\u{1e49}'), ('\u{1e4b}',
-        '\u{1e4b}'), ('\u{1e4d}', '\u{1e4d}'), ('\u{1e4f}', '\u{1e4f}'),
-        ('\u{1e51}', '\u{1e51}'), ('\u{1e53}', '\u{1e53}'), ('\u{1e55}',
-        '\u{1e55}'), ('\u{1e57}', '\u{1e57}'), ('\u{1e59}', '\u{1e59}'),
-        ('\u{1e5b}', '\u{1e5b}'), ('\u{1e5d}', '\u{1e5d}'), ('\u{1e5f}',
-        '\u{1e5f}'), ('\u{1e61}', '\u{1e61}'), ('\u{1e63}', '\u{1e63}'),
-        ('\u{1e65}', '\u{1e65}'), ('\u{1e67}', '\u{1e67}'), ('\u{1e69}',
-        '\u{1e69}'), ('\u{1e6b}', '\u{1e6b}'), ('\u{1e6d}', '\u{1e6d}'),
-        ('\u{1e6f}', '\u{1e6f}'), ('\u{1e71}', '\u{1e71}'), ('\u{1e73}',
-        '\u{1e73}'), ('\u{1e75}', '\u{1e75}'), ('\u{1e77}', '\u{1e77}'),
-        ('\u{1e79}', '\u{1e79}'), ('\u{1e7b}', '\u{1e7b}'), ('\u{1e7d}',
-        '\u{1e7d}'), ('\u{1e7f}', '\u{1e7f}'), ('\u{1e81}', '\u{1e81}'),
-        ('\u{1e83}', '\u{1e83}'), ('\u{1e85}', '\u{1e85}'), ('\u{1e87}',
-        '\u{1e87}'), ('\u{1e89}', '\u{1e89}'), ('\u{1e8b}', '\u{1e8b}'),
-        ('\u{1e8d}', '\u{1e8d}'), ('\u{1e8f}', '\u{1e8f}'), ('\u{1e91}',
-        '\u{1e91}'), ('\u{1e93}', '\u{1e93}'), ('\u{1e95}', '\u{1e9d}'),
-        ('\u{1e9f}', '\u{1e9f}'), ('\u{1ea1}', '\u{1ea1}'), ('\u{1ea3}',
-        '\u{1ea3}'), ('\u{1ea5}', '\u{1ea5}'), ('\u{1ea7}', '\u{1ea7}'),
-        ('\u{1ea9}', '\u{1ea9}'), ('\u{1eab}', '\u{1eab}'), ('\u{1ead}',
-        '\u{1ead}'), ('\u{1eaf}', '\u{1eaf}'), ('\u{1eb1}', '\u{1eb1}'),
-        ('\u{1eb3}', '\u{1eb3}'), ('\u{1eb5}', '\u{1eb5}'), ('\u{1eb7}',
-        '\u{1eb7}'), ('\u{1eb9}', '\u{1eb9}'), ('\u{1ebb}', '\u{1ebb}'),
-        ('\u{1ebd}', '\u{1ebd}'), ('\u{1ebf}', '\u{1ebf}'), ('\u{1ec1}',
-        '\u{1ec1}'), ('\u{1ec3}', '\u{1ec3}'), ('\u{1ec5}', '\u{1ec5}'),
-        ('\u{1ec7}', '\u{1ec7}'), ('\u{1ec9}', '\u{1ec9}'), ('\u{1ecb}',
-        '\u{1ecb}'), ('\u{1ecd}', '\u{1ecd}'), ('\u{1ecf}', '\u{1ecf}'),
-        ('\u{1ed1}', '\u{1ed1}'), ('\u{1ed3}', '\u{1ed3}'), ('\u{1ed5}',
-        '\u{1ed5}'), ('\u{1ed7}', '\u{1ed7}'), ('\u{1ed9}', '\u{1ed9}'),
-        ('\u{1edb}', '\u{1edb}'), ('\u{1edd}', '\u{1edd}'), ('\u{1edf}',
-        '\u{1edf}'), ('\u{1ee1}', '\u{1ee1}'), ('\u{1ee3}', '\u{1ee3}'),
-        ('\u{1ee5}', '\u{1ee5}'), ('\u{1ee7}', '\u{1ee7}'), ('\u{1ee9}',
-        '\u{1ee9}'), ('\u{1eeb}', '\u{1eeb}'), ('\u{1eed}', '\u{1eed}'),
-        ('\u{1eef}', '\u{1eef}'), ('\u{1ef1}', '\u{1ef1}'), ('\u{1ef3}',
-        '\u{1ef3}'), ('\u{1ef5}', '\u{1ef5}'), ('\u{1ef7}', '\u{1ef7}'),
-        ('\u{1ef9}', '\u{1ef9}'), ('\u{1efb}', '\u{1efb}'), ('\u{1efd}',
-        '\u{1efd}'), ('\u{1eff}', '\u{1f07}'), ('\u{1f10}', '\u{1f15}'),
-        ('\u{1f20}', '\u{1f27}'), ('\u{1f30}', '\u{1f37}'), ('\u{1f40}',
-        '\u{1f45}'), ('\u{1f50}', '\u{1f57}'), ('\u{1f60}', '\u{1f67}'),
-        ('\u{1f70}', '\u{1f7d}'), ('\u{1f80}', '\u{1f87}'), ('\u{1f90}',
-        '\u{1f97}'), ('\u{1fa0}', '\u{1fa7}'), ('\u{1fb0}', '\u{1fb4}'),
-        ('\u{1fb6}', '\u{1fb7}'), ('\u{1fbe}', '\u{1fbe}'), ('\u{1fc2}',
-        '\u{1fc4}'), ('\u{1fc6}', '\u{1fc7}'), ('\u{1fd0}', '\u{1fd3}'),
-        ('\u{1fd6}', '\u{1fd7}'), ('\u{1fe0}', '\u{1fe7}'), ('\u{1ff2}',
-        '\u{1ff4}'), ('\u{1ff6}', '\u{1ff7}'), ('\u{2071}', '\u{2071}'),
-        ('\u{207f}', '\u{207f}'), ('\u{2090}', '\u{209c}'), ('\u{210a}',
-        '\u{210a}'), ('\u{210e}', '\u{210f}'), ('\u{2113}', '\u{2113}'),
-        ('\u{212f}', '\u{212f}'), ('\u{2134}', '\u{2134}'), ('\u{2139}',
-        '\u{2139}'), ('\u{213c}', '\u{213d}'), ('\u{2146}', '\u{2149}'),
-        ('\u{214e}', '\u{214e}'), ('\u{2170}', '\u{217f}'), ('\u{2184}',
-        '\u{2184}'), ('\u{24d0}', '\u{24e9}'), ('\u{2c30}', '\u{2c5e}'),
-        ('\u{2c61}', '\u{2c61}'), ('\u{2c65}', '\u{2c66}'), ('\u{2c68}',
-        '\u{2c68}'), ('\u{2c6a}', '\u{2c6a}'), ('\u{2c6c}', '\u{2c6c}'),
-        ('\u{2c71}', '\u{2c71}'), ('\u{2c73}', '\u{2c74}'), ('\u{2c76}',
-        '\u{2c7d}'), ('\u{2c81}', '\u{2c81}'), ('\u{2c83}', '\u{2c83}'),
-        ('\u{2c85}', '\u{2c85}'), ('\u{2c87}', '\u{2c87}'), ('\u{2c89}',
-        '\u{2c89}'), ('\u{2c8b}', '\u{2c8b}'), ('\u{2c8d}', '\u{2c8d}'),
-        ('\u{2c8f}', '\u{2c8f}'), ('\u{2c91}', '\u{2c91}'), ('\u{2c93}',
-        '\u{2c93}'), ('\u{2c95}', '\u{2c95}'), ('\u{2c97}', '\u{2c97}'),
-        ('\u{2c99}', '\u{2c99}'), ('\u{2c9b}', '\u{2c9b}'), ('\u{2c9d}',
-        '\u{2c9d}'), ('\u{2c9f}', '\u{2c9f}'), ('\u{2ca1}', '\u{2ca1}'),
-        ('\u{2ca3}', '\u{2ca3}'), ('\u{2ca5}', '\u{2ca5}'), ('\u{2ca7}',
-        '\u{2ca7}'), ('\u{2ca9}', '\u{2ca9}'), ('\u{2cab}', '\u{2cab}'),
-        ('\u{2cad}', '\u{2cad}'), ('\u{2caf}', '\u{2caf}'), ('\u{2cb1}',
-        '\u{2cb1}'), ('\u{2cb3}', '\u{2cb3}'), ('\u{2cb5}', '\u{2cb5}'),
-        ('\u{2cb7}', '\u{2cb7}'), ('\u{2cb9}', '\u{2cb9}'), ('\u{2cbb}',
-        '\u{2cbb}'), ('\u{2cbd}', '\u{2cbd}'), ('\u{2cbf}', '\u{2cbf}'),
-        ('\u{2cc1}', '\u{2cc1}'), ('\u{2cc3}', '\u{2cc3}'), ('\u{2cc5}',
-        '\u{2cc5}'), ('\u{2cc7}', '\u{2cc7}'), ('\u{2cc9}', '\u{2cc9}'),
-        ('\u{2ccb}', '\u{2ccb}'), ('\u{2ccd}', '\u{2ccd}'), ('\u{2ccf}',
-        '\u{2ccf}'), ('\u{2cd1}', '\u{2cd1}'), ('\u{2cd3}', '\u{2cd3}'),
-        ('\u{2cd5}', '\u{2cd5}'), ('\u{2cd7}', '\u{2cd7}'), ('\u{2cd9}',
-        '\u{2cd9}'), ('\u{2cdb}', '\u{2cdb}'), ('\u{2cdd}', '\u{2cdd}'),
-        ('\u{2cdf}', '\u{2cdf}'), ('\u{2ce1}', '\u{2ce1}'), ('\u{2ce3}',
-        '\u{2ce4}'), ('\u{2cec}', '\u{2cec}'), ('\u{2cee}', '\u{2cee}'),
-        ('\u{2cf3}', '\u{2cf3}'), ('\u{2d00}', '\u{2d25}'), ('\u{2d27}',
-        '\u{2d27}'), ('\u{2d2d}', '\u{2d2d}'), ('\u{a641}', '\u{a641}'),
-        ('\u{a643}', '\u{a643}'), ('\u{a645}', '\u{a645}'), ('\u{a647}',
-        '\u{a647}'), ('\u{a649}', '\u{a649}'), ('\u{a64b}', '\u{a64b}'),
-        ('\u{a64d}', '\u{a64d}'), ('\u{a64f}', '\u{a64f}'), ('\u{a651}',
-        '\u{a651}'), ('\u{a653}', '\u{a653}'), ('\u{a655}', '\u{a655}'),
-        ('\u{a657}', '\u{a657}'), ('\u{a659}', '\u{a659}'), ('\u{a65b}',
-        '\u{a65b}'), ('\u{a65d}', '\u{a65d}'), ('\u{a65f}', '\u{a65f}'),
-        ('\u{a661}', '\u{a661}'), ('\u{a663}', '\u{a663}'), ('\u{a665}',
-        '\u{a665}'), ('\u{a667}', '\u{a667}'), ('\u{a669}', '\u{a669}'),
-        ('\u{a66b}', '\u{a66b}'), ('\u{a66d}', '\u{a66d}'), ('\u{a681}',
-        '\u{a681}'), ('\u{a683}', '\u{a683}'), ('\u{a685}', '\u{a685}'),
-        ('\u{a687}', '\u{a687}'), ('\u{a689}', '\u{a689}'), ('\u{a68b}',
-        '\u{a68b}'), ('\u{a68d}', '\u{a68d}'), ('\u{a68f}', '\u{a68f}'),
-        ('\u{a691}', '\u{a691}'), ('\u{a693}', '\u{a693}'), ('\u{a695}',
-        '\u{a695}'), ('\u{a697}', '\u{a697}'), ('\u{a699}', '\u{a699}'),
-        ('\u{a69b}', '\u{a69d}'), ('\u{a723}', '\u{a723}'), ('\u{a725}',
-        '\u{a725}'), ('\u{a727}', '\u{a727}'), ('\u{a729}', '\u{a729}'),
-        ('\u{a72b}', '\u{a72b}'), ('\u{a72d}', '\u{a72d}'), ('\u{a72f}',
-        '\u{a731}'), ('\u{a733}', '\u{a733}'), ('\u{a735}', '\u{a735}'),
-        ('\u{a737}', '\u{a737}'), ('\u{a739}', '\u{a739}'), ('\u{a73b}',
-        '\u{a73b}'), ('\u{a73d}', '\u{a73d}'), ('\u{a73f}', '\u{a73f}'),
-        ('\u{a741}', '\u{a741}'), ('\u{a743}', '\u{a743}'), ('\u{a745}',
-        '\u{a745}'), ('\u{a747}', '\u{a747}'), ('\u{a749}', '\u{a749}'),
-        ('\u{a74b}', '\u{a74b}'), ('\u{a74d}', '\u{a74d}'), ('\u{a74f}',
-        '\u{a74f}'), ('\u{a751}', '\u{a751}'), ('\u{a753}', '\u{a753}'),
-        ('\u{a755}', '\u{a755}'), ('\u{a757}', '\u{a757}'), ('\u{a759}',
-        '\u{a759}'), ('\u{a75b}', '\u{a75b}'), ('\u{a75d}', '\u{a75d}'),
-        ('\u{a75f}', '\u{a75f}'), ('\u{a761}', '\u{a761}'), ('\u{a763}',
-        '\u{a763}'), ('\u{a765}', '\u{a765}'), ('\u{a767}', '\u{a767}'),
-        ('\u{a769}', '\u{a769}'), ('\u{a76b}', '\u{a76b}'), ('\u{a76d}',
-        '\u{a76d}'), ('\u{a76f}', '\u{a778}'), ('\u{a77a}', '\u{a77a}'),
-        ('\u{a77c}', '\u{a77c}'), ('\u{a77f}', '\u{a77f}'), ('\u{a781}',
-        '\u{a781}'), ('\u{a783}', '\u{a783}'), ('\u{a785}', '\u{a785}'),
-        ('\u{a787}', '\u{a787}'), ('\u{a78c}', '\u{a78c}'), ('\u{a78e}',
-        '\u{a78e}'), ('\u{a791}', '\u{a791}'), ('\u{a793}', '\u{a795}'),
-        ('\u{a797}', '\u{a797}'), ('\u{a799}', '\u{a799}'), ('\u{a79b}',
-        '\u{a79b}'), ('\u{a79d}', '\u{a79d}'), ('\u{a79f}', '\u{a79f}'),
-        ('\u{a7a1}', '\u{a7a1}'), ('\u{a7a3}', '\u{a7a3}'), ('\u{a7a5}',
-        '\u{a7a5}'), ('\u{a7a7}', '\u{a7a7}'), ('\u{a7a9}', '\u{a7a9}'),
-        ('\u{a7b5}', '\u{a7b5}'), ('\u{a7b7}', '\u{a7b7}'), ('\u{a7f8}',
-        '\u{a7fa}'), ('\u{ab30}', '\u{ab5a}'), ('\u{ab5c}', '\u{ab65}'),
-        ('\u{ab70}', '\u{abbf}'), ('\u{fb00}', '\u{fb06}'), ('\u{fb13}',
-        '\u{fb17}'), ('\u{ff41}', '\u{ff5a}'), ('\u{10428}', '\u{1044f}'),
-        ('\u{104d8}', '\u{104fb}'), ('\u{10cc0}', '\u{10cf2}'), ('\u{118c0}',
-        '\u{118df}'), ('\u{1d41a}', '\u{1d433}'), ('\u{1d44e}', '\u{1d454}'),
-        ('\u{1d456}', '\u{1d467}'), ('\u{1d482}', '\u{1d49b}'), ('\u{1d4b6}',
-        '\u{1d4b9}'), ('\u{1d4bb}', '\u{1d4bb}'), ('\u{1d4bd}', '\u{1d4c3}'),
-        ('\u{1d4c5}', '\u{1d4cf}'), ('\u{1d4ea}', '\u{1d503}'), ('\u{1d51e}',
-        '\u{1d537}'), ('\u{1d552}', '\u{1d56b}'), ('\u{1d586}', '\u{1d59f}'),
-        ('\u{1d5ba}', '\u{1d5d3}'), ('\u{1d5ee}', '\u{1d607}'), ('\u{1d622}',
-        '\u{1d63b}'), ('\u{1d656}', '\u{1d66f}'), ('\u{1d68a}', '\u{1d6a5}'),
-        ('\u{1d6c2}', '\u{1d6da}'), ('\u{1d6dc}', '\u{1d6e1}'), ('\u{1d6fc}',
-        '\u{1d714}'), ('\u{1d716}', '\u{1d71b}'), ('\u{1d736}', '\u{1d74e}'),
-        ('\u{1d750}', '\u{1d755}'), ('\u{1d770}', '\u{1d788}'), ('\u{1d78a}',
-        '\u{1d78f}'), ('\u{1d7aa}', '\u{1d7c2}'), ('\u{1d7c4}', '\u{1d7c9}'),
-        ('\u{1d7cb}', '\u{1d7cb}'), ('\u{1e922}', '\u{1e943}')
-    ];
-
-    pub const Uppercase_table: &'static [(char, char)] = &[
-        ('\u{41}', '\u{5a}'), ('\u{c0}', '\u{d6}'), ('\u{d8}', '\u{de}'),
-        ('\u{100}', '\u{100}'), ('\u{102}', '\u{102}'), ('\u{104}', '\u{104}'),
-        ('\u{106}', '\u{106}'), ('\u{108}', '\u{108}'), ('\u{10a}', '\u{10a}'),
-        ('\u{10c}', '\u{10c}'), ('\u{10e}', '\u{10e}'), ('\u{110}', '\u{110}'),
-        ('\u{112}', '\u{112}'), ('\u{114}', '\u{114}'), ('\u{116}', '\u{116}'),
-        ('\u{118}', '\u{118}'), ('\u{11a}', '\u{11a}'), ('\u{11c}', '\u{11c}'),
-        ('\u{11e}', '\u{11e}'), ('\u{120}', '\u{120}'), ('\u{122}', '\u{122}'),
-        ('\u{124}', '\u{124}'), ('\u{126}', '\u{126}'), ('\u{128}', '\u{128}'),
-        ('\u{12a}', '\u{12a}'), ('\u{12c}', '\u{12c}'), ('\u{12e}', '\u{12e}'),
-        ('\u{130}', '\u{130}'), ('\u{132}', '\u{132}'), ('\u{134}', '\u{134}'),
-        ('\u{136}', '\u{136}'), ('\u{139}', '\u{139}'), ('\u{13b}', '\u{13b}'),
-        ('\u{13d}', '\u{13d}'), ('\u{13f}', '\u{13f}'), ('\u{141}', '\u{141}'),
-        ('\u{143}', '\u{143}'), ('\u{145}', '\u{145}'), ('\u{147}', '\u{147}'),
-        ('\u{14a}', '\u{14a}'), ('\u{14c}', '\u{14c}'), ('\u{14e}', '\u{14e}'),
-        ('\u{150}', '\u{150}'), ('\u{152}', '\u{152}'), ('\u{154}', '\u{154}'),
-        ('\u{156}', '\u{156}'), ('\u{158}', '\u{158}'), ('\u{15a}', '\u{15a}'),
-        ('\u{15c}', '\u{15c}'), ('\u{15e}', '\u{15e}'), ('\u{160}', '\u{160}'),
-        ('\u{162}', '\u{162}'), ('\u{164}', '\u{164}'), ('\u{166}', '\u{166}'),
-        ('\u{168}', '\u{168}'), ('\u{16a}', '\u{16a}'), ('\u{16c}', '\u{16c}'),
-        ('\u{16e}', '\u{16e}'), ('\u{170}', '\u{170}'), ('\u{172}', '\u{172}'),
-        ('\u{174}', '\u{174}'), ('\u{176}', '\u{176}'), ('\u{178}', '\u{179}'),
-        ('\u{17b}', '\u{17b}'), ('\u{17d}', '\u{17d}'), ('\u{181}', '\u{182}'),
-        ('\u{184}', '\u{184}'), ('\u{186}', '\u{187}'), ('\u{189}', '\u{18b}'),
-        ('\u{18e}', '\u{191}'), ('\u{193}', '\u{194}'), ('\u{196}', '\u{198}'),
-        ('\u{19c}', '\u{19d}'), ('\u{19f}', '\u{1a0}'), ('\u{1a2}', '\u{1a2}'),
-        ('\u{1a4}', '\u{1a4}'), ('\u{1a6}', '\u{1a7}'), ('\u{1a9}', '\u{1a9}'),
-        ('\u{1ac}', '\u{1ac}'), ('\u{1ae}', '\u{1af}'), ('\u{1b1}', '\u{1b3}'),
-        ('\u{1b5}', '\u{1b5}'), ('\u{1b7}', '\u{1b8}'), ('\u{1bc}', '\u{1bc}'),
-        ('\u{1c4}', '\u{1c4}'), ('\u{1c7}', '\u{1c7}'), ('\u{1ca}', '\u{1ca}'),
-        ('\u{1cd}', '\u{1cd}'), ('\u{1cf}', '\u{1cf}'), ('\u{1d1}', '\u{1d1}'),
-        ('\u{1d3}', '\u{1d3}'), ('\u{1d5}', '\u{1d5}'), ('\u{1d7}', '\u{1d7}'),
-        ('\u{1d9}', '\u{1d9}'), ('\u{1db}', '\u{1db}'), ('\u{1de}', '\u{1de}'),
-        ('\u{1e0}', '\u{1e0}'), ('\u{1e2}', '\u{1e2}'), ('\u{1e4}', '\u{1e4}'),
-        ('\u{1e6}', '\u{1e6}'), ('\u{1e8}', '\u{1e8}'), ('\u{1ea}', '\u{1ea}'),
-        ('\u{1ec}', '\u{1ec}'), ('\u{1ee}', '\u{1ee}'), ('\u{1f1}', '\u{1f1}'),
-        ('\u{1f4}', '\u{1f4}'), ('\u{1f6}', '\u{1f8}'), ('\u{1fa}', '\u{1fa}'),
-        ('\u{1fc}', '\u{1fc}'), ('\u{1fe}', '\u{1fe}'), ('\u{200}', '\u{200}'),
-        ('\u{202}', '\u{202}'), ('\u{204}', '\u{204}'), ('\u{206}', '\u{206}'),
-        ('\u{208}', '\u{208}'), ('\u{20a}', '\u{20a}'), ('\u{20c}', '\u{20c}'),
-        ('\u{20e}', '\u{20e}'), ('\u{210}', '\u{210}'), ('\u{212}', '\u{212}'),
-        ('\u{214}', '\u{214}'), ('\u{216}', '\u{216}'), ('\u{218}', '\u{218}'),
-        ('\u{21a}', '\u{21a}'), ('\u{21c}', '\u{21c}'), ('\u{21e}', '\u{21e}'),
-        ('\u{220}', '\u{220}'), ('\u{222}', '\u{222}'), ('\u{224}', '\u{224}'),
-        ('\u{226}', '\u{226}'), ('\u{228}', '\u{228}'), ('\u{22a}', '\u{22a}'),
-        ('\u{22c}', '\u{22c}'), ('\u{22e}', '\u{22e}'), ('\u{230}', '\u{230}'),
-        ('\u{232}', '\u{232}'), ('\u{23a}', '\u{23b}'), ('\u{23d}', '\u{23e}'),
-        ('\u{241}', '\u{241}'), ('\u{243}', '\u{246}'), ('\u{248}', '\u{248}'),
-        ('\u{24a}', '\u{24a}'), ('\u{24c}', '\u{24c}'), ('\u{24e}', '\u{24e}'),
-        ('\u{370}', '\u{370}'), ('\u{372}', '\u{372}'), ('\u{376}', '\u{376}'),
-        ('\u{37f}', '\u{37f}'), ('\u{386}', '\u{386}'), ('\u{388}', '\u{38a}'),
-        ('\u{38c}', '\u{38c}'), ('\u{38e}', '\u{38f}'), ('\u{391}', '\u{3a1}'),
-        ('\u{3a3}', '\u{3ab}'), ('\u{3cf}', '\u{3cf}'), ('\u{3d2}', '\u{3d4}'),
-        ('\u{3d8}', '\u{3d8}'), ('\u{3da}', '\u{3da}'), ('\u{3dc}', '\u{3dc}'),
-        ('\u{3de}', '\u{3de}'), ('\u{3e0}', '\u{3e0}'), ('\u{3e2}', '\u{3e2}'),
-        ('\u{3e4}', '\u{3e4}'), ('\u{3e6}', '\u{3e6}'), ('\u{3e8}', '\u{3e8}'),
-        ('\u{3ea}', '\u{3ea}'), ('\u{3ec}', '\u{3ec}'), ('\u{3ee}', '\u{3ee}'),
-        ('\u{3f4}', '\u{3f4}'), ('\u{3f7}', '\u{3f7}'), ('\u{3f9}', '\u{3fa}'),
-        ('\u{3fd}', '\u{42f}'), ('\u{460}', '\u{460}'), ('\u{462}', '\u{462}'),
-        ('\u{464}', '\u{464}'), ('\u{466}', '\u{466}'), ('\u{468}', '\u{468}'),
-        ('\u{46a}', '\u{46a}'), ('\u{46c}', '\u{46c}'), ('\u{46e}', '\u{46e}'),
-        ('\u{470}', '\u{470}'), ('\u{472}', '\u{472}'), ('\u{474}', '\u{474}'),
-        ('\u{476}', '\u{476}'), ('\u{478}', '\u{478}'), ('\u{47a}', '\u{47a}'),
-        ('\u{47c}', '\u{47c}'), ('\u{47e}', '\u{47e}'), ('\u{480}', '\u{480}'),
-        ('\u{48a}', '\u{48a}'), ('\u{48c}', '\u{48c}'), ('\u{48e}', '\u{48e}'),
-        ('\u{490}', '\u{490}'), ('\u{492}', '\u{492}'), ('\u{494}', '\u{494}'),
-        ('\u{496}', '\u{496}'), ('\u{498}', '\u{498}'), ('\u{49a}', '\u{49a}'),
-        ('\u{49c}', '\u{49c}'), ('\u{49e}', '\u{49e}'), ('\u{4a0}', '\u{4a0}'),
-        ('\u{4a2}', '\u{4a2}'), ('\u{4a4}', '\u{4a4}'), ('\u{4a6}', '\u{4a6}'),
-        ('\u{4a8}', '\u{4a8}'), ('\u{4aa}', '\u{4aa}'), ('\u{4ac}', '\u{4ac}'),
-        ('\u{4ae}', '\u{4ae}'), ('\u{4b0}', '\u{4b0}'), ('\u{4b2}', '\u{4b2}'),
-        ('\u{4b4}', '\u{4b4}'), ('\u{4b6}', '\u{4b6}'), ('\u{4b8}', '\u{4b8}'),
-        ('\u{4ba}', '\u{4ba}'), ('\u{4bc}', '\u{4bc}'), ('\u{4be}', '\u{4be}'),
-        ('\u{4c0}', '\u{4c1}'), ('\u{4c3}', '\u{4c3}'), ('\u{4c5}', '\u{4c5}'),
-        ('\u{4c7}', '\u{4c7}'), ('\u{4c9}', '\u{4c9}'), ('\u{4cb}', '\u{4cb}'),
-        ('\u{4cd}', '\u{4cd}'), ('\u{4d0}', '\u{4d0}'), ('\u{4d2}', '\u{4d2}'),
-        ('\u{4d4}', '\u{4d4}'), ('\u{4d6}', '\u{4d6}'), ('\u{4d8}', '\u{4d8}'),
-        ('\u{4da}', '\u{4da}'), ('\u{4dc}', '\u{4dc}'), ('\u{4de}', '\u{4de}'),
-        ('\u{4e0}', '\u{4e0}'), ('\u{4e2}', '\u{4e2}'), ('\u{4e4}', '\u{4e4}'),
-        ('\u{4e6}', '\u{4e6}'), ('\u{4e8}', '\u{4e8}'), ('\u{4ea}', '\u{4ea}'),
-        ('\u{4ec}', '\u{4ec}'), ('\u{4ee}', '\u{4ee}'), ('\u{4f0}', '\u{4f0}'),
-        ('\u{4f2}', '\u{4f2}'), ('\u{4f4}', '\u{4f4}'), ('\u{4f6}', '\u{4f6}'),
-        ('\u{4f8}', '\u{4f8}'), ('\u{4fa}', '\u{4fa}'), ('\u{4fc}', '\u{4fc}'),
-        ('\u{4fe}', '\u{4fe}'), ('\u{500}', '\u{500}'), ('\u{502}', '\u{502}'),
-        ('\u{504}', '\u{504}'), ('\u{506}', '\u{506}'), ('\u{508}', '\u{508}'),
-        ('\u{50a}', '\u{50a}'), ('\u{50c}', '\u{50c}'), ('\u{50e}', '\u{50e}'),
-        ('\u{510}', '\u{510}'), ('\u{512}', '\u{512}'), ('\u{514}', '\u{514}'),
-        ('\u{516}', '\u{516}'), ('\u{518}', '\u{518}'), ('\u{51a}', '\u{51a}'),
-        ('\u{51c}', '\u{51c}'), ('\u{51e}', '\u{51e}'), ('\u{520}', '\u{520}'),
-        ('\u{522}', '\u{522}'), ('\u{524}', '\u{524}'), ('\u{526}', '\u{526}'),
-        ('\u{528}', '\u{528}'), ('\u{52a}', '\u{52a}'), ('\u{52c}', '\u{52c}'),
-        ('\u{52e}', '\u{52e}'), ('\u{531}', '\u{556}'), ('\u{10a0}',
-        '\u{10c5}'), ('\u{10c7}', '\u{10c7}'), ('\u{10cd}', '\u{10cd}'),
-        ('\u{13a0}', '\u{13f5}'), ('\u{1e00}', '\u{1e00}'), ('\u{1e02}',
-        '\u{1e02}'), ('\u{1e04}', '\u{1e04}'), ('\u{1e06}', '\u{1e06}'),
-        ('\u{1e08}', '\u{1e08}'), ('\u{1e0a}', '\u{1e0a}'), ('\u{1e0c}',
-        '\u{1e0c}'), ('\u{1e0e}', '\u{1e0e}'), ('\u{1e10}', '\u{1e10}'),
-        ('\u{1e12}', '\u{1e12}'), ('\u{1e14}', '\u{1e14}'), ('\u{1e16}',
-        '\u{1e16}'), ('\u{1e18}', '\u{1e18}'), ('\u{1e1a}', '\u{1e1a}'),
-        ('\u{1e1c}', '\u{1e1c}'), ('\u{1e1e}', '\u{1e1e}'), ('\u{1e20}',
-        '\u{1e20}'), ('\u{1e22}', '\u{1e22}'), ('\u{1e24}', '\u{1e24}'),
-        ('\u{1e26}', '\u{1e26}'), ('\u{1e28}', '\u{1e28}'), ('\u{1e2a}',
-        '\u{1e2a}'), ('\u{1e2c}', '\u{1e2c}'), ('\u{1e2e}', '\u{1e2e}'),
-        ('\u{1e30}', '\u{1e30}'), ('\u{1e32}', '\u{1e32}'), ('\u{1e34}',
-        '\u{1e34}'), ('\u{1e36}', '\u{1e36}'), ('\u{1e38}', '\u{1e38}'),
-        ('\u{1e3a}', '\u{1e3a}'), ('\u{1e3c}', '\u{1e3c}'), ('\u{1e3e}',
-        '\u{1e3e}'), ('\u{1e40}', '\u{1e40}'), ('\u{1e42}', '\u{1e42}'),
-        ('\u{1e44}', '\u{1e44}'), ('\u{1e46}', '\u{1e46}'), ('\u{1e48}',
-        '\u{1e48}'), ('\u{1e4a}', '\u{1e4a}'), ('\u{1e4c}', '\u{1e4c}'),
-        ('\u{1e4e}', '\u{1e4e}'), ('\u{1e50}', '\u{1e50}'), ('\u{1e52}',
-        '\u{1e52}'), ('\u{1e54}', '\u{1e54}'), ('\u{1e56}', '\u{1e56}'),
-        ('\u{1e58}', '\u{1e58}'), ('\u{1e5a}', '\u{1e5a}'), ('\u{1e5c}',
-        '\u{1e5c}'), ('\u{1e5e}', '\u{1e5e}'), ('\u{1e60}', '\u{1e60}'),
-        ('\u{1e62}', '\u{1e62}'), ('\u{1e64}', '\u{1e64}'), ('\u{1e66}',
-        '\u{1e66}'), ('\u{1e68}', '\u{1e68}'), ('\u{1e6a}', '\u{1e6a}'),
-        ('\u{1e6c}', '\u{1e6c}'), ('\u{1e6e}', '\u{1e6e}'), ('\u{1e70}',
-        '\u{1e70}'), ('\u{1e72}', '\u{1e72}'), ('\u{1e74}', '\u{1e74}'),
-        ('\u{1e76}', '\u{1e76}'), ('\u{1e78}', '\u{1e78}'), ('\u{1e7a}',
-        '\u{1e7a}'), ('\u{1e7c}', '\u{1e7c}'), ('\u{1e7e}', '\u{1e7e}'),
-        ('\u{1e80}', '\u{1e80}'), ('\u{1e82}', '\u{1e82}'), ('\u{1e84}',
-        '\u{1e84}'), ('\u{1e86}', '\u{1e86}'), ('\u{1e88}', '\u{1e88}'),
-        ('\u{1e8a}', '\u{1e8a}'), ('\u{1e8c}', '\u{1e8c}'), ('\u{1e8e}',
-        '\u{1e8e}'), ('\u{1e90}', '\u{1e90}'), ('\u{1e92}', '\u{1e92}'),
-        ('\u{1e94}', '\u{1e94}'), ('\u{1e9e}', '\u{1e9e}'), ('\u{1ea0}',
-        '\u{1ea0}'), ('\u{1ea2}', '\u{1ea2}'), ('\u{1ea4}', '\u{1ea4}'),
-        ('\u{1ea6}', '\u{1ea6}'), ('\u{1ea8}', '\u{1ea8}'), ('\u{1eaa}',
-        '\u{1eaa}'), ('\u{1eac}', '\u{1eac}'), ('\u{1eae}', '\u{1eae}'),
-        ('\u{1eb0}', '\u{1eb0}'), ('\u{1eb2}', '\u{1eb2}'), ('\u{1eb4}',
-        '\u{1eb4}'), ('\u{1eb6}', '\u{1eb6}'), ('\u{1eb8}', '\u{1eb8}'),
-        ('\u{1eba}', '\u{1eba}'), ('\u{1ebc}', '\u{1ebc}'), ('\u{1ebe}',
-        '\u{1ebe}'), ('\u{1ec0}', '\u{1ec0}'), ('\u{1ec2}', '\u{1ec2}'),
-        ('\u{1ec4}', '\u{1ec4}'), ('\u{1ec6}', '\u{1ec6}'), ('\u{1ec8}',
-        '\u{1ec8}'), ('\u{1eca}', '\u{1eca}'), ('\u{1ecc}', '\u{1ecc}'),
-        ('\u{1ece}', '\u{1ece}'), ('\u{1ed0}', '\u{1ed0}'), ('\u{1ed2}',
-        '\u{1ed2}'), ('\u{1ed4}', '\u{1ed4}'), ('\u{1ed6}', '\u{1ed6}'),
-        ('\u{1ed8}', '\u{1ed8}'), ('\u{1eda}', '\u{1eda}'), ('\u{1edc}',
-        '\u{1edc}'), ('\u{1ede}', '\u{1ede}'), ('\u{1ee0}', '\u{1ee0}'),
-        ('\u{1ee2}', '\u{1ee2}'), ('\u{1ee4}', '\u{1ee4}'), ('\u{1ee6}',
-        '\u{1ee6}'), ('\u{1ee8}', '\u{1ee8}'), ('\u{1eea}', '\u{1eea}'),
-        ('\u{1eec}', '\u{1eec}'), ('\u{1eee}', '\u{1eee}'), ('\u{1ef0}',
-        '\u{1ef0}'), ('\u{1ef2}', '\u{1ef2}'), ('\u{1ef4}', '\u{1ef4}'),
-        ('\u{1ef6}', '\u{1ef6}'), ('\u{1ef8}', '\u{1ef8}'), ('\u{1efa}',
-        '\u{1efa}'), ('\u{1efc}', '\u{1efc}'), ('\u{1efe}', '\u{1efe}'),
-        ('\u{1f08}', '\u{1f0f}'), ('\u{1f18}', '\u{1f1d}'), ('\u{1f28}',
-        '\u{1f2f}'), ('\u{1f38}', '\u{1f3f}'), ('\u{1f48}', '\u{1f4d}'),
-        ('\u{1f59}', '\u{1f59}'), ('\u{1f5b}', '\u{1f5b}'), ('\u{1f5d}',
-        '\u{1f5d}'), ('\u{1f5f}', '\u{1f5f}'), ('\u{1f68}', '\u{1f6f}'),
-        ('\u{1fb8}', '\u{1fbb}'), ('\u{1fc8}', '\u{1fcb}'), ('\u{1fd8}',
-        '\u{1fdb}'), ('\u{1fe8}', '\u{1fec}'), ('\u{1ff8}', '\u{1ffb}'),
-        ('\u{2102}', '\u{2102}'), ('\u{2107}', '\u{2107}'), ('\u{210b}',
-        '\u{210d}'), ('\u{2110}', '\u{2112}'), ('\u{2115}', '\u{2115}'),
-        ('\u{2119}', '\u{211d}'), ('\u{2124}', '\u{2124}'), ('\u{2126}',
-        '\u{2126}'), ('\u{2128}', '\u{2128}'), ('\u{212a}', '\u{212d}'),
-        ('\u{2130}', '\u{2133}'), ('\u{213e}', '\u{213f}'), ('\u{2145}',
-        '\u{2145}'), ('\u{2160}', '\u{216f}'), ('\u{2183}', '\u{2183}'),
-        ('\u{24b6}', '\u{24cf}'), ('\u{2c00}', '\u{2c2e}'), ('\u{2c60}',
-        '\u{2c60}'), ('\u{2c62}', '\u{2c64}'), ('\u{2c67}', '\u{2c67}'),
-        ('\u{2c69}', '\u{2c69}'), ('\u{2c6b}', '\u{2c6b}'), ('\u{2c6d}',
-        '\u{2c70}'), ('\u{2c72}', '\u{2c72}'), ('\u{2c75}', '\u{2c75}'),
-        ('\u{2c7e}', '\u{2c80}'), ('\u{2c82}', '\u{2c82}'), ('\u{2c84}',
-        '\u{2c84}'), ('\u{2c86}', '\u{2c86}'), ('\u{2c88}', '\u{2c88}'),
-        ('\u{2c8a}', '\u{2c8a}'), ('\u{2c8c}', '\u{2c8c}'), ('\u{2c8e}',
-        '\u{2c8e}'), ('\u{2c90}', '\u{2c90}'), ('\u{2c92}', '\u{2c92}'),
-        ('\u{2c94}', '\u{2c94}'), ('\u{2c96}', '\u{2c96}'), ('\u{2c98}',
-        '\u{2c98}'), ('\u{2c9a}', '\u{2c9a}'), ('\u{2c9c}', '\u{2c9c}'),
-        ('\u{2c9e}', '\u{2c9e}'), ('\u{2ca0}', '\u{2ca0}'), ('\u{2ca2}',
-        '\u{2ca2}'), ('\u{2ca4}', '\u{2ca4}'), ('\u{2ca6}', '\u{2ca6}'),
-        ('\u{2ca8}', '\u{2ca8}'), ('\u{2caa}', '\u{2caa}'), ('\u{2cac}',
-        '\u{2cac}'), ('\u{2cae}', '\u{2cae}'), ('\u{2cb0}', '\u{2cb0}'),
-        ('\u{2cb2}', '\u{2cb2}'), ('\u{2cb4}', '\u{2cb4}'), ('\u{2cb6}',
-        '\u{2cb6}'), ('\u{2cb8}', '\u{2cb8}'), ('\u{2cba}', '\u{2cba}'),
-        ('\u{2cbc}', '\u{2cbc}'), ('\u{2cbe}', '\u{2cbe}'), ('\u{2cc0}',
-        '\u{2cc0}'), ('\u{2cc2}', '\u{2cc2}'), ('\u{2cc4}', '\u{2cc4}'),
-        ('\u{2cc6}', '\u{2cc6}'), ('\u{2cc8}', '\u{2cc8}'), ('\u{2cca}',
-        '\u{2cca}'), ('\u{2ccc}', '\u{2ccc}'), ('\u{2cce}', '\u{2cce}'),
-        ('\u{2cd0}', '\u{2cd0}'), ('\u{2cd2}', '\u{2cd2}'), ('\u{2cd4}',
-        '\u{2cd4}'), ('\u{2cd6}', '\u{2cd6}'), ('\u{2cd8}', '\u{2cd8}'),
-        ('\u{2cda}', '\u{2cda}'), ('\u{2cdc}', '\u{2cdc}'), ('\u{2cde}',
-        '\u{2cde}'), ('\u{2ce0}', '\u{2ce0}'), ('\u{2ce2}', '\u{2ce2}'),
-        ('\u{2ceb}', '\u{2ceb}'), ('\u{2ced}', '\u{2ced}'), ('\u{2cf2}',
-        '\u{2cf2}'), ('\u{a640}', '\u{a640}'), ('\u{a642}', '\u{a642}'),
-        ('\u{a644}', '\u{a644}'), ('\u{a646}', '\u{a646}'), ('\u{a648}',
-        '\u{a648}'), ('\u{a64a}', '\u{a64a}'), ('\u{a64c}', '\u{a64c}'),
-        ('\u{a64e}', '\u{a64e}'), ('\u{a650}', '\u{a650}'), ('\u{a652}',
-        '\u{a652}'), ('\u{a654}', '\u{a654}'), ('\u{a656}', '\u{a656}'),
-        ('\u{a658}', '\u{a658}'), ('\u{a65a}', '\u{a65a}'), ('\u{a65c}',
-        '\u{a65c}'), ('\u{a65e}', '\u{a65e}'), ('\u{a660}', '\u{a660}'),
-        ('\u{a662}', '\u{a662}'), ('\u{a664}', '\u{a664}'), ('\u{a666}',
-        '\u{a666}'), ('\u{a668}', '\u{a668}'), ('\u{a66a}', '\u{a66a}'),
-        ('\u{a66c}', '\u{a66c}'), ('\u{a680}', '\u{a680}'), ('\u{a682}',
-        '\u{a682}'), ('\u{a684}', '\u{a684}'), ('\u{a686}', '\u{a686}'),
-        ('\u{a688}', '\u{a688}'), ('\u{a68a}', '\u{a68a}'), ('\u{a68c}',
-        '\u{a68c}'), ('\u{a68e}', '\u{a68e}'), ('\u{a690}', '\u{a690}'),
-        ('\u{a692}', '\u{a692}'), ('\u{a694}', '\u{a694}'), ('\u{a696}',
-        '\u{a696}'), ('\u{a698}', '\u{a698}'), ('\u{a69a}', '\u{a69a}'),
-        ('\u{a722}', '\u{a722}'), ('\u{a724}', '\u{a724}'), ('\u{a726}',
-        '\u{a726}'), ('\u{a728}', '\u{a728}'), ('\u{a72a}', '\u{a72a}'),
-        ('\u{a72c}', '\u{a72c}'), ('\u{a72e}', '\u{a72e}'), ('\u{a732}',
-        '\u{a732}'), ('\u{a734}', '\u{a734}'), ('\u{a736}', '\u{a736}'),
-        ('\u{a738}', '\u{a738}'), ('\u{a73a}', '\u{a73a}'), ('\u{a73c}',
-        '\u{a73c}'), ('\u{a73e}', '\u{a73e}'), ('\u{a740}', '\u{a740}'),
-        ('\u{a742}', '\u{a742}'), ('\u{a744}', '\u{a744}'), ('\u{a746}',
-        '\u{a746}'), ('\u{a748}', '\u{a748}'), ('\u{a74a}', '\u{a74a}'),
-        ('\u{a74c}', '\u{a74c}'), ('\u{a74e}', '\u{a74e}'), ('\u{a750}',
-        '\u{a750}'), ('\u{a752}', '\u{a752}'), ('\u{a754}', '\u{a754}'),
-        ('\u{a756}', '\u{a756}'), ('\u{a758}', '\u{a758}'), ('\u{a75a}',
-        '\u{a75a}'), ('\u{a75c}', '\u{a75c}'), ('\u{a75e}', '\u{a75e}'),
-        ('\u{a760}', '\u{a760}'), ('\u{a762}', '\u{a762}'), ('\u{a764}',
-        '\u{a764}'), ('\u{a766}', '\u{a766}'), ('\u{a768}', '\u{a768}'),
-        ('\u{a76a}', '\u{a76a}'), ('\u{a76c}', '\u{a76c}'), ('\u{a76e}',
-        '\u{a76e}'), ('\u{a779}', '\u{a779}'), ('\u{a77b}', '\u{a77b}'),
-        ('\u{a77d}', '\u{a77e}'), ('\u{a780}', '\u{a780}'), ('\u{a782}',
-        '\u{a782}'), ('\u{a784}', '\u{a784}'), ('\u{a786}', '\u{a786}'),
-        ('\u{a78b}', '\u{a78b}'), ('\u{a78d}', '\u{a78d}'), ('\u{a790}',
-        '\u{a790}'), ('\u{a792}', '\u{a792}'), ('\u{a796}', '\u{a796}'),
-        ('\u{a798}', '\u{a798}'), ('\u{a79a}', '\u{a79a}'), ('\u{a79c}',
-        '\u{a79c}'), ('\u{a79e}', '\u{a79e}'), ('\u{a7a0}', '\u{a7a0}'),
-        ('\u{a7a2}', '\u{a7a2}'), ('\u{a7a4}', '\u{a7a4}'), ('\u{a7a6}',
-        '\u{a7a6}'), ('\u{a7a8}', '\u{a7a8}'), ('\u{a7aa}', '\u{a7ae}'),
-        ('\u{a7b0}', '\u{a7b4}'), ('\u{a7b6}', '\u{a7b6}'), ('\u{ff21}',
-        '\u{ff3a}'), ('\u{10400}', '\u{10427}'), ('\u{104b0}', '\u{104d3}'),
-        ('\u{10c80}', '\u{10cb2}'), ('\u{118a0}', '\u{118bf}'), ('\u{1d400}',
-        '\u{1d419}'), ('\u{1d434}', '\u{1d44d}'), ('\u{1d468}', '\u{1d481}'),
-        ('\u{1d49c}', '\u{1d49c}'), ('\u{1d49e}', '\u{1d49f}'), ('\u{1d4a2}',
-        '\u{1d4a2}'), ('\u{1d4a5}', '\u{1d4a6}'), ('\u{1d4a9}', '\u{1d4ac}'),
-        ('\u{1d4ae}', '\u{1d4b5}'), ('\u{1d4d0}', '\u{1d4e9}'), ('\u{1d504}',
-        '\u{1d505}'), ('\u{1d507}', '\u{1d50a}'), ('\u{1d50d}', '\u{1d514}'),
-        ('\u{1d516}', '\u{1d51c}'), ('\u{1d538}', '\u{1d539}'), ('\u{1d53b}',
-        '\u{1d53e}'), ('\u{1d540}', '\u{1d544}'), ('\u{1d546}', '\u{1d546}'),
-        ('\u{1d54a}', '\u{1d550}'), ('\u{1d56c}', '\u{1d585}'), ('\u{1d5a0}',
-        '\u{1d5b9}'), ('\u{1d5d4}', '\u{1d5ed}'), ('\u{1d608}', '\u{1d621}'),
-        ('\u{1d63c}', '\u{1d655}'), ('\u{1d670}', '\u{1d689}'), ('\u{1d6a8}',
-        '\u{1d6c0}'), ('\u{1d6e2}', '\u{1d6fa}'), ('\u{1d71c}', '\u{1d734}'),
-        ('\u{1d756}', '\u{1d76e}'), ('\u{1d790}', '\u{1d7a8}'), ('\u{1d7ca}',
-        '\u{1d7ca}'), ('\u{1e900}', '\u{1e921}'), ('\u{1f130}', '\u{1f149}'),
-        ('\u{1f150}', '\u{1f169}'), ('\u{1f170}', '\u{1f189}')
-    ];
-
-    pub const XID_Continue_table: &'static [(char, char)] = &[
-        ('\u{30}', '\u{39}'), ('\u{41}', '\u{5a}'), ('\u{5f}', '\u{5f}'),
-        ('\u{61}', '\u{7a}'), ('\u{aa}', '\u{aa}'), ('\u{b5}', '\u{b5}'),
-        ('\u{b7}', '\u{b7}'), ('\u{ba}', '\u{ba}'), ('\u{c0}', '\u{d6}'),
-        ('\u{d8}', '\u{f6}'), ('\u{f8}', '\u{2c1}'), ('\u{2c6}', '\u{2d1}'),
-        ('\u{2e0}', '\u{2e4}'), ('\u{2ec}', '\u{2ec}'), ('\u{2ee}', '\u{2ee}'),
-        ('\u{300}', '\u{374}'), ('\u{376}', '\u{377}'), ('\u{37b}', '\u{37d}'),
-        ('\u{37f}', '\u{37f}'), ('\u{386}', '\u{38a}'), ('\u{38c}', '\u{38c}'),
-        ('\u{38e}', '\u{3a1}'), ('\u{3a3}', '\u{3f5}'), ('\u{3f7}', '\u{481}'),
-        ('\u{483}', '\u{487}'), ('\u{48a}', '\u{52f}'), ('\u{531}', '\u{556}'),
-        ('\u{559}', '\u{559}'), ('\u{561}', '\u{587}'), ('\u{591}', '\u{5bd}'),
-        ('\u{5bf}', '\u{5bf}'), ('\u{5c1}', '\u{5c2}'), ('\u{5c4}', '\u{5c5}'),
-        ('\u{5c7}', '\u{5c7}'), ('\u{5d0}', '\u{5ea}'), ('\u{5f0}', '\u{5f2}'),
-        ('\u{610}', '\u{61a}'), ('\u{620}', '\u{669}'), ('\u{66e}', '\u{6d3}'),
-        ('\u{6d5}', '\u{6dc}'), ('\u{6df}', '\u{6e8}'), ('\u{6ea}', '\u{6fc}'),
-        ('\u{6ff}', '\u{6ff}'), ('\u{710}', '\u{74a}'), ('\u{74d}', '\u{7b1}'),
-        ('\u{7c0}', '\u{7f5}'), ('\u{7fa}', '\u{7fa}'), ('\u{800}', '\u{82d}'),
-        ('\u{840}', '\u{85b}'), ('\u{860}', '\u{86a}'), ('\u{8a0}', '\u{8b4}'),
-        ('\u{8b6}', '\u{8bd}'), ('\u{8d4}', '\u{8e1}'), ('\u{8e3}', '\u{963}'),
-        ('\u{966}', '\u{96f}'), ('\u{971}', '\u{983}'), ('\u{985}', '\u{98c}'),
-        ('\u{98f}', '\u{990}'), ('\u{993}', '\u{9a8}'), ('\u{9aa}', '\u{9b0}'),
-        ('\u{9b2}', '\u{9b2}'), ('\u{9b6}', '\u{9b9}'), ('\u{9bc}', '\u{9c4}'),
-        ('\u{9c7}', '\u{9c8}'), ('\u{9cb}', '\u{9ce}'), ('\u{9d7}', '\u{9d7}'),
-        ('\u{9dc}', '\u{9dd}'), ('\u{9df}', '\u{9e3}'), ('\u{9e6}', '\u{9f1}'),
-        ('\u{9fc}', '\u{9fc}'), ('\u{a01}', '\u{a03}'), ('\u{a05}', '\u{a0a}'),
-        ('\u{a0f}', '\u{a10}'), ('\u{a13}', '\u{a28}'), ('\u{a2a}', '\u{a30}'),
-        ('\u{a32}', '\u{a33}'), ('\u{a35}', '\u{a36}'), ('\u{a38}', '\u{a39}'),
-        ('\u{a3c}', '\u{a3c}'), ('\u{a3e}', '\u{a42}'), ('\u{a47}', '\u{a48}'),
-        ('\u{a4b}', '\u{a4d}'), ('\u{a51}', '\u{a51}'), ('\u{a59}', '\u{a5c}'),
-        ('\u{a5e}', '\u{a5e}'), ('\u{a66}', '\u{a75}'), ('\u{a81}', '\u{a83}'),
-        ('\u{a85}', '\u{a8d}'), ('\u{a8f}', '\u{a91}'), ('\u{a93}', '\u{aa8}'),
-        ('\u{aaa}', '\u{ab0}'), ('\u{ab2}', '\u{ab3}'), ('\u{ab5}', '\u{ab9}'),
-        ('\u{abc}', '\u{ac5}'), ('\u{ac7}', '\u{ac9}'), ('\u{acb}', '\u{acd}'),
-        ('\u{ad0}', '\u{ad0}'), ('\u{ae0}', '\u{ae3}'), ('\u{ae6}', '\u{aef}'),
-        ('\u{af9}', '\u{aff}'), ('\u{b01}', '\u{b03}'), ('\u{b05}', '\u{b0c}'),
-        ('\u{b0f}', '\u{b10}'), ('\u{b13}', '\u{b28}'), ('\u{b2a}', '\u{b30}'),
-        ('\u{b32}', '\u{b33}'), ('\u{b35}', '\u{b39}'), ('\u{b3c}', '\u{b44}'),
-        ('\u{b47}', '\u{b48}'), ('\u{b4b}', '\u{b4d}'), ('\u{b56}', '\u{b57}'),
-        ('\u{b5c}', '\u{b5d}'), ('\u{b5f}', '\u{b63}'), ('\u{b66}', '\u{b6f}'),
-        ('\u{b71}', '\u{b71}'), ('\u{b82}', '\u{b83}'), ('\u{b85}', '\u{b8a}'),
-        ('\u{b8e}', '\u{b90}'), ('\u{b92}', '\u{b95}'), ('\u{b99}', '\u{b9a}'),
-        ('\u{b9c}', '\u{b9c}'), ('\u{b9e}', '\u{b9f}'), ('\u{ba3}', '\u{ba4}'),
-        ('\u{ba8}', '\u{baa}'), ('\u{bae}', '\u{bb9}'), ('\u{bbe}', '\u{bc2}'),
-        ('\u{bc6}', '\u{bc8}'), ('\u{bca}', '\u{bcd}'), ('\u{bd0}', '\u{bd0}'),
-        ('\u{bd7}', '\u{bd7}'), ('\u{be6}', '\u{bef}'), ('\u{c00}', '\u{c03}'),
-        ('\u{c05}', '\u{c0c}'), ('\u{c0e}', '\u{c10}'), ('\u{c12}', '\u{c28}'),
-        ('\u{c2a}', '\u{c39}'), ('\u{c3d}', '\u{c44}'), ('\u{c46}', '\u{c48}'),
-        ('\u{c4a}', '\u{c4d}'), ('\u{c55}', '\u{c56}'), ('\u{c58}', '\u{c5a}'),
-        ('\u{c60}', '\u{c63}'), ('\u{c66}', '\u{c6f}'), ('\u{c80}', '\u{c83}'),
-        ('\u{c85}', '\u{c8c}'), ('\u{c8e}', '\u{c90}'), ('\u{c92}', '\u{ca8}'),
-        ('\u{caa}', '\u{cb3}'), ('\u{cb5}', '\u{cb9}'), ('\u{cbc}', '\u{cc4}'),
-        ('\u{cc6}', '\u{cc8}'), ('\u{cca}', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'),
-        ('\u{cde}', '\u{cde}'), ('\u{ce0}', '\u{ce3}'), ('\u{ce6}', '\u{cef}'),
-        ('\u{cf1}', '\u{cf2}'), ('\u{d00}', '\u{d03}'), ('\u{d05}', '\u{d0c}'),
-        ('\u{d0e}', '\u{d10}'), ('\u{d12}', '\u{d44}'), ('\u{d46}', '\u{d48}'),
-        ('\u{d4a}', '\u{d4e}'), ('\u{d54}', '\u{d57}'), ('\u{d5f}', '\u{d63}'),
-        ('\u{d66}', '\u{d6f}'), ('\u{d7a}', '\u{d7f}'), ('\u{d82}', '\u{d83}'),
-        ('\u{d85}', '\u{d96}'), ('\u{d9a}', '\u{db1}'), ('\u{db3}', '\u{dbb}'),
-        ('\u{dbd}', '\u{dbd}'), ('\u{dc0}', '\u{dc6}'), ('\u{dca}', '\u{dca}'),
-        ('\u{dcf}', '\u{dd4}'), ('\u{dd6}', '\u{dd6}'), ('\u{dd8}', '\u{ddf}'),
-        ('\u{de6}', '\u{def}'), ('\u{df2}', '\u{df3}'), ('\u{e01}', '\u{e3a}'),
-        ('\u{e40}', '\u{e4e}'), ('\u{e50}', '\u{e59}'), ('\u{e81}', '\u{e82}'),
-        ('\u{e84}', '\u{e84}'), ('\u{e87}', '\u{e88}'), ('\u{e8a}', '\u{e8a}'),
-        ('\u{e8d}', '\u{e8d}'), ('\u{e94}', '\u{e97}'), ('\u{e99}', '\u{e9f}'),
-        ('\u{ea1}', '\u{ea3}'), ('\u{ea5}', '\u{ea5}'), ('\u{ea7}', '\u{ea7}'),
-        ('\u{eaa}', '\u{eab}'), ('\u{ead}', '\u{eb9}'), ('\u{ebb}', '\u{ebd}'),
-        ('\u{ec0}', '\u{ec4}'), ('\u{ec6}', '\u{ec6}'), ('\u{ec8}', '\u{ecd}'),
-        ('\u{ed0}', '\u{ed9}'), ('\u{edc}', '\u{edf}'), ('\u{f00}', '\u{f00}'),
-        ('\u{f18}', '\u{f19}'), ('\u{f20}', '\u{f29}'), ('\u{f35}', '\u{f35}'),
-        ('\u{f37}', '\u{f37}'), ('\u{f39}', '\u{f39}'), ('\u{f3e}', '\u{f47}'),
-        ('\u{f49}', '\u{f6c}'), ('\u{f71}', '\u{f84}'), ('\u{f86}', '\u{f97}'),
-        ('\u{f99}', '\u{fbc}'), ('\u{fc6}', '\u{fc6}'), ('\u{1000}',
-        '\u{1049}'), ('\u{1050}', '\u{109d}'), ('\u{10a0}', '\u{10c5}'),
-        ('\u{10c7}', '\u{10c7}'), ('\u{10cd}', '\u{10cd}'), ('\u{10d0}',
-        '\u{10fa}'), ('\u{10fc}', '\u{1248}'), ('\u{124a}', '\u{124d}'),
-        ('\u{1250}', '\u{1256}'), ('\u{1258}', '\u{1258}'), ('\u{125a}',
-        '\u{125d}'), ('\u{1260}', '\u{1288}'), ('\u{128a}', '\u{128d}'),
-        ('\u{1290}', '\u{12b0}'), ('\u{12b2}', '\u{12b5}'), ('\u{12b8}',
-        '\u{12be}'), ('\u{12c0}', '\u{12c0}'), ('\u{12c2}', '\u{12c5}'),
-        ('\u{12c8}', '\u{12d6}'), ('\u{12d8}', '\u{1310}'), ('\u{1312}',
-        '\u{1315}'), ('\u{1318}', '\u{135a}'), ('\u{135d}', '\u{135f}'),
-        ('\u{1369}', '\u{1371}'), ('\u{1380}', '\u{138f}'), ('\u{13a0}',
-        '\u{13f5}'), ('\u{13f8}', '\u{13fd}'), ('\u{1401}', '\u{166c}'),
-        ('\u{166f}', '\u{167f}'), ('\u{1681}', '\u{169a}'), ('\u{16a0}',
-        '\u{16ea}'), ('\u{16ee}', '\u{16f8}'), ('\u{1700}', '\u{170c}'),
-        ('\u{170e}', '\u{1714}'), ('\u{1720}', '\u{1734}'), ('\u{1740}',
-        '\u{1753}'), ('\u{1760}', '\u{176c}'), ('\u{176e}', '\u{1770}'),
-        ('\u{1772}', '\u{1773}'), ('\u{1780}', '\u{17d3}'), ('\u{17d7}',
-        '\u{17d7}'), ('\u{17dc}', '\u{17dd}'), ('\u{17e0}', '\u{17e9}'),
-        ('\u{180b}', '\u{180d}'), ('\u{1810}', '\u{1819}'), ('\u{1820}',
-        '\u{1877}'), ('\u{1880}', '\u{18aa}'), ('\u{18b0}', '\u{18f5}'),
-        ('\u{1900}', '\u{191e}'), ('\u{1920}', '\u{192b}'), ('\u{1930}',
-        '\u{193b}'), ('\u{1946}', '\u{196d}'), ('\u{1970}', '\u{1974}'),
-        ('\u{1980}', '\u{19ab}'), ('\u{19b0}', '\u{19c9}'), ('\u{19d0}',
-        '\u{19da}'), ('\u{1a00}', '\u{1a1b}'), ('\u{1a20}', '\u{1a5e}'),
-        ('\u{1a60}', '\u{1a7c}'), ('\u{1a7f}', '\u{1a89}'), ('\u{1a90}',
-        '\u{1a99}'), ('\u{1aa7}', '\u{1aa7}'), ('\u{1ab0}', '\u{1abd}'),
-        ('\u{1b00}', '\u{1b4b}'), ('\u{1b50}', '\u{1b59}'), ('\u{1b6b}',
-        '\u{1b73}'), ('\u{1b80}', '\u{1bf3}'), ('\u{1c00}', '\u{1c37}'),
-        ('\u{1c40}', '\u{1c49}'), ('\u{1c4d}', '\u{1c7d}'), ('\u{1c80}',
-        '\u{1c88}'), ('\u{1cd0}', '\u{1cd2}'), ('\u{1cd4}', '\u{1cf9}'),
-        ('\u{1d00}', '\u{1df9}'), ('\u{1dfb}', '\u{1f15}'), ('\u{1f18}',
-        '\u{1f1d}'), ('\u{1f20}', '\u{1f45}'), ('\u{1f48}', '\u{1f4d}'),
-        ('\u{1f50}', '\u{1f57}'), ('\u{1f59}', '\u{1f59}'), ('\u{1f5b}',
-        '\u{1f5b}'), ('\u{1f5d}', '\u{1f5d}'), ('\u{1f5f}', '\u{1f7d}'),
-        ('\u{1f80}', '\u{1fb4}'), ('\u{1fb6}', '\u{1fbc}'), ('\u{1fbe}',
-        '\u{1fbe}'), ('\u{1fc2}', '\u{1fc4}'), ('\u{1fc6}', '\u{1fcc}'),
-        ('\u{1fd0}', '\u{1fd3}'), ('\u{1fd6}', '\u{1fdb}'), ('\u{1fe0}',
-        '\u{1fec}'), ('\u{1ff2}', '\u{1ff4}'), ('\u{1ff6}', '\u{1ffc}'),
-        ('\u{203f}', '\u{2040}'), ('\u{2054}', '\u{2054}'), ('\u{2071}',
-        '\u{2071}'), ('\u{207f}', '\u{207f}'), ('\u{2090}', '\u{209c}'),
-        ('\u{20d0}', '\u{20dc}'), ('\u{20e1}', '\u{20e1}'), ('\u{20e5}',
-        '\u{20f0}'), ('\u{2102}', '\u{2102}'), ('\u{2107}', '\u{2107}'),
-        ('\u{210a}', '\u{2113}'), ('\u{2115}', '\u{2115}'), ('\u{2118}',
-        '\u{211d}'), ('\u{2124}', '\u{2124}'), ('\u{2126}', '\u{2126}'),
-        ('\u{2128}', '\u{2128}'), ('\u{212a}', '\u{2139}'), ('\u{213c}',
-        '\u{213f}'), ('\u{2145}', '\u{2149}'), ('\u{214e}', '\u{214e}'),
-        ('\u{2160}', '\u{2188}'), ('\u{2c00}', '\u{2c2e}'), ('\u{2c30}',
-        '\u{2c5e}'), ('\u{2c60}', '\u{2ce4}'), ('\u{2ceb}', '\u{2cf3}'),
-        ('\u{2d00}', '\u{2d25}'), ('\u{2d27}', '\u{2d27}'), ('\u{2d2d}',
-        '\u{2d2d}'), ('\u{2d30}', '\u{2d67}'), ('\u{2d6f}', '\u{2d6f}'),
-        ('\u{2d7f}', '\u{2d96}'), ('\u{2da0}', '\u{2da6}'), ('\u{2da8}',
-        '\u{2dae}'), ('\u{2db0}', '\u{2db6}'), ('\u{2db8}', '\u{2dbe}'),
-        ('\u{2dc0}', '\u{2dc6}'), ('\u{2dc8}', '\u{2dce}'), ('\u{2dd0}',
-        '\u{2dd6}'), ('\u{2dd8}', '\u{2dde}'), ('\u{2de0}', '\u{2dff}'),
-        ('\u{3005}', '\u{3007}'), ('\u{3021}', '\u{302f}'), ('\u{3031}',
-        '\u{3035}'), ('\u{3038}', '\u{303c}'), ('\u{3041}', '\u{3096}'),
-        ('\u{3099}', '\u{309a}'), ('\u{309d}', '\u{309f}'), ('\u{30a1}',
-        '\u{30fa}'), ('\u{30fc}', '\u{30ff}'), ('\u{3105}', '\u{312e}'),
-        ('\u{3131}', '\u{318e}'), ('\u{31a0}', '\u{31ba}'), ('\u{31f0}',
-        '\u{31ff}'), ('\u{3400}', '\u{4db5}'), ('\u{4e00}', '\u{9fea}'),
-        ('\u{a000}', '\u{a48c}'), ('\u{a4d0}', '\u{a4fd}'), ('\u{a500}',
-        '\u{a60c}'), ('\u{a610}', '\u{a62b}'), ('\u{a640}', '\u{a66f}'),
-        ('\u{a674}', '\u{a67d}'), ('\u{a67f}', '\u{a6f1}'), ('\u{a717}',
-        '\u{a71f}'), ('\u{a722}', '\u{a788}'), ('\u{a78b}', '\u{a7ae}'),
-        ('\u{a7b0}', '\u{a7b7}'), ('\u{a7f7}', '\u{a827}'), ('\u{a840}',
-        '\u{a873}'), ('\u{a880}', '\u{a8c5}'), ('\u{a8d0}', '\u{a8d9}'),
-        ('\u{a8e0}', '\u{a8f7}'), ('\u{a8fb}', '\u{a8fb}'), ('\u{a8fd}',
-        '\u{a8fd}'), ('\u{a900}', '\u{a92d}'), ('\u{a930}', '\u{a953}'),
-        ('\u{a960}', '\u{a97c}'), ('\u{a980}', '\u{a9c0}'), ('\u{a9cf}',
-        '\u{a9d9}'), ('\u{a9e0}', '\u{a9fe}'), ('\u{aa00}', '\u{aa36}'),
-        ('\u{aa40}', '\u{aa4d}'), ('\u{aa50}', '\u{aa59}'), ('\u{aa60}',
-        '\u{aa76}'), ('\u{aa7a}', '\u{aac2}'), ('\u{aadb}', '\u{aadd}'),
-        ('\u{aae0}', '\u{aaef}'), ('\u{aaf2}', '\u{aaf6}'), ('\u{ab01}',
-        '\u{ab06}'), ('\u{ab09}', '\u{ab0e}'), ('\u{ab11}', '\u{ab16}'),
-        ('\u{ab20}', '\u{ab26}'), ('\u{ab28}', '\u{ab2e}'), ('\u{ab30}',
-        '\u{ab5a}'), ('\u{ab5c}', '\u{ab65}'), ('\u{ab70}', '\u{abea}'),
-        ('\u{abec}', '\u{abed}'), ('\u{abf0}', '\u{abf9}'), ('\u{ac00}',
-        '\u{d7a3}'), ('\u{d7b0}', '\u{d7c6}'), ('\u{d7cb}', '\u{d7fb}'),
-        ('\u{f900}', '\u{fa6d}'), ('\u{fa70}', '\u{fad9}'), ('\u{fb00}',
-        '\u{fb06}'), ('\u{fb13}', '\u{fb17}'), ('\u{fb1d}', '\u{fb28}'),
-        ('\u{fb2a}', '\u{fb36}'), ('\u{fb38}', '\u{fb3c}'), ('\u{fb3e}',
-        '\u{fb3e}'), ('\u{fb40}', '\u{fb41}'), ('\u{fb43}', '\u{fb44}'),
-        ('\u{fb46}', '\u{fbb1}'), ('\u{fbd3}', '\u{fc5d}'), ('\u{fc64}',
-        '\u{fd3d}'), ('\u{fd50}', '\u{fd8f}'), ('\u{fd92}', '\u{fdc7}'),
-        ('\u{fdf0}', '\u{fdf9}'), ('\u{fe00}', '\u{fe0f}'), ('\u{fe20}',
-        '\u{fe2f}'), ('\u{fe33}', '\u{fe34}'), ('\u{fe4d}', '\u{fe4f}'),
-        ('\u{fe71}', '\u{fe71}'), ('\u{fe73}', '\u{fe73}'), ('\u{fe77}',
-        '\u{fe77}'), ('\u{fe79}', '\u{fe79}'), ('\u{fe7b}', '\u{fe7b}'),
-        ('\u{fe7d}', '\u{fe7d}'), ('\u{fe7f}', '\u{fefc}'), ('\u{ff10}',
-        '\u{ff19}'), ('\u{ff21}', '\u{ff3a}'), ('\u{ff3f}', '\u{ff3f}'),
-        ('\u{ff41}', '\u{ff5a}'), ('\u{ff66}', '\u{ffbe}'), ('\u{ffc2}',
-        '\u{ffc7}'), ('\u{ffca}', '\u{ffcf}'), ('\u{ffd2}', '\u{ffd7}'),
-        ('\u{ffda}', '\u{ffdc}'), ('\u{10000}', '\u{1000b}'), ('\u{1000d}',
-        '\u{10026}'), ('\u{10028}', '\u{1003a}'), ('\u{1003c}', '\u{1003d}'),
-        ('\u{1003f}', '\u{1004d}'), ('\u{10050}', '\u{1005d}'), ('\u{10080}',
-        '\u{100fa}'), ('\u{10140}', '\u{10174}'), ('\u{101fd}', '\u{101fd}'),
-        ('\u{10280}', '\u{1029c}'), ('\u{102a0}', '\u{102d0}'), ('\u{102e0}',
-        '\u{102e0}'), ('\u{10300}', '\u{1031f}'), ('\u{1032d}', '\u{1034a}'),
-        ('\u{10350}', '\u{1037a}'), ('\u{10380}', '\u{1039d}'), ('\u{103a0}',
-        '\u{103c3}'), ('\u{103c8}', '\u{103cf}'), ('\u{103d1}', '\u{103d5}'),
-        ('\u{10400}', '\u{1049d}'), ('\u{104a0}', '\u{104a9}'), ('\u{104b0}',
-        '\u{104d3}'), ('\u{104d8}', '\u{104fb}'), ('\u{10500}', '\u{10527}'),
-        ('\u{10530}', '\u{10563}'), ('\u{10600}', '\u{10736}'), ('\u{10740}',
-        '\u{10755}'), ('\u{10760}', '\u{10767}'), ('\u{10800}', '\u{10805}'),
-        ('\u{10808}', '\u{10808}'), ('\u{1080a}', '\u{10835}'), ('\u{10837}',
-        '\u{10838}'), ('\u{1083c}', '\u{1083c}'), ('\u{1083f}', '\u{10855}'),
-        ('\u{10860}', '\u{10876}'), ('\u{10880}', '\u{1089e}'), ('\u{108e0}',
-        '\u{108f2}'), ('\u{108f4}', '\u{108f5}'), ('\u{10900}', '\u{10915}'),
-        ('\u{10920}', '\u{10939}'), ('\u{10980}', '\u{109b7}'), ('\u{109be}',
-        '\u{109bf}'), ('\u{10a00}', '\u{10a03}'), ('\u{10a05}', '\u{10a06}'),
-        ('\u{10a0c}', '\u{10a13}'), ('\u{10a15}', '\u{10a17}'), ('\u{10a19}',
-        '\u{10a33}'), ('\u{10a38}', '\u{10a3a}'), ('\u{10a3f}', '\u{10a3f}'),
-        ('\u{10a60}', '\u{10a7c}'), ('\u{10a80}', '\u{10a9c}'), ('\u{10ac0}',
-        '\u{10ac7}'), ('\u{10ac9}', '\u{10ae6}'), ('\u{10b00}', '\u{10b35}'),
-        ('\u{10b40}', '\u{10b55}'), ('\u{10b60}', '\u{10b72}'), ('\u{10b80}',
-        '\u{10b91}'), ('\u{10c00}', '\u{10c48}'), ('\u{10c80}', '\u{10cb2}'),
-        ('\u{10cc0}', '\u{10cf2}'), ('\u{11000}', '\u{11046}'), ('\u{11066}',
-        '\u{1106f}'), ('\u{1107f}', '\u{110ba}'), ('\u{110d0}', '\u{110e8}'),
-        ('\u{110f0}', '\u{110f9}'), ('\u{11100}', '\u{11134}'), ('\u{11136}',
-        '\u{1113f}'), ('\u{11150}', '\u{11173}'), ('\u{11176}', '\u{11176}'),
-        ('\u{11180}', '\u{111c4}'), ('\u{111ca}', '\u{111cc}'), ('\u{111d0}',
-        '\u{111da}'), ('\u{111dc}', '\u{111dc}'), ('\u{11200}', '\u{11211}'),
-        ('\u{11213}', '\u{11237}'), ('\u{1123e}', '\u{1123e}'), ('\u{11280}',
-        '\u{11286}'), ('\u{11288}', '\u{11288}'), ('\u{1128a}', '\u{1128d}'),
-        ('\u{1128f}', '\u{1129d}'), ('\u{1129f}', '\u{112a8}'), ('\u{112b0}',
-        '\u{112ea}'), ('\u{112f0}', '\u{112f9}'), ('\u{11300}', '\u{11303}'),
-        ('\u{11305}', '\u{1130c}'), ('\u{1130f}', '\u{11310}'), ('\u{11313}',
-        '\u{11328}'), ('\u{1132a}', '\u{11330}'), ('\u{11332}', '\u{11333}'),
-        ('\u{11335}', '\u{11339}'), ('\u{1133c}', '\u{11344}'), ('\u{11347}',
-        '\u{11348}'), ('\u{1134b}', '\u{1134d}'), ('\u{11350}', '\u{11350}'),
-        ('\u{11357}', '\u{11357}'), ('\u{1135d}', '\u{11363}'), ('\u{11366}',
-        '\u{1136c}'), ('\u{11370}', '\u{11374}'), ('\u{11400}', '\u{1144a}'),
-        ('\u{11450}', '\u{11459}'), ('\u{11480}', '\u{114c5}'), ('\u{114c7}',
-        '\u{114c7}'), ('\u{114d0}', '\u{114d9}'), ('\u{11580}', '\u{115b5}'),
-        ('\u{115b8}', '\u{115c0}'), ('\u{115d8}', '\u{115dd}'), ('\u{11600}',
-        '\u{11640}'), ('\u{11644}', '\u{11644}'), ('\u{11650}', '\u{11659}'),
-        ('\u{11680}', '\u{116b7}'), ('\u{116c0}', '\u{116c9}'), ('\u{11700}',
-        '\u{11719}'), ('\u{1171d}', '\u{1172b}'), ('\u{11730}', '\u{11739}'),
-        ('\u{118a0}', '\u{118e9}'), ('\u{118ff}', '\u{118ff}'), ('\u{11a00}',
-        '\u{11a3e}'), ('\u{11a47}', '\u{11a47}'), ('\u{11a50}', '\u{11a83}'),
-        ('\u{11a86}', '\u{11a99}'), ('\u{11ac0}', '\u{11af8}'), ('\u{11c00}',
-        '\u{11c08}'), ('\u{11c0a}', '\u{11c36}'), ('\u{11c38}', '\u{11c40}'),
-        ('\u{11c50}', '\u{11c59}'), ('\u{11c72}', '\u{11c8f}'), ('\u{11c92}',
-        '\u{11ca7}'), ('\u{11ca9}', '\u{11cb6}'), ('\u{11d00}', '\u{11d06}'),
-        ('\u{11d08}', '\u{11d09}'), ('\u{11d0b}', '\u{11d36}'), ('\u{11d3a}',
-        '\u{11d3a}'), ('\u{11d3c}', '\u{11d3d}'), ('\u{11d3f}', '\u{11d47}'),
-        ('\u{11d50}', '\u{11d59}'), ('\u{12000}', '\u{12399}'), ('\u{12400}',
-        '\u{1246e}'), ('\u{12480}', '\u{12543}'), ('\u{13000}', '\u{1342e}'),
-        ('\u{14400}', '\u{14646}'), ('\u{16800}', '\u{16a38}'), ('\u{16a40}',
-        '\u{16a5e}'), ('\u{16a60}', '\u{16a69}'), ('\u{16ad0}', '\u{16aed}'),
-        ('\u{16af0}', '\u{16af4}'), ('\u{16b00}', '\u{16b36}'), ('\u{16b40}',
-        '\u{16b43}'), ('\u{16b50}', '\u{16b59}'), ('\u{16b63}', '\u{16b77}'),
-        ('\u{16b7d}', '\u{16b8f}'), ('\u{16f00}', '\u{16f44}'), ('\u{16f50}',
-        '\u{16f7e}'), ('\u{16f8f}', '\u{16f9f}'), ('\u{16fe0}', '\u{16fe1}'),
-        ('\u{17000}', '\u{187ec}'), ('\u{18800}', '\u{18af2}'), ('\u{1b000}',
-        '\u{1b11e}'), ('\u{1b170}', '\u{1b2fb}'), ('\u{1bc00}', '\u{1bc6a}'),
-        ('\u{1bc70}', '\u{1bc7c}'), ('\u{1bc80}', '\u{1bc88}'), ('\u{1bc90}',
-        '\u{1bc99}'), ('\u{1bc9d}', '\u{1bc9e}'), ('\u{1d165}', '\u{1d169}'),
-        ('\u{1d16d}', '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'), ('\u{1d185}',
-        '\u{1d18b}'), ('\u{1d1aa}', '\u{1d1ad}'), ('\u{1d242}', '\u{1d244}'),
-        ('\u{1d400}', '\u{1d454}'), ('\u{1d456}', '\u{1d49c}'), ('\u{1d49e}',
-        '\u{1d49f}'), ('\u{1d4a2}', '\u{1d4a2}'), ('\u{1d4a5}', '\u{1d4a6}'),
-        ('\u{1d4a9}', '\u{1d4ac}'), ('\u{1d4ae}', '\u{1d4b9}'), ('\u{1d4bb}',
-        '\u{1d4bb}'), ('\u{1d4bd}', '\u{1d4c3}'), ('\u{1d4c5}', '\u{1d505}'),
-        ('\u{1d507}', '\u{1d50a}'), ('\u{1d50d}', '\u{1d514}'), ('\u{1d516}',
-        '\u{1d51c}'), ('\u{1d51e}', '\u{1d539}'), ('\u{1d53b}', '\u{1d53e}'),
-        ('\u{1d540}', '\u{1d544}'), ('\u{1d546}', '\u{1d546}'), ('\u{1d54a}',
-        '\u{1d550}'), ('\u{1d552}', '\u{1d6a5}'), ('\u{1d6a8}', '\u{1d6c0}'),
-        ('\u{1d6c2}', '\u{1d6da}'), ('\u{1d6dc}', '\u{1d6fa}'), ('\u{1d6fc}',
-        '\u{1d714}'), ('\u{1d716}', '\u{1d734}'), ('\u{1d736}', '\u{1d74e}'),
-        ('\u{1d750}', '\u{1d76e}'), ('\u{1d770}', '\u{1d788}'), ('\u{1d78a}',
-        '\u{1d7a8}'), ('\u{1d7aa}', '\u{1d7c2}'), ('\u{1d7c4}', '\u{1d7cb}'),
-        ('\u{1d7ce}', '\u{1d7ff}'), ('\u{1da00}', '\u{1da36}'), ('\u{1da3b}',
-        '\u{1da6c}'), ('\u{1da75}', '\u{1da75}'), ('\u{1da84}', '\u{1da84}'),
-        ('\u{1da9b}', '\u{1da9f}'), ('\u{1daa1}', '\u{1daaf}'), ('\u{1e000}',
-        '\u{1e006}'), ('\u{1e008}', '\u{1e018}'), ('\u{1e01b}', '\u{1e021}'),
-        ('\u{1e023}', '\u{1e024}'), ('\u{1e026}', '\u{1e02a}'), ('\u{1e800}',
-        '\u{1e8c4}'), ('\u{1e8d0}', '\u{1e8d6}'), ('\u{1e900}', '\u{1e94a}'),
-        ('\u{1e950}', '\u{1e959}'), ('\u{1ee00}', '\u{1ee03}'), ('\u{1ee05}',
-        '\u{1ee1f}'), ('\u{1ee21}', '\u{1ee22}'), ('\u{1ee24}', '\u{1ee24}'),
-        ('\u{1ee27}', '\u{1ee27}'), ('\u{1ee29}', '\u{1ee32}'), ('\u{1ee34}',
-        '\u{1ee37}'), ('\u{1ee39}', '\u{1ee39}'), ('\u{1ee3b}', '\u{1ee3b}'),
-        ('\u{1ee42}', '\u{1ee42}'), ('\u{1ee47}', '\u{1ee47}'), ('\u{1ee49}',
-        '\u{1ee49}'), ('\u{1ee4b}', '\u{1ee4b}'), ('\u{1ee4d}', '\u{1ee4f}'),
-        ('\u{1ee51}', '\u{1ee52}'), ('\u{1ee54}', '\u{1ee54}'), ('\u{1ee57}',
-        '\u{1ee57}'), ('\u{1ee59}', '\u{1ee59}'), ('\u{1ee5b}', '\u{1ee5b}'),
-        ('\u{1ee5d}', '\u{1ee5d}'), ('\u{1ee5f}', '\u{1ee5f}'), ('\u{1ee61}',
-        '\u{1ee62}'), ('\u{1ee64}', '\u{1ee64}'), ('\u{1ee67}', '\u{1ee6a}'),
-        ('\u{1ee6c}', '\u{1ee72}'), ('\u{1ee74}', '\u{1ee77}'), ('\u{1ee79}',
-        '\u{1ee7c}'), ('\u{1ee7e}', '\u{1ee7e}'), ('\u{1ee80}', '\u{1ee89}'),
-        ('\u{1ee8b}', '\u{1ee9b}'), ('\u{1eea1}', '\u{1eea3}'), ('\u{1eea5}',
-        '\u{1eea9}'), ('\u{1eeab}', '\u{1eebb}'), ('\u{20000}', '\u{2a6d6}'),
-        ('\u{2a700}', '\u{2b734}'), ('\u{2b740}', '\u{2b81d}'), ('\u{2b820}',
-        '\u{2cea1}'), ('\u{2ceb0}', '\u{2ebe0}'), ('\u{2f800}', '\u{2fa1d}'),
-        ('\u{e0100}', '\u{e01ef}')
-    ];
-
-    pub const XID_Start_table: &'static [(char, char)] = &[
-        ('\u{41}', '\u{5a}'), ('\u{61}', '\u{7a}'), ('\u{aa}', '\u{aa}'),
-        ('\u{b5}', '\u{b5}'), ('\u{ba}', '\u{ba}'), ('\u{c0}', '\u{d6}'),
-        ('\u{d8}', '\u{f6}'), ('\u{f8}', '\u{2c1}'), ('\u{2c6}', '\u{2d1}'),
-        ('\u{2e0}', '\u{2e4}'), ('\u{2ec}', '\u{2ec}'), ('\u{2ee}', '\u{2ee}'),
-        ('\u{370}', '\u{374}'), ('\u{376}', '\u{377}'), ('\u{37b}', '\u{37d}'),
-        ('\u{37f}', '\u{37f}'), ('\u{386}', '\u{386}'), ('\u{388}', '\u{38a}'),
-        ('\u{38c}', '\u{38c}'), ('\u{38e}', '\u{3a1}'), ('\u{3a3}', '\u{3f5}'),
-        ('\u{3f7}', '\u{481}'), ('\u{48a}', '\u{52f}'), ('\u{531}', '\u{556}'),
-        ('\u{559}', '\u{559}'), ('\u{561}', '\u{587}'), ('\u{5d0}', '\u{5ea}'),
-        ('\u{5f0}', '\u{5f2}'), ('\u{620}', '\u{64a}'), ('\u{66e}', '\u{66f}'),
-        ('\u{671}', '\u{6d3}'), ('\u{6d5}', '\u{6d5}'), ('\u{6e5}', '\u{6e6}'),
-        ('\u{6ee}', '\u{6ef}'), ('\u{6fa}', '\u{6fc}'), ('\u{6ff}', '\u{6ff}'),
-        ('\u{710}', '\u{710}'), ('\u{712}', '\u{72f}'), ('\u{74d}', '\u{7a5}'),
-        ('\u{7b1}', '\u{7b1}'), ('\u{7ca}', '\u{7ea}'), ('\u{7f4}', '\u{7f5}'),
-        ('\u{7fa}', '\u{7fa}'), ('\u{800}', '\u{815}'), ('\u{81a}', '\u{81a}'),
-        ('\u{824}', '\u{824}'), ('\u{828}', '\u{828}'), ('\u{840}', '\u{858}'),
-        ('\u{860}', '\u{86a}'), ('\u{8a0}', '\u{8b4}'), ('\u{8b6}', '\u{8bd}'),
-        ('\u{904}', '\u{939}'), ('\u{93d}', '\u{93d}'), ('\u{950}', '\u{950}'),
-        ('\u{958}', '\u{961}'), ('\u{971}', '\u{980}'), ('\u{985}', '\u{98c}'),
-        ('\u{98f}', '\u{990}'), ('\u{993}', '\u{9a8}'), ('\u{9aa}', '\u{9b0}'),
-        ('\u{9b2}', '\u{9b2}'), ('\u{9b6}', '\u{9b9}'), ('\u{9bd}', '\u{9bd}'),
-        ('\u{9ce}', '\u{9ce}'), ('\u{9dc}', '\u{9dd}'), ('\u{9df}', '\u{9e1}'),
-        ('\u{9f0}', '\u{9f1}'), ('\u{9fc}', '\u{9fc}'), ('\u{a05}', '\u{a0a}'),
-        ('\u{a0f}', '\u{a10}'), ('\u{a13}', '\u{a28}'), ('\u{a2a}', '\u{a30}'),
-        ('\u{a32}', '\u{a33}'), ('\u{a35}', '\u{a36}'), ('\u{a38}', '\u{a39}'),
-        ('\u{a59}', '\u{a5c}'), ('\u{a5e}', '\u{a5e}'), ('\u{a72}', '\u{a74}'),
-        ('\u{a85}', '\u{a8d}'), ('\u{a8f}', '\u{a91}'), ('\u{a93}', '\u{aa8}'),
-        ('\u{aaa}', '\u{ab0}'), ('\u{ab2}', '\u{ab3}'), ('\u{ab5}', '\u{ab9}'),
-        ('\u{abd}', '\u{abd}'), ('\u{ad0}', '\u{ad0}'), ('\u{ae0}', '\u{ae1}'),
-        ('\u{af9}', '\u{af9}'), ('\u{b05}', '\u{b0c}'), ('\u{b0f}', '\u{b10}'),
-        ('\u{b13}', '\u{b28}'), ('\u{b2a}', '\u{b30}'), ('\u{b32}', '\u{b33}'),
-        ('\u{b35}', '\u{b39}'), ('\u{b3d}', '\u{b3d}'), ('\u{b5c}', '\u{b5d}'),
-        ('\u{b5f}', '\u{b61}'), ('\u{b71}', '\u{b71}'), ('\u{b83}', '\u{b83}'),
-        ('\u{b85}', '\u{b8a}'), ('\u{b8e}', '\u{b90}'), ('\u{b92}', '\u{b95}'),
-        ('\u{b99}', '\u{b9a}'), ('\u{b9c}', '\u{b9c}'), ('\u{b9e}', '\u{b9f}'),
-        ('\u{ba3}', '\u{ba4}'), ('\u{ba8}', '\u{baa}'), ('\u{bae}', '\u{bb9}'),
-        ('\u{bd0}', '\u{bd0}'), ('\u{c05}', '\u{c0c}'), ('\u{c0e}', '\u{c10}'),
-        ('\u{c12}', '\u{c28}'), ('\u{c2a}', '\u{c39}'), ('\u{c3d}', '\u{c3d}'),
-        ('\u{c58}', '\u{c5a}'), ('\u{c60}', '\u{c61}'), ('\u{c80}', '\u{c80}'),
-        ('\u{c85}', '\u{c8c}'), ('\u{c8e}', '\u{c90}'), ('\u{c92}', '\u{ca8}'),
-        ('\u{caa}', '\u{cb3}'), ('\u{cb5}', '\u{cb9}'), ('\u{cbd}', '\u{cbd}'),
-        ('\u{cde}', '\u{cde}'), ('\u{ce0}', '\u{ce1}'), ('\u{cf1}', '\u{cf2}'),
-        ('\u{d05}', '\u{d0c}'), ('\u{d0e}', '\u{d10}'), ('\u{d12}', '\u{d3a}'),
-        ('\u{d3d}', '\u{d3d}'), ('\u{d4e}', '\u{d4e}'), ('\u{d54}', '\u{d56}'),
-        ('\u{d5f}', '\u{d61}'), ('\u{d7a}', '\u{d7f}'), ('\u{d85}', '\u{d96}'),
-        ('\u{d9a}', '\u{db1}'), ('\u{db3}', '\u{dbb}'), ('\u{dbd}', '\u{dbd}'),
-        ('\u{dc0}', '\u{dc6}'), ('\u{e01}', '\u{e30}'), ('\u{e32}', '\u{e32}'),
-        ('\u{e40}', '\u{e46}'), ('\u{e81}', '\u{e82}'), ('\u{e84}', '\u{e84}'),
-        ('\u{e87}', '\u{e88}'), ('\u{e8a}', '\u{e8a}'), ('\u{e8d}', '\u{e8d}'),
-        ('\u{e94}', '\u{e97}'), ('\u{e99}', '\u{e9f}'), ('\u{ea1}', '\u{ea3}'),
-        ('\u{ea5}', '\u{ea5}'), ('\u{ea7}', '\u{ea7}'), ('\u{eaa}', '\u{eab}'),
-        ('\u{ead}', '\u{eb0}'), ('\u{eb2}', '\u{eb2}'), ('\u{ebd}', '\u{ebd}'),
-        ('\u{ec0}', '\u{ec4}'), ('\u{ec6}', '\u{ec6}'), ('\u{edc}', '\u{edf}'),
-        ('\u{f00}', '\u{f00}'), ('\u{f40}', '\u{f47}'), ('\u{f49}', '\u{f6c}'),
-        ('\u{f88}', '\u{f8c}'), ('\u{1000}', '\u{102a}'), ('\u{103f}',
-        '\u{103f}'), ('\u{1050}', '\u{1055}'), ('\u{105a}', '\u{105d}'),
-        ('\u{1061}', '\u{1061}'), ('\u{1065}', '\u{1066}'), ('\u{106e}',
-        '\u{1070}'), ('\u{1075}', '\u{1081}'), ('\u{108e}', '\u{108e}'),
-        ('\u{10a0}', '\u{10c5}'), ('\u{10c7}', '\u{10c7}'), ('\u{10cd}',
-        '\u{10cd}'), ('\u{10d0}', '\u{10fa}'), ('\u{10fc}', '\u{1248}'),
-        ('\u{124a}', '\u{124d}'), ('\u{1250}', '\u{1256}'), ('\u{1258}',
-        '\u{1258}'), ('\u{125a}', '\u{125d}'), ('\u{1260}', '\u{1288}'),
-        ('\u{128a}', '\u{128d}'), ('\u{1290}', '\u{12b0}'), ('\u{12b2}',
-        '\u{12b5}'), ('\u{12b8}', '\u{12be}'), ('\u{12c0}', '\u{12c0}'),
-        ('\u{12c2}', '\u{12c5}'), ('\u{12c8}', '\u{12d6}'), ('\u{12d8}',
-        '\u{1310}'), ('\u{1312}', '\u{1315}'), ('\u{1318}', '\u{135a}'),
-        ('\u{1380}', '\u{138f}'), ('\u{13a0}', '\u{13f5}'), ('\u{13f8}',
-        '\u{13fd}'), ('\u{1401}', '\u{166c}'), ('\u{166f}', '\u{167f}'),
-        ('\u{1681}', '\u{169a}'), ('\u{16a0}', '\u{16ea}'), ('\u{16ee}',
-        '\u{16f8}'), ('\u{1700}', '\u{170c}'), ('\u{170e}', '\u{1711}'),
-        ('\u{1720}', '\u{1731}'), ('\u{1740}', '\u{1751}'), ('\u{1760}',
-        '\u{176c}'), ('\u{176e}', '\u{1770}'), ('\u{1780}', '\u{17b3}'),
-        ('\u{17d7}', '\u{17d7}'), ('\u{17dc}', '\u{17dc}'), ('\u{1820}',
-        '\u{1877}'), ('\u{1880}', '\u{18a8}'), ('\u{18aa}', '\u{18aa}'),
-        ('\u{18b0}', '\u{18f5}'), ('\u{1900}', '\u{191e}'), ('\u{1950}',
-        '\u{196d}'), ('\u{1970}', '\u{1974}'), ('\u{1980}', '\u{19ab}'),
-        ('\u{19b0}', '\u{19c9}'), ('\u{1a00}', '\u{1a16}'), ('\u{1a20}',
-        '\u{1a54}'), ('\u{1aa7}', '\u{1aa7}'), ('\u{1b05}', '\u{1b33}'),
-        ('\u{1b45}', '\u{1b4b}'), ('\u{1b83}', '\u{1ba0}'), ('\u{1bae}',
-        '\u{1baf}'), ('\u{1bba}', '\u{1be5}'), ('\u{1c00}', '\u{1c23}'),
-        ('\u{1c4d}', '\u{1c4f}'), ('\u{1c5a}', '\u{1c7d}'), ('\u{1c80}',
-        '\u{1c88}'), ('\u{1ce9}', '\u{1cec}'), ('\u{1cee}', '\u{1cf1}'),
-        ('\u{1cf5}', '\u{1cf6}'), ('\u{1d00}', '\u{1dbf}'), ('\u{1e00}',
-        '\u{1f15}'), ('\u{1f18}', '\u{1f1d}'), ('\u{1f20}', '\u{1f45}'),
-        ('\u{1f48}', '\u{1f4d}'), ('\u{1f50}', '\u{1f57}'), ('\u{1f59}',
-        '\u{1f59}'), ('\u{1f5b}', '\u{1f5b}'), ('\u{1f5d}', '\u{1f5d}'),
-        ('\u{1f5f}', '\u{1f7d}'), ('\u{1f80}', '\u{1fb4}'), ('\u{1fb6}',
-        '\u{1fbc}'), ('\u{1fbe}', '\u{1fbe}'), ('\u{1fc2}', '\u{1fc4}'),
-        ('\u{1fc6}', '\u{1fcc}'), ('\u{1fd0}', '\u{1fd3}'), ('\u{1fd6}',
-        '\u{1fdb}'), ('\u{1fe0}', '\u{1fec}'), ('\u{1ff2}', '\u{1ff4}'),
-        ('\u{1ff6}', '\u{1ffc}'), ('\u{2071}', '\u{2071}'), ('\u{207f}',
-        '\u{207f}'), ('\u{2090}', '\u{209c}'), ('\u{2102}', '\u{2102}'),
-        ('\u{2107}', '\u{2107}'), ('\u{210a}', '\u{2113}'), ('\u{2115}',
-        '\u{2115}'), ('\u{2118}', '\u{211d}'), ('\u{2124}', '\u{2124}'),
-        ('\u{2126}', '\u{2126}'), ('\u{2128}', '\u{2128}'), ('\u{212a}',
-        '\u{2139}'), ('\u{213c}', '\u{213f}'), ('\u{2145}', '\u{2149}'),
-        ('\u{214e}', '\u{214e}'), ('\u{2160}', '\u{2188}'), ('\u{2c00}',
-        '\u{2c2e}'), ('\u{2c30}', '\u{2c5e}'), ('\u{2c60}', '\u{2ce4}'),
-        ('\u{2ceb}', '\u{2cee}'), ('\u{2cf2}', '\u{2cf3}'), ('\u{2d00}',
-        '\u{2d25}'), ('\u{2d27}', '\u{2d27}'), ('\u{2d2d}', '\u{2d2d}'),
-        ('\u{2d30}', '\u{2d67}'), ('\u{2d6f}', '\u{2d6f}'), ('\u{2d80}',
-        '\u{2d96}'), ('\u{2da0}', '\u{2da6}'), ('\u{2da8}', '\u{2dae}'),
-        ('\u{2db0}', '\u{2db6}'), ('\u{2db8}', '\u{2dbe}'), ('\u{2dc0}',
-        '\u{2dc6}'), ('\u{2dc8}', '\u{2dce}'), ('\u{2dd0}', '\u{2dd6}'),
-        ('\u{2dd8}', '\u{2dde}'), ('\u{3005}', '\u{3007}'), ('\u{3021}',
-        '\u{3029}'), ('\u{3031}', '\u{3035}'), ('\u{3038}', '\u{303c}'),
-        ('\u{3041}', '\u{3096}'), ('\u{309d}', '\u{309f}'), ('\u{30a1}',
-        '\u{30fa}'), ('\u{30fc}', '\u{30ff}'), ('\u{3105}', '\u{312e}'),
-        ('\u{3131}', '\u{318e}'), ('\u{31a0}', '\u{31ba}'), ('\u{31f0}',
-        '\u{31ff}'), ('\u{3400}', '\u{4db5}'), ('\u{4e00}', '\u{9fea}'),
-        ('\u{a000}', '\u{a48c}'), ('\u{a4d0}', '\u{a4fd}'), ('\u{a500}',
-        '\u{a60c}'), ('\u{a610}', '\u{a61f}'), ('\u{a62a}', '\u{a62b}'),
-        ('\u{a640}', '\u{a66e}'), ('\u{a67f}', '\u{a69d}'), ('\u{a6a0}',
-        '\u{a6ef}'), ('\u{a717}', '\u{a71f}'), ('\u{a722}', '\u{a788}'),
-        ('\u{a78b}', '\u{a7ae}'), ('\u{a7b0}', '\u{a7b7}'), ('\u{a7f7}',
-        '\u{a801}'), ('\u{a803}', '\u{a805}'), ('\u{a807}', '\u{a80a}'),
-        ('\u{a80c}', '\u{a822}'), ('\u{a840}', '\u{a873}'), ('\u{a882}',
-        '\u{a8b3}'), ('\u{a8f2}', '\u{a8f7}'), ('\u{a8fb}', '\u{a8fb}'),
-        ('\u{a8fd}', '\u{a8fd}'), ('\u{a90a}', '\u{a925}'), ('\u{a930}',
-        '\u{a946}'), ('\u{a960}', '\u{a97c}'), ('\u{a984}', '\u{a9b2}'),
-        ('\u{a9cf}', '\u{a9cf}'), ('\u{a9e0}', '\u{a9e4}'), ('\u{a9e6}',
-        '\u{a9ef}'), ('\u{a9fa}', '\u{a9fe}'), ('\u{aa00}', '\u{aa28}'),
-        ('\u{aa40}', '\u{aa42}'), ('\u{aa44}', '\u{aa4b}'), ('\u{aa60}',
-        '\u{aa76}'), ('\u{aa7a}', '\u{aa7a}'), ('\u{aa7e}', '\u{aaaf}'),
-        ('\u{aab1}', '\u{aab1}'), ('\u{aab5}', '\u{aab6}'), ('\u{aab9}',
-        '\u{aabd}'), ('\u{aac0}', '\u{aac0}'), ('\u{aac2}', '\u{aac2}'),
-        ('\u{aadb}', '\u{aadd}'), ('\u{aae0}', '\u{aaea}'), ('\u{aaf2}',
-        '\u{aaf4}'), ('\u{ab01}', '\u{ab06}'), ('\u{ab09}', '\u{ab0e}'),
-        ('\u{ab11}', '\u{ab16}'), ('\u{ab20}', '\u{ab26}'), ('\u{ab28}',
-        '\u{ab2e}'), ('\u{ab30}', '\u{ab5a}'), ('\u{ab5c}', '\u{ab65}'),
-        ('\u{ab70}', '\u{abe2}'), ('\u{ac00}', '\u{d7a3}'), ('\u{d7b0}',
-        '\u{d7c6}'), ('\u{d7cb}', '\u{d7fb}'), ('\u{f900}', '\u{fa6d}'),
-        ('\u{fa70}', '\u{fad9}'), ('\u{fb00}', '\u{fb06}'), ('\u{fb13}',
-        '\u{fb17}'), ('\u{fb1d}', '\u{fb1d}'), ('\u{fb1f}', '\u{fb28}'),
-        ('\u{fb2a}', '\u{fb36}'), ('\u{fb38}', '\u{fb3c}'), ('\u{fb3e}',
-        '\u{fb3e}'), ('\u{fb40}', '\u{fb41}'), ('\u{fb43}', '\u{fb44}'),
-        ('\u{fb46}', '\u{fbb1}'), ('\u{fbd3}', '\u{fc5d}'), ('\u{fc64}',
-        '\u{fd3d}'), ('\u{fd50}', '\u{fd8f}'), ('\u{fd92}', '\u{fdc7}'),
-        ('\u{fdf0}', '\u{fdf9}'), ('\u{fe71}', '\u{fe71}'), ('\u{fe73}',
-        '\u{fe73}'), ('\u{fe77}', '\u{fe77}'), ('\u{fe79}', '\u{fe79}'),
-        ('\u{fe7b}', '\u{fe7b}'), ('\u{fe7d}', '\u{fe7d}'), ('\u{fe7f}',
-        '\u{fefc}'), ('\u{ff21}', '\u{ff3a}'), ('\u{ff41}', '\u{ff5a}'),
-        ('\u{ff66}', '\u{ff9d}'), ('\u{ffa0}', '\u{ffbe}'), ('\u{ffc2}',
-        '\u{ffc7}'), ('\u{ffca}', '\u{ffcf}'), ('\u{ffd2}', '\u{ffd7}'),
-        ('\u{ffda}', '\u{ffdc}'), ('\u{10000}', '\u{1000b}'), ('\u{1000d}',
-        '\u{10026}'), ('\u{10028}', '\u{1003a}'), ('\u{1003c}', '\u{1003d}'),
-        ('\u{1003f}', '\u{1004d}'), ('\u{10050}', '\u{1005d}'), ('\u{10080}',
-        '\u{100fa}'), ('\u{10140}', '\u{10174}'), ('\u{10280}', '\u{1029c}'),
-        ('\u{102a0}', '\u{102d0}'), ('\u{10300}', '\u{1031f}'), ('\u{1032d}',
-        '\u{1034a}'), ('\u{10350}', '\u{10375}'), ('\u{10380}', '\u{1039d}'),
-        ('\u{103a0}', '\u{103c3}'), ('\u{103c8}', '\u{103cf}'), ('\u{103d1}',
-        '\u{103d5}'), ('\u{10400}', '\u{1049d}'), ('\u{104b0}', '\u{104d3}'),
-        ('\u{104d8}', '\u{104fb}'), ('\u{10500}', '\u{10527}'), ('\u{10530}',
-        '\u{10563}'), ('\u{10600}', '\u{10736}'), ('\u{10740}', '\u{10755}'),
-        ('\u{10760}', '\u{10767}'), ('\u{10800}', '\u{10805}'), ('\u{10808}',
-        '\u{10808}'), ('\u{1080a}', '\u{10835}'), ('\u{10837}', '\u{10838}'),
-        ('\u{1083c}', '\u{1083c}'), ('\u{1083f}', '\u{10855}'), ('\u{10860}',
-        '\u{10876}'), ('\u{10880}', '\u{1089e}'), ('\u{108e0}', '\u{108f2}'),
-        ('\u{108f4}', '\u{108f5}'), ('\u{10900}', '\u{10915}'), ('\u{10920}',
-        '\u{10939}'), ('\u{10980}', '\u{109b7}'), ('\u{109be}', '\u{109bf}'),
-        ('\u{10a00}', '\u{10a00}'), ('\u{10a10}', '\u{10a13}'), ('\u{10a15}',
-        '\u{10a17}'), ('\u{10a19}', '\u{10a33}'), ('\u{10a60}', '\u{10a7c}'),
-        ('\u{10a80}', '\u{10a9c}'), ('\u{10ac0}', '\u{10ac7}'), ('\u{10ac9}',
-        '\u{10ae4}'), ('\u{10b00}', '\u{10b35}'), ('\u{10b40}', '\u{10b55}'),
-        ('\u{10b60}', '\u{10b72}'), ('\u{10b80}', '\u{10b91}'), ('\u{10c00}',
-        '\u{10c48}'), ('\u{10c80}', '\u{10cb2}'), ('\u{10cc0}', '\u{10cf2}'),
-        ('\u{11003}', '\u{11037}'), ('\u{11083}', '\u{110af}'), ('\u{110d0}',
-        '\u{110e8}'), ('\u{11103}', '\u{11126}'), ('\u{11150}', '\u{11172}'),
-        ('\u{11176}', '\u{11176}'), ('\u{11183}', '\u{111b2}'), ('\u{111c1}',
-        '\u{111c4}'), ('\u{111da}', '\u{111da}'), ('\u{111dc}', '\u{111dc}'),
-        ('\u{11200}', '\u{11211}'), ('\u{11213}', '\u{1122b}'), ('\u{11280}',
-        '\u{11286}'), ('\u{11288}', '\u{11288}'), ('\u{1128a}', '\u{1128d}'),
-        ('\u{1128f}', '\u{1129d}'), ('\u{1129f}', '\u{112a8}'), ('\u{112b0}',
-        '\u{112de}'), ('\u{11305}', '\u{1130c}'), ('\u{1130f}', '\u{11310}'),
-        ('\u{11313}', '\u{11328}'), ('\u{1132a}', '\u{11330}'), ('\u{11332}',
-        '\u{11333}'), ('\u{11335}', '\u{11339}'), ('\u{1133d}', '\u{1133d}'),
-        ('\u{11350}', '\u{11350}'), ('\u{1135d}', '\u{11361}'), ('\u{11400}',
-        '\u{11434}'), ('\u{11447}', '\u{1144a}'), ('\u{11480}', '\u{114af}'),
-        ('\u{114c4}', '\u{114c5}'), ('\u{114c7}', '\u{114c7}'), ('\u{11580}',
-        '\u{115ae}'), ('\u{115d8}', '\u{115db}'), ('\u{11600}', '\u{1162f}'),
-        ('\u{11644}', '\u{11644}'), ('\u{11680}', '\u{116aa}'), ('\u{11700}',
-        '\u{11719}'), ('\u{118a0}', '\u{118df}'), ('\u{118ff}', '\u{118ff}'),
-        ('\u{11a00}', '\u{11a00}'), ('\u{11a0b}', '\u{11a32}'), ('\u{11a3a}',
-        '\u{11a3a}'), ('\u{11a50}', '\u{11a50}'), ('\u{11a5c}', '\u{11a83}'),
-        ('\u{11a86}', '\u{11a89}'), ('\u{11ac0}', '\u{11af8}'), ('\u{11c00}',
-        '\u{11c08}'), ('\u{11c0a}', '\u{11c2e}'), ('\u{11c40}', '\u{11c40}'),
-        ('\u{11c72}', '\u{11c8f}'), ('\u{11d00}', '\u{11d06}'), ('\u{11d08}',
-        '\u{11d09}'), ('\u{11d0b}', '\u{11d30}'), ('\u{11d46}', '\u{11d46}'),
-        ('\u{12000}', '\u{12399}'), ('\u{12400}', '\u{1246e}'), ('\u{12480}',
-        '\u{12543}'), ('\u{13000}', '\u{1342e}'), ('\u{14400}', '\u{14646}'),
-        ('\u{16800}', '\u{16a38}'), ('\u{16a40}', '\u{16a5e}'), ('\u{16ad0}',
-        '\u{16aed}'), ('\u{16b00}', '\u{16b2f}'), ('\u{16b40}', '\u{16b43}'),
-        ('\u{16b63}', '\u{16b77}'), ('\u{16b7d}', '\u{16b8f}'), ('\u{16f00}',
-        '\u{16f44}'), ('\u{16f50}', '\u{16f50}'), ('\u{16f93}', '\u{16f9f}'),
-        ('\u{16fe0}', '\u{16fe1}'), ('\u{17000}', '\u{187ec}'), ('\u{18800}',
-        '\u{18af2}'), ('\u{1b000}', '\u{1b11e}'), ('\u{1b170}', '\u{1b2fb}'),
-        ('\u{1bc00}', '\u{1bc6a}'), ('\u{1bc70}', '\u{1bc7c}'), ('\u{1bc80}',
-        '\u{1bc88}'), ('\u{1bc90}', '\u{1bc99}'), ('\u{1d400}', '\u{1d454}'),
-        ('\u{1d456}', '\u{1d49c}'), ('\u{1d49e}', '\u{1d49f}'), ('\u{1d4a2}',
-        '\u{1d4a2}'), ('\u{1d4a5}', '\u{1d4a6}'), ('\u{1d4a9}', '\u{1d4ac}'),
-        ('\u{1d4ae}', '\u{1d4b9}'), ('\u{1d4bb}', '\u{1d4bb}'), ('\u{1d4bd}',
-        '\u{1d4c3}'), ('\u{1d4c5}', '\u{1d505}'), ('\u{1d507}', '\u{1d50a}'),
-        ('\u{1d50d}', '\u{1d514}'), ('\u{1d516}', '\u{1d51c}'), ('\u{1d51e}',
-        '\u{1d539}'), ('\u{1d53b}', '\u{1d53e}'), ('\u{1d540}', '\u{1d544}'),
-        ('\u{1d546}', '\u{1d546}'), ('\u{1d54a}', '\u{1d550}'), ('\u{1d552}',
-        '\u{1d6a5}'), ('\u{1d6a8}', '\u{1d6c0}'), ('\u{1d6c2}', '\u{1d6da}'),
-        ('\u{1d6dc}', '\u{1d6fa}'), ('\u{1d6fc}', '\u{1d714}'), ('\u{1d716}',
-        '\u{1d734}'), ('\u{1d736}', '\u{1d74e}'), ('\u{1d750}', '\u{1d76e}'),
-        ('\u{1d770}', '\u{1d788}'), ('\u{1d78a}', '\u{1d7a8}'), ('\u{1d7aa}',
-        '\u{1d7c2}'), ('\u{1d7c4}', '\u{1d7cb}'), ('\u{1e800}', '\u{1e8c4}'),
-        ('\u{1e900}', '\u{1e943}'), ('\u{1ee00}', '\u{1ee03}'), ('\u{1ee05}',
-        '\u{1ee1f}'), ('\u{1ee21}', '\u{1ee22}'), ('\u{1ee24}', '\u{1ee24}'),
-        ('\u{1ee27}', '\u{1ee27}'), ('\u{1ee29}', '\u{1ee32}'), ('\u{1ee34}',
-        '\u{1ee37}'), ('\u{1ee39}', '\u{1ee39}'), ('\u{1ee3b}', '\u{1ee3b}'),
-        ('\u{1ee42}', '\u{1ee42}'), ('\u{1ee47}', '\u{1ee47}'), ('\u{1ee49}',
-        '\u{1ee49}'), ('\u{1ee4b}', '\u{1ee4b}'), ('\u{1ee4d}', '\u{1ee4f}'),
-        ('\u{1ee51}', '\u{1ee52}'), ('\u{1ee54}', '\u{1ee54}'), ('\u{1ee57}',
-        '\u{1ee57}'), ('\u{1ee59}', '\u{1ee59}'), ('\u{1ee5b}', '\u{1ee5b}'),
-        ('\u{1ee5d}', '\u{1ee5d}'), ('\u{1ee5f}', '\u{1ee5f}'), ('\u{1ee61}',
-        '\u{1ee62}'), ('\u{1ee64}', '\u{1ee64}'), ('\u{1ee67}', '\u{1ee6a}'),
-        ('\u{1ee6c}', '\u{1ee72}'), ('\u{1ee74}', '\u{1ee77}'), ('\u{1ee79}',
-        '\u{1ee7c}'), ('\u{1ee7e}', '\u{1ee7e}'), ('\u{1ee80}', '\u{1ee89}'),
-        ('\u{1ee8b}', '\u{1ee9b}'), ('\u{1eea1}', '\u{1eea3}'), ('\u{1eea5}',
-        '\u{1eea9}'), ('\u{1eeab}', '\u{1eebb}'), ('\u{20000}', '\u{2a6d6}'),
-        ('\u{2a700}', '\u{2b734}'), ('\u{2b740}', '\u{2b81d}'), ('\u{2b820}',
-        '\u{2cea1}'), ('\u{2ceb0}', '\u{2ebe0}'), ('\u{2f800}', '\u{2fa1d}')
-    ];
-
+/// Encode the given Unicode character to `dst` as a single UTF-8 sequence.
+///
+/// If `dst` is not long enough, then `None` is returned. Otherwise, the number
+/// of bytes written is returned.
+pub fn encode_utf8(character: char, dst: &mut [u8]) -> Option<usize> {
+    // TODO: Remove this function once we move to at least Rust 1.15, which
+    // provides char::encode_utf8 for us.
+    const TAG_CONT: u8 = 0b1000_0000;
+    const TAG_TWO: u8 = 0b1100_0000;
+    const TAG_THREE: u8 = 0b1110_0000;
+    const TAG_FOUR: u8 = 0b1111_0000;
+
+    let code = character as u32;
+    if code <= 0x7F && !dst.is_empty() {
+        dst[0] = code as u8;
+        Some(1)
+    } else if code <= 0x7FF && dst.len() >= 2 {
+        dst[0] = (code >> 6 & 0x1F) as u8 | TAG_TWO;
+        dst[1] = (code & 0x3F) as u8 | TAG_CONT;
+        Some(2)
+    } else if code <= 0xFFFF && dst.len() >= 3  {
+        dst[0] = (code >> 12 & 0x0F) as u8 | TAG_THREE;
+        dst[1] = (code >>  6 & 0x3F) as u8 | TAG_CONT;
+        dst[2] = (code & 0x3F) as u8 | TAG_CONT;
+        Some(3)
+    } else if dst.len() >= 4 {
+        dst[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR;
+        dst[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT;
+        dst[2] = (code >>  6 & 0x3F) as u8 | TAG_CONT;
+        dst[3] = (code & 0x3F) as u8 | TAG_CONT;
+        Some(4)
+    } else {
+        None
+    }
 }
 
-pub mod script {
-    pub const Adlam_table: &'static [(char, char)] = &[
-        ('\u{1e900}', '\u{1e94a}'), ('\u{1e950}', '\u{1e959}'), ('\u{1e95e}',
-        '\u{1e95f}')
-    ];
-
-    pub const Ahom_table: &'static [(char, char)] = &[
-        ('\u{11700}', '\u{11719}'), ('\u{1171d}', '\u{1172b}'), ('\u{11730}',
-        '\u{1173f}')
-    ];
-
-    pub const Anatolian_Hieroglyphs_table: &'static [(char, char)] = &[
-        ('\u{14400}', '\u{14646}')
-    ];
+/// An iterator over a codepoint's simple case equivalence class.
+#[derive(Debug)]
+pub struct SimpleFoldIter(::std::slice::Iter<'static, char>);
 
-    pub const Arabic_table: &'static [(char, char)] = &[
-        ('\u{600}', '\u{604}'), ('\u{606}', '\u{60b}'), ('\u{60d}', '\u{61a}'),
-        ('\u{61c}', '\u{61c}'), ('\u{61e}', '\u{61e}'), ('\u{620}', '\u{63f}'),
-        ('\u{641}', '\u{64a}'), ('\u{656}', '\u{66f}'), ('\u{671}', '\u{6dc}'),
-        ('\u{6de}', '\u{6ff}'), ('\u{750}', '\u{77f}'), ('\u{8a0}', '\u{8b4}'),
-        ('\u{8b6}', '\u{8bd}'), ('\u{8d4}', '\u{8e1}'), ('\u{8e3}', '\u{8ff}'),
-        ('\u{fb50}', '\u{fbc1}'), ('\u{fbd3}', '\u{fd3d}'), ('\u{fd50}',
-        '\u{fd8f}'), ('\u{fd92}', '\u{fdc7}'), ('\u{fdf0}', '\u{fdfd}'),
-        ('\u{fe70}', '\u{fe74}'), ('\u{fe76}', '\u{fefc}'), ('\u{10e60}',
-        '\u{10e7e}'), ('\u{1ee00}', '\u{1ee03}'), ('\u{1ee05}', '\u{1ee1f}'),
-        ('\u{1ee21}', '\u{1ee22}'), ('\u{1ee24}', '\u{1ee24}'), ('\u{1ee27}',
-        '\u{1ee27}'), ('\u{1ee29}', '\u{1ee32}'), ('\u{1ee34}', '\u{1ee37}'),
-        ('\u{1ee39}', '\u{1ee39}'), ('\u{1ee3b}', '\u{1ee3b}'), ('\u{1ee42}',
-        '\u{1ee42}'), ('\u{1ee47}', '\u{1ee47}'), ('\u{1ee49}', '\u{1ee49}'),
-        ('\u{1ee4b}', '\u{1ee4b}'), ('\u{1ee4d}', '\u{1ee4f}'), ('\u{1ee51}',
-        '\u{1ee52}'), ('\u{1ee54}', '\u{1ee54}'), ('\u{1ee57}', '\u{1ee57}'),
-        ('\u{1ee59}', '\u{1ee59}'), ('\u{1ee5b}', '\u{1ee5b}'), ('\u{1ee5d}',
-        '\u{1ee5d}'), ('\u{1ee5f}', '\u{1ee5f}'), ('\u{1ee61}', '\u{1ee62}'),
-        ('\u{1ee64}', '\u{1ee64}'), ('\u{1ee67}', '\u{1ee6a}'), ('\u{1ee6c}',
-        '\u{1ee72}'), ('\u{1ee74}', '\u{1ee77}'), ('\u{1ee79}', '\u{1ee7c}'),
-        ('\u{1ee7e}', '\u{1ee7e}'), ('\u{1ee80}', '\u{1ee89}'), ('\u{1ee8b}',
-        '\u{1ee9b}'), ('\u{1eea1}', '\u{1eea3}'), ('\u{1eea5}', '\u{1eea9}'),
-        ('\u{1eeab}', '\u{1eebb}'), ('\u{1eef0}', '\u{1eef1}')
-    ];
+impl Iterator for SimpleFoldIter {
+    type Item = char;
 
-    pub const Armenian_table: &'static [(char, char)] = &[
-        ('\u{531}', '\u{556}'), ('\u{559}', '\u{55f}'), ('\u{561}', '\u{587}'),
-        ('\u{58a}', '\u{58a}'), ('\u{58d}', '\u{58f}'), ('\u{fb13}',
-        '\u{fb17}')
-    ];
-
-    pub const Avestan_table: &'static [(char, char)] = &[
-        ('\u{10b00}', '\u{10b35}'), ('\u{10b39}', '\u{10b3f}')
-    ];
-
-    pub const Balinese_table: &'static [(char, char)] = &[
-        ('\u{1b00}', '\u{1b4b}'), ('\u{1b50}', '\u{1b7c}')
-    ];
-
-    pub const Bamum_table: &'static [(char, char)] = &[
-        ('\u{a6a0}', '\u{a6f7}'), ('\u{16800}', '\u{16a38}')
-    ];
-
-    pub const Bassa_Vah_table: &'static [(char, char)] = &[
-        ('\u{16ad0}', '\u{16aed}'), ('\u{16af0}', '\u{16af5}')
-    ];
-
-    pub const Batak_table: &'static [(char, char)] = &[
-        ('\u{1bc0}', '\u{1bf3}'), ('\u{1bfc}', '\u{1bff}')
-    ];
-
-    pub const Bengali_table: &'static [(char, char)] = &[
-        ('\u{980}', '\u{983}'), ('\u{985}', '\u{98c}'), ('\u{98f}', '\u{990}'),
-        ('\u{993}', '\u{9a8}'), ('\u{9aa}', '\u{9b0}'), ('\u{9b2}', '\u{9b2}'),
-        ('\u{9b6}', '\u{9b9}'), ('\u{9bc}', '\u{9c4}'), ('\u{9c7}', '\u{9c8}'),
-        ('\u{9cb}', '\u{9ce}'), ('\u{9d7}', '\u{9d7}'), ('\u{9dc}', '\u{9dd}'),
-        ('\u{9df}', '\u{9e3}'), ('\u{9e6}', '\u{9fd}')
-    ];
-
-    pub const Bhaiksuki_table: &'static [(char, char)] = &[
-        ('\u{11c00}', '\u{11c08}'), ('\u{11c0a}', '\u{11c36}'), ('\u{11c38}',
-        '\u{11c45}'), ('\u{11c50}', '\u{11c6c}')
-    ];
-
-    pub const Bopomofo_table: &'static [(char, char)] = &[
-        ('\u{2ea}', '\u{2eb}'), ('\u{3105}', '\u{312e}'), ('\u{31a0}',
-        '\u{31ba}')
-    ];
-
-    pub const Brahmi_table: &'static [(char, char)] = &[
-        ('\u{11000}', '\u{1104d}'), ('\u{11052}', '\u{1106f}'), ('\u{1107f}',
-        '\u{1107f}')
-    ];
-
-    pub const Braille_table: &'static [(char, char)] = &[
-        ('\u{2800}', '\u{28ff}')
-    ];
-
-    pub const Buginese_table: &'static [(char, char)] = &[
-        ('\u{1a00}', '\u{1a1b}'), ('\u{1a1e}', '\u{1a1f}')
-    ];
-
-    pub const Buhid_table: &'static [(char, char)] = &[
-        ('\u{1740}', '\u{1753}')
-    ];
-
-    pub const Canadian_Aboriginal_table: &'static [(char, char)] = &[
-        ('\u{1400}', '\u{167f}'), ('\u{18b0}', '\u{18f5}')
-    ];
-
-    pub const Carian_table: &'static [(char, char)] = &[
-        ('\u{102a0}', '\u{102d0}')
-    ];
-
-    pub const Caucasian_Albanian_table: &'static [(char, char)] = &[
-        ('\u{10530}', '\u{10563}'), ('\u{1056f}', '\u{1056f}')
-    ];
-
-    pub const Chakma_table: &'static [(char, char)] = &[
-        ('\u{11100}', '\u{11134}'), ('\u{11136}', '\u{11143}')
-    ];
-
-    pub const Cham_table: &'static [(char, char)] = &[
-        ('\u{aa00}', '\u{aa36}'), ('\u{aa40}', '\u{aa4d}'), ('\u{aa50}',
-        '\u{aa59}'), ('\u{aa5c}', '\u{aa5f}')
-    ];
-
-    pub const Cherokee_table: &'static [(char, char)] = &[
-        ('\u{13a0}', '\u{13f5}'), ('\u{13f8}', '\u{13fd}'), ('\u{ab70}',
-        '\u{abbf}')
-    ];
-
-    pub const Common_table: &'static [(char, char)] = &[
-        ('\u{0}', '\u{40}'), ('\u{5b}', '\u{60}'), ('\u{7b}', '\u{a9}'),
-        ('\u{ab}', '\u{b9}'), ('\u{bb}', '\u{bf}'), ('\u{d7}', '\u{d7}'),
-        ('\u{f7}', '\u{f7}'), ('\u{2b9}', '\u{2df}'), ('\u{2e5}', '\u{2e9}'),
-        ('\u{2ec}', '\u{2ff}'), ('\u{374}', '\u{374}'), ('\u{37e}', '\u{37e}'),
-        ('\u{385}', '\u{385}'), ('\u{387}', '\u{387}'), ('\u{589}', '\u{589}'),
-        ('\u{605}', '\u{605}'), ('\u{60c}', '\u{60c}'), ('\u{61b}', '\u{61b}'),
-        ('\u{61f}', '\u{61f}'), ('\u{640}', '\u{640}'), ('\u{6dd}', '\u{6dd}'),
-        ('\u{8e2}', '\u{8e2}'), ('\u{964}', '\u{965}'), ('\u{e3f}', '\u{e3f}'),
-        ('\u{fd5}', '\u{fd8}'), ('\u{10fb}', '\u{10fb}'), ('\u{16eb}',
-        '\u{16ed}'), ('\u{1735}', '\u{1736}'), ('\u{1802}', '\u{1803}'),
-        ('\u{1805}', '\u{1805}'), ('\u{1cd3}', '\u{1cd3}'), ('\u{1ce1}',
-        '\u{1ce1}'), ('\u{1ce9}', '\u{1cec}'), ('\u{1cee}', '\u{1cf3}'),
-        ('\u{1cf5}', '\u{1cf7}'), ('\u{2000}', '\u{200b}'), ('\u{200e}',
-        '\u{2064}'), ('\u{2066}', '\u{2070}'), ('\u{2074}', '\u{207e}'),
-        ('\u{2080}', '\u{208e}'), ('\u{20a0}', '\u{20bf}'), ('\u{2100}',
-        '\u{2125}'), ('\u{2127}', '\u{2129}'), ('\u{212c}', '\u{2131}'),
-        ('\u{2133}', '\u{214d}'), ('\u{214f}', '\u{215f}'), ('\u{2189}',
-        '\u{218b}'), ('\u{2190}', '\u{2426}'), ('\u{2440}', '\u{244a}'),
-        ('\u{2460}', '\u{27ff}'), ('\u{2900}', '\u{2b73}'), ('\u{2b76}',
-        '\u{2b95}'), ('\u{2b98}', '\u{2bb9}'), ('\u{2bbd}', '\u{2bc8}'),
-        ('\u{2bca}', '\u{2bd2}'), ('\u{2bec}', '\u{2bef}'), ('\u{2e00}',
-        '\u{2e49}'), ('\u{2ff0}', '\u{2ffb}'), ('\u{3000}', '\u{3004}'),
-        ('\u{3006}', '\u{3006}'), ('\u{3008}', '\u{3020}'), ('\u{3030}',
-        '\u{3037}'), ('\u{303c}', '\u{303f}'), ('\u{309b}', '\u{309c}'),
-        ('\u{30a0}', '\u{30a0}'), ('\u{30fb}', '\u{30fc}'), ('\u{3190}',
-        '\u{319f}'), ('\u{31c0}', '\u{31e3}'), ('\u{3220}', '\u{325f}'),
-        ('\u{327f}', '\u{32cf}'), ('\u{3358}', '\u{33ff}'), ('\u{4dc0}',
-        '\u{4dff}'), ('\u{a700}', '\u{a721}'), ('\u{a788}', '\u{a78a}'),
-        ('\u{a830}', '\u{a839}'), ('\u{a92e}', '\u{a92e}'), ('\u{a9cf}',
-        '\u{a9cf}'), ('\u{ab5b}', '\u{ab5b}'), ('\u{fd3e}', '\u{fd3f}'),
-        ('\u{fe10}', '\u{fe19}'), ('\u{fe30}', '\u{fe52}'), ('\u{fe54}',
-        '\u{fe66}'), ('\u{fe68}', '\u{fe6b}'), ('\u{feff}', '\u{feff}'),
-        ('\u{ff01}', '\u{ff20}'), ('\u{ff3b}', '\u{ff40}'), ('\u{ff5b}',
-        '\u{ff65}'), ('\u{ff70}', '\u{ff70}'), ('\u{ff9e}', '\u{ff9f}'),
-        ('\u{ffe0}', '\u{ffe6}'), ('\u{ffe8}', '\u{ffee}'), ('\u{fff9}',
-        '\u{fffd}'), ('\u{10100}', '\u{10102}'), ('\u{10107}', '\u{10133}'),
-        ('\u{10137}', '\u{1013f}'), ('\u{10190}', '\u{1019b}'), ('\u{101d0}',
-        '\u{101fc}'), ('\u{102e1}', '\u{102fb}'), ('\u{1bca0}', '\u{1bca3}'),
-        ('\u{1d000}', '\u{1d0f5}'), ('\u{1d100}', '\u{1d126}'), ('\u{1d129}',
-        '\u{1d166}'), ('\u{1d16a}', '\u{1d17a}'), ('\u{1d183}', '\u{1d184}'),
-        ('\u{1d18c}', '\u{1d1a9}'), ('\u{1d1ae}', '\u{1d1e8}'), ('\u{1d300}',
-        '\u{1d356}'), ('\u{1d360}', '\u{1d371}'), ('\u{1d400}', '\u{1d454}'),
-        ('\u{1d456}', '\u{1d49c}'), ('\u{1d49e}', '\u{1d49f}'), ('\u{1d4a2}',
-        '\u{1d4a2}'), ('\u{1d4a5}', '\u{1d4a6}'), ('\u{1d4a9}', '\u{1d4ac}'),
-        ('\u{1d4ae}', '\u{1d4b9}'), ('\u{1d4bb}', '\u{1d4bb}'), ('\u{1d4bd}',
-        '\u{1d4c3}'), ('\u{1d4c5}', '\u{1d505}'), ('\u{1d507}', '\u{1d50a}'),
-        ('\u{1d50d}', '\u{1d514}'), ('\u{1d516}', '\u{1d51c}'), ('\u{1d51e}',
-        '\u{1d539}'), ('\u{1d53b}', '\u{1d53e}'), ('\u{1d540}', '\u{1d544}'),
-        ('\u{1d546}', '\u{1d546}'), ('\u{1d54a}', '\u{1d550}'), ('\u{1d552}',
-        '\u{1d6a5}'), ('\u{1d6a8}', '\u{1d7cb}'), ('\u{1d7ce}', '\u{1d7ff}'),
-        ('\u{1f000}', '\u{1f02b}'), ('\u{1f030}', '\u{1f093}'), ('\u{1f0a0}',
-        '\u{1f0ae}'), ('\u{1f0b1}', '\u{1f0bf}'), ('\u{1f0c1}', '\u{1f0cf}'),
-        ('\u{1f0d1}', '\u{1f0f5}'), ('\u{1f100}', '\u{1f10c}'), ('\u{1f110}',
-        '\u{1f12e}'), ('\u{1f130}', '\u{1f16b}'), ('\u{1f170}', '\u{1f1ac}'),
-        ('\u{1f1e6}', '\u{1f1ff}'), ('\u{1f201}', '\u{1f202}'), ('\u{1f210}',
-        '\u{1f23b}'), ('\u{1f240}', '\u{1f248}'), ('\u{1f250}', '\u{1f251}'),
-        ('\u{1f260}', '\u{1f265}'), ('\u{1f300}', '\u{1f6d4}'), ('\u{1f6e0}',
-        '\u{1f6ec}'), ('\u{1f6f0}', '\u{1f6f8}'), ('\u{1f700}', '\u{1f773}'),
-        ('\u{1f780}', '\u{1f7d4}'), ('\u{1f800}', '\u{1f80b}'), ('\u{1f810}',
-        '\u{1f847}'), ('\u{1f850}', '\u{1f859}'), ('\u{1f860}', '\u{1f887}'),
-        ('\u{1f890}', '\u{1f8ad}'), ('\u{1f900}', '\u{1f90b}'), ('\u{1f910}',
-        '\u{1f93e}'), ('\u{1f940}', '\u{1f94c}'), ('\u{1f950}', '\u{1f96b}'),
-        ('\u{1f980}', '\u{1f997}'), ('\u{1f9c0}', '\u{1f9c0}'), ('\u{1f9d0}',
-        '\u{1f9e6}'), ('\u{e0001}', '\u{e0001}'), ('\u{e0020}', '\u{e007f}')
-    ];
-
-    pub const Coptic_table: &'static [(char, char)] = &[
-        ('\u{3e2}', '\u{3ef}'), ('\u{2c80}', '\u{2cf3}'), ('\u{2cf9}',
-        '\u{2cff}')
-    ];
-
-    pub const Cuneiform_table: &'static [(char, char)] = &[
-        ('\u{12000}', '\u{12399}'), ('\u{12400}', '\u{1246e}'), ('\u{12470}',
-        '\u{12474}'), ('\u{12480}', '\u{12543}')
-    ];
-
-    pub const Cypriot_table: &'static [(char, char)] = &[
-        ('\u{10800}', '\u{10805}'), ('\u{10808}', '\u{10808}'), ('\u{1080a}',
-        '\u{10835}'), ('\u{10837}', '\u{10838}'), ('\u{1083c}', '\u{1083c}'),
-        ('\u{1083f}', '\u{1083f}')
-    ];
-
-    pub const Cyrillic_table: &'static [(char, char)] = &[
-        ('\u{400}', '\u{484}'), ('\u{487}', '\u{52f}'), ('\u{1c80}',
-        '\u{1c88}'), ('\u{1d2b}', '\u{1d2b}'), ('\u{1d78}', '\u{1d78}'),
-        ('\u{2de0}', '\u{2dff}'), ('\u{a640}', '\u{a69f}'), ('\u{fe2e}',
-        '\u{fe2f}')
-    ];
-
-    pub const Deseret_table: &'static [(char, char)] = &[
-        ('\u{10400}', '\u{1044f}')
-    ];
-
-    pub const Devanagari_table: &'static [(char, char)] = &[
-        ('\u{900}', '\u{950}'), ('\u{953}', '\u{963}'), ('\u{966}', '\u{97f}'),
-        ('\u{a8e0}', '\u{a8fd}')
-    ];
-
-    pub const Duployan_table: &'static [(char, char)] = &[
-        ('\u{1bc00}', '\u{1bc6a}'), ('\u{1bc70}', '\u{1bc7c}'), ('\u{1bc80}',
-        '\u{1bc88}'), ('\u{1bc90}', '\u{1bc99}'), ('\u{1bc9c}', '\u{1bc9f}')
-    ];
-
-    pub const Egyptian_Hieroglyphs_table: &'static [(char, char)] = &[
-        ('\u{13000}', '\u{1342e}')
-    ];
-
-    pub const Elbasan_table: &'static [(char, char)] = &[
-        ('\u{10500}', '\u{10527}')
-    ];
-
-    pub const Ethiopic_table: &'static [(char, char)] = &[
-        ('\u{1200}', '\u{1248}'), ('\u{124a}', '\u{124d}'), ('\u{1250}',
-        '\u{1256}'), ('\u{1258}', '\u{1258}'), ('\u{125a}', '\u{125d}'),
-        ('\u{1260}', '\u{1288}'), ('\u{128a}', '\u{128d}'), ('\u{1290}',
-        '\u{12b0}'), ('\u{12b2}', '\u{12b5}'), ('\u{12b8}', '\u{12be}'),
-        ('\u{12c0}', '\u{12c0}'), ('\u{12c2}', '\u{12c5}'), ('\u{12c8}',
-        '\u{12d6}'), ('\u{12d8}', '\u{1310}'), ('\u{1312}', '\u{1315}'),
-        ('\u{1318}', '\u{135a}'), ('\u{135d}', '\u{137c}'), ('\u{1380}',
-        '\u{1399}'), ('\u{2d80}', '\u{2d96}'), ('\u{2da0}', '\u{2da6}'),
-        ('\u{2da8}', '\u{2dae}'), ('\u{2db0}', '\u{2db6}'), ('\u{2db8}',
-        '\u{2dbe}'), ('\u{2dc0}', '\u{2dc6}'), ('\u{2dc8}', '\u{2dce}'),
-        ('\u{2dd0}', '\u{2dd6}'), ('\u{2dd8}', '\u{2dde}'), ('\u{ab01}',
-        '\u{ab06}'), ('\u{ab09}', '\u{ab0e}'), ('\u{ab11}', '\u{ab16}'),
-        ('\u{ab20}', '\u{ab26}'), ('\u{ab28}', '\u{ab2e}')
-    ];
-
-    pub const Georgian_table: &'static [(char, char)] = &[
-        ('\u{10a0}', '\u{10c5}'), ('\u{10c7}', '\u{10c7}'), ('\u{10cd}',
-        '\u{10cd}'), ('\u{10d0}', '\u{10fa}'), ('\u{10fc}', '\u{10ff}'),
-        ('\u{2d00}', '\u{2d25}'), ('\u{2d27}', '\u{2d27}'), ('\u{2d2d}',
-        '\u{2d2d}')
-    ];
-
-    pub const Glagolitic_table: &'static [(char, char)] = &[
-        ('\u{2c00}', '\u{2c2e}'), ('\u{2c30}', '\u{2c5e}'), ('\u{1e000}',
-        '\u{1e006}'), ('\u{1e008}', '\u{1e018}'), ('\u{1e01b}', '\u{1e021}'),
-        ('\u{1e023}', '\u{1e024}'), ('\u{1e026}', '\u{1e02a}')
-    ];
-
-    pub const Gothic_table: &'static [(char, char)] = &[
-        ('\u{10330}', '\u{1034a}')
-    ];
-
-    pub const Grantha_table: &'static [(char, char)] = &[
-        ('\u{11300}', '\u{11303}'), ('\u{11305}', '\u{1130c}'), ('\u{1130f}',
-        '\u{11310}'), ('\u{11313}', '\u{11328}'), ('\u{1132a}', '\u{11330}'),
-        ('\u{11332}', '\u{11333}'), ('\u{11335}', '\u{11339}'), ('\u{1133c}',
-        '\u{11344}'), ('\u{11347}', '\u{11348}'), ('\u{1134b}', '\u{1134d}'),
-        ('\u{11350}', '\u{11350}'), ('\u{11357}', '\u{11357}'), ('\u{1135d}',
-        '\u{11363}'), ('\u{11366}', '\u{1136c}'), ('\u{11370}', '\u{11374}')
-    ];
-
-    pub const Greek_table: &'static [(char, char)] = &[
-        ('\u{370}', '\u{373}'), ('\u{375}', '\u{377}'), ('\u{37a}', '\u{37d}'),
-        ('\u{37f}', '\u{37f}'), ('\u{384}', '\u{384}'), ('\u{386}', '\u{386}'),
-        ('\u{388}', '\u{38a}'), ('\u{38c}', '\u{38c}'), ('\u{38e}', '\u{3a1}'),
-        ('\u{3a3}', '\u{3e1}'), ('\u{3f0}', '\u{3ff}'), ('\u{1d26}',
-        '\u{1d2a}'), ('\u{1d5d}', '\u{1d61}'), ('\u{1d66}', '\u{1d6a}'),
-        ('\u{1dbf}', '\u{1dbf}'), ('\u{1f00}', '\u{1f15}'), ('\u{1f18}',
-        '\u{1f1d}'), ('\u{1f20}', '\u{1f45}'), ('\u{1f48}', '\u{1f4d}'),
-        ('\u{1f50}', '\u{1f57}'), ('\u{1f59}', '\u{1f59}'), ('\u{1f5b}',
-        '\u{1f5b}'), ('\u{1f5d}', '\u{1f5d}'), ('\u{1f5f}', '\u{1f7d}'),
-        ('\u{1f80}', '\u{1fb4}'), ('\u{1fb6}', '\u{1fc4}'), ('\u{1fc6}',
-        '\u{1fd3}'), ('\u{1fd6}', '\u{1fdb}'), ('\u{1fdd}', '\u{1fef}'),
-        ('\u{1ff2}', '\u{1ff4}'), ('\u{1ff6}', '\u{1ffe}'), ('\u{2126}',
-        '\u{2126}'), ('\u{ab65}', '\u{ab65}'), ('\u{10140}', '\u{1018e}'),
-        ('\u{101a0}', '\u{101a0}'), ('\u{1d200}', '\u{1d245}')
-    ];
-
-    pub const Gujarati_table: &'static [(char, char)] = &[
-        ('\u{a81}', '\u{a83}'), ('\u{a85}', '\u{a8d}'), ('\u{a8f}', '\u{a91}'),
-        ('\u{a93}', '\u{aa8}'), ('\u{aaa}', '\u{ab0}'), ('\u{ab2}', '\u{ab3}'),
-        ('\u{ab5}', '\u{ab9}'), ('\u{abc}', '\u{ac5}'), ('\u{ac7}', '\u{ac9}'),
-        ('\u{acb}', '\u{acd}'), ('\u{ad0}', '\u{ad0}'), ('\u{ae0}', '\u{ae3}'),
-        ('\u{ae6}', '\u{af1}'), ('\u{af9}', '\u{aff}')
-    ];
-
-    pub const Gurmukhi_table: &'static [(char, char)] = &[
-        ('\u{a01}', '\u{a03}'), ('\u{a05}', '\u{a0a}'), ('\u{a0f}', '\u{a10}'),
-        ('\u{a13}', '\u{a28}'), ('\u{a2a}', '\u{a30}'), ('\u{a32}', '\u{a33}'),
-        ('\u{a35}', '\u{a36}'), ('\u{a38}', '\u{a39}'), ('\u{a3c}', '\u{a3c}'),
-        ('\u{a3e}', '\u{a42}'), ('\u{a47}', '\u{a48}'), ('\u{a4b}', '\u{a4d}'),
-        ('\u{a51}', '\u{a51}'), ('\u{a59}', '\u{a5c}'), ('\u{a5e}', '\u{a5e}'),
-        ('\u{a66}', '\u{a75}')
-    ];
-
-    pub const Han_table: &'static [(char, char)] = &[
-        ('\u{2e80}', '\u{2e99}'), ('\u{2e9b}', '\u{2ef3}'), ('\u{2f00}',
-        '\u{2fd5}'), ('\u{3005}', '\u{3005}'), ('\u{3007}', '\u{3007}'),
-        ('\u{3021}', '\u{3029}'), ('\u{3038}', '\u{303b}'), ('\u{3400}',
-        '\u{4db5}'), ('\u{4e00}', '\u{9fea}'), ('\u{f900}', '\u{fa6d}'),
-        ('\u{fa70}', '\u{fad9}'), ('\u{20000}', '\u{2a6d6}'), ('\u{2a700}',
-        '\u{2b734}'), ('\u{2b740}', '\u{2b81d}'), ('\u{2b820}', '\u{2cea1}'),
-        ('\u{2ceb0}', '\u{2ebe0}'), ('\u{2f800}', '\u{2fa1d}')
-    ];
-
-    pub const Hangul_table: &'static [(char, char)] = &[
-        ('\u{1100}', '\u{11ff}'), ('\u{302e}', '\u{302f}'), ('\u{3131}',
-        '\u{318e}'), ('\u{3200}', '\u{321e}'), ('\u{3260}', '\u{327e}'),
-        ('\u{a960}', '\u{a97c}'), ('\u{ac00}', '\u{d7a3}'), ('\u{d7b0}',
-        '\u{d7c6}'), ('\u{d7cb}', '\u{d7fb}'), ('\u{ffa0}', '\u{ffbe}'),
-        ('\u{ffc2}', '\u{ffc7}'), ('\u{ffca}', '\u{ffcf}'), ('\u{ffd2}',
-        '\u{ffd7}'), ('\u{ffda}', '\u{ffdc}')
-    ];
-
-    pub const Hanunoo_table: &'static [(char, char)] = &[
-        ('\u{1720}', '\u{1734}')
-    ];
-
-    pub const Hatran_table: &'static [(char, char)] = &[
-        ('\u{108e0}', '\u{108f2}'), ('\u{108f4}', '\u{108f5}'), ('\u{108fb}',
-        '\u{108ff}')
-    ];
-
-    pub const Hebrew_table: &'static [(char, char)] = &[
-        ('\u{591}', '\u{5c7}'), ('\u{5d0}', '\u{5ea}'), ('\u{5f0}', '\u{5f4}'),
-        ('\u{fb1d}', '\u{fb36}'), ('\u{fb38}', '\u{fb3c}'), ('\u{fb3e}',
-        '\u{fb3e}'), ('\u{fb40}', '\u{fb41}'), ('\u{fb43}', '\u{fb44}'),
-        ('\u{fb46}', '\u{fb4f}')
-    ];
-
-    pub const Hiragana_table: &'static [(char, char)] = &[
-        ('\u{3041}', '\u{3096}'), ('\u{309d}', '\u{309f}'), ('\u{1b001}',
-        '\u{1b11e}'), ('\u{1f200}', '\u{1f200}')
-    ];
-
-    pub const Imperial_Aramaic_table: &'static [(char, char)] = &[
-        ('\u{10840}', '\u{10855}'), ('\u{10857}', '\u{1085f}')
-    ];
-
-    pub const Inherited_table: &'static [(char, char)] = &[
-        ('\u{300}', '\u{36f}'), ('\u{485}', '\u{486}'), ('\u{64b}', '\u{655}'),
-        ('\u{670}', '\u{670}'), ('\u{951}', '\u{952}'), ('\u{1ab0}',
-        '\u{1abe}'), ('\u{1cd0}', '\u{1cd2}'), ('\u{1cd4}', '\u{1ce0}'),
-        ('\u{1ce2}', '\u{1ce8}'), ('\u{1ced}', '\u{1ced}'), ('\u{1cf4}',
-        '\u{1cf4}'), ('\u{1cf8}', '\u{1cf9}'), ('\u{1dc0}', '\u{1df9}'),
-        ('\u{1dfb}', '\u{1dff}'), ('\u{200c}', '\u{200d}'), ('\u{20d0}',
-        '\u{20f0}'), ('\u{302a}', '\u{302d}'), ('\u{3099}', '\u{309a}'),
-        ('\u{fe00}', '\u{fe0f}'), ('\u{fe20}', '\u{fe2d}'), ('\u{101fd}',
-        '\u{101fd}'), ('\u{102e0}', '\u{102e0}'), ('\u{1d167}', '\u{1d169}'),
-        ('\u{1d17b}', '\u{1d182}'), ('\u{1d185}', '\u{1d18b}'), ('\u{1d1aa}',
-        '\u{1d1ad}'), ('\u{e0100}', '\u{e01ef}')
-    ];
-
-    pub const Inscriptional_Pahlavi_table: &'static [(char, char)] = &[
-        ('\u{10b60}', '\u{10b72}'), ('\u{10b78}', '\u{10b7f}')
-    ];
-
-    pub const Inscriptional_Parthian_table: &'static [(char, char)] = &[
-        ('\u{10b40}', '\u{10b55}'), ('\u{10b58}', '\u{10b5f}')
-    ];
-
-    pub const Javanese_table: &'static [(char, char)] = &[
-        ('\u{a980}', '\u{a9cd}'), ('\u{a9d0}', '\u{a9d9}'), ('\u{a9de}',
-        '\u{a9df}')
-    ];
-
-    pub const Kaithi_table: &'static [(char, char)] = &[
-        ('\u{11080}', '\u{110c1}')
-    ];
-
-    pub const Kannada_table: &'static [(char, char)] = &[
-        ('\u{c80}', '\u{c83}'), ('\u{c85}', '\u{c8c}'), ('\u{c8e}', '\u{c90}'),
-        ('\u{c92}', '\u{ca8}'), ('\u{caa}', '\u{cb3}'), ('\u{cb5}', '\u{cb9}'),
-        ('\u{cbc}', '\u{cc4}'), ('\u{cc6}', '\u{cc8}'), ('\u{cca}', '\u{ccd}'),
-        ('\u{cd5}', '\u{cd6}'), ('\u{cde}', '\u{cde}'), ('\u{ce0}', '\u{ce3}'),
-        ('\u{ce6}', '\u{cef}'), ('\u{cf1}', '\u{cf2}')
-    ];
-
-    pub const Katakana_table: &'static [(char, char)] = &[
-        ('\u{30a1}', '\u{30fa}'), ('\u{30fd}', '\u{30ff}'), ('\u{31f0}',
-        '\u{31ff}'), ('\u{32d0}', '\u{32fe}'), ('\u{3300}', '\u{3357}'),
-        ('\u{ff66}', '\u{ff6f}'), ('\u{ff71}', '\u{ff9d}'), ('\u{1b000}',
-        '\u{1b000}')
-    ];
-
-    pub const Kayah_Li_table: &'static [(char, char)] = &[
-        ('\u{a900}', '\u{a92d}'), ('\u{a92f}', '\u{a92f}')
-    ];
-
-    pub const Kharoshthi_table: &'static [(char, char)] = &[
-        ('\u{10a00}', '\u{10a03}'), ('\u{10a05}', '\u{10a06}'), ('\u{10a0c}',
-        '\u{10a13}'), ('\u{10a15}', '\u{10a17}'), ('\u{10a19}', '\u{10a33}'),
-        ('\u{10a38}', '\u{10a3a}'), ('\u{10a3f}', '\u{10a47}'), ('\u{10a50}',
-        '\u{10a58}')
-    ];
-
-    pub const Khmer_table: &'static [(char, char)] = &[
-        ('\u{1780}', '\u{17dd}'), ('\u{17e0}', '\u{17e9}'), ('\u{17f0}',
-        '\u{17f9}'), ('\u{19e0}', '\u{19ff}')
-    ];
-
-    pub const Khojki_table: &'static [(char, char)] = &[
-        ('\u{11200}', '\u{11211}'), ('\u{11213}', '\u{1123e}')
-    ];
-
-    pub const Khudawadi_table: &'static [(char, char)] = &[
-        ('\u{112b0}', '\u{112ea}'), ('\u{112f0}', '\u{112f9}')
-    ];
-
-    pub const Lao_table: &'static [(char, char)] = &[
-        ('\u{e81}', '\u{e82}'), ('\u{e84}', '\u{e84}'), ('\u{e87}', '\u{e88}'),
-        ('\u{e8a}', '\u{e8a}'), ('\u{e8d}', '\u{e8d}'), ('\u{e94}', '\u{e97}'),
-        ('\u{e99}', '\u{e9f}'), ('\u{ea1}', '\u{ea3}'), ('\u{ea5}', '\u{ea5}'),
-        ('\u{ea7}', '\u{ea7}'), ('\u{eaa}', '\u{eab}'), ('\u{ead}', '\u{eb9}'),
-        ('\u{ebb}', '\u{ebd}'), ('\u{ec0}', '\u{ec4}'), ('\u{ec6}', '\u{ec6}'),
-        ('\u{ec8}', '\u{ecd}'), ('\u{ed0}', '\u{ed9}'), ('\u{edc}', '\u{edf}')
-    ];
-
-    pub const Latin_table: &'static [(char, char)] = &[
-        ('\u{41}', '\u{5a}'), ('\u{61}', '\u{7a}'), ('\u{aa}', '\u{aa}'),
-        ('\u{ba}', '\u{ba}'), ('\u{c0}', '\u{d6}'), ('\u{d8}', '\u{f6}'),
-        ('\u{f8}', '\u{2b8}'), ('\u{2e0}', '\u{2e4}'), ('\u{1d00}', '\u{1d25}'),
-        ('\u{1d2c}', '\u{1d5c}'), ('\u{1d62}', '\u{1d65}'), ('\u{1d6b}',
-        '\u{1d77}'), ('\u{1d79}', '\u{1dbe}'), ('\u{1e00}', '\u{1eff}'),
-        ('\u{2071}', '\u{2071}'), ('\u{207f}', '\u{207f}'), ('\u{2090}',
-        '\u{209c}'), ('\u{212a}', '\u{212b}'), ('\u{2132}', '\u{2132}'),
-        ('\u{214e}', '\u{214e}'), ('\u{2160}', '\u{2188}'), ('\u{2c60}',
-        '\u{2c7f}'), ('\u{a722}', '\u{a787}'), ('\u{a78b}', '\u{a7ae}'),
-        ('\u{a7b0}', '\u{a7b7}'), ('\u{a7f7}', '\u{a7ff}'), ('\u{ab30}',
-        '\u{ab5a}'), ('\u{ab5c}', '\u{ab64}'), ('\u{fb00}', '\u{fb06}'),
-        ('\u{ff21}', '\u{ff3a}'), ('\u{ff41}', '\u{ff5a}')
-    ];
-
-    pub const Lepcha_table: &'static [(char, char)] = &[
-        ('\u{1c00}', '\u{1c37}'), ('\u{1c3b}', '\u{1c49}'), ('\u{1c4d}',
-        '\u{1c4f}')
-    ];
-
-    pub const Limbu_table: &'static [(char, char)] = &[
-        ('\u{1900}', '\u{191e}'), ('\u{1920}', '\u{192b}'), ('\u{1930}',
-        '\u{193b}'), ('\u{1940}', '\u{1940}'), ('\u{1944}', '\u{194f}')
-    ];
-
-    pub const Linear_A_table: &'static [(char, char)] = &[
-        ('\u{10600}', '\u{10736}'), ('\u{10740}', '\u{10755}'), ('\u{10760}',
-        '\u{10767}')
-    ];
-
-    pub const Linear_B_table: &'static [(char, char)] = &[
-        ('\u{10000}', '\u{1000b}'), ('\u{1000d}', '\u{10026}'), ('\u{10028}',
-        '\u{1003a}'), ('\u{1003c}', '\u{1003d}'), ('\u{1003f}', '\u{1004d}'),
-        ('\u{10050}', '\u{1005d}'), ('\u{10080}', '\u{100fa}')
-    ];
-
-    pub const Lisu_table: &'static [(char, char)] = &[
-        ('\u{a4d0}', '\u{a4ff}')
-    ];
-
-    pub const Lycian_table: &'static [(char, char)] = &[
-        ('\u{10280}', '\u{1029c}')
-    ];
-
-    pub const Lydian_table: &'static [(char, char)] = &[
-        ('\u{10920}', '\u{10939}'), ('\u{1093f}', '\u{1093f}')
-    ];
-
-    pub const Mahajani_table: &'static [(char, char)] = &[
-        ('\u{11150}', '\u{11176}')
-    ];
-
-    pub const Malayalam_table: &'static [(char, char)] = &[
-        ('\u{d00}', '\u{d03}'), ('\u{d05}', '\u{d0c}'), ('\u{d0e}', '\u{d10}'),
-        ('\u{d12}', '\u{d44}'), ('\u{d46}', '\u{d48}'), ('\u{d4a}', '\u{d4f}'),
-        ('\u{d54}', '\u{d63}'), ('\u{d66}', '\u{d7f}')
-    ];
-
-    pub const Mandaic_table: &'static [(char, char)] = &[
-        ('\u{840}', '\u{85b}'), ('\u{85e}', '\u{85e}')
-    ];
-
-    pub const Manichaean_table: &'static [(char, char)] = &[
-        ('\u{10ac0}', '\u{10ae6}'), ('\u{10aeb}', '\u{10af6}')
-    ];
-
-    pub const Marchen_table: &'static [(char, char)] = &[
-        ('\u{11c70}', '\u{11c8f}'), ('\u{11c92}', '\u{11ca7}'), ('\u{11ca9}',
-        '\u{11cb6}')
-    ];
-
-    pub const Masaram_Gondi_table: &'static [(char, char)] = &[
-        ('\u{11d00}', '\u{11d06}'), ('\u{11d08}', '\u{11d09}'), ('\u{11d0b}',
-        '\u{11d36}'), ('\u{11d3a}', '\u{11d3a}'), ('\u{11d3c}', '\u{11d3d}'),
-        ('\u{11d3f}', '\u{11d47}'), ('\u{11d50}', '\u{11d59}')
-    ];
-
-    pub const Meetei_Mayek_table: &'static [(char, char)] = &[
-        ('\u{aae0}', '\u{aaf6}'), ('\u{abc0}', '\u{abed}'), ('\u{abf0}',
-        '\u{abf9}')
-    ];
-
-    pub const Mende_Kikakui_table: &'static [(char, char)] = &[
-        ('\u{1e800}', '\u{1e8c4}'), ('\u{1e8c7}', '\u{1e8d6}')
-    ];
-
-    pub const Meroitic_Cursive_table: &'static [(char, char)] = &[
-        ('\u{109a0}', '\u{109b7}'), ('\u{109bc}', '\u{109cf}'), ('\u{109d2}',
-        '\u{109ff}')
-    ];
-
-    pub const Meroitic_Hieroglyphs_table: &'static [(char, char)] = &[
-        ('\u{10980}', '\u{1099f}')
-    ];
-
-    pub const Miao_table: &'static [(char, char)] = &[
-        ('\u{16f00}', '\u{16f44}'), ('\u{16f50}', '\u{16f7e}'), ('\u{16f8f}',
-        '\u{16f9f}')
-    ];
-
-    pub const Modi_table: &'static [(char, char)] = &[
-        ('\u{11600}', '\u{11644}'), ('\u{11650}', '\u{11659}')
-    ];
-
-    pub const Mongolian_table: &'static [(char, char)] = &[
-        ('\u{1800}', '\u{1801}'), ('\u{1804}', '\u{1804}'), ('\u{1806}',
-        '\u{180e}'), ('\u{1810}', '\u{1819}'), ('\u{1820}', '\u{1877}'),
-        ('\u{1880}', '\u{18aa}'), ('\u{11660}', '\u{1166c}')
-    ];
-
-    pub const Mro_table: &'static [(char, char)] = &[
-        ('\u{16a40}', '\u{16a5e}'), ('\u{16a60}', '\u{16a69}'), ('\u{16a6e}',
-        '\u{16a6f}')
-    ];
-
-    pub const Multani_table: &'static [(char, char)] = &[
-        ('\u{11280}', '\u{11286}'), ('\u{11288}', '\u{11288}'), ('\u{1128a}',
-        '\u{1128d}'), ('\u{1128f}', '\u{1129d}'), ('\u{1129f}', '\u{112a9}')
-    ];
-
-    pub const Myanmar_table: &'static [(char, char)] = &[
-        ('\u{1000}', '\u{109f}'), ('\u{a9e0}', '\u{a9fe}'), ('\u{aa60}',
-        '\u{aa7f}')
-    ];
-
-    pub const Nabataean_table: &'static [(char, char)] = &[
-        ('\u{10880}', '\u{1089e}'), ('\u{108a7}', '\u{108af}')
-    ];
-
-    pub const New_Tai_Lue_table: &'static [(char, char)] = &[
-        ('\u{1980}', '\u{19ab}'), ('\u{19b0}', '\u{19c9}'), ('\u{19d0}',
-        '\u{19da}'), ('\u{19de}', '\u{19df}')
-    ];
-
-    pub const Newa_table: &'static [(char, char)] = &[
-        ('\u{11400}', '\u{11459}'), ('\u{1145b}', '\u{1145b}'), ('\u{1145d}',
-        '\u{1145d}')
-    ];
-
-    pub const Nko_table: &'static [(char, char)] = &[
-        ('\u{7c0}', '\u{7fa}')
-    ];
-
-    pub const Nushu_table: &'static [(char, char)] = &[
-        ('\u{16fe1}', '\u{16fe1}'), ('\u{1b170}', '\u{1b2fb}')
-    ];
-
-    pub const Ogham_table: &'static [(char, char)] = &[
-        ('\u{1680}', '\u{169c}')
-    ];
-
-    pub const Ol_Chiki_table: &'static [(char, char)] = &[
-        ('\u{1c50}', '\u{1c7f}')
-    ];
-
-    pub const Old_Hungarian_table: &'static [(char, char)] = &[
-        ('\u{10c80}', '\u{10cb2}'), ('\u{10cc0}', '\u{10cf2}'), ('\u{10cfa}',
-        '\u{10cff}')
-    ];
-
-    pub const Old_Italic_table: &'static [(char, char)] = &[
-        ('\u{10300}', '\u{10323}'), ('\u{1032d}', '\u{1032f}')
-    ];
-
-    pub const Old_North_Arabian_table: &'static [(char, char)] = &[
-        ('\u{10a80}', '\u{10a9f}')
-    ];
-
-    pub const Old_Permic_table: &'static [(char, char)] = &[
-        ('\u{10350}', '\u{1037a}')
-    ];
-
-    pub const Old_Persian_table: &'static [(char, char)] = &[
-        ('\u{103a0}', '\u{103c3}'), ('\u{103c8}', '\u{103d5}')
-    ];
-
-    pub const Old_South_Arabian_table: &'static [(char, char)] = &[
-        ('\u{10a60}', '\u{10a7f}')
-    ];
-
-    pub const Old_Turkic_table: &'static [(char, char)] = &[
-        ('\u{10c00}', '\u{10c48}')
-    ];
-
-    pub const Oriya_table: &'static [(char, char)] = &[
-        ('\u{b01}', '\u{b03}'), ('\u{b05}', '\u{b0c}'), ('\u{b0f}', '\u{b10}'),
-        ('\u{b13}', '\u{b28}'), ('\u{b2a}', '\u{b30}'), ('\u{b32}', '\u{b33}'),
-        ('\u{b35}', '\u{b39}'), ('\u{b3c}', '\u{b44}'), ('\u{b47}', '\u{b48}'),
-        ('\u{b4b}', '\u{b4d}'), ('\u{b56}', '\u{b57}'), ('\u{b5c}', '\u{b5d}'),
-        ('\u{b5f}', '\u{b63}'), ('\u{b66}', '\u{b77}')
-    ];
-
-    pub const Osage_table: &'static [(char, char)] = &[
-        ('\u{104b0}', '\u{104d3}'), ('\u{104d8}', '\u{104fb}')
-    ];
-
-    pub const Osmanya_table: &'static [(char, char)] = &[
-        ('\u{10480}', '\u{1049d}'), ('\u{104a0}', '\u{104a9}')
-    ];
-
-    pub const Pahawh_Hmong_table: &'static [(char, char)] = &[
-        ('\u{16b00}', '\u{16b45}'), ('\u{16b50}', '\u{16b59}'), ('\u{16b5b}',
-        '\u{16b61}'), ('\u{16b63}', '\u{16b77}'), ('\u{16b7d}', '\u{16b8f}')
-    ];
-
-    pub const Palmyrene_table: &'static [(char, char)] = &[
-        ('\u{10860}', '\u{1087f}')
-    ];
-
-    pub const Pau_Cin_Hau_table: &'static [(char, char)] = &[
-        ('\u{11ac0}', '\u{11af8}')
-    ];
-
-    pub const Phags_Pa_table: &'static [(char, char)] = &[
-        ('\u{a840}', '\u{a877}')
-    ];
-
-    pub const Phoenician_table: &'static [(char, char)] = &[
-        ('\u{10900}', '\u{1091b}'), ('\u{1091f}', '\u{1091f}')
-    ];
-
-    pub const Psalter_Pahlavi_table: &'static [(char, char)] = &[
-        ('\u{10b80}', '\u{10b91}'), ('\u{10b99}', '\u{10b9c}'), ('\u{10ba9}',
-        '\u{10baf}')
-    ];
-
-    pub const Rejang_table: &'static [(char, char)] = &[
-        ('\u{a930}', '\u{a953}'), ('\u{a95f}', '\u{a95f}')
-    ];
-
-    pub const Runic_table: &'static [(char, char)] = &[
-        ('\u{16a0}', '\u{16ea}'), ('\u{16ee}', '\u{16f8}')
-    ];
-
-    pub const Samaritan_table: &'static [(char, char)] = &[
-        ('\u{800}', '\u{82d}'), ('\u{830}', '\u{83e}')
-    ];
-
-    pub const Saurashtra_table: &'static [(char, char)] = &[
-        ('\u{a880}', '\u{a8c5}'), ('\u{a8ce}', '\u{a8d9}')
-    ];
-
-    pub const Sharada_table: &'static [(char, char)] = &[
-        ('\u{11180}', '\u{111cd}'), ('\u{111d0}', '\u{111df}')
-    ];
-
-    pub const Shavian_table: &'static [(char, char)] = &[
-        ('\u{10450}', '\u{1047f}')
-    ];
-
-    pub const Siddham_table: &'static [(char, char)] = &[
-        ('\u{11580}', '\u{115b5}'), ('\u{115b8}', '\u{115dd}')
-    ];
-
-    pub const SignWriting_table: &'static [(char, char)] = &[
-        ('\u{1d800}', '\u{1da8b}'), ('\u{1da9b}', '\u{1da9f}'), ('\u{1daa1}',
-        '\u{1daaf}')
-    ];
-
-    pub const Sinhala_table: &'static [(char, char)] = &[
-        ('\u{d82}', '\u{d83}'), ('\u{d85}', '\u{d96}'), ('\u{d9a}', '\u{db1}'),
-        ('\u{db3}', '\u{dbb}'), ('\u{dbd}', '\u{dbd}'), ('\u{dc0}', '\u{dc6}'),
-        ('\u{dca}', '\u{dca}'), ('\u{dcf}', '\u{dd4}'), ('\u{dd6}', '\u{dd6}'),
-        ('\u{dd8}', '\u{ddf}'), ('\u{de6}', '\u{def}'), ('\u{df2}', '\u{df4}'),
-        ('\u{111e1}', '\u{111f4}')
-    ];
-
-    pub const Sora_Sompeng_table: &'static [(char, char)] = &[
-        ('\u{110d0}', '\u{110e8}'), ('\u{110f0}', '\u{110f9}')
-    ];
-
-    pub const Soyombo_table: &'static [(char, char)] = &[
-        ('\u{11a50}', '\u{11a83}'), ('\u{11a86}', '\u{11a9c}'), ('\u{11a9e}',
-        '\u{11aa2}')
-    ];
-
-    pub const Sundanese_table: &'static [(char, char)] = &[
-        ('\u{1b80}', '\u{1bbf}'), ('\u{1cc0}', '\u{1cc7}')
-    ];
-
-    pub const Syloti_Nagri_table: &'static [(char, char)] = &[
-        ('\u{a800}', '\u{a82b}')
-    ];
-
-    pub const Syriac_table: &'static [(char, char)] = &[
-        ('\u{700}', '\u{70d}'), ('\u{70f}', '\u{74a}'), ('\u{74d}', '\u{74f}'),
-        ('\u{860}', '\u{86a}')
-    ];
-
-    pub const Tagalog_table: &'static [(char, char)] = &[
-        ('\u{1700}', '\u{170c}'), ('\u{170e}', '\u{1714}')
-    ];
-
-    pub const Tagbanwa_table: &'static [(char, char)] = &[
-        ('\u{1760}', '\u{176c}'), ('\u{176e}', '\u{1770}'), ('\u{1772}',
-        '\u{1773}')
-    ];
-
-    pub const Tai_Le_table: &'static [(char, char)] = &[
-        ('\u{1950}', '\u{196d}'), ('\u{1970}', '\u{1974}')
-    ];
-
-    pub const Tai_Tham_table: &'static [(char, char)] = &[
-        ('\u{1a20}', '\u{1a5e}'), ('\u{1a60}', '\u{1a7c}'), ('\u{1a7f}',
-        '\u{1a89}'), ('\u{1a90}', '\u{1a99}'), ('\u{1aa0}', '\u{1aad}')
-    ];
-
-    pub const Tai_Viet_table: &'static [(char, char)] = &[
-        ('\u{aa80}', '\u{aac2}'), ('\u{aadb}', '\u{aadf}')
-    ];
-
-    pub const Takri_table: &'static [(char, char)] = &[
-        ('\u{11680}', '\u{116b7}'), ('\u{116c0}', '\u{116c9}')
-    ];
-
-    pub const Tamil_table: &'static [(char, char)] = &[
-        ('\u{b82}', '\u{b83}'), ('\u{b85}', '\u{b8a}'), ('\u{b8e}', '\u{b90}'),
-        ('\u{b92}', '\u{b95}'), ('\u{b99}', '\u{b9a}'), ('\u{b9c}', '\u{b9c}'),
-        ('\u{b9e}', '\u{b9f}'), ('\u{ba3}', '\u{ba4}'), ('\u{ba8}', '\u{baa}'),
-        ('\u{bae}', '\u{bb9}'), ('\u{bbe}', '\u{bc2}'), ('\u{bc6}', '\u{bc8}'),
-        ('\u{bca}', '\u{bcd}'), ('\u{bd0}', '\u{bd0}'), ('\u{bd7}', '\u{bd7}'),
-        ('\u{be6}', '\u{bfa}')
-    ];
-
-    pub const Tangut_table: &'static [(char, char)] = &[
-        ('\u{16fe0}', '\u{16fe0}'), ('\u{17000}', '\u{187ec}'), ('\u{18800}',
-        '\u{18af2}')
-    ];
-
-    pub const Telugu_table: &'static [(char, char)] = &[
-        ('\u{c00}', '\u{c03}'), ('\u{c05}', '\u{c0c}'), ('\u{c0e}', '\u{c10}'),
-        ('\u{c12}', '\u{c28}'), ('\u{c2a}', '\u{c39}'), ('\u{c3d}', '\u{c44}'),
-        ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4d}'), ('\u{c55}', '\u{c56}'),
-        ('\u{c58}', '\u{c5a}'), ('\u{c60}', '\u{c63}'), ('\u{c66}', '\u{c6f}'),
-        ('\u{c78}', '\u{c7f}')
-    ];
-
-    pub const Thaana_table: &'static [(char, char)] = &[
-        ('\u{780}', '\u{7b1}')
-    ];
-
-    pub const Thai_table: &'static [(char, char)] = &[
-        ('\u{e01}', '\u{e3a}'), ('\u{e40}', '\u{e5b}')
-    ];
-
-    pub const Tibetan_table: &'static [(char, char)] = &[
-        ('\u{f00}', '\u{f47}'), ('\u{f49}', '\u{f6c}'), ('\u{f71}', '\u{f97}'),
-        ('\u{f99}', '\u{fbc}'), ('\u{fbe}', '\u{fcc}'), ('\u{fce}', '\u{fd4}'),
-        ('\u{fd9}', '\u{fda}')
-    ];
-
-    pub const Tifinagh_table: &'static [(char, char)] = &[
-        ('\u{2d30}', '\u{2d67}'), ('\u{2d6f}', '\u{2d70}'), ('\u{2d7f}',
-        '\u{2d7f}')
-    ];
-
-    pub const Tirhuta_table: &'static [(char, char)] = &[
-        ('\u{11480}', '\u{114c7}'), ('\u{114d0}', '\u{114d9}')
-    ];
+    fn next(&mut self) -> Option<char> {
+        self.0.next().map(|c| *c)
+    }
+}
 
-    pub const Ugaritic_table: &'static [(char, char)] = &[
-        ('\u{10380}', '\u{1039d}'), ('\u{1039f}', '\u{1039f}')
-    ];
+/// Return an iterator over the equivalence class of simple case mappings
+/// for the given codepoint. The equivalence class does not include the
+/// given codepoint.
+///
+/// If the equivalence class is empty, then this returns the next scalar
+/// value that has a non-empty equivalence class, if it exists. If no such
+/// scalar value exists, then `None` is returned. The point of this behavior
+/// is to permit callers to avoid calling `simple_fold` more than they need
+/// to, since there is some cost to fetching the equivalence class.
+pub fn simple_fold(c: char) -> result::Result<SimpleFoldIter, Option<char>> {
+    CASE_FOLDING_SIMPLE
+        .binary_search_by_key(&c, |&(c1, _)| c1)
+        .map(|i| SimpleFoldIter(CASE_FOLDING_SIMPLE[i].1.iter()))
+        .map_err(|i| {
+            if i >= CASE_FOLDING_SIMPLE.len() {
+                None
+            } else {
+                Some(CASE_FOLDING_SIMPLE[i].0)
+            }
+        })
+}
 
-    pub const Vai_table: &'static [(char, char)] = &[
-        ('\u{a500}', '\u{a62b}')
-    ];
+/// Returns true if and only if the given (inclusive) range contains at least
+/// one Unicode scalar value that has a non-empty non-trivial simple case
+/// mapping.
+///
+/// This function panics if `end < start`.
+pub fn contains_simple_case_mapping(start: char, end: char) -> bool {
+    assert!(start <= end);
+    CASE_FOLDING_SIMPLE
+        .binary_search_by(|&(c, _)| {
+            if start <= c && c <= end {
+                Ordering::Equal
+            } else if c > end {
+                Ordering::Greater
+            } else {
+                Ordering::Less
+            }
+        }).is_ok()
+}
 
-    pub const Warang_Citi_table: &'static [(char, char)] = &[
-        ('\u{118a0}', '\u{118f2}'), ('\u{118ff}', '\u{118ff}')
-    ];
+/// A query for finding a character class defined by Unicode. This supports
+/// either use of a property name directly, or lookup by property value. The
+/// former generally refers to Binary properties (see UTS#44, Table 8), but
+/// as a special exception (see UTS#18, Section 1.2) both general categories
+/// (an enumeration) and scripts (a catalog) are supported as if each of their
+/// possible values were a binary property.
+///
+/// In all circumstances, property names and values are normalized and
+/// canonicalized. That is, `GC == gc == GeneralCategory == general_category`.
+///
+/// The lifetime `'a` refers to the shorter of the lifetimes of property name
+/// and property value.
+#[derive(Debug)]
+pub enum ClassQuery<'a> {
+    /// Return a class corresponding to a Unicode binary property, named by
+    /// a single letter.
+    OneLetter(char),
+    /// Return a class corresponding to a Unicode binary property.
+    ///
+    /// Note that, by special exception (see UTS#18, Section 1.2), both
+    /// general category values and script values are permitted here as if
+    /// they were a binary property.
+    Binary(&'a str),
+    /// Return a class corresponding to all codepoints whose property
+    /// (identified by `property_name`) corresponds to the given value
+    /// (identified by `property_value`).
+    ByValue {
+        /// A property name.
+        property_name: &'a str,
+        /// A property value.
+        property_value: &'a str,
+    },
+}
 
-    pub const Yi_table: &'static [(char, char)] = &[
-        ('\u{a000}', '\u{a48c}'), ('\u{a490}', '\u{a4c6}')
-    ];
+impl<'a> ClassQuery<'a> {
+    fn canonicalize(&self) -> Result<CanonicalClassQuery> {
+        match *self {
+            ClassQuery::OneLetter(c) => self.canonical_binary(&c.to_string()),
+            ClassQuery::Binary(name) => self.canonical_binary(name),
+            ClassQuery::ByValue { property_name, property_value } => {
+                let property_name = normalize(property_name);
+                let property_value = normalize(property_value);
+
+                let canon_name = match canonical_prop(&property_name) {
+                    None => return Err(Error::PropertyNotFound),
+                    Some(canon_name) => canon_name,
+                };
+                Ok(match canon_name {
+                    "General_Category" => {
+                        let canon = match canonical_gencat(&property_value) {
+                            None => return Err(Error::PropertyValueNotFound),
+                            Some(canon) => canon,
+                        };
+                        CanonicalClassQuery::GeneralCategory(canon)
+                    }
+                    "Script" => {
+                        let canon = match canonical_script(&property_value) {
+                            None => return Err(Error::PropertyValueNotFound),
+                            Some(canon) => canon,
+                        };
+                        CanonicalClassQuery::Script(canon)
+                    }
+                    _ => {
+                        let vals = match property_values(canon_name) {
+                            None => return Err(Error::PropertyValueNotFound),
+                            Some(vals) => vals,
+                        };
+                        let canon_val = match canonical_value(
+                            vals,
+                            &property_value,
+                        ) {
+                            None => return Err(Error::PropertyValueNotFound),
+                            Some(canon_val) => canon_val,
+                        };
+                        CanonicalClassQuery::ByValue {
+                            property_name: canon_name,
+                            property_value: canon_val,
+                        }
+                    }
+                })
+            }
+        }
+    }
+
+    fn canonical_binary(&self, name: &str) -> Result<CanonicalClassQuery> {
+        let norm = normalize(name);
+
+        if let Some(canon) = canonical_prop(&norm) {
+            return Ok(CanonicalClassQuery::Binary(canon));
+        }
+        if let Some(canon) = canonical_gencat(&norm) {
+            return Ok(CanonicalClassQuery::GeneralCategory(canon));
+        }
+        if let Some(canon) = canonical_script(&norm) {
+            return Ok(CanonicalClassQuery::Script(canon));
+        }
+        Err(Error::PropertyNotFound)
+    }
+}
 
-    pub const Zanabazar_Square_table: &'static [(char, char)] = &[
-        ('\u{11a00}', '\u{11a47}')
-    ];
+/// Like ClassQuery, but its parameters have been canonicalized. This also
+/// differentiates binary properties from flattened general categories and
+/// scripts.
+#[derive(Debug)]
+enum CanonicalClassQuery {
+    /// The canonical binary property name.
+    Binary(&'static str),
+    /// The canonical general category name.
+    GeneralCategory(&'static str),
+    /// The canonical script name.
+    Script(&'static str),
+    /// An arbitrary association between property and value, both of which
+    /// have been canonicalized.
+    ///
+    /// Note that by construction, the property name of ByValue will never
+    /// be General_Category or Script. Those two cases are subsumed by the
+    /// eponymous variants.
+    ByValue {
+        /// The canonical property name.
+        property_name: &'static str,
+        /// The canonical property value.
+        property_value: &'static str,
+    },
+}
 
+/// Looks up a Unicode class given a query. If one doesn't exist, then
+/// `None` is returned.
+pub fn class<'a>(query: ClassQuery<'a>) -> Result<hir::ClassUnicode> {
+    use self::CanonicalClassQuery::*;
+
+    match try!(query.canonicalize()) {
+        Binary(name) => {
+            property_set(property_bool::BY_NAME, name)
+                .map(hir_class)
+                .ok_or(Error::PropertyNotFound)
+        }
+        GeneralCategory("Any") => {
+            Ok(hir_class(&[('\0', '\u{10FFFF}')]))
+        }
+        GeneralCategory("Assigned") => {
+            let mut cls =
+                try!(property_set(general_category::BY_NAME, "Unassigned")
+                    .map(hir_class)
+                    .ok_or(Error::PropertyNotFound));
+            cls.negate();
+            Ok(cls)
+        }
+        GeneralCategory("ASCII") => {
+            Ok(hir_class(&[('\0', '\x7F')]))
+        }
+        GeneralCategory(name) => {
+            property_set(general_category::BY_NAME, name)
+                .map(hir_class)
+                .ok_or(Error::PropertyValueNotFound)
+        }
+        Script(name) => {
+            property_set(script::BY_NAME, name)
+                .map(hir_class)
+                .ok_or(Error::PropertyValueNotFound)
+        }
+        ByValue { property_name: "Age", property_value } => {
+            let mut class = hir::ClassUnicode::empty();
+            for set in try!(ages(property_value)) {
+                class.union(&hir_class(set));
+            }
+            Ok(class)
+        }
+        ByValue { property_name: "Script_Extensions", property_value } => {
+            property_set(script_extension::BY_NAME, property_value)
+                .map(hir_class)
+                .ok_or(Error::PropertyValueNotFound)
+        }
+        _ => {
+            // What else should we support?
+            Err(Error::PropertyNotFound)
+        }
+    }
 }
 
-pub mod property {
-    pub const Join_Control_table: &'static [(char, char)] = &[
-        ('\u{200c}', '\u{200d}')
-    ];
+/// Build a Unicode HIR class from a sequence of Unicode scalar value ranges.
+pub fn hir_class(ranges: &[(char, char)]) -> hir::ClassUnicode {
+    let hir_ranges: Vec<hir::ClassUnicodeRange> = ranges
+        .iter()
+        .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e))
+        .collect();
+    hir::ClassUnicode::new(hir_ranges)
+}
 
-    pub const Noncharacter_Code_Point_table: &'static [(char, char)] = &[
-        ('\u{fdd0}', '\u{fdef}'), ('\u{fffe}', '\u{ffff}'), ('\u{1fffe}',
-        '\u{1ffff}'), ('\u{2fffe}', '\u{2ffff}'), ('\u{3fffe}', '\u{3ffff}'),
-        ('\u{4fffe}', '\u{4ffff}'), ('\u{5fffe}', '\u{5ffff}'), ('\u{6fffe}',
-        '\u{6ffff}'), ('\u{7fffe}', '\u{7ffff}'), ('\u{8fffe}', '\u{8ffff}'),
-        ('\u{9fffe}', '\u{9ffff}'), ('\u{afffe}', '\u{affff}'), ('\u{bfffe}',
-        '\u{bffff}'), ('\u{cfffe}', '\u{cffff}'), ('\u{dfffe}', '\u{dffff}'),
-        ('\u{efffe}', '\u{effff}'), ('\u{ffffe}', '\u{fffff}'), ('\u{10fffe}',
-        '\u{10ffff}')
-    ];
+fn canonical_prop(normalized_name: &str) -> Option<&'static str> {
+    ucd_util::canonical_property_name(PROPERTY_NAMES, normalized_name)
+}
 
-    pub const White_Space_table: &'static [(char, char)] = &[
-        ('\u{9}', '\u{d}'), ('\u{20}', '\u{20}'), ('\u{85}', '\u{85}'),
-        ('\u{a0}', '\u{a0}'), ('\u{1680}', '\u{1680}'), ('\u{2000}',
-        '\u{200a}'), ('\u{2028}', '\u{2029}'), ('\u{202f}', '\u{202f}'),
-        ('\u{205f}', '\u{205f}'), ('\u{3000}', '\u{3000}')
-    ];
+fn canonical_gencat(normalized_value: &str) -> Option<&'static str> {
+    match normalized_value {
+        "any" => Some("Any"),
+        "assigned" => Some("Assigned"),
+        "ascii" => Some("ASCII"),
+        _ => {
+            let gencats = property_values("General_Category").unwrap();
+            canonical_value(gencats, normalized_value)
+        }
+    }
+}
 
+fn canonical_script(normalized_value: &str) -> Option<&'static str> {
+    let scripts = property_values("Script").unwrap();
+    canonical_value(scripts, normalized_value)
 }
 
-pub mod regex {
-    pub const UNICODE_CLASSES: &'static [(&'static str, &'static [(char, char)])] = &[
-        ("Adlam", super::script::Adlam_table), ("Ahom",
-        super::script::Ahom_table), ("Alphabetic",
-        super::derived_property::Alphabetic_table), ("Anatolian_Hieroglyphs",
-        super::script::Anatolian_Hieroglyphs_table), ("Arabic",
-        super::script::Arabic_table), ("Armenian",
-        super::script::Armenian_table), ("Avestan",
-        super::script::Avestan_table), ("Balinese",
-        super::script::Balinese_table), ("Bamum", super::script::Bamum_table),
-        ("Bassa_Vah", super::script::Bassa_Vah_table), ("Batak",
-        super::script::Batak_table), ("Bengali", super::script::Bengali_table),
-        ("Bhaiksuki", super::script::Bhaiksuki_table), ("Bopomofo",
-        super::script::Bopomofo_table), ("Brahmi", super::script::Brahmi_table),
-        ("Braille", super::script::Braille_table), ("Buginese",
-        super::script::Buginese_table), ("Buhid", super::script::Buhid_table),
-        ("C", super::general_category::C_table), ("Canadian_Aboriginal",
-        super::script::Canadian_Aboriginal_table), ("Carian",
-        super::script::Carian_table), ("Caucasian_Albanian",
-        super::script::Caucasian_Albanian_table), ("Cc",
-        super::general_category::Cc_table), ("Cf",
-        super::general_category::Cf_table), ("Chakma",
-        super::script::Chakma_table), ("Cham", super::script::Cham_table),
-        ("Cherokee", super::script::Cherokee_table), ("Cn",
-        super::general_category::Cn_table), ("Co",
-        super::general_category::Co_table), ("Common",
-        super::script::Common_table), ("Coptic", super::script::Coptic_table),
-        ("Cuneiform", super::script::Cuneiform_table), ("Cypriot",
-        super::script::Cypriot_table), ("Cyrillic",
-        super::script::Cyrillic_table), ("Default_Ignorable_Code_Point",
-        super::derived_property::Default_Ignorable_Code_Point_table),
-        ("Deseret", super::script::Deseret_table), ("Devanagari",
-        super::script::Devanagari_table), ("Duployan",
-        super::script::Duployan_table), ("Egyptian_Hieroglyphs",
-        super::script::Egyptian_Hieroglyphs_table), ("Elbasan",
-        super::script::Elbasan_table), ("Ethiopic",
-        super::script::Ethiopic_table), ("Georgian",
-        super::script::Georgian_table), ("Glagolitic",
-        super::script::Glagolitic_table), ("Gothic",
-        super::script::Gothic_table), ("Grantha", super::script::Grantha_table),
-        ("Grapheme_Extend", super::derived_property::Grapheme_Extend_table),
-        ("Greek", super::script::Greek_table), ("Gujarati",
-        super::script::Gujarati_table), ("Gurmukhi",
-        super::script::Gurmukhi_table), ("Han", super::script::Han_table),
-        ("Hangul", super::script::Hangul_table), ("Hanunoo",
-        super::script::Hanunoo_table), ("Hatran", super::script::Hatran_table),
-        ("Hebrew", super::script::Hebrew_table), ("Hiragana",
-        super::script::Hiragana_table), ("Imperial_Aramaic",
-        super::script::Imperial_Aramaic_table), ("Inherited",
-        super::script::Inherited_table), ("Inscriptional_Pahlavi",
-        super::script::Inscriptional_Pahlavi_table), ("Inscriptional_Parthian",
-        super::script::Inscriptional_Parthian_table), ("Javanese",
-        super::script::Javanese_table), ("Join_Control",
-        super::property::Join_Control_table), ("Kaithi",
-        super::script::Kaithi_table), ("Kannada", super::script::Kannada_table),
-        ("Katakana", super::script::Katakana_table), ("Kayah_Li",
-        super::script::Kayah_Li_table), ("Kharoshthi",
-        super::script::Kharoshthi_table), ("Khmer", super::script::Khmer_table),
-        ("Khojki", super::script::Khojki_table), ("Khudawadi",
-        super::script::Khudawadi_table), ("L",
-        super::general_category::L_table), ("LC",
-        super::general_category::LC_table), ("Lao", super::script::Lao_table),
-        ("Latin", super::script::Latin_table), ("Lepcha",
-        super::script::Lepcha_table), ("Limbu", super::script::Limbu_table),
-        ("Linear_A", super::script::Linear_A_table), ("Linear_B",
-        super::script::Linear_B_table), ("Lisu", super::script::Lisu_table),
-        ("Ll", super::general_category::Ll_table), ("Lm",
-        super::general_category::Lm_table), ("Lo",
-        super::general_category::Lo_table), ("Lowercase",
-        super::derived_property::Lowercase_table), ("Lt",
-        super::general_category::Lt_table), ("Lu",
-        super::general_category::Lu_table), ("Lycian",
-        super::script::Lycian_table), ("Lydian", super::script::Lydian_table),
-        ("M", super::general_category::M_table), ("Mahajani",
-        super::script::Mahajani_table), ("Malayalam",
-        super::script::Malayalam_table), ("Mandaic",
-        super::script::Mandaic_table), ("Manichaean",
-        super::script::Manichaean_table), ("Marchen",
-        super::script::Marchen_table), ("Masaram_Gondi",
-        super::script::Masaram_Gondi_table), ("Mc",
-        super::general_category::Mc_table), ("Me",
-        super::general_category::Me_table), ("Meetei_Mayek",
-        super::script::Meetei_Mayek_table), ("Mende_Kikakui",
-        super::script::Mende_Kikakui_table), ("Meroitic_Cursive",
-        super::script::Meroitic_Cursive_table), ("Meroitic_Hieroglyphs",
-        super::script::Meroitic_Hieroglyphs_table), ("Miao",
-        super::script::Miao_table), ("Mn", super::general_category::Mn_table),
-        ("Modi", super::script::Modi_table), ("Mongolian",
-        super::script::Mongolian_table), ("Mro", super::script::Mro_table),
-        ("Multani", super::script::Multani_table), ("Myanmar",
-        super::script::Myanmar_table), ("N", super::general_category::N_table),
-        ("Nabataean", super::script::Nabataean_table), ("Nd",
-        super::general_category::Nd_table), ("New_Tai_Lue",
-        super::script::New_Tai_Lue_table), ("Newa", super::script::Newa_table),
-        ("Nko", super::script::Nko_table), ("Nl",
-        super::general_category::Nl_table), ("No",
-        super::general_category::No_table), ("Noncharacter_Code_Point",
-        super::property::Noncharacter_Code_Point_table), ("Nushu",
-        super::script::Nushu_table), ("Ogham", super::script::Ogham_table),
-        ("Ol_Chiki", super::script::Ol_Chiki_table), ("Old_Hungarian",
-        super::script::Old_Hungarian_table), ("Old_Italic",
-        super::script::Old_Italic_table), ("Old_North_Arabian",
-        super::script::Old_North_Arabian_table), ("Old_Permic",
-        super::script::Old_Permic_table), ("Old_Persian",
-        super::script::Old_Persian_table), ("Old_South_Arabian",
-        super::script::Old_South_Arabian_table), ("Old_Turkic",
-        super::script::Old_Turkic_table), ("Oriya", super::script::Oriya_table),
-        ("Osage", super::script::Osage_table), ("Osmanya",
-        super::script::Osmanya_table), ("P", super::general_category::P_table),
-        ("Pahawh_Hmong", super::script::Pahawh_Hmong_table), ("Palmyrene",
-        super::script::Palmyrene_table), ("Pau_Cin_Hau",
-        super::script::Pau_Cin_Hau_table), ("Pc",
-        super::general_category::Pc_table), ("Pd",
-        super::general_category::Pd_table), ("Pe",
-        super::general_category::Pe_table), ("Pf",
-        super::general_category::Pf_table), ("Phags_Pa",
-        super::script::Phags_Pa_table), ("Phoenician",
-        super::script::Phoenician_table), ("Pi",
-        super::general_category::Pi_table), ("Po",
-        super::general_category::Po_table), ("Ps",
-        super::general_category::Ps_table), ("Psalter_Pahlavi",
-        super::script::Psalter_Pahlavi_table), ("Rejang",
-        super::script::Rejang_table), ("Runic", super::script::Runic_table),
-        ("S", super::general_category::S_table), ("Samaritan",
-        super::script::Samaritan_table), ("Saurashtra",
-        super::script::Saurashtra_table), ("Sc",
-        super::general_category::Sc_table), ("Sharada",
-        super::script::Sharada_table), ("Shavian",
-        super::script::Shavian_table), ("Siddham",
-        super::script::Siddham_table), ("SignWriting",
-        super::script::SignWriting_table), ("Sinhala",
-        super::script::Sinhala_table), ("Sk",
-        super::general_category::Sk_table), ("Sm",
-        super::general_category::Sm_table), ("So",
-        super::general_category::So_table), ("Sora_Sompeng",
-        super::script::Sora_Sompeng_table), ("Soyombo",
-        super::script::Soyombo_table), ("Sundanese",
-        super::script::Sundanese_table), ("Syloti_Nagri",
-        super::script::Syloti_Nagri_table), ("Syriac",
-        super::script::Syriac_table), ("Tagalog", super::script::Tagalog_table),
-        ("Tagbanwa", super::script::Tagbanwa_table), ("Tai_Le",
-        super::script::Tai_Le_table), ("Tai_Tham",
-        super::script::Tai_Tham_table), ("Tai_Viet",
-        super::script::Tai_Viet_table), ("Takri", super::script::Takri_table),
-        ("Tamil", super::script::Tamil_table), ("Tangut",
-        super::script::Tangut_table), ("Telugu", super::script::Telugu_table),
-        ("Thaana", super::script::Thaana_table), ("Thai",
-        super::script::Thai_table), ("Tibetan", super::script::Tibetan_table),
-        ("Tifinagh", super::script::Tifinagh_table), ("Tirhuta",
-        super::script::Tirhuta_table), ("Ugaritic",
-        super::script::Ugaritic_table), ("Uppercase",
-        super::derived_property::Uppercase_table), ("Vai",
-        super::script::Vai_table), ("Warang_Citi",
-        super::script::Warang_Citi_table), ("White_Space",
-        super::property::White_Space_table), ("XID_Continue",
-        super::derived_property::XID_Continue_table), ("XID_Start",
-        super::derived_property::XID_Start_table), ("Yi",
-        super::script::Yi_table), ("Z", super::general_category::Z_table),
-        ("Zanabazar_Square", super::script::Zanabazar_Square_table), ("Zl",
-        super::general_category::Zl_table), ("Zp",
-        super::general_category::Zp_table), ("Zs",
-        super::general_category::Zs_table)
-    ];
+fn canonical_value(
+    vals: PropertyValues,
+    normalized_value: &str,
+) -> Option<&'static str> {
+    ucd_util::canonical_property_value(vals, normalized_value)
+}
 
-    pub const PERLD: &'static [(char, char)] = super::general_category::Nd_table;
+fn normalize(x: &str) -> String {
+    let mut x = x.to_string();
+    ucd_util::symbolic_name_normalize(&mut x);
+    x
+}
 
-    pub const PERLS: &'static [(char, char)] = super::property::White_Space_table;
+fn property_values(
+    canonical_property_name: &'static str,
+) -> Option<PropertyValues>
+{
+    ucd_util::property_values(PROPERTY_VALUES, canonical_property_name)
+}
 
-    pub const PERLW: &'static [(char, char)] = &[
-        ('\u{30}', '\u{39}'), ('\u{41}', '\u{5a}'), ('\u{5f}', '\u{5f}'),
-        ('\u{61}', '\u{7a}'), ('\u{aa}', '\u{aa}'), ('\u{b5}', '\u{b5}'),
-        ('\u{ba}', '\u{ba}'), ('\u{c0}', '\u{d6}'), ('\u{d8}', '\u{f6}'),
-        ('\u{f8}', '\u{2c1}'), ('\u{2c6}', '\u{2d1}'), ('\u{2e0}', '\u{2e4}'),
-        ('\u{2ec}', '\u{2ec}'), ('\u{2ee}', '\u{2ee}'), ('\u{300}', '\u{374}'),
-        ('\u{376}', '\u{377}'), ('\u{37a}', '\u{37d}'), ('\u{37f}', '\u{37f}'),
-        ('\u{386}', '\u{386}'), ('\u{388}', '\u{38a}'), ('\u{38c}', '\u{38c}'),
-        ('\u{38e}', '\u{3a1}'), ('\u{3a3}', '\u{3f5}'), ('\u{3f7}', '\u{481}'),
-        ('\u{483}', '\u{52f}'), ('\u{531}', '\u{556}'), ('\u{559}', '\u{559}'),
-        ('\u{561}', '\u{587}'), ('\u{591}', '\u{5bd}'), ('\u{5bf}', '\u{5bf}'),
-        ('\u{5c1}', '\u{5c2}'), ('\u{5c4}', '\u{5c5}'), ('\u{5c7}', '\u{5c7}'),
-        ('\u{5d0}', '\u{5ea}'), ('\u{5f0}', '\u{5f2}'), ('\u{610}', '\u{61a}'),
-        ('\u{620}', '\u{669}'), ('\u{66e}', '\u{6d3}'), ('\u{6d5}', '\u{6dc}'),
-        ('\u{6df}', '\u{6e8}'), ('\u{6ea}', '\u{6fc}'), ('\u{6ff}', '\u{6ff}'),
-        ('\u{710}', '\u{74a}'), ('\u{74d}', '\u{7b1}'), ('\u{7c0}', '\u{7f5}'),
-        ('\u{7fa}', '\u{7fa}'), ('\u{800}', '\u{82d}'), ('\u{840}', '\u{85b}'),
-        ('\u{860}', '\u{86a}'), ('\u{8a0}', '\u{8b4}'), ('\u{8b6}', '\u{8bd}'),
-        ('\u{8d4}', '\u{8e1}'), ('\u{8e3}', '\u{963}'), ('\u{966}', '\u{96f}'),
-        ('\u{971}', '\u{983}'), ('\u{985}', '\u{98c}'), ('\u{98f}', '\u{990}'),
-        ('\u{993}', '\u{9a8}'), ('\u{9aa}', '\u{9b0}'), ('\u{9b2}', '\u{9b2}'),
-        ('\u{9b6}', '\u{9b9}'), ('\u{9bc}', '\u{9c4}'), ('\u{9c7}', '\u{9c8}'),
-        ('\u{9cb}', '\u{9ce}'), ('\u{9d7}', '\u{9d7}'), ('\u{9dc}', '\u{9dd}'),
-        ('\u{9df}', '\u{9e3}'), ('\u{9e6}', '\u{9f1}'), ('\u{9fc}', '\u{9fc}'),
-        ('\u{a01}', '\u{a03}'), ('\u{a05}', '\u{a0a}'), ('\u{a0f}', '\u{a10}'),
-        ('\u{a13}', '\u{a28}'), ('\u{a2a}', '\u{a30}'), ('\u{a32}', '\u{a33}'),
-        ('\u{a35}', '\u{a36}'), ('\u{a38}', '\u{a39}'), ('\u{a3c}', '\u{a3c}'),
-        ('\u{a3e}', '\u{a42}'), ('\u{a47}', '\u{a48}'), ('\u{a4b}', '\u{a4d}'),
-        ('\u{a51}', '\u{a51}'), ('\u{a59}', '\u{a5c}'), ('\u{a5e}', '\u{a5e}'),
-        ('\u{a66}', '\u{a75}'), ('\u{a81}', '\u{a83}'), ('\u{a85}', '\u{a8d}'),
-        ('\u{a8f}', '\u{a91}'), ('\u{a93}', '\u{aa8}'), ('\u{aaa}', '\u{ab0}'),
-        ('\u{ab2}', '\u{ab3}'), ('\u{ab5}', '\u{ab9}'), ('\u{abc}', '\u{ac5}'),
-        ('\u{ac7}', '\u{ac9}'), ('\u{acb}', '\u{acd}'), ('\u{ad0}', '\u{ad0}'),
-        ('\u{ae0}', '\u{ae3}'), ('\u{ae6}', '\u{aef}'), ('\u{af9}', '\u{aff}'),
-        ('\u{b01}', '\u{b03}'), ('\u{b05}', '\u{b0c}'), ('\u{b0f}', '\u{b10}'),
-        ('\u{b13}', '\u{b28}'), ('\u{b2a}', '\u{b30}'), ('\u{b32}', '\u{b33}'),
-        ('\u{b35}', '\u{b39}'), ('\u{b3c}', '\u{b44}'), ('\u{b47}', '\u{b48}'),
-        ('\u{b4b}', '\u{b4d}'), ('\u{b56}', '\u{b57}'), ('\u{b5c}', '\u{b5d}'),
-        ('\u{b5f}', '\u{b63}'), ('\u{b66}', '\u{b6f}'), ('\u{b71}', '\u{b71}'),
-        ('\u{b82}', '\u{b83}'), ('\u{b85}', '\u{b8a}'), ('\u{b8e}', '\u{b90}'),
-        ('\u{b92}', '\u{b95}'), ('\u{b99}', '\u{b9a}'), ('\u{b9c}', '\u{b9c}'),
-        ('\u{b9e}', '\u{b9f}'), ('\u{ba3}', '\u{ba4}'), ('\u{ba8}', '\u{baa}'),
-        ('\u{bae}', '\u{bb9}'), ('\u{bbe}', '\u{bc2}'), ('\u{bc6}', '\u{bc8}'),
-        ('\u{bca}', '\u{bcd}'), ('\u{bd0}', '\u{bd0}'), ('\u{bd7}', '\u{bd7}'),
-        ('\u{be6}', '\u{bef}'), ('\u{c00}', '\u{c03}'), ('\u{c05}', '\u{c0c}'),
-        ('\u{c0e}', '\u{c10}'), ('\u{c12}', '\u{c28}'), ('\u{c2a}', '\u{c39}'),
-        ('\u{c3d}', '\u{c44}'), ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4d}'),
-        ('\u{c55}', '\u{c56}'), ('\u{c58}', '\u{c5a}'), ('\u{c60}', '\u{c63}'),
-        ('\u{c66}', '\u{c6f}'), ('\u{c80}', '\u{c83}'), ('\u{c85}', '\u{c8c}'),
-        ('\u{c8e}', '\u{c90}'), ('\u{c92}', '\u{ca8}'), ('\u{caa}', '\u{cb3}'),
-        ('\u{cb5}', '\u{cb9}'), ('\u{cbc}', '\u{cc4}'), ('\u{cc6}', '\u{cc8}'),
-        ('\u{cca}', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), ('\u{cde}', '\u{cde}'),
-        ('\u{ce0}', '\u{ce3}'), ('\u{ce6}', '\u{cef}'), ('\u{cf1}', '\u{cf2}'),
-        ('\u{d00}', '\u{d03}'), ('\u{d05}', '\u{d0c}'), ('\u{d0e}', '\u{d10}'),
-        ('\u{d12}', '\u{d44}'), ('\u{d46}', '\u{d48}'), ('\u{d4a}', '\u{d4e}'),
-        ('\u{d54}', '\u{d57}'), ('\u{d5f}', '\u{d63}'), ('\u{d66}', '\u{d6f}'),
-        ('\u{d7a}', '\u{d7f}'), ('\u{d82}', '\u{d83}'), ('\u{d85}', '\u{d96}'),
-        ('\u{d9a}', '\u{db1}'), ('\u{db3}', '\u{dbb}'), ('\u{dbd}', '\u{dbd}'),
-        ('\u{dc0}', '\u{dc6}'), ('\u{dca}', '\u{dca}'), ('\u{dcf}', '\u{dd4}'),
-        ('\u{dd6}', '\u{dd6}'), ('\u{dd8}', '\u{ddf}'), ('\u{de6}', '\u{def}'),
-        ('\u{df2}', '\u{df3}'), ('\u{e01}', '\u{e3a}'), ('\u{e40}', '\u{e4e}'),
-        ('\u{e50}', '\u{e59}'), ('\u{e81}', '\u{e82}'), ('\u{e84}', '\u{e84}'),
-        ('\u{e87}', '\u{e88}'), ('\u{e8a}', '\u{e8a}'), ('\u{e8d}', '\u{e8d}'),
-        ('\u{e94}', '\u{e97}'), ('\u{e99}', '\u{e9f}'), ('\u{ea1}', '\u{ea3}'),
-        ('\u{ea5}', '\u{ea5}'), ('\u{ea7}', '\u{ea7}'), ('\u{eaa}', '\u{eab}'),
-        ('\u{ead}', '\u{eb9}'), ('\u{ebb}', '\u{ebd}'), ('\u{ec0}', '\u{ec4}'),
-        ('\u{ec6}', '\u{ec6}'), ('\u{ec8}', '\u{ecd}'), ('\u{ed0}', '\u{ed9}'),
-        ('\u{edc}', '\u{edf}'), ('\u{f00}', '\u{f00}'), ('\u{f18}', '\u{f19}'),
-        ('\u{f20}', '\u{f29}'), ('\u{f35}', '\u{f35}'), ('\u{f37}', '\u{f37}'),
-        ('\u{f39}', '\u{f39}'), ('\u{f3e}', '\u{f47}'), ('\u{f49}', '\u{f6c}'),
-        ('\u{f71}', '\u{f84}'), ('\u{f86}', '\u{f97}'), ('\u{f99}', '\u{fbc}'),
-        ('\u{fc6}', '\u{fc6}'), ('\u{1000}', '\u{1049}'), ('\u{1050}',
-        '\u{109d}'), ('\u{10a0}', '\u{10c5}'), ('\u{10c7}', '\u{10c7}'),
-        ('\u{10cd}', '\u{10cd}'), ('\u{10d0}', '\u{10fa}'), ('\u{10fc}',
-        '\u{1248}'), ('\u{124a}', '\u{124d}'), ('\u{1250}', '\u{1256}'),
-        ('\u{1258}', '\u{1258}'), ('\u{125a}', '\u{125d}'), ('\u{1260}',
-        '\u{1288}'), ('\u{128a}', '\u{128d}'), ('\u{1290}', '\u{12b0}'),
-        ('\u{12b2}', '\u{12b5}'), ('\u{12b8}', '\u{12be}'), ('\u{12c0}',
-        '\u{12c0}'), ('\u{12c2}', '\u{12c5}'), ('\u{12c8}', '\u{12d6}'),
-        ('\u{12d8}', '\u{1310}'), ('\u{1312}', '\u{1315}'), ('\u{1318}',
-        '\u{135a}'), ('\u{135d}', '\u{135f}'), ('\u{1380}', '\u{138f}'),
-        ('\u{13a0}', '\u{13f5}'), ('\u{13f8}', '\u{13fd}'), ('\u{1401}',
-        '\u{166c}'), ('\u{166f}', '\u{167f}'), ('\u{1681}', '\u{169a}'),
-        ('\u{16a0}', '\u{16ea}'), ('\u{16ee}', '\u{16f8}'), ('\u{1700}',
-        '\u{170c}'), ('\u{170e}', '\u{1714}'), ('\u{1720}', '\u{1734}'),
-        ('\u{1740}', '\u{1753}'), ('\u{1760}', '\u{176c}'), ('\u{176e}',
-        '\u{1770}'), ('\u{1772}', '\u{1773}'), ('\u{1780}', '\u{17d3}'),
-        ('\u{17d7}', '\u{17d7}'), ('\u{17dc}', '\u{17dd}'), ('\u{17e0}',
-        '\u{17e9}'), ('\u{180b}', '\u{180d}'), ('\u{1810}', '\u{1819}'),
-        ('\u{1820}', '\u{1877}'), ('\u{1880}', '\u{18aa}'), ('\u{18b0}',
-        '\u{18f5}'), ('\u{1900}', '\u{191e}'), ('\u{1920}', '\u{192b}'),
-        ('\u{1930}', '\u{193b}'), ('\u{1946}', '\u{196d}'), ('\u{1970}',
-        '\u{1974}'), ('\u{1980}', '\u{19ab}'), ('\u{19b0}', '\u{19c9}'),
-        ('\u{19d0}', '\u{19d9}'), ('\u{1a00}', '\u{1a1b}'), ('\u{1a20}',
-        '\u{1a5e}'), ('\u{1a60}', '\u{1a7c}'), ('\u{1a7f}', '\u{1a89}'),
-        ('\u{1a90}', '\u{1a99}'), ('\u{1aa7}', '\u{1aa7}'), ('\u{1ab0}',
-        '\u{1abe}'), ('\u{1b00}', '\u{1b4b}'), ('\u{1b50}', '\u{1b59}'),
-        ('\u{1b6b}', '\u{1b73}'), ('\u{1b80}', '\u{1bf3}'), ('\u{1c00}',
-        '\u{1c37}'), ('\u{1c40}', '\u{1c49}'), ('\u{1c4d}', '\u{1c7d}'),
-        ('\u{1c80}', '\u{1c88}'), ('\u{1cd0}', '\u{1cd2}'), ('\u{1cd4}',
-        '\u{1cf9}'), ('\u{1d00}', '\u{1df9}'), ('\u{1dfb}', '\u{1f15}'),
-        ('\u{1f18}', '\u{1f1d}'), ('\u{1f20}', '\u{1f45}'), ('\u{1f48}',
-        '\u{1f4d}'), ('\u{1f50}', '\u{1f57}'), ('\u{1f59}', '\u{1f59}'),
-        ('\u{1f5b}', '\u{1f5b}'), ('\u{1f5d}', '\u{1f5d}'), ('\u{1f5f}',
-        '\u{1f7d}'), ('\u{1f80}', '\u{1fb4}'), ('\u{1fb6}', '\u{1fbc}'),
-        ('\u{1fbe}', '\u{1fbe}'), ('\u{1fc2}', '\u{1fc4}'), ('\u{1fc6}',
-        '\u{1fcc}'), ('\u{1fd0}', '\u{1fd3}'), ('\u{1fd6}', '\u{1fdb}'),
-        ('\u{1fe0}', '\u{1fec}'), ('\u{1ff2}', '\u{1ff4}'), ('\u{1ff6}',
-        '\u{1ffc}'), ('\u{200c}', '\u{200d}'), ('\u{203f}', '\u{2040}'),
-        ('\u{2054}', '\u{2054}'), ('\u{2071}', '\u{2071}'), ('\u{207f}',
-        '\u{207f}'), ('\u{2090}', '\u{209c}'), ('\u{20d0}', '\u{20f0}'),
-        ('\u{2102}', '\u{2102}'), ('\u{2107}', '\u{2107}'), ('\u{210a}',
-        '\u{2113}'), ('\u{2115}', '\u{2115}'), ('\u{2119}', '\u{211d}'),
-        ('\u{2124}', '\u{2124}'), ('\u{2126}', '\u{2126}'), ('\u{2128}',
-        '\u{2128}'), ('\u{212a}', '\u{212d}'), ('\u{212f}', '\u{2139}'),
-        ('\u{213c}', '\u{213f}'), ('\u{2145}', '\u{2149}'), ('\u{214e}',
-        '\u{214e}'), ('\u{2160}', '\u{2188}'), ('\u{24b6}', '\u{24e9}'),
-        ('\u{2c00}', '\u{2c2e}'), ('\u{2c30}', '\u{2c5e}'), ('\u{2c60}',
-        '\u{2ce4}'), ('\u{2ceb}', '\u{2cf3}'), ('\u{2d00}', '\u{2d25}'),
-        ('\u{2d27}', '\u{2d27}'), ('\u{2d2d}', '\u{2d2d}'), ('\u{2d30}',
-        '\u{2d67}'), ('\u{2d6f}', '\u{2d6f}'), ('\u{2d7f}', '\u{2d96}'),
-        ('\u{2da0}', '\u{2da6}'), ('\u{2da8}', '\u{2dae}'), ('\u{2db0}',
-        '\u{2db6}'), ('\u{2db8}', '\u{2dbe}'), ('\u{2dc0}', '\u{2dc6}'),
-        ('\u{2dc8}', '\u{2dce}'), ('\u{2dd0}', '\u{2dd6}'), ('\u{2dd8}',
-        '\u{2dde}'), ('\u{2de0}', '\u{2dff}'), ('\u{2e2f}', '\u{2e2f}'),
-        ('\u{3005}', '\u{3007}'), ('\u{3021}', '\u{302f}'), ('\u{3031}',
-        '\u{3035}'), ('\u{3038}', '\u{303c}'), ('\u{3041}', '\u{3096}'),
-        ('\u{3099}', '\u{309a}'), ('\u{309d}', '\u{309f}'), ('\u{30a1}',
-        '\u{30fa}'), ('\u{30fc}', '\u{30ff}'), ('\u{3105}', '\u{312e}'),
-        ('\u{3131}', '\u{318e}'), ('\u{31a0}', '\u{31ba}'), ('\u{31f0}',
-        '\u{31ff}'), ('\u{3400}', '\u{4db5}'), ('\u{4e00}', '\u{9fea}'),
-        ('\u{a000}', '\u{a48c}'), ('\u{a4d0}', '\u{a4fd}'), ('\u{a500}',
-        '\u{a60c}'), ('\u{a610}', '\u{a62b}'), ('\u{a640}', '\u{a672}'),
-        ('\u{a674}', '\u{a67d}'), ('\u{a67f}', '\u{a6f1}'), ('\u{a717}',
-        '\u{a71f}'), ('\u{a722}', '\u{a788}'), ('\u{a78b}', '\u{a7ae}'),
-        ('\u{a7b0}', '\u{a7b7}'), ('\u{a7f7}', '\u{a827}'), ('\u{a840}',
-        '\u{a873}'), ('\u{a880}', '\u{a8c5}'), ('\u{a8d0}', '\u{a8d9}'),
-        ('\u{a8e0}', '\u{a8f7}'), ('\u{a8fb}', '\u{a8fb}'), ('\u{a8fd}',
-        '\u{a8fd}'), ('\u{a900}', '\u{a92d}'), ('\u{a930}', '\u{a953}'),
-        ('\u{a960}', '\u{a97c}'), ('\u{a980}', '\u{a9c0}'), ('\u{a9cf}',
-        '\u{a9d9}'), ('\u{a9e0}', '\u{a9fe}'), ('\u{aa00}', '\u{aa36}'),
-        ('\u{aa40}', '\u{aa4d}'), ('\u{aa50}', '\u{aa59}'), ('\u{aa60}',
-        '\u{aa76}'), ('\u{aa7a}', '\u{aac2}'), ('\u{aadb}', '\u{aadd}'),
-        ('\u{aae0}', '\u{aaef}'), ('\u{aaf2}', '\u{aaf6}'), ('\u{ab01}',
-        '\u{ab06}'), ('\u{ab09}', '\u{ab0e}'), ('\u{ab11}', '\u{ab16}'),
-        ('\u{ab20}', '\u{ab26}'), ('\u{ab28}', '\u{ab2e}'), ('\u{ab30}',
-        '\u{ab5a}'), ('\u{ab5c}', '\u{ab65}'), ('\u{ab70}', '\u{abea}'),
-        ('\u{abec}', '\u{abed}'), ('\u{abf0}', '\u{abf9}'), ('\u{ac00}',
-        '\u{d7a3}'), ('\u{d7b0}', '\u{d7c6}'), ('\u{d7cb}', '\u{d7fb}'),
-        ('\u{f900}', '\u{fa6d}'), ('\u{fa70}', '\u{fad9}'), ('\u{fb00}',
-        '\u{fb06}'), ('\u{fb13}', '\u{fb17}'), ('\u{fb1d}', '\u{fb28}'),
-        ('\u{fb2a}', '\u{fb36}'), ('\u{fb38}', '\u{fb3c}'), ('\u{fb3e}',
-        '\u{fb3e}'), ('\u{fb40}', '\u{fb41}'), ('\u{fb43}', '\u{fb44}'),
-        ('\u{fb46}', '\u{fbb1}'), ('\u{fbd3}', '\u{fd3d}'), ('\u{fd50}',
-        '\u{fd8f}'), ('\u{fd92}', '\u{fdc7}'), ('\u{fdf0}', '\u{fdfb}'),
-        ('\u{fe00}', '\u{fe0f}'), ('\u{fe20}', '\u{fe2f}'), ('\u{fe33}',
-        '\u{fe34}'), ('\u{fe4d}', '\u{fe4f}'), ('\u{fe70}', '\u{fe74}'),
-        ('\u{fe76}', '\u{fefc}'), ('\u{ff10}', '\u{ff19}'), ('\u{ff21}',
-        '\u{ff3a}'), ('\u{ff3f}', '\u{ff3f}'), ('\u{ff41}', '\u{ff5a}'),
-        ('\u{ff66}', '\u{ffbe}'), ('\u{ffc2}', '\u{ffc7}'), ('\u{ffca}',
-        '\u{ffcf}'), ('\u{ffd2}', '\u{ffd7}'), ('\u{ffda}', '\u{ffdc}'),
-        ('\u{10000}', '\u{1000b}'), ('\u{1000d}', '\u{10026}'), ('\u{10028}',
-        '\u{1003a}'), ('\u{1003c}', '\u{1003d}'), ('\u{1003f}', '\u{1004d}'),
-        ('\u{10050}', '\u{1005d}'), ('\u{10080}', '\u{100fa}'), ('\u{10140}',
-        '\u{10174}'), ('\u{101fd}', '\u{101fd}'), ('\u{10280}', '\u{1029c}'),
-        ('\u{102a0}', '\u{102d0}'), ('\u{102e0}', '\u{102e0}'), ('\u{10300}',
-        '\u{1031f}'), ('\u{1032d}', '\u{1034a}'), ('\u{10350}', '\u{1037a}'),
-        ('\u{10380}', '\u{1039d}'), ('\u{103a0}', '\u{103c3}'), ('\u{103c8}',
-        '\u{103cf}'), ('\u{103d1}', '\u{103d5}'), ('\u{10400}', '\u{1049d}'),
-        ('\u{104a0}', '\u{104a9}'), ('\u{104b0}', '\u{104d3}'), ('\u{104d8}',
-        '\u{104fb}'), ('\u{10500}', '\u{10527}'), ('\u{10530}', '\u{10563}'),
-        ('\u{10600}', '\u{10736}'), ('\u{10740}', '\u{10755}'), ('\u{10760}',
-        '\u{10767}'), ('\u{10800}', '\u{10805}'), ('\u{10808}', '\u{10808}'),
-        ('\u{1080a}', '\u{10835}'), ('\u{10837}', '\u{10838}'), ('\u{1083c}',
-        '\u{1083c}'), ('\u{1083f}', '\u{10855}'), ('\u{10860}', '\u{10876}'),
-        ('\u{10880}', '\u{1089e}'), ('\u{108e0}', '\u{108f2}'), ('\u{108f4}',
-        '\u{108f5}'), ('\u{10900}', '\u{10915}'), ('\u{10920}', '\u{10939}'),
-        ('\u{10980}', '\u{109b7}'), ('\u{109be}', '\u{109bf}'), ('\u{10a00}',
-        '\u{10a03}'), ('\u{10a05}', '\u{10a06}'), ('\u{10a0c}', '\u{10a13}'),
-        ('\u{10a15}', '\u{10a17}'), ('\u{10a19}', '\u{10a33}'), ('\u{10a38}',
-        '\u{10a3a}'), ('\u{10a3f}', '\u{10a3f}'), ('\u{10a60}', '\u{10a7c}'),
-        ('\u{10a80}', '\u{10a9c}'), ('\u{10ac0}', '\u{10ac7}'), ('\u{10ac9}',
-        '\u{10ae6}'), ('\u{10b00}', '\u{10b35}'), ('\u{10b40}', '\u{10b55}'),
-        ('\u{10b60}', '\u{10b72}'), ('\u{10b80}', '\u{10b91}'), ('\u{10c00}',
-        '\u{10c48}'), ('\u{10c80}', '\u{10cb2}'), ('\u{10cc0}', '\u{10cf2}'),
-        ('\u{11000}', '\u{11046}'), ('\u{11066}', '\u{1106f}'), ('\u{1107f}',
-        '\u{110ba}'), ('\u{110d0}', '\u{110e8}'), ('\u{110f0}', '\u{110f9}'),
-        ('\u{11100}', '\u{11134}'), ('\u{11136}', '\u{1113f}'), ('\u{11150}',
-        '\u{11173}'), ('\u{11176}', '\u{11176}'), ('\u{11180}', '\u{111c4}'),
-        ('\u{111ca}', '\u{111cc}'), ('\u{111d0}', '\u{111da}'), ('\u{111dc}',
-        '\u{111dc}'), ('\u{11200}', '\u{11211}'), ('\u{11213}', '\u{11237}'),
-        ('\u{1123e}', '\u{1123e}'), ('\u{11280}', '\u{11286}'), ('\u{11288}',
-        '\u{11288}'), ('\u{1128a}', '\u{1128d}'), ('\u{1128f}', '\u{1129d}'),
-        ('\u{1129f}', '\u{112a8}'), ('\u{112b0}', '\u{112ea}'), ('\u{112f0}',
-        '\u{112f9}'), ('\u{11300}', '\u{11303}'), ('\u{11305}', '\u{1130c}'),
-        ('\u{1130f}', '\u{11310}'), ('\u{11313}', '\u{11328}'), ('\u{1132a}',
-        '\u{11330}'), ('\u{11332}', '\u{11333}'), ('\u{11335}', '\u{11339}'),
-        ('\u{1133c}', '\u{11344}'), ('\u{11347}', '\u{11348}'), ('\u{1134b}',
-        '\u{1134d}'), ('\u{11350}', '\u{11350}'), ('\u{11357}', '\u{11357}'),
-        ('\u{1135d}', '\u{11363}'), ('\u{11366}', '\u{1136c}'), ('\u{11370}',
-        '\u{11374}'), ('\u{11400}', '\u{1144a}'), ('\u{11450}', '\u{11459}'),
-        ('\u{11480}', '\u{114c5}'), ('\u{114c7}', '\u{114c7}'), ('\u{114d0}',
-        '\u{114d9}'), ('\u{11580}', '\u{115b5}'), ('\u{115b8}', '\u{115c0}'),
-        ('\u{115d8}', '\u{115dd}'), ('\u{11600}', '\u{11640}'), ('\u{11644}',
-        '\u{11644}'), ('\u{11650}', '\u{11659}'), ('\u{11680}', '\u{116b7}'),
-        ('\u{116c0}', '\u{116c9}'), ('\u{11700}', '\u{11719}'), ('\u{1171d}',
-        '\u{1172b}'), ('\u{11730}', '\u{11739}'), ('\u{118a0}', '\u{118e9}'),
-        ('\u{118ff}', '\u{118ff}'), ('\u{11a00}', '\u{11a3e}'), ('\u{11a47}',
-        '\u{11a47}'), ('\u{11a50}', '\u{11a83}'), ('\u{11a86}', '\u{11a99}'),
-        ('\u{11ac0}', '\u{11af8}'), ('\u{11c00}', '\u{11c08}'), ('\u{11c0a}',
-        '\u{11c36}'), ('\u{11c38}', '\u{11c40}'), ('\u{11c50}', '\u{11c59}'),
-        ('\u{11c72}', '\u{11c8f}'), ('\u{11c92}', '\u{11ca7}'), ('\u{11ca9}',
-        '\u{11cb6}'), ('\u{11d00}', '\u{11d06}'), ('\u{11d08}', '\u{11d09}'),
-        ('\u{11d0b}', '\u{11d36}'), ('\u{11d3a}', '\u{11d3a}'), ('\u{11d3c}',
-        '\u{11d3d}'), ('\u{11d3f}', '\u{11d47}'), ('\u{11d50}', '\u{11d59}'),
-        ('\u{12000}', '\u{12399}'), ('\u{12400}', '\u{1246e}'), ('\u{12480}',
-        '\u{12543}'), ('\u{13000}', '\u{1342e}'), ('\u{14400}', '\u{14646}'),
-        ('\u{16800}', '\u{16a38}'), ('\u{16a40}', '\u{16a5e}'), ('\u{16a60}',
-        '\u{16a69}'), ('\u{16ad0}', '\u{16aed}'), ('\u{16af0}', '\u{16af4}'),
-        ('\u{16b00}', '\u{16b36}'), ('\u{16b40}', '\u{16b43}'), ('\u{16b50}',
-        '\u{16b59}'), ('\u{16b63}', '\u{16b77}'), ('\u{16b7d}', '\u{16b8f}'),
-        ('\u{16f00}', '\u{16f44}'), ('\u{16f50}', '\u{16f7e}'), ('\u{16f8f}',
-        '\u{16f9f}'), ('\u{16fe0}', '\u{16fe1}'), ('\u{17000}', '\u{187ec}'),
-        ('\u{18800}', '\u{18af2}'), ('\u{1b000}', '\u{1b11e}'), ('\u{1b170}',
-        '\u{1b2fb}'), ('\u{1bc00}', '\u{1bc6a}'), ('\u{1bc70}', '\u{1bc7c}'),
-        ('\u{1bc80}', '\u{1bc88}'), ('\u{1bc90}', '\u{1bc99}'), ('\u{1bc9d}',
-        '\u{1bc9e}'), ('\u{1d165}', '\u{1d169}'), ('\u{1d16d}', '\u{1d172}'),
-        ('\u{1d17b}', '\u{1d182}'), ('\u{1d185}', '\u{1d18b}'), ('\u{1d1aa}',
-        '\u{1d1ad}'), ('\u{1d242}', '\u{1d244}'), ('\u{1d400}', '\u{1d454}'),
-        ('\u{1d456}', '\u{1d49c}'), ('\u{1d49e}', '\u{1d49f}'), ('\u{1d4a2}',
-        '\u{1d4a2}'), ('\u{1d4a5}', '\u{1d4a6}'), ('\u{1d4a9}', '\u{1d4ac}'),
-        ('\u{1d4ae}', '\u{1d4b9}'), ('\u{1d4bb}', '\u{1d4bb}'), ('\u{1d4bd}',
-        '\u{1d4c3}'), ('\u{1d4c5}', '\u{1d505}'), ('\u{1d507}', '\u{1d50a}'),
-        ('\u{1d50d}', '\u{1d514}'), ('\u{1d516}', '\u{1d51c}'), ('\u{1d51e}',
-        '\u{1d539}'), ('\u{1d53b}', '\u{1d53e}'), ('\u{1d540}', '\u{1d544}'),
-        ('\u{1d546}', '\u{1d546}'), ('\u{1d54a}', '\u{1d550}'), ('\u{1d552}',
-        '\u{1d6a5}'), ('\u{1d6a8}', '\u{1d6c0}'), ('\u{1d6c2}', '\u{1d6da}'),
-        ('\u{1d6dc}', '\u{1d6fa}'), ('\u{1d6fc}', '\u{1d714}'), ('\u{1d716}',
-        '\u{1d734}'), ('\u{1d736}', '\u{1d74e}'), ('\u{1d750}', '\u{1d76e}'),
-        ('\u{1d770}', '\u{1d788}'), ('\u{1d78a}', '\u{1d7a8}'), ('\u{1d7aa}',
-        '\u{1d7c2}'), ('\u{1d7c4}', '\u{1d7cb}'), ('\u{1d7ce}', '\u{1d7ff}'),
-        ('\u{1da00}', '\u{1da36}'), ('\u{1da3b}', '\u{1da6c}'), ('\u{1da75}',
-        '\u{1da75}'), ('\u{1da84}', '\u{1da84}'), ('\u{1da9b}', '\u{1da9f}'),
-        ('\u{1daa1}', '\u{1daaf}'), ('\u{1e000}', '\u{1e006}'), ('\u{1e008}',
-        '\u{1e018}'), ('\u{1e01b}', '\u{1e021}'), ('\u{1e023}', '\u{1e024}'),
-        ('\u{1e026}', '\u{1e02a}'), ('\u{1e800}', '\u{1e8c4}'), ('\u{1e8d0}',
-        '\u{1e8d6}'), ('\u{1e900}', '\u{1e94a}'), ('\u{1e950}', '\u{1e959}'),
-        ('\u{1ee00}', '\u{1ee03}'), ('\u{1ee05}', '\u{1ee1f}'), ('\u{1ee21}',
-        '\u{1ee22}'), ('\u{1ee24}', '\u{1ee24}'), ('\u{1ee27}', '\u{1ee27}'),
-        ('\u{1ee29}', '\u{1ee32}'), ('\u{1ee34}', '\u{1ee37}'), ('\u{1ee39}',
-        '\u{1ee39}'), ('\u{1ee3b}', '\u{1ee3b}'), ('\u{1ee42}', '\u{1ee42}'),
-        ('\u{1ee47}', '\u{1ee47}'), ('\u{1ee49}', '\u{1ee49}'), ('\u{1ee4b}',
-        '\u{1ee4b}'), ('\u{1ee4d}', '\u{1ee4f}'), ('\u{1ee51}', '\u{1ee52}'),
-        ('\u{1ee54}', '\u{1ee54}'), ('\u{1ee57}', '\u{1ee57}'), ('\u{1ee59}',
-        '\u{1ee59}'), ('\u{1ee5b}', '\u{1ee5b}'), ('\u{1ee5d}', '\u{1ee5d}'),
-        ('\u{1ee5f}', '\u{1ee5f}'), ('\u{1ee61}', '\u{1ee62}'), ('\u{1ee64}',
-        '\u{1ee64}'), ('\u{1ee67}', '\u{1ee6a}'), ('\u{1ee6c}', '\u{1ee72}'),
-        ('\u{1ee74}', '\u{1ee77}'), ('\u{1ee79}', '\u{1ee7c}'), ('\u{1ee7e}',
-        '\u{1ee7e}'), ('\u{1ee80}', '\u{1ee89}'), ('\u{1ee8b}', '\u{1ee9b}'),
-        ('\u{1eea1}', '\u{1eea3}'), ('\u{1eea5}', '\u{1eea9}'), ('\u{1eeab}',
-        '\u{1eebb}'), ('\u{1f130}', '\u{1f149}'), ('\u{1f150}', '\u{1f169}'),
-        ('\u{1f170}', '\u{1f189}'), ('\u{20000}', '\u{2a6d6}'), ('\u{2a700}',
-        '\u{2b734}'), ('\u{2b740}', '\u{2b81d}'), ('\u{2b820}', '\u{2cea1}'),
-        ('\u{2ceb0}', '\u{2ebe0}'), ('\u{2f800}', '\u{2fa1d}'), ('\u{e0100}',
-        '\u{e01ef}')
-    ];
+fn property_set(
+    name_map: &'static [(&'static str, &'static [(char, char)])],
+    canonical: &'static str,
+) -> Option<&'static [(char, char)]> {
+    name_map
+        .binary_search_by_key(&canonical, |x| x.0)
+        .ok()
+        .map(|i| name_map[i].1)
+}
 
+/// An iterator over Unicode Age sets. Each item corresponds to a set of
+/// codepoints that were added in a particular revision of Unicode. The
+/// iterator yields items in chronological order.
+#[derive(Debug)]
+struct AgeIter {
+    ages: &'static [(&'static str, &'static [(char, char)])],
 }
 
-pub mod case_folding {
-    pub const C_plus_S_both_table: &'static [(char, char)] = &[
-        ('\u{41}', '\u{61}'), ('\u{42}', '\u{62}'), ('\u{43}', '\u{63}'),
-        ('\u{44}', '\u{64}'), ('\u{45}', '\u{65}'), ('\u{46}', '\u{66}'),
-        ('\u{47}', '\u{67}'), ('\u{48}', '\u{68}'), ('\u{49}', '\u{69}'),
-        ('\u{4a}', '\u{6a}'), ('\u{4b}', '\u{6b}'), ('\u{4b}', '\u{212a}'),
-        ('\u{4c}', '\u{6c}'), ('\u{4d}', '\u{6d}'), ('\u{4e}', '\u{6e}'),
-        ('\u{4f}', '\u{6f}'), ('\u{50}', '\u{70}'), ('\u{51}', '\u{71}'),
-        ('\u{52}', '\u{72}'), ('\u{53}', '\u{73}'), ('\u{53}', '\u{17f}'),
-        ('\u{54}', '\u{74}'), ('\u{55}', '\u{75}'), ('\u{56}', '\u{76}'),
-        ('\u{57}', '\u{77}'), ('\u{58}', '\u{78}'), ('\u{59}', '\u{79}'),
-        ('\u{5a}', '\u{7a}'), ('\u{61}', '\u{41}'), ('\u{62}', '\u{42}'),
-        ('\u{63}', '\u{43}'), ('\u{64}', '\u{44}'), ('\u{65}', '\u{45}'),
-        ('\u{66}', '\u{46}'), ('\u{67}', '\u{47}'), ('\u{68}', '\u{48}'),
-        ('\u{69}', '\u{49}'), ('\u{6a}', '\u{4a}'), ('\u{6b}', '\u{4b}'),
-        ('\u{6b}', '\u{212a}'), ('\u{6c}', '\u{4c}'), ('\u{6d}', '\u{4d}'),
-        ('\u{6e}', '\u{4e}'), ('\u{6f}', '\u{4f}'), ('\u{70}', '\u{50}'),
-        ('\u{71}', '\u{51}'), ('\u{72}', '\u{52}'), ('\u{73}', '\u{53}'),
-        ('\u{73}', '\u{17f}'), ('\u{74}', '\u{54}'), ('\u{75}', '\u{55}'),
-        ('\u{76}', '\u{56}'), ('\u{77}', '\u{57}'), ('\u{78}', '\u{58}'),
-        ('\u{79}', '\u{59}'), ('\u{7a}', '\u{5a}'), ('\u{b5}', '\u{39c}'),
-        ('\u{b5}', '\u{3bc}'), ('\u{c0}', '\u{e0}'), ('\u{c1}', '\u{e1}'),
-        ('\u{c2}', '\u{e2}'), ('\u{c3}', '\u{e3}'), ('\u{c4}', '\u{e4}'),
-        ('\u{c5}', '\u{e5}'), ('\u{c5}', '\u{212b}'), ('\u{c6}', '\u{e6}'),
-        ('\u{c7}', '\u{e7}'), ('\u{c8}', '\u{e8}'), ('\u{c9}', '\u{e9}'),
-        ('\u{ca}', '\u{ea}'), ('\u{cb}', '\u{eb}'), ('\u{cc}', '\u{ec}'),
-        ('\u{cd}', '\u{ed}'), ('\u{ce}', '\u{ee}'), ('\u{cf}', '\u{ef}'),
-        ('\u{d0}', '\u{f0}'), ('\u{d1}', '\u{f1}'), ('\u{d2}', '\u{f2}'),
-        ('\u{d3}', '\u{f3}'), ('\u{d4}', '\u{f4}'), ('\u{d5}', '\u{f5}'),
-        ('\u{d6}', '\u{f6}'), ('\u{d8}', '\u{f8}'), ('\u{d9}', '\u{f9}'),
-        ('\u{da}', '\u{fa}'), ('\u{db}', '\u{fb}'), ('\u{dc}', '\u{fc}'),
-        ('\u{dd}', '\u{fd}'), ('\u{de}', '\u{fe}'), ('\u{df}', '\u{1e9e}'),
-        ('\u{e0}', '\u{c0}'), ('\u{e1}', '\u{c1}'), ('\u{e2}', '\u{c2}'),
-        ('\u{e3}', '\u{c3}'), ('\u{e4}', '\u{c4}'), ('\u{e5}', '\u{c5}'),
-        ('\u{e5}', '\u{212b}'), ('\u{e6}', '\u{c6}'), ('\u{e7}', '\u{c7}'),
-        ('\u{e8}', '\u{c8}'), ('\u{e9}', '\u{c9}'), ('\u{ea}', '\u{ca}'),
-        ('\u{eb}', '\u{cb}'), ('\u{ec}', '\u{cc}'), ('\u{ed}', '\u{cd}'),
-        ('\u{ee}', '\u{ce}'), ('\u{ef}', '\u{cf}'), ('\u{f0}', '\u{d0}'),
-        ('\u{f1}', '\u{d1}'), ('\u{f2}', '\u{d2}'), ('\u{f3}', '\u{d3}'),
-        ('\u{f4}', '\u{d4}'), ('\u{f5}', '\u{d5}'), ('\u{f6}', '\u{d6}'),
-        ('\u{f8}', '\u{d8}'), ('\u{f9}', '\u{d9}'), ('\u{fa}', '\u{da}'),
-        ('\u{fb}', '\u{db}'), ('\u{fc}', '\u{dc}'), ('\u{fd}', '\u{dd}'),
-        ('\u{fe}', '\u{de}'), ('\u{ff}', '\u{178}'), ('\u{100}', '\u{101}'),
-        ('\u{101}', '\u{100}'), ('\u{102}', '\u{103}'), ('\u{103}', '\u{102}'),
-        ('\u{104}', '\u{105}'), ('\u{105}', '\u{104}'), ('\u{106}', '\u{107}'),
-        ('\u{107}', '\u{106}'), ('\u{108}', '\u{109}'), ('\u{109}', '\u{108}'),
-        ('\u{10a}', '\u{10b}'), ('\u{10b}', '\u{10a}'), ('\u{10c}', '\u{10d}'),
-        ('\u{10d}', '\u{10c}'), ('\u{10e}', '\u{10f}'), ('\u{10f}', '\u{10e}'),
-        ('\u{110}', '\u{111}'), ('\u{111}', '\u{110}'), ('\u{112}', '\u{113}'),
-        ('\u{113}', '\u{112}'), ('\u{114}', '\u{115}'), ('\u{115}', '\u{114}'),
-        ('\u{116}', '\u{117}'), ('\u{117}', '\u{116}'), ('\u{118}', '\u{119}'),
-        ('\u{119}', '\u{118}'), ('\u{11a}', '\u{11b}'), ('\u{11b}', '\u{11a}'),
-        ('\u{11c}', '\u{11d}'), ('\u{11d}', '\u{11c}'), ('\u{11e}', '\u{11f}'),
-        ('\u{11f}', '\u{11e}'), ('\u{120}', '\u{121}'), ('\u{121}', '\u{120}'),
-        ('\u{122}', '\u{123}'), ('\u{123}', '\u{122}'), ('\u{124}', '\u{125}'),
-        ('\u{125}', '\u{124}'), ('\u{126}', '\u{127}'), ('\u{127}', '\u{126}'),
-        ('\u{128}', '\u{129}'), ('\u{129}', '\u{128}'), ('\u{12a}', '\u{12b}'),
-        ('\u{12b}', '\u{12a}'), ('\u{12c}', '\u{12d}'), ('\u{12d}', '\u{12c}'),
-        ('\u{12e}', '\u{12f}'), ('\u{12f}', '\u{12e}'), ('\u{132}', '\u{133}'),
-        ('\u{133}', '\u{132}'), ('\u{134}', '\u{135}'), ('\u{135}', '\u{134}'),
-        ('\u{136}', '\u{137}'), ('\u{137}', '\u{136}'), ('\u{139}', '\u{13a}'),
-        ('\u{13a}', '\u{139}'), ('\u{13b}', '\u{13c}'), ('\u{13c}', '\u{13b}'),
-        ('\u{13d}', '\u{13e}'), ('\u{13e}', '\u{13d}'), ('\u{13f}', '\u{140}'),
-        ('\u{140}', '\u{13f}'), ('\u{141}', '\u{142}'), ('\u{142}', '\u{141}'),
-        ('\u{143}', '\u{144}'), ('\u{144}', '\u{143}'), ('\u{145}', '\u{146}'),
-        ('\u{146}', '\u{145}'), ('\u{147}', '\u{148}'), ('\u{148}', '\u{147}'),
-        ('\u{14a}', '\u{14b}'), ('\u{14b}', '\u{14a}'), ('\u{14c}', '\u{14d}'),
-        ('\u{14d}', '\u{14c}'), ('\u{14e}', '\u{14f}'), ('\u{14f}', '\u{14e}'),
-        ('\u{150}', '\u{151}'), ('\u{151}', '\u{150}'), ('\u{152}', '\u{153}'),
-        ('\u{153}', '\u{152}'), ('\u{154}', '\u{155}'), ('\u{155}', '\u{154}'),
-        ('\u{156}', '\u{157}'), ('\u{157}', '\u{156}'), ('\u{158}', '\u{159}'),
-        ('\u{159}', '\u{158}'), ('\u{15a}', '\u{15b}'), ('\u{15b}', '\u{15a}'),
-        ('\u{15c}', '\u{15d}'), ('\u{15d}', '\u{15c}'), ('\u{15e}', '\u{15f}'),
-        ('\u{15f}', '\u{15e}'), ('\u{160}', '\u{161}'), ('\u{161}', '\u{160}'),
-        ('\u{162}', '\u{163}'), ('\u{163}', '\u{162}'), ('\u{164}', '\u{165}'),
-        ('\u{165}', '\u{164}'), ('\u{166}', '\u{167}'), ('\u{167}', '\u{166}'),
-        ('\u{168}', '\u{169}'), ('\u{169}', '\u{168}'), ('\u{16a}', '\u{16b}'),
-        ('\u{16b}', '\u{16a}'), ('\u{16c}', '\u{16d}'), ('\u{16d}', '\u{16c}'),
-        ('\u{16e}', '\u{16f}'), ('\u{16f}', '\u{16e}'), ('\u{170}', '\u{171}'),
-        ('\u{171}', '\u{170}'), ('\u{172}', '\u{173}'), ('\u{173}', '\u{172}'),
-        ('\u{174}', '\u{175}'), ('\u{175}', '\u{174}'), ('\u{176}', '\u{177}'),
-        ('\u{177}', '\u{176}'), ('\u{178}', '\u{ff}'), ('\u{179}', '\u{17a}'),
-        ('\u{17a}', '\u{179}'), ('\u{17b}', '\u{17c}'), ('\u{17c}', '\u{17b}'),
-        ('\u{17d}', '\u{17e}'), ('\u{17e}', '\u{17d}'), ('\u{17f}', '\u{53}'),
-        ('\u{17f}', '\u{73}'), ('\u{180}', '\u{243}'), ('\u{181}', '\u{253}'),
-        ('\u{182}', '\u{183}'), ('\u{183}', '\u{182}'), ('\u{184}', '\u{185}'),
-        ('\u{185}', '\u{184}'), ('\u{186}', '\u{254}'), ('\u{187}', '\u{188}'),
-        ('\u{188}', '\u{187}'), ('\u{189}', '\u{256}'), ('\u{18a}', '\u{257}'),
-        ('\u{18b}', '\u{18c}'), ('\u{18c}', '\u{18b}'), ('\u{18e}', '\u{1dd}'),
-        ('\u{18f}', '\u{259}'), ('\u{190}', '\u{25b}'), ('\u{191}', '\u{192}'),
-        ('\u{192}', '\u{191}'), ('\u{193}', '\u{260}'), ('\u{194}', '\u{263}'),
-        ('\u{195}', '\u{1f6}'), ('\u{196}', '\u{269}'), ('\u{197}', '\u{268}'),
-        ('\u{198}', '\u{199}'), ('\u{199}', '\u{198}'), ('\u{19a}', '\u{23d}'),
-        ('\u{19c}', '\u{26f}'), ('\u{19d}', '\u{272}'), ('\u{19e}', '\u{220}'),
-        ('\u{19f}', '\u{275}'), ('\u{1a0}', '\u{1a1}'), ('\u{1a1}', '\u{1a0}'),
-        ('\u{1a2}', '\u{1a3}'), ('\u{1a3}', '\u{1a2}'), ('\u{1a4}', '\u{1a5}'),
-        ('\u{1a5}', '\u{1a4}'), ('\u{1a6}', '\u{280}'), ('\u{1a7}', '\u{1a8}'),
-        ('\u{1a8}', '\u{1a7}'), ('\u{1a9}', '\u{283}'), ('\u{1ac}', '\u{1ad}'),
-        ('\u{1ad}', '\u{1ac}'), ('\u{1ae}', '\u{288}'), ('\u{1af}', '\u{1b0}'),
-        ('\u{1b0}', '\u{1af}'), ('\u{1b1}', '\u{28a}'), ('\u{1b2}', '\u{28b}'),
-        ('\u{1b3}', '\u{1b4}'), ('\u{1b4}', '\u{1b3}'), ('\u{1b5}', '\u{1b6}'),
-        ('\u{1b6}', '\u{1b5}'), ('\u{1b7}', '\u{292}'), ('\u{1b8}', '\u{1b9}'),
-        ('\u{1b9}', '\u{1b8}'), ('\u{1bc}', '\u{1bd}'), ('\u{1bd}', '\u{1bc}'),
-        ('\u{1bf}', '\u{1f7}'), ('\u{1c4}', '\u{1c5}'), ('\u{1c4}', '\u{1c6}'),
-        ('\u{1c5}', '\u{1c4}'), ('\u{1c5}', '\u{1c6}'), ('\u{1c6}', '\u{1c4}'),
-        ('\u{1c6}', '\u{1c5}'), ('\u{1c7}', '\u{1c8}'), ('\u{1c7}', '\u{1c9}'),
-        ('\u{1c8}', '\u{1c7}'), ('\u{1c8}', '\u{1c9}'), ('\u{1c9}', '\u{1c7}'),
-        ('\u{1c9}', '\u{1c8}'), ('\u{1ca}', '\u{1cb}'), ('\u{1ca}', '\u{1cc}'),
-        ('\u{1cb}', '\u{1ca}'), ('\u{1cb}', '\u{1cc}'), ('\u{1cc}', '\u{1ca}'),
-        ('\u{1cc}', '\u{1cb}'), ('\u{1cd}', '\u{1ce}'), ('\u{1ce}', '\u{1cd}'),
-        ('\u{1cf}', '\u{1d0}'), ('\u{1d0}', '\u{1cf}'), ('\u{1d1}', '\u{1d2}'),
-        ('\u{1d2}', '\u{1d1}'), ('\u{1d3}', '\u{1d4}'), ('\u{1d4}', '\u{1d3}'),
-        ('\u{1d5}', '\u{1d6}'), ('\u{1d6}', '\u{1d5}'), ('\u{1d7}', '\u{1d8}'),
-        ('\u{1d8}', '\u{1d7}'), ('\u{1d9}', '\u{1da}'), ('\u{1da}', '\u{1d9}'),
-        ('\u{1db}', '\u{1dc}'), ('\u{1dc}', '\u{1db}'), ('\u{1dd}', '\u{18e}'),
-        ('\u{1de}', '\u{1df}'), ('\u{1df}', '\u{1de}'), ('\u{1e0}', '\u{1e1}'),
-        ('\u{1e1}', '\u{1e0}'), ('\u{1e2}', '\u{1e3}'), ('\u{1e3}', '\u{1e2}'),
-        ('\u{1e4}', '\u{1e5}'), ('\u{1e5}', '\u{1e4}'), ('\u{1e6}', '\u{1e7}'),
-        ('\u{1e7}', '\u{1e6}'), ('\u{1e8}', '\u{1e9}'), ('\u{1e9}', '\u{1e8}'),
-        ('\u{1ea}', '\u{1eb}'), ('\u{1eb}', '\u{1ea}'), ('\u{1ec}', '\u{1ed}'),
-        ('\u{1ed}', '\u{1ec}'), ('\u{1ee}', '\u{1ef}'), ('\u{1ef}', '\u{1ee}'),
-        ('\u{1f1}', '\u{1f2}'), ('\u{1f1}', '\u{1f3}'), ('\u{1f2}', '\u{1f1}'),
-        ('\u{1f2}', '\u{1f3}'), ('\u{1f3}', '\u{1f1}'), ('\u{1f3}', '\u{1f2}'),
-        ('\u{1f4}', '\u{1f5}'), ('\u{1f5}', '\u{1f4}'), ('\u{1f6}', '\u{195}'),
-        ('\u{1f7}', '\u{1bf}'), ('\u{1f8}', '\u{1f9}'), ('\u{1f9}', '\u{1f8}'),
-        ('\u{1fa}', '\u{1fb}'), ('\u{1fb}', '\u{1fa}'), ('\u{1fc}', '\u{1fd}'),
-        ('\u{1fd}', '\u{1fc}'), ('\u{1fe}', '\u{1ff}'), ('\u{1ff}', '\u{1fe}'),
-        ('\u{200}', '\u{201}'), ('\u{201}', '\u{200}'), ('\u{202}', '\u{203}'),
-        ('\u{203}', '\u{202}'), ('\u{204}', '\u{205}'), ('\u{205}', '\u{204}'),
-        ('\u{206}', '\u{207}'), ('\u{207}', '\u{206}'), ('\u{208}', '\u{209}'),
-        ('\u{209}', '\u{208}'), ('\u{20a}', '\u{20b}'), ('\u{20b}', '\u{20a}'),
-        ('\u{20c}', '\u{20d}'), ('\u{20d}', '\u{20c}'), ('\u{20e}', '\u{20f}'),
-        ('\u{20f}', '\u{20e}'), ('\u{210}', '\u{211}'), ('\u{211}', '\u{210}'),
-        ('\u{212}', '\u{213}'), ('\u{213}', '\u{212}'), ('\u{214}', '\u{215}'),
-        ('\u{215}', '\u{214}'), ('\u{216}', '\u{217}'), ('\u{217}', '\u{216}'),
-        ('\u{218}', '\u{219}'), ('\u{219}', '\u{218}'), ('\u{21a}', '\u{21b}'),
-        ('\u{21b}', '\u{21a}'), ('\u{21c}', '\u{21d}'), ('\u{21d}', '\u{21c}'),
-        ('\u{21e}', '\u{21f}'), ('\u{21f}', '\u{21e}'), ('\u{220}', '\u{19e}'),
-        ('\u{222}', '\u{223}'), ('\u{223}', '\u{222}'), ('\u{224}', '\u{225}'),
-        ('\u{225}', '\u{224}'), ('\u{226}', '\u{227}'), ('\u{227}', '\u{226}'),
-        ('\u{228}', '\u{229}'), ('\u{229}', '\u{228}'), ('\u{22a}', '\u{22b}'),
-        ('\u{22b}', '\u{22a}'), ('\u{22c}', '\u{22d}'), ('\u{22d}', '\u{22c}'),
-        ('\u{22e}', '\u{22f}'), ('\u{22f}', '\u{22e}'), ('\u{230}', '\u{231}'),
-        ('\u{231}', '\u{230}'), ('\u{232}', '\u{233}'), ('\u{233}', '\u{232}'),
-        ('\u{23a}', '\u{2c65}'), ('\u{23b}', '\u{23c}'), ('\u{23c}', '\u{23b}'),
-        ('\u{23d}', '\u{19a}'), ('\u{23e}', '\u{2c66}'), ('\u{23f}',
-        '\u{2c7e}'), ('\u{240}', '\u{2c7f}'), ('\u{241}', '\u{242}'),
-        ('\u{242}', '\u{241}'), ('\u{243}', '\u{180}'), ('\u{244}', '\u{289}'),
-        ('\u{245}', '\u{28c}'), ('\u{246}', '\u{247}'), ('\u{247}', '\u{246}'),
-        ('\u{248}', '\u{249}'), ('\u{249}', '\u{248}'), ('\u{24a}', '\u{24b}'),
-        ('\u{24b}', '\u{24a}'), ('\u{24c}', '\u{24d}'), ('\u{24d}', '\u{24c}'),
-        ('\u{24e}', '\u{24f}'), ('\u{24f}', '\u{24e}'), ('\u{250}', '\u{2c6f}'),
-        ('\u{251}', '\u{2c6d}'), ('\u{252}', '\u{2c70}'), ('\u{253}',
-        '\u{181}'), ('\u{254}', '\u{186}'), ('\u{256}', '\u{189}'), ('\u{257}',
-        '\u{18a}'), ('\u{259}', '\u{18f}'), ('\u{25b}', '\u{190}'), ('\u{25c}',
-        '\u{a7ab}'), ('\u{260}', '\u{193}'), ('\u{261}', '\u{a7ac}'),
-        ('\u{263}', '\u{194}'), ('\u{265}', '\u{a78d}'), ('\u{266}',
-        '\u{a7aa}'), ('\u{268}', '\u{197}'), ('\u{269}', '\u{196}'), ('\u{26a}',
-        '\u{a7ae}'), ('\u{26b}', '\u{2c62}'), ('\u{26c}', '\u{a7ad}'),
-        ('\u{26f}', '\u{19c}'), ('\u{271}', '\u{2c6e}'), ('\u{272}', '\u{19d}'),
-        ('\u{275}', '\u{19f}'), ('\u{27d}', '\u{2c64}'), ('\u{280}', '\u{1a6}'),
-        ('\u{283}', '\u{1a9}'), ('\u{287}', '\u{a7b1}'), ('\u{288}', '\u{1ae}'),
-        ('\u{289}', '\u{244}'), ('\u{28a}', '\u{1b1}'), ('\u{28b}', '\u{1b2}'),
-        ('\u{28c}', '\u{245}'), ('\u{292}', '\u{1b7}'), ('\u{29d}', '\u{a7b2}'),
-        ('\u{29e}', '\u{a7b0}'), ('\u{345}', '\u{399}'), ('\u{345}', '\u{3b9}'),
-        ('\u{345}', '\u{1fbe}'), ('\u{370}', '\u{371}'), ('\u{371}', '\u{370}'),
-        ('\u{372}', '\u{373}'), ('\u{373}', '\u{372}'), ('\u{376}', '\u{377}'),
-        ('\u{377}', '\u{376}'), ('\u{37b}', '\u{3fd}'), ('\u{37c}', '\u{3fe}'),
-        ('\u{37d}', '\u{3ff}'), ('\u{37f}', '\u{3f3}'), ('\u{386}', '\u{3ac}'),
-        ('\u{388}', '\u{3ad}'), ('\u{389}', '\u{3ae}'), ('\u{38a}', '\u{3af}'),
-        ('\u{38c}', '\u{3cc}'), ('\u{38e}', '\u{3cd}'), ('\u{38f}', '\u{3ce}'),
-        ('\u{391}', '\u{3b1}'), ('\u{392}', '\u{3b2}'), ('\u{392}', '\u{3d0}'),
-        ('\u{393}', '\u{3b3}'), ('\u{394}', '\u{3b4}'), ('\u{395}', '\u{3b5}'),
-        ('\u{395}', '\u{3f5}'), ('\u{396}', '\u{3b6}'), ('\u{397}', '\u{3b7}'),
-        ('\u{398}', '\u{3b8}'), ('\u{398}', '\u{3d1}'), ('\u{398}', '\u{3f4}'),
-        ('\u{399}', '\u{345}'), ('\u{399}', '\u{3b9}'), ('\u{399}', '\u{1fbe}'),
-        ('\u{39a}', '\u{3ba}'), ('\u{39a}', '\u{3f0}'), ('\u{39b}', '\u{3bb}'),
-        ('\u{39c}', '\u{b5}'), ('\u{39c}', '\u{3bc}'), ('\u{39d}', '\u{3bd}'),
-        ('\u{39e}', '\u{3be}'), ('\u{39f}', '\u{3bf}'), ('\u{3a0}', '\u{3c0}'),
-        ('\u{3a0}', '\u{3d6}'), ('\u{3a1}', '\u{3c1}'), ('\u{3a1}', '\u{3f1}'),
-        ('\u{3a3}', '\u{3c2}'), ('\u{3a3}', '\u{3c3}'), ('\u{3a4}', '\u{3c4}'),
-        ('\u{3a5}', '\u{3c5}'), ('\u{3a6}', '\u{3c6}'), ('\u{3a6}', '\u{3d5}'),
-        ('\u{3a7}', '\u{3c7}'), ('\u{3a8}', '\u{3c8}'), ('\u{3a9}', '\u{3c9}'),
-        ('\u{3a9}', '\u{2126}'), ('\u{3aa}', '\u{3ca}'), ('\u{3ab}', '\u{3cb}'),
-        ('\u{3ac}', '\u{386}'), ('\u{3ad}', '\u{388}'), ('\u{3ae}', '\u{389}'),
-        ('\u{3af}', '\u{38a}'), ('\u{3b1}', '\u{391}'), ('\u{3b2}', '\u{392}'),
-        ('\u{3b2}', '\u{3d0}'), ('\u{3b3}', '\u{393}'), ('\u{3b4}', '\u{394}'),
-        ('\u{3b5}', '\u{395}'), ('\u{3b5}', '\u{3f5}'), ('\u{3b6}', '\u{396}'),
-        ('\u{3b7}', '\u{397}'), ('\u{3b8}', '\u{398}'), ('\u{3b8}', '\u{3d1}'),
-        ('\u{3b8}', '\u{3f4}'), ('\u{3b9}', '\u{345}'), ('\u{3b9}', '\u{399}'),
-        ('\u{3b9}', '\u{1fbe}'), ('\u{3ba}', '\u{39a}'), ('\u{3ba}', '\u{3f0}'),
-        ('\u{3bb}', '\u{39b}'), ('\u{3bc}', '\u{b5}'), ('\u{3bc}', '\u{39c}'),
-        ('\u{3bd}', '\u{39d}'), ('\u{3be}', '\u{39e}'), ('\u{3bf}', '\u{39f}'),
-        ('\u{3c0}', '\u{3a0}'), ('\u{3c0}', '\u{3d6}'), ('\u{3c1}', '\u{3a1}'),
-        ('\u{3c1}', '\u{3f1}'), ('\u{3c2}', '\u{3a3}'), ('\u{3c2}', '\u{3c3}'),
-        ('\u{3c3}', '\u{3a3}'), ('\u{3c3}', '\u{3c2}'), ('\u{3c4}', '\u{3a4}'),
-        ('\u{3c5}', '\u{3a5}'), ('\u{3c6}', '\u{3a6}'), ('\u{3c6}', '\u{3d5}'),
-        ('\u{3c7}', '\u{3a7}'), ('\u{3c8}', '\u{3a8}'), ('\u{3c9}', '\u{3a9}'),
-        ('\u{3c9}', '\u{2126}'), ('\u{3ca}', '\u{3aa}'), ('\u{3cb}', '\u{3ab}'),
-        ('\u{3cc}', '\u{38c}'), ('\u{3cd}', '\u{38e}'), ('\u{3ce}', '\u{38f}'),
-        ('\u{3cf}', '\u{3d7}'), ('\u{3d0}', '\u{392}'), ('\u{3d0}', '\u{3b2}'),
-        ('\u{3d1}', '\u{398}'), ('\u{3d1}', '\u{3b8}'), ('\u{3d1}', '\u{3f4}'),
-        ('\u{3d5}', '\u{3a6}'), ('\u{3d5}', '\u{3c6}'), ('\u{3d6}', '\u{3a0}'),
-        ('\u{3d6}', '\u{3c0}'), ('\u{3d7}', '\u{3cf}'), ('\u{3d8}', '\u{3d9}'),
-        ('\u{3d9}', '\u{3d8}'), ('\u{3da}', '\u{3db}'), ('\u{3db}', '\u{3da}'),
-        ('\u{3dc}', '\u{3dd}'), ('\u{3dd}', '\u{3dc}'), ('\u{3de}', '\u{3df}'),
-        ('\u{3df}', '\u{3de}'), ('\u{3e0}', '\u{3e1}'), ('\u{3e1}', '\u{3e0}'),
-        ('\u{3e2}', '\u{3e3}'), ('\u{3e3}', '\u{3e2}'), ('\u{3e4}', '\u{3e5}'),
-        ('\u{3e5}', '\u{3e4}'), ('\u{3e6}', '\u{3e7}'), ('\u{3e7}', '\u{3e6}'),
-        ('\u{3e8}', '\u{3e9}'), ('\u{3e9}', '\u{3e8}'), ('\u{3ea}', '\u{3eb}'),
-        ('\u{3eb}', '\u{3ea}'), ('\u{3ec}', '\u{3ed}'), ('\u{3ed}', '\u{3ec}'),
-        ('\u{3ee}', '\u{3ef}'), ('\u{3ef}', '\u{3ee}'), ('\u{3f0}', '\u{39a}'),
-        ('\u{3f0}', '\u{3ba}'), ('\u{3f1}', '\u{3a1}'), ('\u{3f1}', '\u{3c1}'),
-        ('\u{3f2}', '\u{3f9}'), ('\u{3f3}', '\u{37f}'), ('\u{3f4}', '\u{398}'),
-        ('\u{3f4}', '\u{3b8}'), ('\u{3f4}', '\u{3d1}'), ('\u{3f5}', '\u{395}'),
-        ('\u{3f5}', '\u{3b5}'), ('\u{3f7}', '\u{3f8}'), ('\u{3f8}', '\u{3f7}'),
-        ('\u{3f9}', '\u{3f2}'), ('\u{3fa}', '\u{3fb}'), ('\u{3fb}', '\u{3fa}'),
-        ('\u{3fd}', '\u{37b}'), ('\u{3fe}', '\u{37c}'), ('\u{3ff}', '\u{37d}'),
-        ('\u{400}', '\u{450}'), ('\u{401}', '\u{451}'), ('\u{402}', '\u{452}'),
-        ('\u{403}', '\u{453}'), ('\u{404}', '\u{454}'), ('\u{405}', '\u{455}'),
-        ('\u{406}', '\u{456}'), ('\u{407}', '\u{457}'), ('\u{408}', '\u{458}'),
-        ('\u{409}', '\u{459}'), ('\u{40a}', '\u{45a}'), ('\u{40b}', '\u{45b}'),
-        ('\u{40c}', '\u{45c}'), ('\u{40d}', '\u{45d}'), ('\u{40e}', '\u{45e}'),
-        ('\u{40f}', '\u{45f}'), ('\u{410}', '\u{430}'), ('\u{411}', '\u{431}'),
-        ('\u{412}', '\u{432}'), ('\u{412}', '\u{1c80}'), ('\u{413}', '\u{433}'),
-        ('\u{414}', '\u{434}'), ('\u{414}', '\u{1c81}'), ('\u{415}', '\u{435}'),
-        ('\u{416}', '\u{436}'), ('\u{417}', '\u{437}'), ('\u{418}', '\u{438}'),
-        ('\u{419}', '\u{439}'), ('\u{41a}', '\u{43a}'), ('\u{41b}', '\u{43b}'),
-        ('\u{41c}', '\u{43c}'), ('\u{41d}', '\u{43d}'), ('\u{41e}', '\u{43e}'),
-        ('\u{41e}', '\u{1c82}'), ('\u{41f}', '\u{43f}'), ('\u{420}', '\u{440}'),
-        ('\u{421}', '\u{441}'), ('\u{421}', '\u{1c83}'), ('\u{422}', '\u{442}'),
-        ('\u{422}', '\u{1c84}'), ('\u{422}', '\u{1c85}'), ('\u{423}',
-        '\u{443}'), ('\u{424}', '\u{444}'), ('\u{425}', '\u{445}'), ('\u{426}',
-        '\u{446}'), ('\u{427}', '\u{447}'), ('\u{428}', '\u{448}'), ('\u{429}',
-        '\u{449}'), ('\u{42a}', '\u{44a}'), ('\u{42a}', '\u{1c86}'), ('\u{42b}',
-        '\u{44b}'), ('\u{42c}', '\u{44c}'), ('\u{42d}', '\u{44d}'), ('\u{42e}',
-        '\u{44e}'), ('\u{42f}', '\u{44f}'), ('\u{430}', '\u{410}'), ('\u{431}',
-        '\u{411}'), ('\u{432}', '\u{412}'), ('\u{432}', '\u{1c80}'), ('\u{433}',
-        '\u{413}'), ('\u{434}', '\u{414}'), ('\u{434}', '\u{1c81}'), ('\u{435}',
-        '\u{415}'), ('\u{436}', '\u{416}'), ('\u{437}', '\u{417}'), ('\u{438}',
-        '\u{418}'), ('\u{439}', '\u{419}'), ('\u{43a}', '\u{41a}'), ('\u{43b}',
-        '\u{41b}'), ('\u{43c}', '\u{41c}'), ('\u{43d}', '\u{41d}'), ('\u{43e}',
-        '\u{41e}'), ('\u{43e}', '\u{1c82}'), ('\u{43f}', '\u{41f}'), ('\u{440}',
-        '\u{420}'), ('\u{441}', '\u{421}'), ('\u{441}', '\u{1c83}'), ('\u{442}',
-        '\u{422}'), ('\u{442}', '\u{1c84}'), ('\u{442}', '\u{1c85}'),
-        ('\u{443}', '\u{423}'), ('\u{444}', '\u{424}'), ('\u{445}', '\u{425}'),
-        ('\u{446}', '\u{426}'), ('\u{447}', '\u{427}'), ('\u{448}', '\u{428}'),
-        ('\u{449}', '\u{429}'), ('\u{44a}', '\u{42a}'), ('\u{44a}', '\u{1c86}'),
-        ('\u{44b}', '\u{42b}'), ('\u{44c}', '\u{42c}'), ('\u{44d}', '\u{42d}'),
-        ('\u{44e}', '\u{42e}'), ('\u{44f}', '\u{42f}'), ('\u{450}', '\u{400}'),
-        ('\u{451}', '\u{401}'), ('\u{452}', '\u{402}'), ('\u{453}', '\u{403}'),
-        ('\u{454}', '\u{404}'), ('\u{455}', '\u{405}'), ('\u{456}', '\u{406}'),
-        ('\u{457}', '\u{407}'), ('\u{458}', '\u{408}'), ('\u{459}', '\u{409}'),
-        ('\u{45a}', '\u{40a}'), ('\u{45b}', '\u{40b}'), ('\u{45c}', '\u{40c}'),
-        ('\u{45d}', '\u{40d}'), ('\u{45e}', '\u{40e}'), ('\u{45f}', '\u{40f}'),
-        ('\u{460}', '\u{461}'), ('\u{461}', '\u{460}'), ('\u{462}', '\u{463}'),
-        ('\u{462}', '\u{1c87}'), ('\u{463}', '\u{462}'), ('\u{463}',
-        '\u{1c87}'), ('\u{464}', '\u{465}'), ('\u{465}', '\u{464}'), ('\u{466}',
-        '\u{467}'), ('\u{467}', '\u{466}'), ('\u{468}', '\u{469}'), ('\u{469}',
-        '\u{468}'), ('\u{46a}', '\u{46b}'), ('\u{46b}', '\u{46a}'), ('\u{46c}',
-        '\u{46d}'), ('\u{46d}', '\u{46c}'), ('\u{46e}', '\u{46f}'), ('\u{46f}',
-        '\u{46e}'), ('\u{470}', '\u{471}'), ('\u{471}', '\u{470}'), ('\u{472}',
-        '\u{473}'), ('\u{473}', '\u{472}'), ('\u{474}', '\u{475}'), ('\u{475}',
-        '\u{474}'), ('\u{476}', '\u{477}'), ('\u{477}', '\u{476}'), ('\u{478}',
-        '\u{479}'), ('\u{479}', '\u{478}'), ('\u{47a}', '\u{47b}'), ('\u{47b}',
-        '\u{47a}'), ('\u{47c}', '\u{47d}'), ('\u{47d}', '\u{47c}'), ('\u{47e}',
-        '\u{47f}'), ('\u{47f}', '\u{47e}'), ('\u{480}', '\u{481}'), ('\u{481}',
-        '\u{480}'), ('\u{48a}', '\u{48b}'), ('\u{48b}', '\u{48a}'), ('\u{48c}',
-        '\u{48d}'), ('\u{48d}', '\u{48c}'), ('\u{48e}', '\u{48f}'), ('\u{48f}',
-        '\u{48e}'), ('\u{490}', '\u{491}'), ('\u{491}', '\u{490}'), ('\u{492}',
-        '\u{493}'), ('\u{493}', '\u{492}'), ('\u{494}', '\u{495}'), ('\u{495}',
-        '\u{494}'), ('\u{496}', '\u{497}'), ('\u{497}', '\u{496}'), ('\u{498}',
-        '\u{499}'), ('\u{499}', '\u{498}'), ('\u{49a}', '\u{49b}'), ('\u{49b}',
-        '\u{49a}'), ('\u{49c}', '\u{49d}'), ('\u{49d}', '\u{49c}'), ('\u{49e}',
-        '\u{49f}'), ('\u{49f}', '\u{49e}'), ('\u{4a0}', '\u{4a1}'), ('\u{4a1}',
-        '\u{4a0}'), ('\u{4a2}', '\u{4a3}'), ('\u{4a3}', '\u{4a2}'), ('\u{4a4}',
-        '\u{4a5}'), ('\u{4a5}', '\u{4a4}'), ('\u{4a6}', '\u{4a7}'), ('\u{4a7}',
-        '\u{4a6}'), ('\u{4a8}', '\u{4a9}'), ('\u{4a9}', '\u{4a8}'), ('\u{4aa}',
-        '\u{4ab}'), ('\u{4ab}', '\u{4aa}'), ('\u{4ac}', '\u{4ad}'), ('\u{4ad}',
-        '\u{4ac}'), ('\u{4ae}', '\u{4af}'), ('\u{4af}', '\u{4ae}'), ('\u{4b0}',
-        '\u{4b1}'), ('\u{4b1}', '\u{4b0}'), ('\u{4b2}', '\u{4b3}'), ('\u{4b3}',
-        '\u{4b2}'), ('\u{4b4}', '\u{4b5}'), ('\u{4b5}', '\u{4b4}'), ('\u{4b6}',
-        '\u{4b7}'), ('\u{4b7}', '\u{4b6}'), ('\u{4b8}', '\u{4b9}'), ('\u{4b9}',
-        '\u{4b8}'), ('\u{4ba}', '\u{4bb}'), ('\u{4bb}', '\u{4ba}'), ('\u{4bc}',
-        '\u{4bd}'), ('\u{4bd}', '\u{4bc}'), ('\u{4be}', '\u{4bf}'), ('\u{4bf}',
-        '\u{4be}'), ('\u{4c0}', '\u{4cf}'), ('\u{4c1}', '\u{4c2}'), ('\u{4c2}',
-        '\u{4c1}'), ('\u{4c3}', '\u{4c4}'), ('\u{4c4}', '\u{4c3}'), ('\u{4c5}',
-        '\u{4c6}'), ('\u{4c6}', '\u{4c5}'), ('\u{4c7}', '\u{4c8}'), ('\u{4c8}',
-        '\u{4c7}'), ('\u{4c9}', '\u{4ca}'), ('\u{4ca}', '\u{4c9}'), ('\u{4cb}',
-        '\u{4cc}'), ('\u{4cc}', '\u{4cb}'), ('\u{4cd}', '\u{4ce}'), ('\u{4ce}',
-        '\u{4cd}'), ('\u{4cf}', '\u{4c0}'), ('\u{4d0}', '\u{4d1}'), ('\u{4d1}',
-        '\u{4d0}'), ('\u{4d2}', '\u{4d3}'), ('\u{4d3}', '\u{4d2}'), ('\u{4d4}',
-        '\u{4d5}'), ('\u{4d5}', '\u{4d4}'), ('\u{4d6}', '\u{4d7}'), ('\u{4d7}',
-        '\u{4d6}'), ('\u{4d8}', '\u{4d9}'), ('\u{4d9}', '\u{4d8}'), ('\u{4da}',
-        '\u{4db}'), ('\u{4db}', '\u{4da}'), ('\u{4dc}', '\u{4dd}'), ('\u{4dd}',
-        '\u{4dc}'), ('\u{4de}', '\u{4df}'), ('\u{4df}', '\u{4de}'), ('\u{4e0}',
-        '\u{4e1}'), ('\u{4e1}', '\u{4e0}'), ('\u{4e2}', '\u{4e3}'), ('\u{4e3}',
-        '\u{4e2}'), ('\u{4e4}', '\u{4e5}'), ('\u{4e5}', '\u{4e4}'), ('\u{4e6}',
-        '\u{4e7}'), ('\u{4e7}', '\u{4e6}'), ('\u{4e8}', '\u{4e9}'), ('\u{4e9}',
-        '\u{4e8}'), ('\u{4ea}', '\u{4eb}'), ('\u{4eb}', '\u{4ea}'), ('\u{4ec}',
-        '\u{4ed}'), ('\u{4ed}', '\u{4ec}'), ('\u{4ee}', '\u{4ef}'), ('\u{4ef}',
-        '\u{4ee}'), ('\u{4f0}', '\u{4f1}'), ('\u{4f1}', '\u{4f0}'), ('\u{4f2}',
-        '\u{4f3}'), ('\u{4f3}', '\u{4f2}'), ('\u{4f4}', '\u{4f5}'), ('\u{4f5}',
-        '\u{4f4}'), ('\u{4f6}', '\u{4f7}'), ('\u{4f7}', '\u{4f6}'), ('\u{4f8}',
-        '\u{4f9}'), ('\u{4f9}', '\u{4f8}'), ('\u{4fa}', '\u{4fb}'), ('\u{4fb}',
-        '\u{4fa}'), ('\u{4fc}', '\u{4fd}'), ('\u{4fd}', '\u{4fc}'), ('\u{4fe}',
-        '\u{4ff}'), ('\u{4ff}', '\u{4fe}'), ('\u{500}', '\u{501}'), ('\u{501}',
-        '\u{500}'), ('\u{502}', '\u{503}'), ('\u{503}', '\u{502}'), ('\u{504}',
-        '\u{505}'), ('\u{505}', '\u{504}'), ('\u{506}', '\u{507}'), ('\u{507}',
-        '\u{506}'), ('\u{508}', '\u{509}'), ('\u{509}', '\u{508}'), ('\u{50a}',
-        '\u{50b}'), ('\u{50b}', '\u{50a}'), ('\u{50c}', '\u{50d}'), ('\u{50d}',
-        '\u{50c}'), ('\u{50e}', '\u{50f}'), ('\u{50f}', '\u{50e}'), ('\u{510}',
-        '\u{511}'), ('\u{511}', '\u{510}'), ('\u{512}', '\u{513}'), ('\u{513}',
-        '\u{512}'), ('\u{514}', '\u{515}'), ('\u{515}', '\u{514}'), ('\u{516}',
-        '\u{517}'), ('\u{517}', '\u{516}'), ('\u{518}', '\u{519}'), ('\u{519}',
-        '\u{518}'), ('\u{51a}', '\u{51b}'), ('\u{51b}', '\u{51a}'), ('\u{51c}',
-        '\u{51d}'), ('\u{51d}', '\u{51c}'), ('\u{51e}', '\u{51f}'), ('\u{51f}',
-        '\u{51e}'), ('\u{520}', '\u{521}'), ('\u{521}', '\u{520}'), ('\u{522}',
-        '\u{523}'), ('\u{523}', '\u{522}'), ('\u{524}', '\u{525}'), ('\u{525}',
-        '\u{524}'), ('\u{526}', '\u{527}'), ('\u{527}', '\u{526}'), ('\u{528}',
-        '\u{529}'), ('\u{529}', '\u{528}'), ('\u{52a}', '\u{52b}'), ('\u{52b}',
-        '\u{52a}'), ('\u{52c}', '\u{52d}'), ('\u{52d}', '\u{52c}'), ('\u{52e}',
-        '\u{52f}'), ('\u{52f}', '\u{52e}'), ('\u{531}', '\u{561}'), ('\u{532}',
-        '\u{562}'), ('\u{533}', '\u{563}'), ('\u{534}', '\u{564}'), ('\u{535}',
-        '\u{565}'), ('\u{536}', '\u{566}'), ('\u{537}', '\u{567}'), ('\u{538}',
-        '\u{568}'), ('\u{539}', '\u{569}'), ('\u{53a}', '\u{56a}'), ('\u{53b}',
-        '\u{56b}'), ('\u{53c}', '\u{56c}'), ('\u{53d}', '\u{56d}'), ('\u{53e}',
-        '\u{56e}'), ('\u{53f}', '\u{56f}'), ('\u{540}', '\u{570}'), ('\u{541}',
-        '\u{571}'), ('\u{542}', '\u{572}'), ('\u{543}', '\u{573}'), ('\u{544}',
-        '\u{574}'), ('\u{545}', '\u{575}'), ('\u{546}', '\u{576}'), ('\u{547}',
-        '\u{577}'), ('\u{548}', '\u{578}'), ('\u{549}', '\u{579}'), ('\u{54a}',
-        '\u{57a}'), ('\u{54b}', '\u{57b}'), ('\u{54c}', '\u{57c}'), ('\u{54d}',
-        '\u{57d}'), ('\u{54e}', '\u{57e}'), ('\u{54f}', '\u{57f}'), ('\u{550}',
-        '\u{580}'), ('\u{551}', '\u{581}'), ('\u{552}', '\u{582}'), ('\u{553}',
-        '\u{583}'), ('\u{554}', '\u{584}'), ('\u{555}', '\u{585}'), ('\u{556}',
-        '\u{586}'), ('\u{561}', '\u{531}'), ('\u{562}', '\u{532}'), ('\u{563}',
-        '\u{533}'), ('\u{564}', '\u{534}'), ('\u{565}', '\u{535}'), ('\u{566}',
-        '\u{536}'), ('\u{567}', '\u{537}'), ('\u{568}', '\u{538}'), ('\u{569}',
-        '\u{539}'), ('\u{56a}', '\u{53a}'), ('\u{56b}', '\u{53b}'), ('\u{56c}',
-        '\u{53c}'), ('\u{56d}', '\u{53d}'), ('\u{56e}', '\u{53e}'), ('\u{56f}',
-        '\u{53f}'), ('\u{570}', '\u{540}'), ('\u{571}', '\u{541}'), ('\u{572}',
-        '\u{542}'), ('\u{573}', '\u{543}'), ('\u{574}', '\u{544}'), ('\u{575}',
-        '\u{545}'), ('\u{576}', '\u{546}'), ('\u{577}', '\u{547}'), ('\u{578}',
-        '\u{548}'), ('\u{579}', '\u{549}'), ('\u{57a}', '\u{54a}'), ('\u{57b}',
-        '\u{54b}'), ('\u{57c}', '\u{54c}'), ('\u{57d}', '\u{54d}'), ('\u{57e}',
-        '\u{54e}'), ('\u{57f}', '\u{54f}'), ('\u{580}', '\u{550}'), ('\u{581}',
-        '\u{551}'), ('\u{582}', '\u{552}'), ('\u{583}', '\u{553}'), ('\u{584}',
-        '\u{554}'), ('\u{585}', '\u{555}'), ('\u{586}', '\u{556}'), ('\u{10a0}',
-        '\u{2d00}'), ('\u{10a1}', '\u{2d01}'), ('\u{10a2}', '\u{2d02}'),
-        ('\u{10a3}', '\u{2d03}'), ('\u{10a4}', '\u{2d04}'), ('\u{10a5}',
-        '\u{2d05}'), ('\u{10a6}', '\u{2d06}'), ('\u{10a7}', '\u{2d07}'),
-        ('\u{10a8}', '\u{2d08}'), ('\u{10a9}', '\u{2d09}'), ('\u{10aa}',
-        '\u{2d0a}'), ('\u{10ab}', '\u{2d0b}'), ('\u{10ac}', '\u{2d0c}'),
-        ('\u{10ad}', '\u{2d0d}'), ('\u{10ae}', '\u{2d0e}'), ('\u{10af}',
-        '\u{2d0f}'), ('\u{10b0}', '\u{2d10}'), ('\u{10b1}', '\u{2d11}'),
-        ('\u{10b2}', '\u{2d12}'), ('\u{10b3}', '\u{2d13}'), ('\u{10b4}',
-        '\u{2d14}'), ('\u{10b5}', '\u{2d15}'), ('\u{10b6}', '\u{2d16}'),
-        ('\u{10b7}', '\u{2d17}'), ('\u{10b8}', '\u{2d18}'), ('\u{10b9}',
-        '\u{2d19}'), ('\u{10ba}', '\u{2d1a}'), ('\u{10bb}', '\u{2d1b}'),
-        ('\u{10bc}', '\u{2d1c}'), ('\u{10bd}', '\u{2d1d}'), ('\u{10be}',
-        '\u{2d1e}'), ('\u{10bf}', '\u{2d1f}'), ('\u{10c0}', '\u{2d20}'),
-        ('\u{10c1}', '\u{2d21}'), ('\u{10c2}', '\u{2d22}'), ('\u{10c3}',
-        '\u{2d23}'), ('\u{10c4}', '\u{2d24}'), ('\u{10c5}', '\u{2d25}'),
-        ('\u{10c7}', '\u{2d27}'), ('\u{10cd}', '\u{2d2d}'), ('\u{13a0}',
-        '\u{ab70}'), ('\u{13a1}', '\u{ab71}'), ('\u{13a2}', '\u{ab72}'),
-        ('\u{13a3}', '\u{ab73}'), ('\u{13a4}', '\u{ab74}'), ('\u{13a5}',
-        '\u{ab75}'), ('\u{13a6}', '\u{ab76}'), ('\u{13a7}', '\u{ab77}'),
-        ('\u{13a8}', '\u{ab78}'), ('\u{13a9}', '\u{ab79}'), ('\u{13aa}',
-        '\u{ab7a}'), ('\u{13ab}', '\u{ab7b}'), ('\u{13ac}', '\u{ab7c}'),
-        ('\u{13ad}', '\u{ab7d}'), ('\u{13ae}', '\u{ab7e}'), ('\u{13af}',
-        '\u{ab7f}'), ('\u{13b0}', '\u{ab80}'), ('\u{13b1}', '\u{ab81}'),
-        ('\u{13b2}', '\u{ab82}'), ('\u{13b3}', '\u{ab83}'), ('\u{13b4}',
-        '\u{ab84}'), ('\u{13b5}', '\u{ab85}'), ('\u{13b6}', '\u{ab86}'),
-        ('\u{13b7}', '\u{ab87}'), ('\u{13b8}', '\u{ab88}'), ('\u{13b9}',
-        '\u{ab89}'), ('\u{13ba}', '\u{ab8a}'), ('\u{13bb}', '\u{ab8b}'),
-        ('\u{13bc}', '\u{ab8c}'), ('\u{13bd}', '\u{ab8d}'), ('\u{13be}',
-        '\u{ab8e}'), ('\u{13bf}', '\u{ab8f}'), ('\u{13c0}', '\u{ab90}'),
-        ('\u{13c1}', '\u{ab91}'), ('\u{13c2}', '\u{ab92}'), ('\u{13c3}',
-        '\u{ab93}'), ('\u{13c4}', '\u{ab94}'), ('\u{13c5}', '\u{ab95}'),
-        ('\u{13c6}', '\u{ab96}'), ('\u{13c7}', '\u{ab97}'), ('\u{13c8}',
-        '\u{ab98}'), ('\u{13c9}', '\u{ab99}'), ('\u{13ca}', '\u{ab9a}'),
-        ('\u{13cb}', '\u{ab9b}'), ('\u{13cc}', '\u{ab9c}'), ('\u{13cd}',
-        '\u{ab9d}'), ('\u{13ce}', '\u{ab9e}'), ('\u{13cf}', '\u{ab9f}'),
-        ('\u{13d0}', '\u{aba0}'), ('\u{13d1}', '\u{aba1}'), ('\u{13d2}',
-        '\u{aba2}'), ('\u{13d3}', '\u{aba3}'), ('\u{13d4}', '\u{aba4}'),
-        ('\u{13d5}', '\u{aba5}'), ('\u{13d6}', '\u{aba6}'), ('\u{13d7}',
-        '\u{aba7}'), ('\u{13d8}', '\u{aba8}'), ('\u{13d9}', '\u{aba9}'),
-        ('\u{13da}', '\u{abaa}'), ('\u{13db}', '\u{abab}'), ('\u{13dc}',
-        '\u{abac}'), ('\u{13dd}', '\u{abad}'), ('\u{13de}', '\u{abae}'),
-        ('\u{13df}', '\u{abaf}'), ('\u{13e0}', '\u{abb0}'), ('\u{13e1}',
-        '\u{abb1}'), ('\u{13e2}', '\u{abb2}'), ('\u{13e3}', '\u{abb3}'),
-        ('\u{13e4}', '\u{abb4}'), ('\u{13e5}', '\u{abb5}'), ('\u{13e6}',
-        '\u{abb6}'), ('\u{13e7}', '\u{abb7}'), ('\u{13e8}', '\u{abb8}'),
-        ('\u{13e9}', '\u{abb9}'), ('\u{13ea}', '\u{abba}'), ('\u{13eb}',
-        '\u{abbb}'), ('\u{13ec}', '\u{abbc}'), ('\u{13ed}', '\u{abbd}'),
-        ('\u{13ee}', '\u{abbe}'), ('\u{13ef}', '\u{abbf}'), ('\u{13f0}',
-        '\u{13f8}'), ('\u{13f1}', '\u{13f9}'), ('\u{13f2}', '\u{13fa}'),
-        ('\u{13f3}', '\u{13fb}'), ('\u{13f4}', '\u{13fc}'), ('\u{13f5}',
-        '\u{13fd}'), ('\u{13f8}', '\u{13f0}'), ('\u{13f9}', '\u{13f1}'),
-        ('\u{13fa}', '\u{13f2}'), ('\u{13fb}', '\u{13f3}'), ('\u{13fc}',
-        '\u{13f4}'), ('\u{13fd}', '\u{13f5}'), ('\u{1c80}', '\u{412}'),
-        ('\u{1c80}', '\u{432}'), ('\u{1c81}', '\u{414}'), ('\u{1c81}',
-        '\u{434}'), ('\u{1c82}', '\u{41e}'), ('\u{1c82}', '\u{43e}'),
-        ('\u{1c83}', '\u{421}'), ('\u{1c83}', '\u{441}'), ('\u{1c84}',
-        '\u{422}'), ('\u{1c84}', '\u{442}'), ('\u{1c84}', '\u{1c85}'),
-        ('\u{1c85}', '\u{422}'), ('\u{1c85}', '\u{442}'), ('\u{1c85}',
-        '\u{1c84}'), ('\u{1c86}', '\u{42a}'), ('\u{1c86}', '\u{44a}'),
-        ('\u{1c87}', '\u{462}'), ('\u{1c87}', '\u{463}'), ('\u{1c88}',
-        '\u{a64a}'), ('\u{1c88}', '\u{a64b}'), ('\u{1d79}', '\u{a77d}'),
-        ('\u{1d7d}', '\u{2c63}'), ('\u{1e00}', '\u{1e01}'), ('\u{1e01}',
-        '\u{1e00}'), ('\u{1e02}', '\u{1e03}'), ('\u{1e03}', '\u{1e02}'),
-        ('\u{1e04}', '\u{1e05}'), ('\u{1e05}', '\u{1e04}'), ('\u{1e06}',
-        '\u{1e07}'), ('\u{1e07}', '\u{1e06}'), ('\u{1e08}', '\u{1e09}'),
-        ('\u{1e09}', '\u{1e08}'), ('\u{1e0a}', '\u{1e0b}'), ('\u{1e0b}',
-        '\u{1e0a}'), ('\u{1e0c}', '\u{1e0d}'), ('\u{1e0d}', '\u{1e0c}'),
-        ('\u{1e0e}', '\u{1e0f}'), ('\u{1e0f}', '\u{1e0e}'), ('\u{1e10}',
-        '\u{1e11}'), ('\u{1e11}', '\u{1e10}'), ('\u{1e12}', '\u{1e13}'),
-        ('\u{1e13}', '\u{1e12}'), ('\u{1e14}', '\u{1e15}'), ('\u{1e15}',
-        '\u{1e14}'), ('\u{1e16}', '\u{1e17}'), ('\u{1e17}', '\u{1e16}'),
-        ('\u{1e18}', '\u{1e19}'), ('\u{1e19}', '\u{1e18}'), ('\u{1e1a}',
-        '\u{1e1b}'), ('\u{1e1b}', '\u{1e1a}'), ('\u{1e1c}', '\u{1e1d}'),
-        ('\u{1e1d}', '\u{1e1c}'), ('\u{1e1e}', '\u{1e1f}'), ('\u{1e1f}',
-        '\u{1e1e}'), ('\u{1e20}', '\u{1e21}'), ('\u{1e21}', '\u{1e20}'),
-        ('\u{1e22}', '\u{1e23}'), ('\u{1e23}', '\u{1e22}'), ('\u{1e24}',
-        '\u{1e25}'), ('\u{1e25}', '\u{1e24}'), ('\u{1e26}', '\u{1e27}'),
-        ('\u{1e27}', '\u{1e26}'), ('\u{1e28}', '\u{1e29}'), ('\u{1e29}',
-        '\u{1e28}'), ('\u{1e2a}', '\u{1e2b}'), ('\u{1e2b}', '\u{1e2a}'),
-        ('\u{1e2c}', '\u{1e2d}'), ('\u{1e2d}', '\u{1e2c}'), ('\u{1e2e}',
-        '\u{1e2f}'), ('\u{1e2f}', '\u{1e2e}'), ('\u{1e30}', '\u{1e31}'),
-        ('\u{1e31}', '\u{1e30}'), ('\u{1e32}', '\u{1e33}'), ('\u{1e33}',
-        '\u{1e32}'), ('\u{1e34}', '\u{1e35}'), ('\u{1e35}', '\u{1e34}'),
-        ('\u{1e36}', '\u{1e37}'), ('\u{1e37}', '\u{1e36}'), ('\u{1e38}',
-        '\u{1e39}'), ('\u{1e39}', '\u{1e38}'), ('\u{1e3a}', '\u{1e3b}'),
-        ('\u{1e3b}', '\u{1e3a}'), ('\u{1e3c}', '\u{1e3d}'), ('\u{1e3d}',
-        '\u{1e3c}'), ('\u{1e3e}', '\u{1e3f}'), ('\u{1e3f}', '\u{1e3e}'),
-        ('\u{1e40}', '\u{1e41}'), ('\u{1e41}', '\u{1e40}'), ('\u{1e42}',
-        '\u{1e43}'), ('\u{1e43}', '\u{1e42}'), ('\u{1e44}', '\u{1e45}'),
-        ('\u{1e45}', '\u{1e44}'), ('\u{1e46}', '\u{1e47}'), ('\u{1e47}',
-        '\u{1e46}'), ('\u{1e48}', '\u{1e49}'), ('\u{1e49}', '\u{1e48}'),
-        ('\u{1e4a}', '\u{1e4b}'), ('\u{1e4b}', '\u{1e4a}'), ('\u{1e4c}',
-        '\u{1e4d}'), ('\u{1e4d}', '\u{1e4c}'), ('\u{1e4e}', '\u{1e4f}'),
-        ('\u{1e4f}', '\u{1e4e}'), ('\u{1e50}', '\u{1e51}'), ('\u{1e51}',
-        '\u{1e50}'), ('\u{1e52}', '\u{1e53}'), ('\u{1e53}', '\u{1e52}'),
-        ('\u{1e54}', '\u{1e55}'), ('\u{1e55}', '\u{1e54}'), ('\u{1e56}',
-        '\u{1e57}'), ('\u{1e57}', '\u{1e56}'), ('\u{1e58}', '\u{1e59}'),
-        ('\u{1e59}', '\u{1e58}'), ('\u{1e5a}', '\u{1e5b}'), ('\u{1e5b}',
-        '\u{1e5a}'), ('\u{1e5c}', '\u{1e5d}'), ('\u{1e5d}', '\u{1e5c}'),
-        ('\u{1e5e}', '\u{1e5f}'), ('\u{1e5f}', '\u{1e5e}'), ('\u{1e60}',
-        '\u{1e61}'), ('\u{1e60}', '\u{1e9b}'), ('\u{1e61}', '\u{1e60}'),
-        ('\u{1e61}', '\u{1e9b}'), ('\u{1e62}', '\u{1e63}'), ('\u{1e63}',
-        '\u{1e62}'), ('\u{1e64}', '\u{1e65}'), ('\u{1e65}', '\u{1e64}'),
-        ('\u{1e66}', '\u{1e67}'), ('\u{1e67}', '\u{1e66}'), ('\u{1e68}',
-        '\u{1e69}'), ('\u{1e69}', '\u{1e68}'), ('\u{1e6a}', '\u{1e6b}'),
-        ('\u{1e6b}', '\u{1e6a}'), ('\u{1e6c}', '\u{1e6d}'), ('\u{1e6d}',
-        '\u{1e6c}'), ('\u{1e6e}', '\u{1e6f}'), ('\u{1e6f}', '\u{1e6e}'),
-        ('\u{1e70}', '\u{1e71}'), ('\u{1e71}', '\u{1e70}'), ('\u{1e72}',
-        '\u{1e73}'), ('\u{1e73}', '\u{1e72}'), ('\u{1e74}', '\u{1e75}'),
-        ('\u{1e75}', '\u{1e74}'), ('\u{1e76}', '\u{1e77}'), ('\u{1e77}',
-        '\u{1e76}'), ('\u{1e78}', '\u{1e79}'), ('\u{1e79}', '\u{1e78}'),
-        ('\u{1e7a}', '\u{1e7b}'), ('\u{1e7b}', '\u{1e7a}'), ('\u{1e7c}',
-        '\u{1e7d}'), ('\u{1e7d}', '\u{1e7c}'), ('\u{1e7e}', '\u{1e7f}'),
-        ('\u{1e7f}', '\u{1e7e}'), ('\u{1e80}', '\u{1e81}'), ('\u{1e81}',
-        '\u{1e80}'), ('\u{1e82}', '\u{1e83}'), ('\u{1e83}', '\u{1e82}'),
-        ('\u{1e84}', '\u{1e85}'), ('\u{1e85}', '\u{1e84}'), ('\u{1e86}',
-        '\u{1e87}'), ('\u{1e87}', '\u{1e86}'), ('\u{1e88}', '\u{1e89}'),
-        ('\u{1e89}', '\u{1e88}'), ('\u{1e8a}', '\u{1e8b}'), ('\u{1e8b}',
-        '\u{1e8a}'), ('\u{1e8c}', '\u{1e8d}'), ('\u{1e8d}', '\u{1e8c}'),
-        ('\u{1e8e}', '\u{1e8f}'), ('\u{1e8f}', '\u{1e8e}'), ('\u{1e90}',
-        '\u{1e91}'), ('\u{1e91}', '\u{1e90}'), ('\u{1e92}', '\u{1e93}'),
-        ('\u{1e93}', '\u{1e92}'), ('\u{1e94}', '\u{1e95}'), ('\u{1e95}',
-        '\u{1e94}'), ('\u{1e9b}', '\u{1e60}'), ('\u{1e9b}', '\u{1e61}'),
-        ('\u{1e9e}', '\u{df}'), ('\u{1ea0}', '\u{1ea1}'), ('\u{1ea1}',
-        '\u{1ea0}'), ('\u{1ea2}', '\u{1ea3}'), ('\u{1ea3}', '\u{1ea2}'),
-        ('\u{1ea4}', '\u{1ea5}'), ('\u{1ea5}', '\u{1ea4}'), ('\u{1ea6}',
-        '\u{1ea7}'), ('\u{1ea7}', '\u{1ea6}'), ('\u{1ea8}', '\u{1ea9}'),
-        ('\u{1ea9}', '\u{1ea8}'), ('\u{1eaa}', '\u{1eab}'), ('\u{1eab}',
-        '\u{1eaa}'), ('\u{1eac}', '\u{1ead}'), ('\u{1ead}', '\u{1eac}'),
-        ('\u{1eae}', '\u{1eaf}'), ('\u{1eaf}', '\u{1eae}'), ('\u{1eb0}',
-        '\u{1eb1}'), ('\u{1eb1}', '\u{1eb0}'), ('\u{1eb2}', '\u{1eb3}'),
-        ('\u{1eb3}', '\u{1eb2}'), ('\u{1eb4}', '\u{1eb5}'), ('\u{1eb5}',
-        '\u{1eb4}'), ('\u{1eb6}', '\u{1eb7}'), ('\u{1eb7}', '\u{1eb6}'),
-        ('\u{1eb8}', '\u{1eb9}'), ('\u{1eb9}', '\u{1eb8}'), ('\u{1eba}',
-        '\u{1ebb}'), ('\u{1ebb}', '\u{1eba}'), ('\u{1ebc}', '\u{1ebd}'),
-        ('\u{1ebd}', '\u{1ebc}'), ('\u{1ebe}', '\u{1ebf}'), ('\u{1ebf}',
-        '\u{1ebe}'), ('\u{1ec0}', '\u{1ec1}'), ('\u{1ec1}', '\u{1ec0}'),
-        ('\u{1ec2}', '\u{1ec3}'), ('\u{1ec3}', '\u{1ec2}'), ('\u{1ec4}',
-        '\u{1ec5}'), ('\u{1ec5}', '\u{1ec4}'), ('\u{1ec6}', '\u{1ec7}'),
-        ('\u{1ec7}', '\u{1ec6}'), ('\u{1ec8}', '\u{1ec9}'), ('\u{1ec9}',
-        '\u{1ec8}'), ('\u{1eca}', '\u{1ecb}'), ('\u{1ecb}', '\u{1eca}'),
-        ('\u{1ecc}', '\u{1ecd}'), ('\u{1ecd}', '\u{1ecc}'), ('\u{1ece}',
-        '\u{1ecf}'), ('\u{1ecf}', '\u{1ece}'), ('\u{1ed0}', '\u{1ed1}'),
-        ('\u{1ed1}', '\u{1ed0}'), ('\u{1ed2}', '\u{1ed3}'), ('\u{1ed3}',
-        '\u{1ed2}'), ('\u{1ed4}', '\u{1ed5}'), ('\u{1ed5}', '\u{1ed4}'),
-        ('\u{1ed6}', '\u{1ed7}'), ('\u{1ed7}', '\u{1ed6}'), ('\u{1ed8}',
-        '\u{1ed9}'), ('\u{1ed9}', '\u{1ed8}'), ('\u{1eda}', '\u{1edb}'),
-        ('\u{1edb}', '\u{1eda}'), ('\u{1edc}', '\u{1edd}'), ('\u{1edd}',
-        '\u{1edc}'), ('\u{1ede}', '\u{1edf}'), ('\u{1edf}', '\u{1ede}'),
-        ('\u{1ee0}', '\u{1ee1}'), ('\u{1ee1}', '\u{1ee0}'), ('\u{1ee2}',
-        '\u{1ee3}'), ('\u{1ee3}', '\u{1ee2}'), ('\u{1ee4}', '\u{1ee5}'),
-        ('\u{1ee5}', '\u{1ee4}'), ('\u{1ee6}', '\u{1ee7}'), ('\u{1ee7}',
-        '\u{1ee6}'), ('\u{1ee8}', '\u{1ee9}'), ('\u{1ee9}', '\u{1ee8}'),
-        ('\u{1eea}', '\u{1eeb}'), ('\u{1eeb}', '\u{1eea}'), ('\u{1eec}',
-        '\u{1eed}'), ('\u{1eed}', '\u{1eec}'), ('\u{1eee}', '\u{1eef}'),
-        ('\u{1eef}', '\u{1eee}'), ('\u{1ef0}', '\u{1ef1}'), ('\u{1ef1}',
-        '\u{1ef0}'), ('\u{1ef2}', '\u{1ef3}'), ('\u{1ef3}', '\u{1ef2}'),
-        ('\u{1ef4}', '\u{1ef5}'), ('\u{1ef5}', '\u{1ef4}'), ('\u{1ef6}',
-        '\u{1ef7}'), ('\u{1ef7}', '\u{1ef6}'), ('\u{1ef8}', '\u{1ef9}'),
-        ('\u{1ef9}', '\u{1ef8}'), ('\u{1efa}', '\u{1efb}'), ('\u{1efb}',
-        '\u{1efa}'), ('\u{1efc}', '\u{1efd}'), ('\u{1efd}', '\u{1efc}'),
-        ('\u{1efe}', '\u{1eff}'), ('\u{1eff}', '\u{1efe}'), ('\u{1f00}',
-        '\u{1f08}'), ('\u{1f01}', '\u{1f09}'), ('\u{1f02}', '\u{1f0a}'),
-        ('\u{1f03}', '\u{1f0b}'), ('\u{1f04}', '\u{1f0c}'), ('\u{1f05}',
-        '\u{1f0d}'), ('\u{1f06}', '\u{1f0e}'), ('\u{1f07}', '\u{1f0f}'),
-        ('\u{1f08}', '\u{1f00}'), ('\u{1f09}', '\u{1f01}'), ('\u{1f0a}',
-        '\u{1f02}'), ('\u{1f0b}', '\u{1f03}'), ('\u{1f0c}', '\u{1f04}'),
-        ('\u{1f0d}', '\u{1f05}'), ('\u{1f0e}', '\u{1f06}'), ('\u{1f0f}',
-        '\u{1f07}'), ('\u{1f10}', '\u{1f18}'), ('\u{1f11}', '\u{1f19}'),
-        ('\u{1f12}', '\u{1f1a}'), ('\u{1f13}', '\u{1f1b}'), ('\u{1f14}',
-        '\u{1f1c}'), ('\u{1f15}', '\u{1f1d}'), ('\u{1f18}', '\u{1f10}'),
-        ('\u{1f19}', '\u{1f11}'), ('\u{1f1a}', '\u{1f12}'), ('\u{1f1b}',
-        '\u{1f13}'), ('\u{1f1c}', '\u{1f14}'), ('\u{1f1d}', '\u{1f15}'),
-        ('\u{1f20}', '\u{1f28}'), ('\u{1f21}', '\u{1f29}'), ('\u{1f22}',
-        '\u{1f2a}'), ('\u{1f23}', '\u{1f2b}'), ('\u{1f24}', '\u{1f2c}'),
-        ('\u{1f25}', '\u{1f2d}'), ('\u{1f26}', '\u{1f2e}'), ('\u{1f27}',
-        '\u{1f2f}'), ('\u{1f28}', '\u{1f20}'), ('\u{1f29}', '\u{1f21}'),
-        ('\u{1f2a}', '\u{1f22}'), ('\u{1f2b}', '\u{1f23}'), ('\u{1f2c}',
-        '\u{1f24}'), ('\u{1f2d}', '\u{1f25}'), ('\u{1f2e}', '\u{1f26}'),
-        ('\u{1f2f}', '\u{1f27}'), ('\u{1f30}', '\u{1f38}'), ('\u{1f31}',
-        '\u{1f39}'), ('\u{1f32}', '\u{1f3a}'), ('\u{1f33}', '\u{1f3b}'),
-        ('\u{1f34}', '\u{1f3c}'), ('\u{1f35}', '\u{1f3d}'), ('\u{1f36}',
-        '\u{1f3e}'), ('\u{1f37}', '\u{1f3f}'), ('\u{1f38}', '\u{1f30}'),
-        ('\u{1f39}', '\u{1f31}'), ('\u{1f3a}', '\u{1f32}'), ('\u{1f3b}',
-        '\u{1f33}'), ('\u{1f3c}', '\u{1f34}'), ('\u{1f3d}', '\u{1f35}'),
-        ('\u{1f3e}', '\u{1f36}'), ('\u{1f3f}', '\u{1f37}'), ('\u{1f40}',
-        '\u{1f48}'), ('\u{1f41}', '\u{1f49}'), ('\u{1f42}', '\u{1f4a}'),
-        ('\u{1f43}', '\u{1f4b}'), ('\u{1f44}', '\u{1f4c}'), ('\u{1f45}',
-        '\u{1f4d}'), ('\u{1f48}', '\u{1f40}'), ('\u{1f49}', '\u{1f41}'),
-        ('\u{1f4a}', '\u{1f42}'), ('\u{1f4b}', '\u{1f43}'), ('\u{1f4c}',
-        '\u{1f44}'), ('\u{1f4d}', '\u{1f45}'), ('\u{1f51}', '\u{1f59}'),
-        ('\u{1f53}', '\u{1f5b}'), ('\u{1f55}', '\u{1f5d}'), ('\u{1f57}',
-        '\u{1f5f}'), ('\u{1f59}', '\u{1f51}'), ('\u{1f5b}', '\u{1f53}'),
-        ('\u{1f5d}', '\u{1f55}'), ('\u{1f5f}', '\u{1f57}'), ('\u{1f60}',
-        '\u{1f68}'), ('\u{1f61}', '\u{1f69}'), ('\u{1f62}', '\u{1f6a}'),
-        ('\u{1f63}', '\u{1f6b}'), ('\u{1f64}', '\u{1f6c}'), ('\u{1f65}',
-        '\u{1f6d}'), ('\u{1f66}', '\u{1f6e}'), ('\u{1f67}', '\u{1f6f}'),
-        ('\u{1f68}', '\u{1f60}'), ('\u{1f69}', '\u{1f61}'), ('\u{1f6a}',
-        '\u{1f62}'), ('\u{1f6b}', '\u{1f63}'), ('\u{1f6c}', '\u{1f64}'),
-        ('\u{1f6d}', '\u{1f65}'), ('\u{1f6e}', '\u{1f66}'), ('\u{1f6f}',
-        '\u{1f67}'), ('\u{1f70}', '\u{1fba}'), ('\u{1f71}', '\u{1fbb}'),
-        ('\u{1f72}', '\u{1fc8}'), ('\u{1f73}', '\u{1fc9}'), ('\u{1f74}',
-        '\u{1fca}'), ('\u{1f75}', '\u{1fcb}'), ('\u{1f76}', '\u{1fda}'),
-        ('\u{1f77}', '\u{1fdb}'), ('\u{1f78}', '\u{1ff8}'), ('\u{1f79}',
-        '\u{1ff9}'), ('\u{1f7a}', '\u{1fea}'), ('\u{1f7b}', '\u{1feb}'),
-        ('\u{1f7c}', '\u{1ffa}'), ('\u{1f7d}', '\u{1ffb}'), ('\u{1f80}',
-        '\u{1f88}'), ('\u{1f81}', '\u{1f89}'), ('\u{1f82}', '\u{1f8a}'),
-        ('\u{1f83}', '\u{1f8b}'), ('\u{1f84}', '\u{1f8c}'), ('\u{1f85}',
-        '\u{1f8d}'), ('\u{1f86}', '\u{1f8e}'), ('\u{1f87}', '\u{1f8f}'),
-        ('\u{1f88}', '\u{1f80}'), ('\u{1f89}', '\u{1f81}'), ('\u{1f8a}',
-        '\u{1f82}'), ('\u{1f8b}', '\u{1f83}'), ('\u{1f8c}', '\u{1f84}'),
-        ('\u{1f8d}', '\u{1f85}'), ('\u{1f8e}', '\u{1f86}'), ('\u{1f8f}',
-        '\u{1f87}'), ('\u{1f90}', '\u{1f98}'), ('\u{1f91}', '\u{1f99}'),
-        ('\u{1f92}', '\u{1f9a}'), ('\u{1f93}', '\u{1f9b}'), ('\u{1f94}',
-        '\u{1f9c}'), ('\u{1f95}', '\u{1f9d}'), ('\u{1f96}', '\u{1f9e}'),
-        ('\u{1f97}', '\u{1f9f}'), ('\u{1f98}', '\u{1f90}'), ('\u{1f99}',
-        '\u{1f91}'), ('\u{1f9a}', '\u{1f92}'), ('\u{1f9b}', '\u{1f93}'),
-        ('\u{1f9c}', '\u{1f94}'), ('\u{1f9d}', '\u{1f95}'), ('\u{1f9e}',
-        '\u{1f96}'), ('\u{1f9f}', '\u{1f97}'), ('\u{1fa0}', '\u{1fa8}'),
-        ('\u{1fa1}', '\u{1fa9}'), ('\u{1fa2}', '\u{1faa}'), ('\u{1fa3}',
-        '\u{1fab}'), ('\u{1fa4}', '\u{1fac}'), ('\u{1fa5}', '\u{1fad}'),
-        ('\u{1fa6}', '\u{1fae}'), ('\u{1fa7}', '\u{1faf}'), ('\u{1fa8}',
-        '\u{1fa0}'), ('\u{1fa9}', '\u{1fa1}'), ('\u{1faa}', '\u{1fa2}'),
-        ('\u{1fab}', '\u{1fa3}'), ('\u{1fac}', '\u{1fa4}'), ('\u{1fad}',
-        '\u{1fa5}'), ('\u{1fae}', '\u{1fa6}'), ('\u{1faf}', '\u{1fa7}'),
-        ('\u{1fb0}', '\u{1fb8}'), ('\u{1fb1}', '\u{1fb9}'), ('\u{1fb3}',
-        '\u{1fbc}'), ('\u{1fb8}', '\u{1fb0}'), ('\u{1fb9}', '\u{1fb1}'),
-        ('\u{1fba}', '\u{1f70}'), ('\u{1fbb}', '\u{1f71}'), ('\u{1fbc}',
-        '\u{1fb3}'), ('\u{1fbe}', '\u{345}'), ('\u{1fbe}', '\u{399}'),
-        ('\u{1fbe}', '\u{3b9}'), ('\u{1fc3}', '\u{1fcc}'), ('\u{1fc8}',
-        '\u{1f72}'), ('\u{1fc9}', '\u{1f73}'), ('\u{1fca}', '\u{1f74}'),
-        ('\u{1fcb}', '\u{1f75}'), ('\u{1fcc}', '\u{1fc3}'), ('\u{1fd0}',
-        '\u{1fd8}'), ('\u{1fd1}', '\u{1fd9}'), ('\u{1fd8}', '\u{1fd0}'),
-        ('\u{1fd9}', '\u{1fd1}'), ('\u{1fda}', '\u{1f76}'), ('\u{1fdb}',
-        '\u{1f77}'), ('\u{1fe0}', '\u{1fe8}'), ('\u{1fe1}', '\u{1fe9}'),
-        ('\u{1fe5}', '\u{1fec}'), ('\u{1fe8}', '\u{1fe0}'), ('\u{1fe9}',
-        '\u{1fe1}'), ('\u{1fea}', '\u{1f7a}'), ('\u{1feb}', '\u{1f7b}'),
-        ('\u{1fec}', '\u{1fe5}'), ('\u{1ff3}', '\u{1ffc}'), ('\u{1ff8}',
-        '\u{1f78}'), ('\u{1ff9}', '\u{1f79}'), ('\u{1ffa}', '\u{1f7c}'),
-        ('\u{1ffb}', '\u{1f7d}'), ('\u{1ffc}', '\u{1ff3}'), ('\u{2126}',
-        '\u{3a9}'), ('\u{2126}', '\u{3c9}'), ('\u{212a}', '\u{4b}'),
-        ('\u{212a}', '\u{6b}'), ('\u{212b}', '\u{c5}'), ('\u{212b}', '\u{e5}'),
-        ('\u{2132}', '\u{214e}'), ('\u{214e}', '\u{2132}'), ('\u{2160}',
-        '\u{2170}'), ('\u{2161}', '\u{2171}'), ('\u{2162}', '\u{2172}'),
-        ('\u{2163}', '\u{2173}'), ('\u{2164}', '\u{2174}'), ('\u{2165}',
-        '\u{2175}'), ('\u{2166}', '\u{2176}'), ('\u{2167}', '\u{2177}'),
-        ('\u{2168}', '\u{2178}'), ('\u{2169}', '\u{2179}'), ('\u{216a}',
-        '\u{217a}'), ('\u{216b}', '\u{217b}'), ('\u{216c}', '\u{217c}'),
-        ('\u{216d}', '\u{217d}'), ('\u{216e}', '\u{217e}'), ('\u{216f}',
-        '\u{217f}'), ('\u{2170}', '\u{2160}'), ('\u{2171}', '\u{2161}'),
-        ('\u{2172}', '\u{2162}'), ('\u{2173}', '\u{2163}'), ('\u{2174}',
-        '\u{2164}'), ('\u{2175}', '\u{2165}'), ('\u{2176}', '\u{2166}'),
-        ('\u{2177}', '\u{2167}'), ('\u{2178}', '\u{2168}'), ('\u{2179}',
-        '\u{2169}'), ('\u{217a}', '\u{216a}'), ('\u{217b}', '\u{216b}'),
-        ('\u{217c}', '\u{216c}'), ('\u{217d}', '\u{216d}'), ('\u{217e}',
-        '\u{216e}'), ('\u{217f}', '\u{216f}'), ('\u{2183}', '\u{2184}'),
-        ('\u{2184}', '\u{2183}'), ('\u{24b6}', '\u{24d0}'), ('\u{24b7}',
-        '\u{24d1}'), ('\u{24b8}', '\u{24d2}'), ('\u{24b9}', '\u{24d3}'),
-        ('\u{24ba}', '\u{24d4}'), ('\u{24bb}', '\u{24d5}'), ('\u{24bc}',
-        '\u{24d6}'), ('\u{24bd}', '\u{24d7}'), ('\u{24be}', '\u{24d8}'),
-        ('\u{24bf}', '\u{24d9}'), ('\u{24c0}', '\u{24da}'), ('\u{24c1}',
-        '\u{24db}'), ('\u{24c2}', '\u{24dc}'), ('\u{24c3}', '\u{24dd}'),
-        ('\u{24c4}', '\u{24de}'), ('\u{24c5}', '\u{24df}'), ('\u{24c6}',
-        '\u{24e0}'), ('\u{24c7}', '\u{24e1}'), ('\u{24c8}', '\u{24e2}'),
-        ('\u{24c9}', '\u{24e3}'), ('\u{24ca}', '\u{24e4}'), ('\u{24cb}',
-        '\u{24e5}'), ('\u{24cc}', '\u{24e6}'), ('\u{24cd}', '\u{24e7}'),
-        ('\u{24ce}', '\u{24e8}'), ('\u{24cf}', '\u{24e9}'), ('\u{24d0}',
-        '\u{24b6}'), ('\u{24d1}', '\u{24b7}'), ('\u{24d2}', '\u{24b8}'),
-        ('\u{24d3}', '\u{24b9}'), ('\u{24d4}', '\u{24ba}'), ('\u{24d5}',
-        '\u{24bb}'), ('\u{24d6}', '\u{24bc}'), ('\u{24d7}', '\u{24bd}'),
-        ('\u{24d8}', '\u{24be}'), ('\u{24d9}', '\u{24bf}'), ('\u{24da}',
-        '\u{24c0}'), ('\u{24db}', '\u{24c1}'), ('\u{24dc}', '\u{24c2}'),
-        ('\u{24dd}', '\u{24c3}'), ('\u{24de}', '\u{24c4}'), ('\u{24df}',
-        '\u{24c5}'), ('\u{24e0}', '\u{24c6}'), ('\u{24e1}', '\u{24c7}'),
-        ('\u{24e2}', '\u{24c8}'), ('\u{24e3}', '\u{24c9}'), ('\u{24e4}',
-        '\u{24ca}'), ('\u{24e5}', '\u{24cb}'), ('\u{24e6}', '\u{24cc}'),
-        ('\u{24e7}', '\u{24cd}'), ('\u{24e8}', '\u{24ce}'), ('\u{24e9}',
-        '\u{24cf}'), ('\u{2c00}', '\u{2c30}'), ('\u{2c01}', '\u{2c31}'),
-        ('\u{2c02}', '\u{2c32}'), ('\u{2c03}', '\u{2c33}'), ('\u{2c04}',
-        '\u{2c34}'), ('\u{2c05}', '\u{2c35}'), ('\u{2c06}', '\u{2c36}'),
-        ('\u{2c07}', '\u{2c37}'), ('\u{2c08}', '\u{2c38}'), ('\u{2c09}',
-        '\u{2c39}'), ('\u{2c0a}', '\u{2c3a}'), ('\u{2c0b}', '\u{2c3b}'),
-        ('\u{2c0c}', '\u{2c3c}'), ('\u{2c0d}', '\u{2c3d}'), ('\u{2c0e}',
-        '\u{2c3e}'), ('\u{2c0f}', '\u{2c3f}'), ('\u{2c10}', '\u{2c40}'),
-        ('\u{2c11}', '\u{2c41}'), ('\u{2c12}', '\u{2c42}'), ('\u{2c13}',
-        '\u{2c43}'), ('\u{2c14}', '\u{2c44}'), ('\u{2c15}', '\u{2c45}'),
-        ('\u{2c16}', '\u{2c46}'), ('\u{2c17}', '\u{2c47}'), ('\u{2c18}',
-        '\u{2c48}'), ('\u{2c19}', '\u{2c49}'), ('\u{2c1a}', '\u{2c4a}'),
-        ('\u{2c1b}', '\u{2c4b}'), ('\u{2c1c}', '\u{2c4c}'), ('\u{2c1d}',
-        '\u{2c4d}'), ('\u{2c1e}', '\u{2c4e}'), ('\u{2c1f}', '\u{2c4f}'),
-        ('\u{2c20}', '\u{2c50}'), ('\u{2c21}', '\u{2c51}'), ('\u{2c22}',
-        '\u{2c52}'), ('\u{2c23}', '\u{2c53}'), ('\u{2c24}', '\u{2c54}'),
-        ('\u{2c25}', '\u{2c55}'), ('\u{2c26}', '\u{2c56}'), ('\u{2c27}',
-        '\u{2c57}'), ('\u{2c28}', '\u{2c58}'), ('\u{2c29}', '\u{2c59}'),
-        ('\u{2c2a}', '\u{2c5a}'), ('\u{2c2b}', '\u{2c5b}'), ('\u{2c2c}',
-        '\u{2c5c}'), ('\u{2c2d}', '\u{2c5d}'), ('\u{2c2e}', '\u{2c5e}'),
-        ('\u{2c30}', '\u{2c00}'), ('\u{2c31}', '\u{2c01}'), ('\u{2c32}',
-        '\u{2c02}'), ('\u{2c33}', '\u{2c03}'), ('\u{2c34}', '\u{2c04}'),
-        ('\u{2c35}', '\u{2c05}'), ('\u{2c36}', '\u{2c06}'), ('\u{2c37}',
-        '\u{2c07}'), ('\u{2c38}', '\u{2c08}'), ('\u{2c39}', '\u{2c09}'),
-        ('\u{2c3a}', '\u{2c0a}'), ('\u{2c3b}', '\u{2c0b}'), ('\u{2c3c}',
-        '\u{2c0c}'), ('\u{2c3d}', '\u{2c0d}'), ('\u{2c3e}', '\u{2c0e}'),
-        ('\u{2c3f}', '\u{2c0f}'), ('\u{2c40}', '\u{2c10}'), ('\u{2c41}',
-        '\u{2c11}'), ('\u{2c42}', '\u{2c12}'), ('\u{2c43}', '\u{2c13}'),
-        ('\u{2c44}', '\u{2c14}'), ('\u{2c45}', '\u{2c15}'), ('\u{2c46}',
-        '\u{2c16}'), ('\u{2c47}', '\u{2c17}'), ('\u{2c48}', '\u{2c18}'),
-        ('\u{2c49}', '\u{2c19}'), ('\u{2c4a}', '\u{2c1a}'), ('\u{2c4b}',
-        '\u{2c1b}'), ('\u{2c4c}', '\u{2c1c}'), ('\u{2c4d}', '\u{2c1d}'),
-        ('\u{2c4e}', '\u{2c1e}'), ('\u{2c4f}', '\u{2c1f}'), ('\u{2c50}',
-        '\u{2c20}'), ('\u{2c51}', '\u{2c21}'), ('\u{2c52}', '\u{2c22}'),
-        ('\u{2c53}', '\u{2c23}'), ('\u{2c54}', '\u{2c24}'), ('\u{2c55}',
-        '\u{2c25}'), ('\u{2c56}', '\u{2c26}'), ('\u{2c57}', '\u{2c27}'),
-        ('\u{2c58}', '\u{2c28}'), ('\u{2c59}', '\u{2c29}'), ('\u{2c5a}',
-        '\u{2c2a}'), ('\u{2c5b}', '\u{2c2b}'), ('\u{2c5c}', '\u{2c2c}'),
-        ('\u{2c5d}', '\u{2c2d}'), ('\u{2c5e}', '\u{2c2e}'), ('\u{2c60}',
-        '\u{2c61}'), ('\u{2c61}', '\u{2c60}'), ('\u{2c62}', '\u{26b}'),
-        ('\u{2c63}', '\u{1d7d}'), ('\u{2c64}', '\u{27d}'), ('\u{2c65}',
-        '\u{23a}'), ('\u{2c66}', '\u{23e}'), ('\u{2c67}', '\u{2c68}'),
-        ('\u{2c68}', '\u{2c67}'), ('\u{2c69}', '\u{2c6a}'), ('\u{2c6a}',
-        '\u{2c69}'), ('\u{2c6b}', '\u{2c6c}'), ('\u{2c6c}', '\u{2c6b}'),
-        ('\u{2c6d}', '\u{251}'), ('\u{2c6e}', '\u{271}'), ('\u{2c6f}',
-        '\u{250}'), ('\u{2c70}', '\u{252}'), ('\u{2c72}', '\u{2c73}'),
-        ('\u{2c73}', '\u{2c72}'), ('\u{2c75}', '\u{2c76}'), ('\u{2c76}',
-        '\u{2c75}'), ('\u{2c7e}', '\u{23f}'), ('\u{2c7f}', '\u{240}'),
-        ('\u{2c80}', '\u{2c81}'), ('\u{2c81}', '\u{2c80}'), ('\u{2c82}',
-        '\u{2c83}'), ('\u{2c83}', '\u{2c82}'), ('\u{2c84}', '\u{2c85}'),
-        ('\u{2c85}', '\u{2c84}'), ('\u{2c86}', '\u{2c87}'), ('\u{2c87}',
-        '\u{2c86}'), ('\u{2c88}', '\u{2c89}'), ('\u{2c89}', '\u{2c88}'),
-        ('\u{2c8a}', '\u{2c8b}'), ('\u{2c8b}', '\u{2c8a}'), ('\u{2c8c}',
-        '\u{2c8d}'), ('\u{2c8d}', '\u{2c8c}'), ('\u{2c8e}', '\u{2c8f}'),
-        ('\u{2c8f}', '\u{2c8e}'), ('\u{2c90}', '\u{2c91}'), ('\u{2c91}',
-        '\u{2c90}'), ('\u{2c92}', '\u{2c93}'), ('\u{2c93}', '\u{2c92}'),
-        ('\u{2c94}', '\u{2c95}'), ('\u{2c95}', '\u{2c94}'), ('\u{2c96}',
-        '\u{2c97}'), ('\u{2c97}', '\u{2c96}'), ('\u{2c98}', '\u{2c99}'),
-        ('\u{2c99}', '\u{2c98}'), ('\u{2c9a}', '\u{2c9b}'), ('\u{2c9b}',
-        '\u{2c9a}'), ('\u{2c9c}', '\u{2c9d}'), ('\u{2c9d}', '\u{2c9c}'),
-        ('\u{2c9e}', '\u{2c9f}'), ('\u{2c9f}', '\u{2c9e}'), ('\u{2ca0}',
-        '\u{2ca1}'), ('\u{2ca1}', '\u{2ca0}'), ('\u{2ca2}', '\u{2ca3}'),
-        ('\u{2ca3}', '\u{2ca2}'), ('\u{2ca4}', '\u{2ca5}'), ('\u{2ca5}',
-        '\u{2ca4}'), ('\u{2ca6}', '\u{2ca7}'), ('\u{2ca7}', '\u{2ca6}'),
-        ('\u{2ca8}', '\u{2ca9}'), ('\u{2ca9}', '\u{2ca8}'), ('\u{2caa}',
-        '\u{2cab}'), ('\u{2cab}', '\u{2caa}'), ('\u{2cac}', '\u{2cad}'),
-        ('\u{2cad}', '\u{2cac}'), ('\u{2cae}', '\u{2caf}'), ('\u{2caf}',
-        '\u{2cae}'), ('\u{2cb0}', '\u{2cb1}'), ('\u{2cb1}', '\u{2cb0}'),
-        ('\u{2cb2}', '\u{2cb3}'), ('\u{2cb3}', '\u{2cb2}'), ('\u{2cb4}',
-        '\u{2cb5}'), ('\u{2cb5}', '\u{2cb4}'), ('\u{2cb6}', '\u{2cb7}'),
-        ('\u{2cb7}', '\u{2cb6}'), ('\u{2cb8}', '\u{2cb9}'), ('\u{2cb9}',
-        '\u{2cb8}'), ('\u{2cba}', '\u{2cbb}'), ('\u{2cbb}', '\u{2cba}'),
-        ('\u{2cbc}', '\u{2cbd}'), ('\u{2cbd}', '\u{2cbc}'), ('\u{2cbe}',
-        '\u{2cbf}'), ('\u{2cbf}', '\u{2cbe}'), ('\u{2cc0}', '\u{2cc1}'),
-        ('\u{2cc1}', '\u{2cc0}'), ('\u{2cc2}', '\u{2cc3}'), ('\u{2cc3}',
-        '\u{2cc2}'), ('\u{2cc4}', '\u{2cc5}'), ('\u{2cc5}', '\u{2cc4}'),
-        ('\u{2cc6}', '\u{2cc7}'), ('\u{2cc7}', '\u{2cc6}'), ('\u{2cc8}',
-        '\u{2cc9}'), ('\u{2cc9}', '\u{2cc8}'), ('\u{2cca}', '\u{2ccb}'),
-        ('\u{2ccb}', '\u{2cca}'), ('\u{2ccc}', '\u{2ccd}'), ('\u{2ccd}',
-        '\u{2ccc}'), ('\u{2cce}', '\u{2ccf}'), ('\u{2ccf}', '\u{2cce}'),
-        ('\u{2cd0}', '\u{2cd1}'), ('\u{2cd1}', '\u{2cd0}'), ('\u{2cd2}',
-        '\u{2cd3}'), ('\u{2cd3}', '\u{2cd2}'), ('\u{2cd4}', '\u{2cd5}'),
-        ('\u{2cd5}', '\u{2cd4}'), ('\u{2cd6}', '\u{2cd7}'), ('\u{2cd7}',
-        '\u{2cd6}'), ('\u{2cd8}', '\u{2cd9}'), ('\u{2cd9}', '\u{2cd8}'),
-        ('\u{2cda}', '\u{2cdb}'), ('\u{2cdb}', '\u{2cda}'), ('\u{2cdc}',
-        '\u{2cdd}'), ('\u{2cdd}', '\u{2cdc}'), ('\u{2cde}', '\u{2cdf}'),
-        ('\u{2cdf}', '\u{2cde}'), ('\u{2ce0}', '\u{2ce1}'), ('\u{2ce1}',
-        '\u{2ce0}'), ('\u{2ce2}', '\u{2ce3}'), ('\u{2ce3}', '\u{2ce2}'),
-        ('\u{2ceb}', '\u{2cec}'), ('\u{2cec}', '\u{2ceb}'), ('\u{2ced}',
-        '\u{2cee}'), ('\u{2cee}', '\u{2ced}'), ('\u{2cf2}', '\u{2cf3}'),
-        ('\u{2cf3}', '\u{2cf2}'), ('\u{2d00}', '\u{10a0}'), ('\u{2d01}',
-        '\u{10a1}'), ('\u{2d02}', '\u{10a2}'), ('\u{2d03}', '\u{10a3}'),
-        ('\u{2d04}', '\u{10a4}'), ('\u{2d05}', '\u{10a5}'), ('\u{2d06}',
-        '\u{10a6}'), ('\u{2d07}', '\u{10a7}'), ('\u{2d08}', '\u{10a8}'),
-        ('\u{2d09}', '\u{10a9}'), ('\u{2d0a}', '\u{10aa}'), ('\u{2d0b}',
-        '\u{10ab}'), ('\u{2d0c}', '\u{10ac}'), ('\u{2d0d}', '\u{10ad}'),
-        ('\u{2d0e}', '\u{10ae}'), ('\u{2d0f}', '\u{10af}'), ('\u{2d10}',
-        '\u{10b0}'), ('\u{2d11}', '\u{10b1}'), ('\u{2d12}', '\u{10b2}'),
-        ('\u{2d13}', '\u{10b3}'), ('\u{2d14}', '\u{10b4}'), ('\u{2d15}',
-        '\u{10b5}'), ('\u{2d16}', '\u{10b6}'), ('\u{2d17}', '\u{10b7}'),
-        ('\u{2d18}', '\u{10b8}'), ('\u{2d19}', '\u{10b9}'), ('\u{2d1a}',
-        '\u{10ba}'), ('\u{2d1b}', '\u{10bb}'), ('\u{2d1c}', '\u{10bc}'),
-        ('\u{2d1d}', '\u{10bd}'), ('\u{2d1e}', '\u{10be}'), ('\u{2d1f}',
-        '\u{10bf}'), ('\u{2d20}', '\u{10c0}'), ('\u{2d21}', '\u{10c1}'),
-        ('\u{2d22}', '\u{10c2}'), ('\u{2d23}', '\u{10c3}'), ('\u{2d24}',
-        '\u{10c4}'), ('\u{2d25}', '\u{10c5}'), ('\u{2d27}', '\u{10c7}'),
-        ('\u{2d2d}', '\u{10cd}'), ('\u{a640}', '\u{a641}'), ('\u{a641}',
-        '\u{a640}'), ('\u{a642}', '\u{a643}'), ('\u{a643}', '\u{a642}'),
-        ('\u{a644}', '\u{a645}'), ('\u{a645}', '\u{a644}'), ('\u{a646}',
-        '\u{a647}'), ('\u{a647}', '\u{a646}'), ('\u{a648}', '\u{a649}'),
-        ('\u{a649}', '\u{a648}'), ('\u{a64a}', '\u{1c88}'), ('\u{a64a}',
-        '\u{a64b}'), ('\u{a64b}', '\u{1c88}'), ('\u{a64b}', '\u{a64a}'),
-        ('\u{a64c}', '\u{a64d}'), ('\u{a64d}', '\u{a64c}'), ('\u{a64e}',
-        '\u{a64f}'), ('\u{a64f}', '\u{a64e}'), ('\u{a650}', '\u{a651}'),
-        ('\u{a651}', '\u{a650}'), ('\u{a652}', '\u{a653}'), ('\u{a653}',
-        '\u{a652}'), ('\u{a654}', '\u{a655}'), ('\u{a655}', '\u{a654}'),
-        ('\u{a656}', '\u{a657}'), ('\u{a657}', '\u{a656}'), ('\u{a658}',
-        '\u{a659}'), ('\u{a659}', '\u{a658}'), ('\u{a65a}', '\u{a65b}'),
-        ('\u{a65b}', '\u{a65a}'), ('\u{a65c}', '\u{a65d}'), ('\u{a65d}',
-        '\u{a65c}'), ('\u{a65e}', '\u{a65f}'), ('\u{a65f}', '\u{a65e}'),
-        ('\u{a660}', '\u{a661}'), ('\u{a661}', '\u{a660}'), ('\u{a662}',
-        '\u{a663}'), ('\u{a663}', '\u{a662}'), ('\u{a664}', '\u{a665}'),
-        ('\u{a665}', '\u{a664}'), ('\u{a666}', '\u{a667}'), ('\u{a667}',
-        '\u{a666}'), ('\u{a668}', '\u{a669}'), ('\u{a669}', '\u{a668}'),
-        ('\u{a66a}', '\u{a66b}'), ('\u{a66b}', '\u{a66a}'), ('\u{a66c}',
-        '\u{a66d}'), ('\u{a66d}', '\u{a66c}'), ('\u{a680}', '\u{a681}'),
-        ('\u{a681}', '\u{a680}'), ('\u{a682}', '\u{a683}'), ('\u{a683}',
-        '\u{a682}'), ('\u{a684}', '\u{a685}'), ('\u{a685}', '\u{a684}'),
-        ('\u{a686}', '\u{a687}'), ('\u{a687}', '\u{a686}'), ('\u{a688}',
-        '\u{a689}'), ('\u{a689}', '\u{a688}'), ('\u{a68a}', '\u{a68b}'),
-        ('\u{a68b}', '\u{a68a}'), ('\u{a68c}', '\u{a68d}'), ('\u{a68d}',
-        '\u{a68c}'), ('\u{a68e}', '\u{a68f}'), ('\u{a68f}', '\u{a68e}'),
-        ('\u{a690}', '\u{a691}'), ('\u{a691}', '\u{a690}'), ('\u{a692}',
-        '\u{a693}'), ('\u{a693}', '\u{a692}'), ('\u{a694}', '\u{a695}'),
-        ('\u{a695}', '\u{a694}'), ('\u{a696}', '\u{a697}'), ('\u{a697}',
-        '\u{a696}'), ('\u{a698}', '\u{a699}'), ('\u{a699}', '\u{a698}'),
-        ('\u{a69a}', '\u{a69b}'), ('\u{a69b}', '\u{a69a}'), ('\u{a722}',
-        '\u{a723}'), ('\u{a723}', '\u{a722}'), ('\u{a724}', '\u{a725}'),
-        ('\u{a725}', '\u{a724}'), ('\u{a726}', '\u{a727}'), ('\u{a727}',
-        '\u{a726}'), ('\u{a728}', '\u{a729}'), ('\u{a729}', '\u{a728}'),
-        ('\u{a72a}', '\u{a72b}'), ('\u{a72b}', '\u{a72a}'), ('\u{a72c}',
-        '\u{a72d}'), ('\u{a72d}', '\u{a72c}'), ('\u{a72e}', '\u{a72f}'),
-        ('\u{a72f}', '\u{a72e}'), ('\u{a732}', '\u{a733}'), ('\u{a733}',
-        '\u{a732}'), ('\u{a734}', '\u{a735}'), ('\u{a735}', '\u{a734}'),
-        ('\u{a736}', '\u{a737}'), ('\u{a737}', '\u{a736}'), ('\u{a738}',
-        '\u{a739}'), ('\u{a739}', '\u{a738}'), ('\u{a73a}', '\u{a73b}'),
-        ('\u{a73b}', '\u{a73a}'), ('\u{a73c}', '\u{a73d}'), ('\u{a73d}',
-        '\u{a73c}'), ('\u{a73e}', '\u{a73f}'), ('\u{a73f}', '\u{a73e}'),
-        ('\u{a740}', '\u{a741}'), ('\u{a741}', '\u{a740}'), ('\u{a742}',
-        '\u{a743}'), ('\u{a743}', '\u{a742}'), ('\u{a744}', '\u{a745}'),
-        ('\u{a745}', '\u{a744}'), ('\u{a746}', '\u{a747}'), ('\u{a747}',
-        '\u{a746}'), ('\u{a748}', '\u{a749}'), ('\u{a749}', '\u{a748}'),
-        ('\u{a74a}', '\u{a74b}'), ('\u{a74b}', '\u{a74a}'), ('\u{a74c}',
-        '\u{a74d}'), ('\u{a74d}', '\u{a74c}'), ('\u{a74e}', '\u{a74f}'),
-        ('\u{a74f}', '\u{a74e}'), ('\u{a750}', '\u{a751}'), ('\u{a751}',
-        '\u{a750}'), ('\u{a752}', '\u{a753}'), ('\u{a753}', '\u{a752}'),
-        ('\u{a754}', '\u{a755}'), ('\u{a755}', '\u{a754}'), ('\u{a756}',
-        '\u{a757}'), ('\u{a757}', '\u{a756}'), ('\u{a758}', '\u{a759}'),
-        ('\u{a759}', '\u{a758}'), ('\u{a75a}', '\u{a75b}'), ('\u{a75b}',
-        '\u{a75a}'), ('\u{a75c}', '\u{a75d}'), ('\u{a75d}', '\u{a75c}'),
-        ('\u{a75e}', '\u{a75f}'), ('\u{a75f}', '\u{a75e}'), ('\u{a760}',
-        '\u{a761}'), ('\u{a761}', '\u{a760}'), ('\u{a762}', '\u{a763}'),
-        ('\u{a763}', '\u{a762}'), ('\u{a764}', '\u{a765}'), ('\u{a765}',
-        '\u{a764}'), ('\u{a766}', '\u{a767}'), ('\u{a767}', '\u{a766}'),
-        ('\u{a768}', '\u{a769}'), ('\u{a769}', '\u{a768}'), ('\u{a76a}',
-        '\u{a76b}'), ('\u{a76b}', '\u{a76a}'), ('\u{a76c}', '\u{a76d}'),
-        ('\u{a76d}', '\u{a76c}'), ('\u{a76e}', '\u{a76f}'), ('\u{a76f}',
-        '\u{a76e}'), ('\u{a779}', '\u{a77a}'), ('\u{a77a}', '\u{a779}'),
-        ('\u{a77b}', '\u{a77c}'), ('\u{a77c}', '\u{a77b}'), ('\u{a77d}',
-        '\u{1d79}'), ('\u{a77e}', '\u{a77f}'), ('\u{a77f}', '\u{a77e}'),
-        ('\u{a780}', '\u{a781}'), ('\u{a781}', '\u{a780}'), ('\u{a782}',
-        '\u{a783}'), ('\u{a783}', '\u{a782}'), ('\u{a784}', '\u{a785}'),
-        ('\u{a785}', '\u{a784}'), ('\u{a786}', '\u{a787}'), ('\u{a787}',
-        '\u{a786}'), ('\u{a78b}', '\u{a78c}'), ('\u{a78c}', '\u{a78b}'),
-        ('\u{a78d}', '\u{265}'), ('\u{a790}', '\u{a791}'), ('\u{a791}',
-        '\u{a790}'), ('\u{a792}', '\u{a793}'), ('\u{a793}', '\u{a792}'),
-        ('\u{a796}', '\u{a797}'), ('\u{a797}', '\u{a796}'), ('\u{a798}',
-        '\u{a799}'), ('\u{a799}', '\u{a798}'), ('\u{a79a}', '\u{a79b}'),
-        ('\u{a79b}', '\u{a79a}'), ('\u{a79c}', '\u{a79d}'), ('\u{a79d}',
-        '\u{a79c}'), ('\u{a79e}', '\u{a79f}'), ('\u{a79f}', '\u{a79e}'),
-        ('\u{a7a0}', '\u{a7a1}'), ('\u{a7a1}', '\u{a7a0}'), ('\u{a7a2}',
-        '\u{a7a3}'), ('\u{a7a3}', '\u{a7a2}'), ('\u{a7a4}', '\u{a7a5}'),
-        ('\u{a7a5}', '\u{a7a4}'), ('\u{a7a6}', '\u{a7a7}'), ('\u{a7a7}',
-        '\u{a7a6}'), ('\u{a7a8}', '\u{a7a9}'), ('\u{a7a9}', '\u{a7a8}'),
-        ('\u{a7aa}', '\u{266}'), ('\u{a7ab}', '\u{25c}'), ('\u{a7ac}',
-        '\u{261}'), ('\u{a7ad}', '\u{26c}'), ('\u{a7ae}', '\u{26a}'),
-        ('\u{a7b0}', '\u{29e}'), ('\u{a7b1}', '\u{287}'), ('\u{a7b2}',
-        '\u{29d}'), ('\u{a7b3}', '\u{ab53}'), ('\u{a7b4}', '\u{a7b5}'),
-        ('\u{a7b5}', '\u{a7b4}'), ('\u{a7b6}', '\u{a7b7}'), ('\u{a7b7}',
-        '\u{a7b6}'), ('\u{ab53}', '\u{a7b3}'), ('\u{ab70}', '\u{13a0}'),
-        ('\u{ab71}', '\u{13a1}'), ('\u{ab72}', '\u{13a2}'), ('\u{ab73}',
-        '\u{13a3}'), ('\u{ab74}', '\u{13a4}'), ('\u{ab75}', '\u{13a5}'),
-        ('\u{ab76}', '\u{13a6}'), ('\u{ab77}', '\u{13a7}'), ('\u{ab78}',
-        '\u{13a8}'), ('\u{ab79}', '\u{13a9}'), ('\u{ab7a}', '\u{13aa}'),
-        ('\u{ab7b}', '\u{13ab}'), ('\u{ab7c}', '\u{13ac}'), ('\u{ab7d}',
-        '\u{13ad}'), ('\u{ab7e}', '\u{13ae}'), ('\u{ab7f}', '\u{13af}'),
-        ('\u{ab80}', '\u{13b0}'), ('\u{ab81}', '\u{13b1}'), ('\u{ab82}',
-        '\u{13b2}'), ('\u{ab83}', '\u{13b3}'), ('\u{ab84}', '\u{13b4}'),
-        ('\u{ab85}', '\u{13b5}'), ('\u{ab86}', '\u{13b6}'), ('\u{ab87}',
-        '\u{13b7}'), ('\u{ab88}', '\u{13b8}'), ('\u{ab89}', '\u{13b9}'),
-        ('\u{ab8a}', '\u{13ba}'), ('\u{ab8b}', '\u{13bb}'), ('\u{ab8c}',
-        '\u{13bc}'), ('\u{ab8d}', '\u{13bd}'), ('\u{ab8e}', '\u{13be}'),
-        ('\u{ab8f}', '\u{13bf}'), ('\u{ab90}', '\u{13c0}'), ('\u{ab91}',
-        '\u{13c1}'), ('\u{ab92}', '\u{13c2}'), ('\u{ab93}', '\u{13c3}'),
-        ('\u{ab94}', '\u{13c4}'), ('\u{ab95}', '\u{13c5}'), ('\u{ab96}',
-        '\u{13c6}'), ('\u{ab97}', '\u{13c7}'), ('\u{ab98}', '\u{13c8}'),
-        ('\u{ab99}', '\u{13c9}'), ('\u{ab9a}', '\u{13ca}'), ('\u{ab9b}',
-        '\u{13cb}'), ('\u{ab9c}', '\u{13cc}'), ('\u{ab9d}', '\u{13cd}'),
-        ('\u{ab9e}', '\u{13ce}'), ('\u{ab9f}', '\u{13cf}'), ('\u{aba0}',
-        '\u{13d0}'), ('\u{aba1}', '\u{13d1}'), ('\u{aba2}', '\u{13d2}'),
-        ('\u{aba3}', '\u{13d3}'), ('\u{aba4}', '\u{13d4}'), ('\u{aba5}',
-        '\u{13d5}'), ('\u{aba6}', '\u{13d6}'), ('\u{aba7}', '\u{13d7}'),
-        ('\u{aba8}', '\u{13d8}'), ('\u{aba9}', '\u{13d9}'), ('\u{abaa}',
-        '\u{13da}'), ('\u{abab}', '\u{13db}'), ('\u{abac}', '\u{13dc}'),
-        ('\u{abad}', '\u{13dd}'), ('\u{abae}', '\u{13de}'), ('\u{abaf}',
-        '\u{13df}'), ('\u{abb0}', '\u{13e0}'), ('\u{abb1}', '\u{13e1}'),
-        ('\u{abb2}', '\u{13e2}'), ('\u{abb3}', '\u{13e3}'), ('\u{abb4}',
-        '\u{13e4}'), ('\u{abb5}', '\u{13e5}'), ('\u{abb6}', '\u{13e6}'),
-        ('\u{abb7}', '\u{13e7}'), ('\u{abb8}', '\u{13e8}'), ('\u{abb9}',
-        '\u{13e9}'), ('\u{abba}', '\u{13ea}'), ('\u{abbb}', '\u{13eb}'),
-        ('\u{abbc}', '\u{13ec}'), ('\u{abbd}', '\u{13ed}'), ('\u{abbe}',
-        '\u{13ee}'), ('\u{abbf}', '\u{13ef}'), ('\u{ff21}', '\u{ff41}'),
-        ('\u{ff22}', '\u{ff42}'), ('\u{ff23}', '\u{ff43}'), ('\u{ff24}',
-        '\u{ff44}'), ('\u{ff25}', '\u{ff45}'), ('\u{ff26}', '\u{ff46}'),
-        ('\u{ff27}', '\u{ff47}'), ('\u{ff28}', '\u{ff48}'), ('\u{ff29}',
-        '\u{ff49}'), ('\u{ff2a}', '\u{ff4a}'), ('\u{ff2b}', '\u{ff4b}'),
-        ('\u{ff2c}', '\u{ff4c}'), ('\u{ff2d}', '\u{ff4d}'), ('\u{ff2e}',
-        '\u{ff4e}'), ('\u{ff2f}', '\u{ff4f}'), ('\u{ff30}', '\u{ff50}'),
-        ('\u{ff31}', '\u{ff51}'), ('\u{ff32}', '\u{ff52}'), ('\u{ff33}',
-        '\u{ff53}'), ('\u{ff34}', '\u{ff54}'), ('\u{ff35}', '\u{ff55}'),
-        ('\u{ff36}', '\u{ff56}'), ('\u{ff37}', '\u{ff57}'), ('\u{ff38}',
-        '\u{ff58}'), ('\u{ff39}', '\u{ff59}'), ('\u{ff3a}', '\u{ff5a}'),
-        ('\u{ff41}', '\u{ff21}'), ('\u{ff42}', '\u{ff22}'), ('\u{ff43}',
-        '\u{ff23}'), ('\u{ff44}', '\u{ff24}'), ('\u{ff45}', '\u{ff25}'),
-        ('\u{ff46}', '\u{ff26}'), ('\u{ff47}', '\u{ff27}'), ('\u{ff48}',
-        '\u{ff28}'), ('\u{ff49}', '\u{ff29}'), ('\u{ff4a}', '\u{ff2a}'),
-        ('\u{ff4b}', '\u{ff2b}'), ('\u{ff4c}', '\u{ff2c}'), ('\u{ff4d}',
-        '\u{ff2d}'), ('\u{ff4e}', '\u{ff2e}'), ('\u{ff4f}', '\u{ff2f}'),
-        ('\u{ff50}', '\u{ff30}'), ('\u{ff51}', '\u{ff31}'), ('\u{ff52}',
-        '\u{ff32}'), ('\u{ff53}', '\u{ff33}'), ('\u{ff54}', '\u{ff34}'),
-        ('\u{ff55}', '\u{ff35}'), ('\u{ff56}', '\u{ff36}'), ('\u{ff57}',
-        '\u{ff37}'), ('\u{ff58}', '\u{ff38}'), ('\u{ff59}', '\u{ff39}'),
-        ('\u{ff5a}', '\u{ff3a}'), ('\u{10400}', '\u{10428}'), ('\u{10401}',
-        '\u{10429}'), ('\u{10402}', '\u{1042a}'), ('\u{10403}', '\u{1042b}'),
-        ('\u{10404}', '\u{1042c}'), ('\u{10405}', '\u{1042d}'), ('\u{10406}',
-        '\u{1042e}'), ('\u{10407}', '\u{1042f}'), ('\u{10408}', '\u{10430}'),
-        ('\u{10409}', '\u{10431}'), ('\u{1040a}', '\u{10432}'), ('\u{1040b}',
-        '\u{10433}'), ('\u{1040c}', '\u{10434}'), ('\u{1040d}', '\u{10435}'),
-        ('\u{1040e}', '\u{10436}'), ('\u{1040f}', '\u{10437}'), ('\u{10410}',
-        '\u{10438}'), ('\u{10411}', '\u{10439}'), ('\u{10412}', '\u{1043a}'),
-        ('\u{10413}', '\u{1043b}'), ('\u{10414}', '\u{1043c}'), ('\u{10415}',
-        '\u{1043d}'), ('\u{10416}', '\u{1043e}'), ('\u{10417}', '\u{1043f}'),
-        ('\u{10418}', '\u{10440}'), ('\u{10419}', '\u{10441}'), ('\u{1041a}',
-        '\u{10442}'), ('\u{1041b}', '\u{10443}'), ('\u{1041c}', '\u{10444}'),
-        ('\u{1041d}', '\u{10445}'), ('\u{1041e}', '\u{10446}'), ('\u{1041f}',
-        '\u{10447}'), ('\u{10420}', '\u{10448}'), ('\u{10421}', '\u{10449}'),
-        ('\u{10422}', '\u{1044a}'), ('\u{10423}', '\u{1044b}'), ('\u{10424}',
-        '\u{1044c}'), ('\u{10425}', '\u{1044d}'), ('\u{10426}', '\u{1044e}'),
-        ('\u{10427}', '\u{1044f}'), ('\u{10428}', '\u{10400}'), ('\u{10429}',
-        '\u{10401}'), ('\u{1042a}', '\u{10402}'), ('\u{1042b}', '\u{10403}'),
-        ('\u{1042c}', '\u{10404}'), ('\u{1042d}', '\u{10405}'), ('\u{1042e}',
-        '\u{10406}'), ('\u{1042f}', '\u{10407}'), ('\u{10430}', '\u{10408}'),
-        ('\u{10431}', '\u{10409}'), ('\u{10432}', '\u{1040a}'), ('\u{10433}',
-        '\u{1040b}'), ('\u{10434}', '\u{1040c}'), ('\u{10435}', '\u{1040d}'),
-        ('\u{10436}', '\u{1040e}'), ('\u{10437}', '\u{1040f}'), ('\u{10438}',
-        '\u{10410}'), ('\u{10439}', '\u{10411}'), ('\u{1043a}', '\u{10412}'),
-        ('\u{1043b}', '\u{10413}'), ('\u{1043c}', '\u{10414}'), ('\u{1043d}',
-        '\u{10415}'), ('\u{1043e}', '\u{10416}'), ('\u{1043f}', '\u{10417}'),
-        ('\u{10440}', '\u{10418}'), ('\u{10441}', '\u{10419}'), ('\u{10442}',
-        '\u{1041a}'), ('\u{10443}', '\u{1041b}'), ('\u{10444}', '\u{1041c}'),
-        ('\u{10445}', '\u{1041d}'), ('\u{10446}', '\u{1041e}'), ('\u{10447}',
-        '\u{1041f}'), ('\u{10448}', '\u{10420}'), ('\u{10449}', '\u{10421}'),
-        ('\u{1044a}', '\u{10422}'), ('\u{1044b}', '\u{10423}'), ('\u{1044c}',
-        '\u{10424}'), ('\u{1044d}', '\u{10425}'), ('\u{1044e}', '\u{10426}'),
-        ('\u{1044f}', '\u{10427}'), ('\u{104b0}', '\u{104d8}'), ('\u{104b1}',
-        '\u{104d9}'), ('\u{104b2}', '\u{104da}'), ('\u{104b3}', '\u{104db}'),
-        ('\u{104b4}', '\u{104dc}'), ('\u{104b5}', '\u{104dd}'), ('\u{104b6}',
-        '\u{104de}'), ('\u{104b7}', '\u{104df}'), ('\u{104b8}', '\u{104e0}'),
-        ('\u{104b9}', '\u{104e1}'), ('\u{104ba}', '\u{104e2}'), ('\u{104bb}',
-        '\u{104e3}'), ('\u{104bc}', '\u{104e4}'), ('\u{104bd}', '\u{104e5}'),
-        ('\u{104be}', '\u{104e6}'), ('\u{104bf}', '\u{104e7}'), ('\u{104c0}',
-        '\u{104e8}'), ('\u{104c1}', '\u{104e9}'), ('\u{104c2}', '\u{104ea}'),
-        ('\u{104c3}', '\u{104eb}'), ('\u{104c4}', '\u{104ec}'), ('\u{104c5}',
-        '\u{104ed}'), ('\u{104c6}', '\u{104ee}'), ('\u{104c7}', '\u{104ef}'),
-        ('\u{104c8}', '\u{104f0}'), ('\u{104c9}', '\u{104f1}'), ('\u{104ca}',
-        '\u{104f2}'), ('\u{104cb}', '\u{104f3}'), ('\u{104cc}', '\u{104f4}'),
-        ('\u{104cd}', '\u{104f5}'), ('\u{104ce}', '\u{104f6}'), ('\u{104cf}',
-        '\u{104f7}'), ('\u{104d0}', '\u{104f8}'), ('\u{104d1}', '\u{104f9}'),
-        ('\u{104d2}', '\u{104fa}'), ('\u{104d3}', '\u{104fb}'), ('\u{104d8}',
-        '\u{104b0}'), ('\u{104d9}', '\u{104b1}'), ('\u{104da}', '\u{104b2}'),
-        ('\u{104db}', '\u{104b3}'), ('\u{104dc}', '\u{104b4}'), ('\u{104dd}',
-        '\u{104b5}'), ('\u{104de}', '\u{104b6}'), ('\u{104df}', '\u{104b7}'),
-        ('\u{104e0}', '\u{104b8}'), ('\u{104e1}', '\u{104b9}'), ('\u{104e2}',
-        '\u{104ba}'), ('\u{104e3}', '\u{104bb}'), ('\u{104e4}', '\u{104bc}'),
-        ('\u{104e5}', '\u{104bd}'), ('\u{104e6}', '\u{104be}'), ('\u{104e7}',
-        '\u{104bf}'), ('\u{104e8}', '\u{104c0}'), ('\u{104e9}', '\u{104c1}'),
-        ('\u{104ea}', '\u{104c2}'), ('\u{104eb}', '\u{104c3}'), ('\u{104ec}',
-        '\u{104c4}'), ('\u{104ed}', '\u{104c5}'), ('\u{104ee}', '\u{104c6}'),
-        ('\u{104ef}', '\u{104c7}'), ('\u{104f0}', '\u{104c8}'), ('\u{104f1}',
-        '\u{104c9}'), ('\u{104f2}', '\u{104ca}'), ('\u{104f3}', '\u{104cb}'),
-        ('\u{104f4}', '\u{104cc}'), ('\u{104f5}', '\u{104cd}'), ('\u{104f6}',
-        '\u{104ce}'), ('\u{104f7}', '\u{104cf}'), ('\u{104f8}', '\u{104d0}'),
-        ('\u{104f9}', '\u{104d1}'), ('\u{104fa}', '\u{104d2}'), ('\u{104fb}',
-        '\u{104d3}'), ('\u{10c80}', '\u{10cc0}'), ('\u{10c81}', '\u{10cc1}'),
-        ('\u{10c82}', '\u{10cc2}'), ('\u{10c83}', '\u{10cc3}'), ('\u{10c84}',
-        '\u{10cc4}'), ('\u{10c85}', '\u{10cc5}'), ('\u{10c86}', '\u{10cc6}'),
-        ('\u{10c87}', '\u{10cc7}'), ('\u{10c88}', '\u{10cc8}'), ('\u{10c89}',
-        '\u{10cc9}'), ('\u{10c8a}', '\u{10cca}'), ('\u{10c8b}', '\u{10ccb}'),
-        ('\u{10c8c}', '\u{10ccc}'), ('\u{10c8d}', '\u{10ccd}'), ('\u{10c8e}',
-        '\u{10cce}'), ('\u{10c8f}', '\u{10ccf}'), ('\u{10c90}', '\u{10cd0}'),
-        ('\u{10c91}', '\u{10cd1}'), ('\u{10c92}', '\u{10cd2}'), ('\u{10c93}',
-        '\u{10cd3}'), ('\u{10c94}', '\u{10cd4}'), ('\u{10c95}', '\u{10cd5}'),
-        ('\u{10c96}', '\u{10cd6}'), ('\u{10c97}', '\u{10cd7}'), ('\u{10c98}',
-        '\u{10cd8}'), ('\u{10c99}', '\u{10cd9}'), ('\u{10c9a}', '\u{10cda}'),
-        ('\u{10c9b}', '\u{10cdb}'), ('\u{10c9c}', '\u{10cdc}'), ('\u{10c9d}',
-        '\u{10cdd}'), ('\u{10c9e}', '\u{10cde}'), ('\u{10c9f}', '\u{10cdf}'),
-        ('\u{10ca0}', '\u{10ce0}'), ('\u{10ca1}', '\u{10ce1}'), ('\u{10ca2}',
-        '\u{10ce2}'), ('\u{10ca3}', '\u{10ce3}'), ('\u{10ca4}', '\u{10ce4}'),
-        ('\u{10ca5}', '\u{10ce5}'), ('\u{10ca6}', '\u{10ce6}'), ('\u{10ca7}',
-        '\u{10ce7}'), ('\u{10ca8}', '\u{10ce8}'), ('\u{10ca9}', '\u{10ce9}'),
-        ('\u{10caa}', '\u{10cea}'), ('\u{10cab}', '\u{10ceb}'), ('\u{10cac}',
-        '\u{10cec}'), ('\u{10cad}', '\u{10ced}'), ('\u{10cae}', '\u{10cee}'),
-        ('\u{10caf}', '\u{10cef}'), ('\u{10cb0}', '\u{10cf0}'), ('\u{10cb1}',
-        '\u{10cf1}'), ('\u{10cb2}', '\u{10cf2}'), ('\u{10cc0}', '\u{10c80}'),
-        ('\u{10cc1}', '\u{10c81}'), ('\u{10cc2}', '\u{10c82}'), ('\u{10cc3}',
-        '\u{10c83}'), ('\u{10cc4}', '\u{10c84}'), ('\u{10cc5}', '\u{10c85}'),
-        ('\u{10cc6}', '\u{10c86}'), ('\u{10cc7}', '\u{10c87}'), ('\u{10cc8}',
-        '\u{10c88}'), ('\u{10cc9}', '\u{10c89}'), ('\u{10cca}', '\u{10c8a}'),
-        ('\u{10ccb}', '\u{10c8b}'), ('\u{10ccc}', '\u{10c8c}'), ('\u{10ccd}',
-        '\u{10c8d}'), ('\u{10cce}', '\u{10c8e}'), ('\u{10ccf}', '\u{10c8f}'),
-        ('\u{10cd0}', '\u{10c90}'), ('\u{10cd1}', '\u{10c91}'), ('\u{10cd2}',
-        '\u{10c92}'), ('\u{10cd3}', '\u{10c93}'), ('\u{10cd4}', '\u{10c94}'),
-        ('\u{10cd5}', '\u{10c95}'), ('\u{10cd6}', '\u{10c96}'), ('\u{10cd7}',
-        '\u{10c97}'), ('\u{10cd8}', '\u{10c98}'), ('\u{10cd9}', '\u{10c99}'),
-        ('\u{10cda}', '\u{10c9a}'), ('\u{10cdb}', '\u{10c9b}'), ('\u{10cdc}',
-        '\u{10c9c}'), ('\u{10cdd}', '\u{10c9d}'), ('\u{10cde}', '\u{10c9e}'),
-        ('\u{10cdf}', '\u{10c9f}'), ('\u{10ce0}', '\u{10ca0}'), ('\u{10ce1}',
-        '\u{10ca1}'), ('\u{10ce2}', '\u{10ca2}'), ('\u{10ce3}', '\u{10ca3}'),
-        ('\u{10ce4}', '\u{10ca4}'), ('\u{10ce5}', '\u{10ca5}'), ('\u{10ce6}',
-        '\u{10ca6}'), ('\u{10ce7}', '\u{10ca7}'), ('\u{10ce8}', '\u{10ca8}'),
-        ('\u{10ce9}', '\u{10ca9}'), ('\u{10cea}', '\u{10caa}'), ('\u{10ceb}',
-        '\u{10cab}'), ('\u{10cec}', '\u{10cac}'), ('\u{10ced}', '\u{10cad}'),
-        ('\u{10cee}', '\u{10cae}'), ('\u{10cef}', '\u{10caf}'), ('\u{10cf0}',
-        '\u{10cb0}'), ('\u{10cf1}', '\u{10cb1}'), ('\u{10cf2}', '\u{10cb2}'),
-        ('\u{118a0}', '\u{118c0}'), ('\u{118a1}', '\u{118c1}'), ('\u{118a2}',
-        '\u{118c2}'), ('\u{118a3}', '\u{118c3}'), ('\u{118a4}', '\u{118c4}'),
-        ('\u{118a5}', '\u{118c5}'), ('\u{118a6}', '\u{118c6}'), ('\u{118a7}',
-        '\u{118c7}'), ('\u{118a8}', '\u{118c8}'), ('\u{118a9}', '\u{118c9}'),
-        ('\u{118aa}', '\u{118ca}'), ('\u{118ab}', '\u{118cb}'), ('\u{118ac}',
-        '\u{118cc}'), ('\u{118ad}', '\u{118cd}'), ('\u{118ae}', '\u{118ce}'),
-        ('\u{118af}', '\u{118cf}'), ('\u{118b0}', '\u{118d0}'), ('\u{118b1}',
-        '\u{118d1}'), ('\u{118b2}', '\u{118d2}'), ('\u{118b3}', '\u{118d3}'),
-        ('\u{118b4}', '\u{118d4}'), ('\u{118b5}', '\u{118d5}'), ('\u{118b6}',
-        '\u{118d6}'), ('\u{118b7}', '\u{118d7}'), ('\u{118b8}', '\u{118d8}'),
-        ('\u{118b9}', '\u{118d9}'), ('\u{118ba}', '\u{118da}'), ('\u{118bb}',
-        '\u{118db}'), ('\u{118bc}', '\u{118dc}'), ('\u{118bd}', '\u{118dd}'),
-        ('\u{118be}', '\u{118de}'), ('\u{118bf}', '\u{118df}'), ('\u{118c0}',
-        '\u{118a0}'), ('\u{118c1}', '\u{118a1}'), ('\u{118c2}', '\u{118a2}'),
-        ('\u{118c3}', '\u{118a3}'), ('\u{118c4}', '\u{118a4}'), ('\u{118c5}',
-        '\u{118a5}'), ('\u{118c6}', '\u{118a6}'), ('\u{118c7}', '\u{118a7}'),
-        ('\u{118c8}', '\u{118a8}'), ('\u{118c9}', '\u{118a9}'), ('\u{118ca}',
-        '\u{118aa}'), ('\u{118cb}', '\u{118ab}'), ('\u{118cc}', '\u{118ac}'),
-        ('\u{118cd}', '\u{118ad}'), ('\u{118ce}', '\u{118ae}'), ('\u{118cf}',
-        '\u{118af}'), ('\u{118d0}', '\u{118b0}'), ('\u{118d1}', '\u{118b1}'),
-        ('\u{118d2}', '\u{118b2}'), ('\u{118d3}', '\u{118b3}'), ('\u{118d4}',
-        '\u{118b4}'), ('\u{118d5}', '\u{118b5}'), ('\u{118d6}', '\u{118b6}'),
-        ('\u{118d7}', '\u{118b7}'), ('\u{118d8}', '\u{118b8}'), ('\u{118d9}',
-        '\u{118b9}'), ('\u{118da}', '\u{118ba}'), ('\u{118db}', '\u{118bb}'),
-        ('\u{118dc}', '\u{118bc}'), ('\u{118dd}', '\u{118bd}'), ('\u{118de}',
-        '\u{118be}'), ('\u{118df}', '\u{118bf}'), ('\u{1e900}', '\u{1e922}'),
-        ('\u{1e901}', '\u{1e923}'), ('\u{1e902}', '\u{1e924}'), ('\u{1e903}',
-        '\u{1e925}'), ('\u{1e904}', '\u{1e926}'), ('\u{1e905}', '\u{1e927}'),
-        ('\u{1e906}', '\u{1e928}'), ('\u{1e907}', '\u{1e929}'), ('\u{1e908}',
-        '\u{1e92a}'), ('\u{1e909}', '\u{1e92b}'), ('\u{1e90a}', '\u{1e92c}'),
-        ('\u{1e90b}', '\u{1e92d}'), ('\u{1e90c}', '\u{1e92e}'), ('\u{1e90d}',
-        '\u{1e92f}'), ('\u{1e90e}', '\u{1e930}'), ('\u{1e90f}', '\u{1e931}'),
-        ('\u{1e910}', '\u{1e932}'), ('\u{1e911}', '\u{1e933}'), ('\u{1e912}',
-        '\u{1e934}'), ('\u{1e913}', '\u{1e935}'), ('\u{1e914}', '\u{1e936}'),
-        ('\u{1e915}', '\u{1e937}'), ('\u{1e916}', '\u{1e938}'), ('\u{1e917}',
-        '\u{1e939}'), ('\u{1e918}', '\u{1e93a}'), ('\u{1e919}', '\u{1e93b}'),
-        ('\u{1e91a}', '\u{1e93c}'), ('\u{1e91b}', '\u{1e93d}'), ('\u{1e91c}',
-        '\u{1e93e}'), ('\u{1e91d}', '\u{1e93f}'), ('\u{1e91e}', '\u{1e940}'),
-        ('\u{1e91f}', '\u{1e941}'), ('\u{1e920}', '\u{1e942}'), ('\u{1e921}',
-        '\u{1e943}'), ('\u{1e922}', '\u{1e900}'), ('\u{1e923}', '\u{1e901}'),
-        ('\u{1e924}', '\u{1e902}'), ('\u{1e925}', '\u{1e903}'), ('\u{1e926}',
-        '\u{1e904}'), ('\u{1e927}', '\u{1e905}'), ('\u{1e928}', '\u{1e906}'),
-        ('\u{1e929}', '\u{1e907}'), ('\u{1e92a}', '\u{1e908}'), ('\u{1e92b}',
-        '\u{1e909}'), ('\u{1e92c}', '\u{1e90a}'), ('\u{1e92d}', '\u{1e90b}'),
-        ('\u{1e92e}', '\u{1e90c}'), ('\u{1e92f}', '\u{1e90d}'), ('\u{1e930}',
-        '\u{1e90e}'), ('\u{1e931}', '\u{1e90f}'), ('\u{1e932}', '\u{1e910}'),
-        ('\u{1e933}', '\u{1e911}'), ('\u{1e934}', '\u{1e912}'), ('\u{1e935}',
-        '\u{1e913}'), ('\u{1e936}', '\u{1e914}'), ('\u{1e937}', '\u{1e915}'),
-        ('\u{1e938}', '\u{1e916}'), ('\u{1e939}', '\u{1e917}'), ('\u{1e93a}',
-        '\u{1e918}'), ('\u{1e93b}', '\u{1e919}'), ('\u{1e93c}', '\u{1e91a}'),
-        ('\u{1e93d}', '\u{1e91b}'), ('\u{1e93e}', '\u{1e91c}'), ('\u{1e93f}',
-        '\u{1e91d}'), ('\u{1e940}', '\u{1e91e}'), ('\u{1e941}', '\u{1e91f}'),
-        ('\u{1e942}', '\u{1e920}'), ('\u{1e943}', '\u{1e921}')
-    ];
+fn ages(canonical_age: &str) -> Result<AgeIter> {
+    const AGES: &'static [(&'static str, &'static [(char, char)])] = &[
+        ("V1_1", age::V1_1),
+        ("V2_0", age::V2_0),
+        ("V2_1", age::V2_1),
+        ("V3_0", age::V3_0),
+        ("V3_1", age::V3_1),
+        ("V3_2", age::V3_2),
+        ("V4_0", age::V4_0),
+        ("V4_1", age::V4_1),
+        ("V5_0", age::V5_0),
+        ("V5_1", age::V5_1),
+        ("V5_2", age::V5_2),
+        ("V6_0", age::V6_0),
+        ("V6_1", age::V6_1),
+        ("V6_2", age::V6_2),
+        ("V6_3", age::V6_3),
+        ("V7_0", age::V7_0),
+        ("V8_0", age::V8_0),
+        ("V9_0", age::V9_0),
+        ("V10_0", age::V10_0),
+    ];
+    assert_eq!(AGES.len(), age::BY_NAME.len(), "ages are out of sync");
+
+    let pos = AGES.iter().position(|&(age, _)| canonical_age == age);
+    match pos {
+        None => Err(Error::PropertyValueNotFound),
+        Some(i) => Ok(AgeIter { ages: &AGES[..i+1] }),
+    }
+}
 
+impl Iterator for AgeIter {
+    type Item = &'static [(char, char)];
+
+    fn next(&mut self) -> Option<&'static [(char, char)]> {
+        if self.ages.is_empty() {
+            None
+        } else {
+            let set = self.ages[0];
+            self.ages = &self.ages[1..];
+            Some(set.1)
+        }
+    }
 }
 
+#[cfg(test)]
+mod tests {
+    use super::{contains_simple_case_mapping, simple_fold};
+
+    #[test]
+    fn simple_fold_k() {
+        let xs: Vec<char> = simple_fold('k').unwrap().collect();
+        assert_eq!(xs, vec!['K', 'K']);
+
+        let xs: Vec<char> = simple_fold('K').unwrap().collect();
+        assert_eq!(xs, vec!['k', 'K']);
+
+        let xs: Vec<char> = simple_fold('K').unwrap().collect();
+        assert_eq!(xs, vec!['K', 'k']);
+    }
+
+    #[test]
+    fn simple_fold_a() {
+        let xs: Vec<char> = simple_fold('a').unwrap().collect();
+        assert_eq!(xs, vec!['A']);
+
+        let xs: Vec<char> = simple_fold('A').unwrap().collect();
+        assert_eq!(xs, vec!['a']);
+    }
+
+    #[test]
+    fn simple_fold_empty() {
+        assert_eq!(Some('A'), simple_fold('?').unwrap_err());
+        assert_eq!(Some('A'), simple_fold('@').unwrap_err());
+        assert_eq!(Some('a'), simple_fold('[').unwrap_err());
+        assert_eq!(Some('Ⰰ'), simple_fold('☃').unwrap_err());
+    }
+
+    #[test]
+    fn simple_fold_max() {
+        assert_eq!(None, simple_fold('\u{10FFFE}').unwrap_err());
+        assert_eq!(None, simple_fold('\u{10FFFF}').unwrap_err());
+    }
+
+    #[test]
+    fn range_contains() {
+        assert!(contains_simple_case_mapping('A', 'A'));
+        assert!(contains_simple_case_mapping('Z', 'Z'));
+        assert!(contains_simple_case_mapping('A', 'Z'));
+        assert!(contains_simple_case_mapping('@', 'A'));
+        assert!(contains_simple_case_mapping('Z', '['));
+        assert!(contains_simple_case_mapping('☃', 'Ⰰ'));
+
+        assert!(!contains_simple_case_mapping('[', '['));
+        assert!(!contains_simple_case_mapping('[', '`'));
+
+        assert!(!contains_simple_case_mapping('☃', '☃'));
+    }
+}
diff --git a/regex-syntax/src/unicode_tables/age.rs b/regex-syntax/src/unicode_tables/age.rs
new file mode 100644
index 0000000000..afba3d3ff4
--- /dev/null
+++ b/regex-syntax/src/unicode_tables/age.rs
@@ -0,0 +1,424 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+//  ucd-generate age tmp/ucd-10.0.0/ --chars
+//
+// ucd-generate is available on crates.io.
+
+pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
+  ("V10_0", V10_0), ("V1_1", V1_1), ("V2_0", V2_0), ("V2_1", V2_1),
+  ("V3_0", V3_0), ("V3_1", V3_1), ("V3_2", V3_2), ("V4_0", V4_0),
+  ("V4_1", V4_1), ("V5_0", V5_0), ("V5_1", V5_1), ("V5_2", V5_2),
+  ("V6_0", V6_0), ("V6_1", V6_1), ("V6_2", V6_2), ("V6_3", V6_3),
+  ("V7_0", V7_0), ("V8_0", V8_0), ("V9_0", V9_0),
+];
+
+pub const V10_0: &'static [(char, char)] = &[
+  ('ࡠ', 'ࡪ'), ('ৼ', '৽'), ('ૺ', '૿'), ('ഀ', 'ഀ'),
+  ('഻', '഼'), ('᳷', '᳷'), ('᷶', '᷹'), ('₿', '₿'),
+  ('⏿', '⏿'), ('⯒', '⯒'), ('⹅', '⹉'), ('ㄮ', 'ㄮ'),
+  ('鿖', '鿪'), ('𐌭', '𐌯'), ('𑨀', '𑩇'), ('𑩐', '𑪃'),
+  ('𑪆', '𑪜'), ('𑪞', '𑪢'), ('𑴀', '𑴆'), ('𑴈', '𑴉'),
+  ('𑴋', '𑴶'), ('𑴺', '𑴺'), ('𑴼', '𑴽'), ('𑴿', '𑵇'),
+  ('𑵐', '𑵙'), ('𖿡', '𖿡'), ('𛀂', '𛄞'), ('𛅰', '𛋻'),
+  ('🉠', '🉥'), ('🛓', '🛔'), ('🛷', '🛸'), ('🤀', '🤋'),
+  ('🤟', '🤟'), ('🤨', '🤯'), ('🤱', '🤲'), ('🥌', '🥌'),
+  ('🥟', '🥫'), ('🦒', '🦗'), ('🧐', '🧦'), ('𬺰', '𮯠'),
+];
+
+pub const V1_1: &'static [(char, char)] = &[
+  ('\u{0}', 'ǵ'), ('Ǻ', 'ȗ'), ('ɐ', 'ʨ'), ('ʰ', '˞'), ('ˠ', '˩'),
+  ('̀', 'ͅ'), ('͠', '͡'), ('ʹ', '͵'), ('ͺ', 'ͺ'), (';', ';'),
+  ('΄', 'Ί'), ('Ό', 'Ό'), ('Ύ', 'Ρ'), ('Σ', 'ώ'), ('ϐ', 'ϖ'),
+  ('Ϛ', 'Ϛ'), ('Ϝ', 'Ϝ'), ('Ϟ', 'Ϟ'), ('Ϡ', 'Ϡ'), ('Ϣ', 'ϳ'),
+  ('Ё', 'Ќ'), ('Ў', 'я'), ('ё', 'ќ'), ('ў', '҆'), ('Ґ', 'ӄ'),
+  ('Ӈ', 'ӈ'), ('Ӌ', 'ӌ'), ('Ӑ', 'ӫ'), ('Ӯ', 'ӵ'), ('Ӹ', 'ӹ'),
+  ('Ա', 'Ֆ'), ('ՙ', '՟'), ('ա', 'և'), ('։', '։'), ('ְ', 'ֹ'),
+  ('ֻ', '׃'), ('א', 'ת'), ('װ', '״'), ('،', '،'), ('؛', '؛'),
+  ('؟', '؟'), ('ء', 'غ'), ('ـ', 'ْ'), ('٠', '٭'), ('ٰ', 'ڷ'),
+  ('ں', 'ھ'), ('ۀ', 'ێ'), ('ې', 'ۭ'), ('۰', '۹'), ('ँ', 'ः'),
+  ('अ', 'ह'), ('़', '्'), ('ॐ', '॔'), ('क़', '॰'),
+  ('ঁ', 'ঃ'), ('অ', 'ঌ'), ('এ', 'ঐ'), ('ও', 'ন'),
+  ('প', 'র'), ('ল', 'ল'), ('শ', 'হ'), ('়', '়'),
+  ('া', 'ৄ'), ('ে', 'ৈ'), ('ো', '্'), ('ৗ', 'ৗ'),
+  ('ড়', 'ঢ়'), ('য়', 'ৣ'), ('০', '৺'), ('ਂ', 'ਂ'),
+  ('ਅ', 'ਊ'), ('ਏ', 'ਐ'), ('ਓ', 'ਨ'), ('ਪ', 'ਰ'),
+  ('ਲ', 'ਲ਼'), ('ਵ', 'ਸ਼'), ('ਸ', 'ਹ'), ('਼', '਼'),
+  ('ਾ', 'ੂ'), ('ੇ', 'ੈ'), ('ੋ', '੍'), ('ਖ਼', 'ੜ'),
+  ('ਫ਼', 'ਫ਼'), ('੦', 'ੴ'), ('ઁ', 'ઃ'), ('અ', 'ઋ'),
+  ('ઍ', 'ઍ'), ('એ', 'ઑ'), ('ઓ', 'ન'), ('પ', 'ર'),
+  ('લ', 'ળ'), ('વ', 'હ'), ('઼', 'ૅ'), ('ે', 'ૉ'),
+  ('ો', '્'), ('ૐ', 'ૐ'), ('ૠ', 'ૠ'), ('૦', '૯'),
+  ('ଁ', 'ଃ'), ('ଅ', 'ଌ'), ('ଏ', 'ଐ'), ('ଓ', 'ନ'),
+  ('ପ', 'ର'), ('ଲ', 'ଳ'), ('ଶ', 'ହ'), ('଼', 'ୃ'),
+  ('େ', 'ୈ'), ('ୋ', '୍'), ('ୖ', 'ୗ'), ('ଡ଼', 'ଢ଼'),
+  ('ୟ', 'ୡ'), ('୦', '୰'), ('ஂ', 'ஃ'), ('அ', 'ஊ'),
+  ('எ', 'ஐ'), ('ஒ', 'க'), ('ங', 'ச'), ('ஜ', 'ஜ'),
+  ('ஞ', 'ட'), ('ண', 'த'), ('ந', 'ப'), ('ம', 'வ'),
+  ('ஷ', 'ஹ'), ('ா', 'ூ'), ('ெ', 'ை'), ('ொ', '்'),
+  ('ௗ', 'ௗ'), ('௧', '௲'), ('ఁ', 'ః'), ('అ', 'ఌ'),
+  ('ఎ', 'ఐ'), ('ఒ', 'న'), ('ప', 'ళ'), ('వ', 'హ'),
+  ('ా', 'ౄ'), ('ె', 'ై'), ('ొ', '్'), ('ౕ', 'ౖ'),
+  ('ౠ', 'ౡ'), ('౦', '౯'), ('ಂ', 'ಃ'), ('ಅ', 'ಌ'),
+  ('ಎ', 'ಐ'), ('ಒ', 'ನ'), ('ಪ', 'ಳ'), ('ವ', 'ಹ'),
+  ('ಾ', 'ೄ'), ('ೆ', 'ೈ'), ('ೊ', '್'), ('ೕ', 'ೖ'),
+  ('ೞ', 'ೞ'), ('ೠ', 'ೡ'), ('೦', '೯'), ('ം', 'ഃ'),
+  ('അ', 'ഌ'), ('എ', 'ഐ'), ('ഒ', 'ന'), ('പ', 'ഹ'),
+  ('ാ', 'ൃ'), ('െ', 'ൈ'), ('ൊ', '്'), ('ൗ', 'ൗ'),
+  ('ൠ', 'ൡ'), ('൦', '൯'), ('ก', 'ฺ'), ('฿', '๛'),
+  ('ກ', 'ຂ'), ('ຄ', 'ຄ'), ('ງ', 'ຈ'), ('ຊ', 'ຊ'),
+  ('ຍ', 'ຍ'), ('ດ', 'ທ'), ('ນ', 'ຟ'), ('ມ', 'ຣ'),
+  ('ລ', 'ລ'), ('ວ', 'ວ'), ('ສ', 'ຫ'), ('ອ', 'ູ'),
+  ('ົ', 'ຽ'), ('ເ', 'ໄ'), ('ໆ', 'ໆ'), ('່', 'ໍ'),
+  ('໐', '໙'), ('ໜ', 'ໝ'), ('Ⴀ', 'Ⴥ'), ('ა', 'ჶ'),
+  ('჻', '჻'), ('ᄀ', 'ᅙ'), ('ᅟ', 'ᆢ'), ('ᆨ', 'ᇹ'),
+  ('Ḁ', 'ẚ'), ('Ạ', 'ỹ'), ('ἀ', 'ἕ'), ('Ἐ', 'Ἕ'),
+  ('ἠ', 'ὅ'), ('Ὀ', 'Ὅ'), ('ὐ', 'ὗ'), ('Ὑ', 'Ὑ'),
+  ('Ὓ', 'Ὓ'), ('Ὕ', 'Ὕ'), ('Ὗ', 'ώ'), ('ᾀ', 'ᾴ'),
+  ('ᾶ', 'ῄ'), ('ῆ', 'ΐ'), ('ῖ', 'Ί'), ('῝', '`'),
+  ('ῲ', 'ῴ'), ('ῶ', '῾'), ('\u{2000}', '\u{202e}'), ('‰', '⁆'),
+  ('\u{206a}', '⁰'), ('⁴', '₎'), ('₠', '₪'), ('⃐', '⃡'),
+  ('℀', 'ℸ'), ('⅓', 'ↂ'), ('←', '⇪'), ('∀', '⋱'),
+  ('⌀', '⌀'), ('⌂', '⍺'), ('␀', '␤'), ('⑀', '⑊'),
+  ('①', '⓪'), ('─', '▕'), ('■', '◯'), ('☀', '☓'),
+  ('☚', '♯'), ('✁', '✄'), ('✆', '✉'), ('✌', '✧'),
+  ('✩', '❋'), ('❍', '❍'), ('❏', '❒'), ('❖', '❖'),
+  ('❘', '❞'), ('❡', '❧'), ('❶', '➔'), ('➘', '➯'),
+  ('➱', '➾'), ('\u{3000}', '〷'), ('〿', '〿'), ('ぁ', 'ゔ'),
+  ('゙', 'ゞ'), ('ァ', 'ヾ'), ('ㄅ', 'ㄬ'), ('ㄱ', 'ㆎ'),
+  ('㆐', '㆟'), ('㈀', '㈜'), ('㈠', '㉃'), ('㉠', '㉻'),
+  ('㉿', '㊰'), ('㋀', '㋋'), ('㋐', '㋾'), ('㌀', '㍶'),
+  ('㍻', '㏝'), ('㏠', '㏾'), ('一', '龥'), ('\u{e000}', '鶴'),
+  ('ﬀ', 'ﬆ'), ('ﬓ', 'ﬗ'), ('ﬞ', 'זּ'), ('טּ', 'לּ'),
+  ('מּ', 'מּ'), ('נּ', 'סּ'), ('ףּ', 'פּ'), ('צּ', 'ﮱ'),
+  ('ﯓ', '﴿'), ('ﵐ', 'ﶏ'), ('ﶒ', 'ﷇ'), ('ﷰ', 'ﷻ'),
+  ('︠', '︣'), ('︰', '﹄'), ('﹉', '﹒'), ('﹔', '﹦'),
+  ('﹨', '﹫'), ('ﹰ', 'ﹲ'), ('ﹴ', 'ﹴ'), ('ﹶ', 'ﻼ'),
+  ('\u{feff}', '\u{feff}'), ('！', '～'), ('｡', 'ﾾ'), ('ￂ', 'ￇ'),
+  ('ￊ', 'ￏ'), ('ￒ', 'ￗ'), ('ￚ', 'ￜ'), ('￠', '￦'),
+  ('￨', '￮'), ('�', '\u{ffff}'),
+];
+
+pub const V2_0: &'static [(char, char)] = &[
+  ('֑', '֡'), ('֣', '֯'), ('ׄ', 'ׄ'), ('ༀ', 'ཇ'), ('ཉ', 'ཀྵ'),
+  ('ཱ', 'ྋ'), ('ྐ', 'ྕ'), ('ྗ', 'ྗ'), ('ྙ', 'ྭ'),
+  ('ྱ', 'ྷ'), ('ྐྵ', 'ྐྵ'), ('ẛ', 'ẛ'), ('₫', '₫'),
+  ('가', '힣'), ('\u{1fffe}', '\u{1ffff}'), ('\u{2fffe}', '\u{2ffff}'),
+  ('\u{3fffe}', '\u{3ffff}'), ('\u{4fffe}', '\u{4ffff}'),
+  ('\u{5fffe}', '\u{5ffff}'), ('\u{6fffe}', '\u{6ffff}'),
+  ('\u{7fffe}', '\u{7ffff}'), ('\u{8fffe}', '\u{8ffff}'),
+  ('\u{9fffe}', '\u{9ffff}'), ('\u{afffe}', '\u{affff}'),
+  ('\u{bfffe}', '\u{bffff}'), ('\u{cfffe}', '\u{cffff}'),
+  ('\u{dfffe}', '\u{dffff}'), ('\u{efffe}', '\u{10ffff}'),
+];
+
+pub const V2_1: &'static [(char, char)] = &[
+  ('€', '€'), ('￼', '￼'),
+];
+
+pub const V3_0: &'static [(char, char)] = &[
+  ('Ƕ', 'ǹ'), ('Ș', 'ȟ'), ('Ȣ', 'ȳ'), ('ʩ', 'ʭ'), ('˟', '˟'),
+  ('˪', 'ˮ'), ('͆', '͎'), ('͢', '͢'), ('ϗ', 'ϗ'), ('ϛ', 'ϛ'),
+  ('ϝ', 'ϝ'), ('ϟ', 'ϟ'), ('ϡ', 'ϡ'), ('Ѐ', 'Ѐ'), ('Ѝ', 'Ѝ'),
+  ('ѐ', 'ѐ'), ('ѝ', 'ѝ'), ('҈', '҉'), ('Ҍ', 'ҏ'), ('Ӭ', 'ӭ'),
+  ('֊', '֊'), ('ٓ', 'ٕ'), ('ڸ', 'ڹ'), ('ڿ', 'ڿ'), ('ۏ', 'ۏ'),
+  ('ۺ', '۾'), ('܀', '܍'), ('\u{70f}', 'ܬ'), ('ܰ', '݊'), ('ހ', 'ް'),
+  ('ං', 'ඃ'), ('අ', 'ඖ'), ('ක', 'න'), ('ඳ', 'ර'),
+  ('ල', 'ල'), ('ව', 'ෆ'), ('්', '්'), ('ා', 'ු'),
+  ('ූ', 'ූ'), ('ෘ', 'ෟ'), ('ෲ', '෴'), ('ཪ', 'ཪ'),
+  ('ྖ', 'ྖ'), ('ྮ', 'ྰ'), ('ྸ', 'ྸ'), ('ྺ', 'ྼ'),
+  ('྾', '࿌'), ('࿏', '࿏'), ('က', 'အ'), ('ဣ', 'ဧ'),
+  ('ဩ', 'ဪ'), ('ာ', 'ဲ'), ('ံ', '္'), ('၀', 'ၙ'),
+  ('ሀ', 'ሆ'), ('ለ', 'ቆ'), ('ቈ', 'ቈ'), ('ቊ', 'ቍ'),
+  ('ቐ', 'ቖ'), ('ቘ', 'ቘ'), ('ቚ', 'ቝ'), ('በ', 'ኆ'),
+  ('ኈ', 'ኈ'), ('ኊ', 'ኍ'), ('ነ', 'ኮ'), ('ኰ', 'ኰ'),
+  ('ኲ', 'ኵ'), ('ኸ', 'ኾ'), ('ዀ', 'ዀ'), ('ዂ', 'ዅ'),
+  ('ወ', 'ዎ'), ('ዐ', 'ዖ'), ('ዘ', 'ዮ'), ('ደ', 'ጎ'),
+  ('ጐ', 'ጐ'), ('ጒ', 'ጕ'), ('ጘ', 'ጞ'), ('ጠ', 'ፆ'),
+  ('ፈ', 'ፚ'), ('፡', '፼'), ('Ꭰ', 'Ᏼ'), ('ᐁ', 'ᙶ'),
+  ('\u{1680}', '᚜'), ('ᚠ', 'ᛰ'), ('ក', 'ៜ'), ('០', '៩'),
+  ('᠀', '\u{180e}'), ('᠐', '᠙'), ('ᠠ', 'ᡷ'), ('ᢀ', 'ᢩ'),
+  ('\u{202f}', '\u{202f}'), ('⁈', '⁍'), ('₭', '₯'), ('⃢', '⃣'),
+  ('ℹ', '℺'), ('Ↄ', 'Ↄ'), ('⇫', '⇳'), ('⌁', '⌁'),
+  ('⍻', '⍻'), ('⍽', '⎚'), ('␥', '␦'), ('◰', '◷'),
+  ('☙', '☙'), ('♰', '♱'), ('⠀', '⣿'), ('⺀', '⺙'),
+  ('⺛', '⻳'), ('⼀', '⿕'), ('⿰', '⿻'), ('〸', '〺'),
+  ('〾', '〾'), ('ㆠ', 'ㆷ'), ('㐀', '䶵'), ('ꀀ', 'ꒌ'),
+  ('꒐', '꒡'), ('꒤', '꒳'), ('꒵', '꓀'), ('꓂', '꓄'),
+  ('꓆', '꓆'), ('יִ', 'יִ'), ('\u{fff9}', '\u{fffb}'),
+];
+
+pub const V3_1: &'static [(char, char)] = &[
+  ('ϴ', 'ϵ'), ('\u{fdd0}', '\u{fdef}'), ('𐌀', '𐌞'), ('𐌠', '𐌣'),
+  ('𐌰', '𐍊'), ('𐐀', '𐐥'), ('𐐨', '𐑍'), ('𝀀', '𝃵'),
+  ('𝄀', '𝄦'), ('𝄪', '𝇝'), ('𝐀', '𝑔'), ('𝑖', '𝒜'),
+  ('𝒞', '𝒟'), ('𝒢', '𝒢'), ('𝒥', '𝒦'), ('𝒩', '𝒬'),
+  ('𝒮', '𝒹'), ('𝒻', '𝒻'), ('𝒽', '𝓀'), ('𝓂', '𝓃'),
+  ('𝓅', '𝔅'), ('𝔇', '𝔊'), ('𝔍', '𝔔'), ('𝔖', '𝔜'),
+  ('𝔞', '𝔹'), ('𝔻', '𝔾'), ('𝕀', '𝕄'), ('𝕆', '𝕆'),
+  ('𝕊', '𝕐'), ('𝕒', '𝚣'), ('𝚨', '𝟉'), ('𝟎', '𝟿'),
+  ('𠀀', '𪛖'), ('丽', '𪘀'), ('\u{e0001}', '\u{e0001}'),
+  ('\u{e0020}', '\u{e007f}'),
+];
+
+pub const V3_2: &'static [(char, char)] = &[
+  ('Ƞ', 'Ƞ'), ('͏', '͏'), ('ͣ', 'ͯ'), ('Ϙ', 'ϙ'), ('϶', '϶'),
+  ('Ҋ', 'ҋ'), ('Ӆ', 'ӆ'), ('Ӊ', 'ӊ'), ('Ӎ', 'ӎ'), ('Ԁ', 'ԏ'),
+  ('ٮ', 'ٯ'), ('ޱ', 'ޱ'), ('ჷ', 'ჸ'), ('ᜀ', 'ᜌ'), ('ᜎ', '᜔'),
+  ('ᜠ', '᜶'), ('ᝀ', 'ᝓ'), ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'),
+  ('ᝲ', 'ᝳ'), ('⁇', '⁇'), ('⁎', '⁒'), ('⁗', '⁗'),
+  ('\u{205f}', '\u{2063}'), ('ⁱ', 'ⁱ'), ('₰', '₱'), ('⃤', '⃪'),
+  ('ℽ', '⅋'), ('⇴', '⇿'), ('⋲', '⋿'), ('⍼', '⍼'),
+  ('⎛', '⏎'), ('⓫', '⓾'), ('▖', '▟'), ('◸', '◿'),
+  ('☖', '☗'), ('♲', '♽'), ('⚀', '⚉'), ('❨', '❵'),
+  ('⟐', '⟫'), ('⟰', '⟿'), ('⤀', '⫿'), ('〻', '〽'),
+  ('ゕ', 'ゖ'), ('ゟ', '゠'), ('ヿ', 'ヿ'), ('ㇰ', 'ㇿ'),
+  ('㉑', '㉟'), ('㊱', '㊿'), ('꒢', '꒣'), ('꒴', '꒴'),
+  ('꓁', '꓁'), ('꓅', '꓅'), ('侮', '頻'), ('﷼', '﷼'),
+  ('︀', '️'), ('﹅', '﹆'), ('ﹳ', 'ﹳ'), ('｟', '｠'),
+];
+
+pub const V4_0: &'static [(char, char)] = &[
+  ('ȡ', 'ȡ'), ('ȴ', 'ȶ'), ('ʮ', 'ʯ'), ('˯', '˿'), ('͐', '͗'),
+  ('͝', '͟'), ('Ϸ', 'ϻ'), ('\u{600}', '\u{603}'), ('؍', 'ؕ'),
+  ('ٖ', '٘'), ('ۮ', 'ۯ'), ('ۿ', 'ۿ'), ('ܭ', 'ܯ'), ('ݍ', 'ݏ'),
+  ('ऄ', 'ऄ'), ('ঽ', 'ঽ'), ('ਁ', 'ਁ'), ('ਃ', 'ਃ'),
+  ('ઌ', 'ઌ'), ('ૡ', 'ૣ'), ('૱', '૱'), ('ଵ', 'ଵ'),
+  ('ୱ', 'ୱ'), ('௳', '௺'), ('಼', 'ಽ'), ('៝', '៝'),
+  ('៰', '៹'), ('ᤀ', 'ᤜ'), ('ᤠ', 'ᤫ'), ('ᤰ', '᤻'),
+  ('᥀', '᥀'), ('᥄', 'ᥭ'), ('ᥰ', 'ᥴ'), ('᧠', '᧿'),
+  ('ᴀ', 'ᵫ'), ('⁓', '⁔'), ('℻', '℻'), ('⏏', '⏐'),
+  ('⓿', '⓿'), ('☔', '☕'), ('⚊', '⚑'), ('⚠', '⚡'),
+  ('⬀', '⬍'), ('㈝', '㈞'), ('㉐', '㉐'), ('㉼', '㉽'),
+  ('㋌', '㋏'), ('㍷', '㍺'), ('㏞', '㏟'), ('㏿', '㏿'),
+  ('䷀', '䷿'), ('﷽', '﷽'), ('﹇', '﹈'), ('𐀀', '𐀋'),
+  ('𐀍', '𐀦'), ('𐀨', '𐀺'), ('𐀼', '𐀽'), ('𐀿', '𐁍'),
+  ('𐁐', '𐁝'), ('𐂀', '𐃺'), ('𐄀', '𐄂'), ('𐄇', '𐄳'),
+  ('𐄷', '𐄿'), ('𐎀', '𐎝'), ('𐎟', '𐎟'), ('𐐦', '𐐧'),
+  ('𐑎', '𐒝'), ('𐒠', '𐒩'), ('𐠀', '𐠅'), ('𐠈', '𐠈'),
+  ('𐠊', '𐠵'), ('𐠷', '𐠸'), ('𐠼', '𐠼'), ('𐠿', '𐠿'),
+  ('𝌀', '𝍖'), ('𝓁', '𝓁'), ('󠄀', '󠇯'),
+];
+
+pub const V4_1: &'static [(char, char)] = &[
+  ('ȷ', 'Ɂ'), ('͘', '͜'), ('ϼ', 'Ͽ'), ('Ӷ', 'ӷ'), ('֢', '֢'),
+  ('ׅ', 'ׇ'), ('؋', '؋'), ('؞', '؞'), ('ٙ', 'ٞ'), ('ݐ', 'ݭ'),
+  ('ॽ', 'ॽ'), ('ৎ', 'ৎ'), ('ஶ', 'ஶ'), ('௦', '௦'),
+  ('࿐', '࿑'), ('ჹ', 'ჺ'), ('ჼ', 'ჼ'), ('ሇ', 'ሇ'),
+  ('ቇ', 'ቇ'), ('ኇ', 'ኇ'), ('ኯ', 'ኯ'), ('ዏ', 'ዏ'),
+  ('ዯ', 'ዯ'), ('ጏ', 'ጏ'), ('ጟ', 'ጟ'), ('ፇ', 'ፇ'),
+  ('፟', '፠'), ('ᎀ', '᎙'), ('ᦀ', 'ᦩ'), ('ᦰ', 'ᧉ'),
+  ('᧐', '᧙'), ('᧞', '᧟'), ('ᨀ', 'ᨛ'), ('᨞', '᨟'),
+  ('ᵬ', '᷃'), ('⁕', '⁖'), ('⁘', '⁞'), ('ₐ', 'ₔ'),
+  ('₲', '₵'), ('⃫', '⃫'), ('ℼ', 'ℼ'), ('⅌', '⅌'),
+  ('⏑', '⏛'), ('☘', '☘'), ('♾', '♿'), ('⚒', '⚜'),
+  ('⚢', '⚱'), ('⟀', '⟆'), ('⬎', '⬓'), ('Ⰰ', 'Ⱞ'),
+  ('ⰰ', 'ⱞ'), ('Ⲁ', '⳪'), ('⳹', 'ⴥ'), ('ⴰ', 'ⵥ'),
+  ('ⵯ', 'ⵯ'), ('ⶀ', 'ⶖ'), ('ⶠ', 'ⶦ'), ('ⶨ', 'ⶮ'),
+  ('ⶰ', 'ⶶ'), ('ⶸ', 'ⶾ'), ('ⷀ', 'ⷆ'), ('ⷈ', 'ⷎ'),
+  ('ⷐ', 'ⷖ'), ('ⷘ', 'ⷞ'), ('⸀', '⸗'), ('⸜', '⸝'),
+  ('㇀', '㇏'), ('㉾', '㉾'), ('龦', '龻'), ('꜀', '꜖'),
+  ('ꠀ', '꠫'), ('並', '龎'), ('︐', '︙'), ('𐅀', '𐆊'),
+  ('𐎠', '𐏃'), ('𐏈', '𐏕'), ('𐨀', '𐨃'), ('𐨅', '𐨆'),
+  ('𐨌', '𐨓'), ('𐨕', '𐨗'), ('𐨙', '𐨳'), ('𐨸', '𐨺'),
+  ('𐨿', '𐩇'), ('𐩐', '𐩘'), ('𝈀', '𝉅'), ('𝚤', '𝚥'),
+];
+
+pub const V5_0: &'static [(char, char)] = &[
+  ('ɂ', 'ɏ'), ('ͻ', 'ͽ'), ('ӏ', 'ӏ'), ('Ӻ', 'ӿ'), ('Ԑ', 'ԓ'),
+  ('ֺ', 'ֺ'), ('߀', 'ߺ'), ('ॻ', 'ॼ'), ('ॾ', 'ॿ'), ('ೢ', 'ೣ'),
+  ('ೱ', 'ೲ'), ('ᬀ', 'ᭋ'), ('᭐', '᭼'), ('᷄', '᷊'),
+  ('᷾', '᷿'), ('⃬', '⃯'), ('⅍', 'ⅎ'), ('ↄ', 'ↄ'),
+  ('⏜', '⏧'), ('⚲', '⚲'), ('⟇', '⟊'), ('⬔', '⬚'),
+  ('⬠', '⬣'), ('Ⱡ', 'ⱬ'), ('ⱴ', 'ⱷ'), ('ꜗ', 'ꜚ'),
+  ('꜠', '꜡'), ('ꡀ', '꡷'), ('𐤀', '𐤙'), ('𐤟', '𐤟'),
+  ('𒀀', '𒍮'), ('𒐀', '𒑢'), ('𒑰', '𒑳'), ('𝍠', '𝍱'),
+  ('𝟊', '𝟋'),
+];
+
+pub const V5_1: &'static [(char, char)] = &[
+  ('Ͱ', 'ͳ'), ('Ͷ', 'ͷ'), ('Ϗ', 'Ϗ'), ('҇', '҇'), ('Ԕ', 'ԣ'),
+  ('؆', '؊'), ('ؖ', 'ؚ'), ('ػ', 'ؿ'), ('ݮ', 'ݿ'), ('ॱ', 'ॲ'),
+  ('ੑ', 'ੑ'), ('ੵ', 'ੵ'), ('ୄ', 'ୄ'), ('ୢ', 'ୣ'),
+  ('ௐ', 'ௐ'), ('ఽ', 'ఽ'), ('ౘ', 'ౙ'), ('ౢ', 'ౣ'),
+  ('౸', '౿'), ('ഽ', 'ഽ'), ('ൄ', 'ൄ'), ('ൢ', 'ൣ'),
+  ('൰', '൵'), ('൹', 'ൿ'), ('ཫ', 'ཬ'), ('࿎', '࿎'),
+  ('࿒', '࿔'), ('ဢ', 'ဢ'), ('ဨ', 'ဨ'), ('ါ', 'ါ'),
+  ('ဳ', 'ဵ'), ('်', 'ဿ'), ('ၚ', '႙'), ('႞', '႟'),
+  ('ᢪ', 'ᢪ'), ('ᮀ', '᮪'), ('ᮮ', '᮹'), ('ᰀ', '᰷'),
+  ('᰻', '᱉'), ('ᱍ', '᱿'), ('᷋', 'ᷦ'), ('ẜ', 'ẟ'),
+  ('Ỻ', 'ỿ'), ('\u{2064}', '\u{2064}'), ('⃰', '⃰'), ('⅏', '⅏'),
+  ('ↅ', 'ↈ'), ('⚝', '⚝'), ('⚳', '⚼'), ('⛀', '⛃'),
+  ('⟌', '⟌'), ('⟬', '⟯'), ('⬛', '⬟'), ('⬤', '⭌'),
+  ('⭐', '⭔'), ('Ɑ', 'Ɐ'), ('ⱱ', 'ⱳ'), ('ⱸ', 'ⱽ'),
+  ('ⷠ', 'ⷿ'), ('⸘', '⸛'), ('⸞', '⸰'), ('ㄭ', 'ㄭ'),
+  ('㇐', '㇣'), ('龼', '鿃'), ('ꔀ', 'ꘫ'), ('Ꙁ', 'ꙟ'),
+  ('Ꙣ', '꙳'), ('꙼', 'ꚗ'), ('ꜛ', 'ꜟ'), ('Ꜣ', 'ꞌ'),
+  ('ꟻ', 'ꟿ'), ('ꢀ', '꣄'), ('꣎', '꣙'), ('꤀', '꥓'),
+  ('꥟', '꥟'), ('ꨀ', 'ꨶ'), ('ꩀ', 'ꩍ'), ('꩐', '꩙'),
+  ('꩜', '꩟'), ('︤', '︦'), ('𐆐', '𐆛'), ('𐇐', '𐇽'),
+  ('𐊀', '𐊜'), ('𐊠', '𐋐'), ('𐤠', '𐤹'), ('𐤿', '𐤿'),
+  ('𝄩', '𝄩'), ('🀀', '🀫'), ('🀰', '🂓'),
+];
+
+pub const V5_2: &'static [(char, char)] = &[
+  ('Ԥ', 'ԥ'), ('ࠀ', '࠭'), ('࠰', '࠾'), ('ऀ', 'ऀ'),
+  ('ॎ', 'ॎ'), ('ॕ', 'ॕ'), ('ॹ', 'ॺ'), ('৻', '৻'),
+  ('࿕', '࿘'), ('ႚ', 'ႝ'), ('ᅚ', 'ᅞ'), ('ᆣ', 'ᆧ'),
+  ('ᇺ', 'ᇿ'), ('᐀', '᐀'), ('ᙷ', 'ᙿ'), ('ᢰ', 'ᣵ'),
+  ('ᦪ', 'ᦫ'), ('᧚', '᧚'), ('ᨠ', 'ᩞ'), ('᩠', '᩼'),
+  ('᩿', '᪉'), ('᪐', '᪙'), ('᪠', '᪭'), ('᳐', 'ᳲ'),
+  ('᷽', '᷽'), ('₶', '₸'), ('⅐', '⅒'), ('↉', '↉'),
+  ('⏨', '⏨'), ('⚞', '⚟'), ('⚽', '⚿'), ('⛄', '⛍'),
+  ('⛏', '⛡'), ('⛣', '⛣'), ('⛨', '⛿'), ('❗', '❗'),
+  ('⭕', '⭙'), ('Ɒ', 'Ɒ'), ('Ȿ', 'Ɀ'), ('Ⳬ', '⳱'),
+  ('⸱', '⸱'), ('㉄', '㉏'), ('鿄', '鿋'), ('ꓐ', '꓿'),
+  ('ꚠ', '꛷'), ('꠰', '꠹'), ('꣠', 'ꣻ'), ('ꥠ', 'ꥼ'),
+  ('ꦀ', '꧍'), ('ꧏ', '꧙'), ('꧞', '꧟'), ('ꩠ', 'ꩻ'),
+  ('ꪀ', 'ꫂ'), ('ꫛ', '꫟'), ('ꯀ', '꯭'), ('꯰', '꯹'),
+  ('ힰ', 'ퟆ'), ('ퟋ', 'ퟻ'), ('恵', '舘'), ('𐡀', '𐡕'),
+  ('𐡗', '𐡟'), ('𐤚', '𐤛'), ('𐩠', '𐩿'), ('𐬀', '𐬵'),
+  ('𐬹', '𐭕'), ('𐭘', '𐭲'), ('𐭸', '𐭿'), ('𐰀', '𐱈'),
+  ('𐹠', '𐹾'), ('𑂀', '𑃁'), ('𓀀', '𓐮'), ('🄀', '🄊'),
+  ('🄐', '🄮'), ('🄱', '🄱'), ('🄽', '🄽'), ('🄿', '🄿'),
+  ('🅂', '🅂'), ('🅆', '🅆'), ('🅊', '🅎'), ('🅗', '🅗'),
+  ('🅟', '🅟'), ('🅹', '🅹'), ('🅻', '🅼'), ('🅿', '🅿'),
+  ('🆊', '🆍'), ('🆐', '🆐'), ('🈀', '🈀'), ('🈐', '🈱'),
+  ('🉀', '🉈'), ('𪜀', '𫜴'),
+];
+
+pub const V6_0: &'static [(char, char)] = &[
+  ('Ԧ', 'ԧ'), ('ؠ', 'ؠ'), ('ٟ', 'ٟ'), ('ࡀ', '࡛'), ('࡞', '࡞'),
+  ('ऺ', 'ऻ'), ('ॏ', 'ॏ'), ('ॖ', 'ॗ'), ('ॳ', 'ॷ'),
+  ('୲', '୷'), ('ഩ', 'ഩ'), ('ഺ', 'ഺ'), ('ൎ', 'ൎ'),
+  ('ྌ', 'ྏ'), ('࿙', '࿚'), ('፝', '፞'), ('ᯀ', '᯳'),
+  ('᯼', '᯿'), ('᷼', '᷼'), ('ₕ', 'ₜ'), ('₹', '₹'),
+  ('⏩', '⏳'), ('⛎', '⛎'), ('⛢', '⛢'), ('⛤', '⛧'),
+  ('✅', '✅'), ('✊', '✋'), ('✨', '✨'), ('❌', '❌'),
+  ('❎', '❎'), ('❓', '❕'), ('❟', '❠'), ('➕', '➗'),
+  ('➰', '➰'), ('➿', '➿'), ('⟎', '⟏'), ('⵰', '⵰'),
+  ('⵿', '⵿'), ('ㆸ', 'ㆺ'), ('Ꙡ', 'ꙡ'), ('Ɥ', 'ꞎ'),
+  ('Ꞑ', 'ꞑ'), ('Ꞡ', 'ꞩ'), ('ꟺ', 'ꟺ'), ('ꬁ', 'ꬆ'),
+  ('ꬉ', 'ꬎ'), ('ꬑ', 'ꬖ'), ('ꬠ', 'ꬦ'), ('ꬨ', 'ꬮ'),
+  ('﮲', '﯁'), ('𑀀', '𑁍'), ('𑁒', '𑁯'), ('𖠀', '𖨸'),
+  ('𛀀', '𛀁'), ('🂠', '🂮'), ('🂱', '🂾'), ('🃁', '🃏'),
+  ('🃑', '🃟'), ('🄰', '🄰'), ('🄲', '🄼'), ('🄾', '🄾'),
+  ('🅀', '🅁'), ('🅃', '🅅'), ('🅇', '🅉'), ('🅏', '🅖'),
+  ('🅘', '🅞'), ('🅠', '🅩'), ('🅰', '🅸'), ('🅺', '🅺'),
+  ('🅽', '🅾'), ('🆀', '🆉'), ('🆎', '🆏'), ('🆑', '🆚'),
+  ('🇦', '🇿'), ('🈁', '🈂'), ('🈲', '🈺'), ('🉐', '🉑'),
+  ('🌀', '🌠'), ('🌰', '🌵'), ('🌷', '🍼'), ('🎀', '🎓'),
+  ('🎠', '🏄'), ('🏆', '🏊'), ('🏠', '🏰'), ('🐀', '🐾'),
+  ('👀', '👀'), ('👂', '📷'), ('📹', '📼'), ('🔀', '🔽'),
+  ('🕐', '🕧'), ('🗻', '🗿'), ('😁', '😐'), ('😒', '😔'),
+  ('😖', '😖'), ('😘', '😘'), ('😚', '😚'), ('😜', '😞'),
+  ('😠', '😥'), ('😨', '😫'), ('😭', '😭'), ('😰', '😳'),
+  ('😵', '🙀'), ('🙅', '🙏'), ('🚀', '🛅'), ('🜀', '🝳'),
+  ('𫝀', '𫠝'),
+];
+
+pub const V6_1: &'static [(char, char)] = &[
+  ('֏', '֏'), ('\u{604}', '\u{604}'), ('ࢠ', 'ࢠ'), ('ࢢ', 'ࢬ'),
+  ('ࣤ', 'ࣾ'), ('૰', '૰'), ('ໞ', 'ໟ'), ('Ⴧ', 'Ⴧ'),
+  ('Ⴭ', 'Ⴭ'), ('ჽ', 'ჿ'), ('᮫', 'ᮭ'), ('ᮺ', 'ᮿ'),
+  ('᳀', '᳇'), ('ᳳ', 'ᳶ'), ('⟋', '⟋'), ('⟍', '⟍'),
+  ('Ⳳ', 'ⳳ'), ('ⴧ', 'ⴧ'), ('ⴭ', 'ⴭ'), ('ⵦ', 'ⵧ'),
+  ('⸲', '⸻'), ('鿌', '鿌'), ('ꙴ', 'ꙻ'), ('ꚟ', 'ꚟ'),
+  ('Ꞓ', 'ꞓ'), ('Ɦ', 'Ɦ'), ('ꟸ', 'ꟹ'), ('ꫠ', '꫶'),
+  ('郞', '隷'), ('𐦀', '𐦷'), ('𐦾', '𐦿'), ('𑃐', '𑃨'),
+  ('𑃰', '𑃹'), ('𑄀', '𑄴'), ('𑄶', '𑅃'), ('𑆀', '𑇈'),
+  ('𑇐', '𑇙'), ('𑚀', '𑚷'), ('𑛀', '𑛉'), ('𖼀', '𖽄'),
+  ('𖽐', '𖽾'), ('𖾏', '𖾟'), ('𞸀', '𞸃'), ('𞸅', '𞸟'),
+  ('𞸡', '𞸢'), ('𞸤', '𞸤'), ('𞸧', '𞸧'), ('𞸩', '𞸲'),
+  ('𞸴', '𞸷'), ('𞸹', '𞸹'), ('𞸻', '𞸻'), ('𞹂', '𞹂'),
+  ('𞹇', '𞹇'), ('𞹉', '𞹉'), ('𞹋', '𞹋'), ('𞹍', '𞹏'),
+  ('𞹑', '𞹒'), ('𞹔', '𞹔'), ('𞹗', '𞹗'), ('𞹙', '𞹙'),
+  ('𞹛', '𞹛'), ('𞹝', '𞹝'), ('𞹟', '𞹟'), ('𞹡', '𞹢'),
+  ('𞹤', '𞹤'), ('𞹧', '𞹪'), ('𞹬', '𞹲'), ('𞹴', '𞹷'),
+  ('𞹹', '𞹼'), ('𞹾', '𞹾'), ('𞺀', '𞺉'), ('𞺋', '𞺛'),
+  ('𞺡', '𞺣'), ('𞺥', '𞺩'), ('𞺫', '𞺻'), ('𞻰', '𞻱'),
+  ('🅪', '🅫'), ('🕀', '🕃'), ('😀', '😀'), ('😑', '😑'),
+  ('😕', '😕'), ('😗', '😗'), ('😙', '😙'), ('😛', '😛'),
+  ('😟', '😟'), ('😦', '😧'), ('😬', '😬'), ('😮', '😯'),
+  ('😴', '😴'),
+];
+
+pub const V6_2: &'static [(char, char)] = &[
+  ('₺', '₺'),
+];
+
+pub const V6_3: &'static [(char, char)] = &[
+  ('\u{61c}', '\u{61c}'), ('\u{2066}', '\u{2069}'),
+];
+
+pub const V7_0: &'static [(char, char)] = &[
+  ('Ϳ', 'Ϳ'), ('Ԩ', 'ԯ'), ('֍', '֎'), ('\u{605}', '\u{605}'),
+  ('ࢡ', 'ࢡ'), ('ࢭ', 'ࢲ'), ('ࣿ', 'ࣿ'), ('ॸ', 'ॸ'),
+  ('ঀ', 'ঀ'), ('ఀ', 'ఀ'), ('ఴ', 'ఴ'), ('ಁ', 'ಁ'),
+  ('ഁ', 'ഁ'), ('෦', '෯'), ('ᛱ', 'ᛸ'), ('ᤝ', 'ᤞ'),
+  ('᪰', '᪾'), ('᳸', '᳹'), ('ᷧ', '᷵'), ('₻', '₽'),
+  ('⏴', '⏺'), ('✀', '✀'), ('⭍', '⭏'), ('⭚', '⭳'),
+  ('⭶', '⮕'), ('⮘', '⮹'), ('⮽', '⯈'), ('⯊', '⯑'),
+  ('⸼', '⹂'), ('Ꚙ', 'ꚝ'), ('ꞔ', 'ꞟ'), ('Ɜ', 'Ɬ'),
+  ('Ʞ', 'Ʇ'), ('ꟷ', 'ꟷ'), ('ꧠ', 'ꧾ'), ('ꩼ', 'ꩿ'),
+  ('ꬰ', 'ꭟ'), ('ꭤ', 'ꭥ'), ('︧', '︭'), ('𐆋', '𐆌'),
+  ('𐆠', '𐆠'), ('𐋠', '𐋻'), ('𐌟', '𐌟'), ('𐍐', '𐍺'),
+  ('𐔀', '𐔧'), ('𐔰', '𐕣'), ('𐕯', '𐕯'), ('𐘀', '𐜶'),
+  ('𐝀', '𐝕'), ('𐝠', '𐝧'), ('𐡠', '𐢞'), ('𐢧', '𐢯'),
+  ('𐪀', '𐪟'), ('𐫀', '𐫦'), ('𐫫', '𐫶'), ('𐮀', '𐮑'),
+  ('𐮙', '𐮜'), ('𐮩', '𐮯'), ('𑁿', '𑁿'), ('𑅐', '𑅶'),
+  ('𑇍', '𑇍'), ('𑇚', '𑇚'), ('𑇡', '𑇴'), ('𑈀', '𑈑'),
+  ('𑈓', '𑈽'), ('𑊰', '𑋪'), ('𑋰', '𑋹'), ('𑌁', '𑌃'),
+  ('𑌅', '𑌌'), ('𑌏', '𑌐'), ('𑌓', '𑌨'), ('𑌪', '𑌰'),
+  ('𑌲', '𑌳'), ('𑌵', '𑌹'), ('𑌼', '𑍄'), ('𑍇', '𑍈'),
+  ('𑍋', '𑍍'), ('𑍗', '𑍗'), ('𑍝', '𑍣'), ('𑍦', '𑍬'),
+  ('𑍰', '𑍴'), ('𑒀', '𑓇'), ('𑓐', '𑓙'), ('𑖀', '𑖵'),
+  ('𑖸', '𑗉'), ('𑘀', '𑙄'), ('𑙐', '𑙙'), ('𑢠', '𑣲'),
+  ('𑣿', '𑣿'), ('𑫀', '𑫸'), ('𒍯', '𒎘'), ('𒑣', '𒑮'),
+  ('𒑴', '𒑴'), ('𖩀', '𖩞'), ('𖩠', '𖩩'), ('𖩮', '𖩯'),
+  ('𖫐', '𖫭'), ('𖫰', '𖫵'), ('𖬀', '𖭅'), ('𖭐', '𖭙'),
+  ('𖭛', '𖭡'), ('𖭣', '𖭷'), ('𖭽', '𖮏'), ('𛰀', '𛱪'),
+  ('𛱰', '𛱼'), ('𛲀', '𛲈'), ('𛲐', '𛲙'), ('𛲜', '\u{1bca3}'),
+  ('𞠀', '𞣄'), ('𞣇', '𞣖'), ('🂿', '🂿'), ('🃠', '🃵'),
+  ('🄋', '🄌'), ('🌡', '🌬'), ('🌶', '🌶'), ('🍽', '🍽'),
+  ('🎔', '🎟'), ('🏅', '🏅'), ('🏋', '🏎'), ('🏔', '🏟'),
+  ('🏱', '🏷'), ('🐿', '🐿'), ('👁', '👁'), ('📸', '📸'),
+  ('📽', '📾'), ('🔾', '🔿'), ('🕄', '🕊'), ('🕨', '🕹'),
+  ('🕻', '🖣'), ('🖥', '🗺'), ('🙁', '🙂'), ('🙐', '🙿'),
+  ('🛆', '🛏'), ('🛠', '🛬'), ('🛰', '🛳'), ('🞀', '🟔'),
+  ('🠀', '🠋'), ('🠐', '🡇'), ('🡐', '🡙'), ('🡠', '🢇'),
+  ('🢐', '🢭'),
+];
+
+pub const V8_0: &'static [(char, char)] = &[
+  ('ࢳ', 'ࢴ'), ('ࣣ', 'ࣣ'), ('ૹ', 'ૹ'), ('ౚ', 'ౚ'),
+  ('ൟ', 'ൟ'), ('Ᏽ', 'Ᏽ'), ('ᏸ', 'ᏽ'), ('₾', '₾'),
+  ('↊', '↋'), ('⯬', '⯯'), ('鿍', '鿕'), ('ꚞ', 'ꚞ'),
+  ('ꞏ', 'ꞏ'), ('Ʝ', 'ꞷ'), ('꣼', 'ꣽ'), ('ꭠ', 'ꭣ'),
+  ('ꭰ', 'ꮿ'), ('︮', '︯'), ('𐣠', '𐣲'), ('𐣴', '𐣵'),
+  ('𐣻', '𐣿'), ('𐦼', '𐦽'), ('𐧀', '𐧏'), ('𐧒', '𐧿'),
+  ('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𐳺', '𐳿'), ('𑇉', '𑇌'),
+  ('𑇛', '𑇟'), ('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'),
+  ('𑊏', '𑊝'), ('𑊟', '𑊩'), ('𑌀', '𑌀'), ('𑍐', '𑍐'),
+  ('𑗊', '𑗝'), ('𑜀', '𑜙'), ('𑜝', '𑜫'), ('𑜰', '𑜿'),
+  ('𒎙', '𒎙'), ('𒒀', '𒕃'), ('𔐀', '𔙆'), ('𝇞', '𝇨'),
+  ('𝠀', '𝪋'), ('𝪛', '𝪟'), ('𝪡', '𝪯'), ('🌭', '🌯'),
+  ('🍾', '🍿'), ('🏏', '🏓'), ('🏸', '🏿'), ('📿', '📿'),
+  ('🕋', '🕏'), ('🙃', '🙄'), ('🛐', '🛐'), ('🤐', '🤘'),
+  ('🦀', '🦄'), ('🧀', '🧀'), ('𫠠', '𬺡'),
+];
+
+pub const V9_0: &'static [(char, char)] = &[
+  ('ࢶ', 'ࢽ'), ('ࣔ', '\u{8e2}'), ('ಀ', 'ಀ'), ('൏', '൏'),
+  ('ൔ', 'ൖ'), ('൘', '൞'), ('൶', '൸'), ('ᲀ', 'ᲈ'),
+  ('᷻', '᷻'), ('⏻', '⏾'), ('⹃', '⹄'), ('Ɪ', 'Ɪ'),
+  ('ꣅ', 'ꣅ'), ('𐆍', '𐆎'), ('𐒰', '𐓓'), ('𐓘', '𐓻'),
+  ('𑈾', '𑈾'), ('𑐀', '𑑙'), ('𑑛', '𑑛'), ('𑑝', '𑑝'),
+  ('𑙠', '𑙬'), ('𑰀', '𑰈'), ('𑰊', '𑰶'), ('𑰸', '𑱅'),
+  ('𑱐', '𑱬'), ('𑱰', '𑲏'), ('𑲒', '𑲧'), ('𑲩', '𑲶'),
+  ('𖿠', '𖿠'), ('𗀀', '𘟬'), ('𘠀', '𘫲'), ('𞀀', '𞀆'),
+  ('𞀈', '𞀘'), ('𞀛', '𞀡'), ('𞀣', '𞀤'), ('𞀦', '𞀪'),
+  ('𞤀', '𞥊'), ('𞥐', '𞥙'), ('𞥞', '𞥟'), ('🆛', '🆬'),
+  ('🈻', '🈻'), ('🕺', '🕺'), ('🖤', '🖤'), ('🛑', '🛒'),
+  ('🛴', '🛶'), ('🤙', '🤞'), ('🤠', '🤧'), ('🤰', '🤰'),
+  ('🤳', '🤾'), ('🥀', '🥋'), ('🥐', '🥞'), ('🦅', '🦑'),
+];
diff --git a/regex-syntax/src/unicode_tables/case_folding_simple.rs b/regex-syntax/src/unicode_tables/case_folding_simple.rs
new file mode 100644
index 0000000000..72ec79f0dc
--- /dev/null
+++ b/regex-syntax/src/unicode_tables/case_folding_simple.rs
@@ -0,0 +1,662 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+//  ucd-generate case-folding-simple /home/andrew/tmp/ucd-10.0.0/ --chars --all-pairs
+//
+// ucd-generate is available on crates.io.
+
+pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[
+  ('A', &['a']), ('B', &['b']), ('C', &['c']), ('D', &['d']), ('E', &['e']),
+  ('F', &['f']), ('G', &['g']), ('H', &['h']), ('I', &['i']), ('J', &['j']),
+  ('K', &['k', 'K', ]), ('L', &['l']), ('M', &['m']), ('N', &['n']), ('O', &[
+  'o']), ('P', &['p']), ('Q', &['q']), ('R', &['r']), ('S', &['s', 'ſ', ]),
+  ('T', &['t']), ('U', &['u']), ('V', &['v']), ('W', &['w']), ('X', &['x']),
+  ('Y', &['y']), ('Z', &['z']), ('a', &['A']), ('b', &['B']), ('c', &['C']),
+  ('d', &['D']), ('e', &['E']), ('f', &['F']), ('g', &['G']), ('h', &['H']),
+  ('i', &['I']), ('j', &['J']), ('k', &['K', 'K', ]), ('l', &['L']), ('m', &[
+  'M']), ('n', &['N']), ('o', &['O']), ('p', &['P']), ('q', &['Q']), ('r', &[
+  'R']), ('s', &['S', 'ſ', ]), ('t', &['T']), ('u', &['U']), ('v', &['V']),
+  ('w', &['W']), ('x', &['X']), ('y', &['Y']), ('z', &['Z']), ('µ', &['Μ',
+  'μ', ]), ('À', &['à']), ('Á', &['á']), ('Â', &['â']), ('Ã', &['ã'
+  ]), ('Ä', &['ä']), ('Å', &['å', 'Å', ]), ('Æ', &['æ']), ('Ç', &['ç'
+  ]), ('È', &['è']), ('É', &['é']), ('Ê', &['ê']), ('Ë', &['ë']),
+  ('Ì', &['ì']), ('Í', &['í']), ('Î', &['î']), ('Ï', &['ï']), ('Ð', &[
+  'ð']), ('Ñ', &['ñ']), ('Ò', &['ò']), ('Ó', &['ó']), ('Ô', &['ô']),
+  ('Õ', &['õ']), ('Ö', &['ö']), ('Ø', &['ø']), ('Ù', &['ù']), ('Ú', &[
+  'ú']), ('Û', &['û']), ('Ü', &['ü']), ('Ý', &['ý']), ('Þ', &['þ']),
+  ('ß', &['ẞ']), ('à', &['À']), ('á', &['Á']), ('â', &['Â']),
+  ('ã', &['Ã']), ('ä', &['Ä']), ('å', &['Å', 'Å', ]), ('æ', &['Æ']),
+  ('ç', &['Ç']), ('è', &['È']), ('é', &['É']), ('ê', &['Ê']), ('ë', &[
+  'Ë']), ('ì', &['Ì']), ('í', &['Í']), ('î', &['Î']), ('ï', &['Ï']),
+  ('ð', &['Ð']), ('ñ', &['Ñ']), ('ò', &['Ò']), ('ó', &['Ó']), ('ô', &[
+  'Ô']), ('õ', &['Õ']), ('ö', &['Ö']), ('ø', &['Ø']), ('ù', &['Ù']),
+  ('ú', &['Ú']), ('û', &['Û']), ('ü', &['Ü']), ('ý', &['Ý']), ('þ', &[
+  'Þ']), ('ÿ', &['Ÿ']), ('Ā', &['ā']), ('ā', &['Ā']), ('Ă', &['ă']),
+  ('ă', &['Ă']), ('Ą', &['ą']), ('ą', &['Ą']), ('Ć', &['ć']), ('ć', &[
+  'Ć']), ('Ĉ', &['ĉ']), ('ĉ', &['Ĉ']), ('Ċ', &['ċ']), ('ċ', &['Ċ']),
+  ('Č', &['č']), ('č', &['Č']), ('Ď', &['ď']), ('ď', &['Ď']), ('Đ', &[
+  'đ']), ('đ', &['Đ']), ('Ē', &['ē']), ('ē', &['Ē']), ('Ĕ', &['ĕ']),
+  ('ĕ', &['Ĕ']), ('Ė', &['ė']), ('ė', &['Ė']), ('Ę', &['ę']), ('ę', &[
+  'Ę']), ('Ě', &['ě']), ('ě', &['Ě']), ('Ĝ', &['ĝ']), ('ĝ', &['Ĝ']),
+  ('Ğ', &['ğ']), ('ğ', &['Ğ']), ('Ġ', &['ġ']), ('ġ', &['Ġ']), ('Ģ', &[
+  'ģ']), ('ģ', &['Ģ']), ('Ĥ', &['ĥ']), ('ĥ', &['Ĥ']), ('Ħ', &['ħ']),
+  ('ħ', &['Ħ']), ('Ĩ', &['ĩ']), ('ĩ', &['Ĩ']), ('Ī', &['ī']), ('ī', &[
+  'Ī']), ('Ĭ', &['ĭ']), ('ĭ', &['Ĭ']), ('Į', &['į']), ('į', &['Į']),
+  ('Ĳ', &['ĳ']), ('ĳ', &['Ĳ']), ('Ĵ', &['ĵ']), ('ĵ', &['Ĵ']), ('Ķ', &[
+  'ķ']), ('ķ', &['Ķ']), ('Ĺ', &['ĺ']), ('ĺ', &['Ĺ']), ('Ļ', &['ļ']),
+  ('ļ', &['Ļ']), ('Ľ', &['ľ']), ('ľ', &['Ľ']), ('Ŀ', &['ŀ']), ('ŀ', &[
+  'Ŀ']), ('Ł', &['ł']), ('ł', &['Ł']), ('Ń', &['ń']), ('ń', &['Ń']),
+  ('Ņ', &['ņ']), ('ņ', &['Ņ']), ('Ň', &['ň']), ('ň', &['Ň']), ('Ŋ', &[
+  'ŋ']), ('ŋ', &['Ŋ']), ('Ō', &['ō']), ('ō', &['Ō']), ('Ŏ', &['ŏ']),
+  ('ŏ', &['Ŏ']), ('Ő', &['ő']), ('ő', &['Ő']), ('Œ', &['œ']), ('œ', &[
+  'Œ']), ('Ŕ', &['ŕ']), ('ŕ', &['Ŕ']), ('Ŗ', &['ŗ']), ('ŗ', &['Ŗ']),
+  ('Ř', &['ř']), ('ř', &['Ř']), ('Ś', &['ś']), ('ś', &['Ś']), ('Ŝ', &[
+  'ŝ']), ('ŝ', &['Ŝ']), ('Ş', &['ş']), ('ş', &['Ş']), ('Š', &['š']),
+  ('š', &['Š']), ('Ţ', &['ţ']), ('ţ', &['Ţ']), ('Ť', &['ť']), ('ť', &[
+  'Ť']), ('Ŧ', &['ŧ']), ('ŧ', &['Ŧ']), ('Ũ', &['ũ']), ('ũ', &['Ũ']),
+  ('Ū', &['ū']), ('ū', &['Ū']), ('Ŭ', &['ŭ']), ('ŭ', &['Ŭ']), ('Ů', &[
+  'ů']), ('ů', &['Ů']), ('Ű', &['ű']), ('ű', &['Ű']), ('Ų', &['ų']),
+  ('ų', &['Ų']), ('Ŵ', &['ŵ']), ('ŵ', &['Ŵ']), ('Ŷ', &['ŷ']), ('ŷ', &[
+  'Ŷ']), ('Ÿ', &['ÿ']), ('Ź', &['ź']), ('ź', &['Ź']), ('Ż', &['ż']),
+  ('ż', &['Ż']), ('Ž', &['ž']), ('ž', &['Ž']), ('ſ', &['S', 's', ]),
+  ('ƀ', &['Ƀ']), ('Ɓ', &['ɓ']), ('Ƃ', &['ƃ']), ('ƃ', &['Ƃ']), ('Ƅ', &[
+  'ƅ']), ('ƅ', &['Ƅ']), ('Ɔ', &['ɔ']), ('Ƈ', &['ƈ']), ('ƈ', &['Ƈ']),
+  ('Ɖ', &['ɖ']), ('Ɗ', &['ɗ']), ('Ƌ', &['ƌ']), ('ƌ', &['Ƌ']), ('Ǝ', &[
+  'ǝ']), ('Ə', &['ə']), ('Ɛ', &['ɛ']), ('Ƒ', &['ƒ']), ('ƒ', &['Ƒ']),
+  ('Ɠ', &['ɠ']), ('Ɣ', &['ɣ']), ('ƕ', &['Ƕ']), ('Ɩ', &['ɩ']), ('Ɨ', &[
+  'ɨ']), ('Ƙ', &['ƙ']), ('ƙ', &['Ƙ']), ('ƚ', &['Ƚ']), ('Ɯ', &['ɯ']),
+  ('Ɲ', &['ɲ']), ('ƞ', &['Ƞ']), ('Ɵ', &['ɵ']), ('Ơ', &['ơ']), ('ơ', &[
+  'Ơ']), ('Ƣ', &['ƣ']), ('ƣ', &['Ƣ']), ('Ƥ', &['ƥ']), ('ƥ', &['Ƥ']),
+  ('Ʀ', &['ʀ']), ('Ƨ', &['ƨ']), ('ƨ', &['Ƨ']), ('Ʃ', &['ʃ']), ('Ƭ', &[
+  'ƭ']), ('ƭ', &['Ƭ']), ('Ʈ', &['ʈ']), ('Ư', &['ư']), ('ư', &['Ư']),
+  ('Ʊ', &['ʊ']), ('Ʋ', &['ʋ']), ('Ƴ', &['ƴ']), ('ƴ', &['Ƴ']), ('Ƶ', &[
+  'ƶ']), ('ƶ', &['Ƶ']), ('Ʒ', &['ʒ']), ('Ƹ', &['ƹ']), ('ƹ', &['Ƹ']),
+  ('Ƽ', &['ƽ']), ('ƽ', &['Ƽ']), ('ƿ', &['Ƿ']), ('Ǆ', &['ǅ', 'ǆ', ]),
+  ('ǅ', &['Ǆ', 'ǆ', ]), ('ǆ', &['Ǆ', 'ǅ', ]), ('Ǉ', &['ǈ', 'ǉ', ]),
+  ('ǈ', &['Ǉ', 'ǉ', ]), ('ǉ', &['Ǉ', 'ǈ', ]), ('Ǌ', &['ǋ', 'ǌ', ]),
+  ('ǋ', &['Ǌ', 'ǌ', ]), ('ǌ', &['Ǌ', 'ǋ', ]), ('Ǎ', &['ǎ']), ('ǎ', &[
+  'Ǎ']), ('Ǐ', &['ǐ']), ('ǐ', &['Ǐ']), ('Ǒ', &['ǒ']), ('ǒ', &['Ǒ']),
+  ('Ǔ', &['ǔ']), ('ǔ', &['Ǔ']), ('Ǖ', &['ǖ']), ('ǖ', &['Ǖ']), ('Ǘ', &[
+  'ǘ']), ('ǘ', &['Ǘ']), ('Ǚ', &['ǚ']), ('ǚ', &['Ǚ']), ('Ǜ', &['ǜ']),
+  ('ǜ', &['Ǜ']), ('ǝ', &['Ǝ']), ('Ǟ', &['ǟ']), ('ǟ', &['Ǟ']), ('Ǡ', &[
+  'ǡ']), ('ǡ', &['Ǡ']), ('Ǣ', &['ǣ']), ('ǣ', &['Ǣ']), ('Ǥ', &['ǥ']),
+  ('ǥ', &['Ǥ']), ('Ǧ', &['ǧ']), ('ǧ', &['Ǧ']), ('Ǩ', &['ǩ']), ('ǩ', &[
+  'Ǩ']), ('Ǫ', &['ǫ']), ('ǫ', &['Ǫ']), ('Ǭ', &['ǭ']), ('ǭ', &['Ǭ']),
+  ('Ǯ', &['ǯ']), ('ǯ', &['Ǯ']), ('Ǳ', &['ǲ', 'ǳ', ]), ('ǲ', &['Ǳ',
+  'ǳ', ]), ('ǳ', &['Ǳ', 'ǲ', ]), ('Ǵ', &['ǵ']), ('ǵ', &['Ǵ']),
+  ('Ƕ', &['ƕ']), ('Ƿ', &['ƿ']), ('Ǹ', &['ǹ']), ('ǹ', &['Ǹ']), ('Ǻ', &[
+  'ǻ']), ('ǻ', &['Ǻ']), ('Ǽ', &['ǽ']), ('ǽ', &['Ǽ']), ('Ǿ', &['ǿ']),
+  ('ǿ', &['Ǿ']), ('Ȁ', &['ȁ']), ('ȁ', &['Ȁ']), ('Ȃ', &['ȃ']), ('ȃ', &[
+  'Ȃ']), ('Ȅ', &['ȅ']), ('ȅ', &['Ȅ']), ('Ȇ', &['ȇ']), ('ȇ', &['Ȇ']),
+  ('Ȉ', &['ȉ']), ('ȉ', &['Ȉ']), ('Ȋ', &['ȋ']), ('ȋ', &['Ȋ']), ('Ȍ', &[
+  'ȍ']), ('ȍ', &['Ȍ']), ('Ȏ', &['ȏ']), ('ȏ', &['Ȏ']), ('Ȑ', &['ȑ']),
+  ('ȑ', &['Ȑ']), ('Ȓ', &['ȓ']), ('ȓ', &['Ȓ']), ('Ȕ', &['ȕ']), ('ȕ', &[
+  'Ȕ']), ('Ȗ', &['ȗ']), ('ȗ', &['Ȗ']), ('Ș', &['ș']), ('ș', &['Ș']),
+  ('Ț', &['ț']), ('ț', &['Ț']), ('Ȝ', &['ȝ']), ('ȝ', &['Ȝ']), ('Ȟ', &[
+  'ȟ']), ('ȟ', &['Ȟ']), ('Ƞ', &['ƞ']), ('Ȣ', &['ȣ']), ('ȣ', &['Ȣ']),
+  ('Ȥ', &['ȥ']), ('ȥ', &['Ȥ']), ('Ȧ', &['ȧ']), ('ȧ', &['Ȧ']), ('Ȩ', &[
+  'ȩ']), ('ȩ', &['Ȩ']), ('Ȫ', &['ȫ']), ('ȫ', &['Ȫ']), ('Ȭ', &['ȭ']),
+  ('ȭ', &['Ȭ']), ('Ȯ', &['ȯ']), ('ȯ', &['Ȯ']), ('Ȱ', &['ȱ']), ('ȱ', &[
+  'Ȱ']), ('Ȳ', &['ȳ']), ('ȳ', &['Ȳ']), ('Ⱥ', &['ⱥ']), ('Ȼ', &['ȼ']),
+  ('ȼ', &['Ȼ']), ('Ƚ', &['ƚ']), ('Ⱦ', &['ⱦ']), ('ȿ', &['Ȿ']),
+  ('ɀ', &['Ɀ']), ('Ɂ', &['ɂ']), ('ɂ', &['Ɂ']), ('Ƀ', &['ƀ']),
+  ('Ʉ', &['ʉ']), ('Ʌ', &['ʌ']), ('Ɇ', &['ɇ']), ('ɇ', &['Ɇ']), ('Ɉ', &[
+  'ɉ']), ('ɉ', &['Ɉ']), ('Ɋ', &['ɋ']), ('ɋ', &['Ɋ']), ('Ɍ', &['ɍ']),
+  ('ɍ', &['Ɍ']), ('Ɏ', &['ɏ']), ('ɏ', &['Ɏ']), ('ɐ', &['Ɐ']),
+  ('ɑ', &['Ɑ']), ('ɒ', &['Ɒ']), ('ɓ', &['Ɓ']), ('ɔ', &['Ɔ']),
+  ('ɖ', &['Ɖ']), ('ɗ', &['Ɗ']), ('ə', &['Ə']), ('ɛ', &['Ɛ']), ('ɜ', &[
+  'Ɜ']), ('ɠ', &['Ɠ']), ('ɡ', &['Ɡ']), ('ɣ', &['Ɣ']), ('ɥ', &['Ɥ'
+  ]), ('ɦ', &['Ɦ']), ('ɨ', &['Ɨ']), ('ɩ', &['Ɩ']), ('ɪ', &['Ɪ']),
+  ('ɫ', &['Ɫ']), ('ɬ', &['Ɬ']), ('ɯ', &['Ɯ']), ('ɱ', &['Ɱ']),
+  ('ɲ', &['Ɲ']), ('ɵ', &['Ɵ']), ('ɽ', &['Ɽ']), ('ʀ', &['Ʀ']),
+  ('ʃ', &['Ʃ']), ('ʇ', &['Ʇ']), ('ʈ', &['Ʈ']), ('ʉ', &['Ʉ']),
+  ('ʊ', &['Ʊ']), ('ʋ', &['Ʋ']), ('ʌ', &['Ʌ']), ('ʒ', &['Ʒ']), ('ʝ', &[
+  'Ʝ']), ('ʞ', &['Ʞ']), ('ͅ', &['Ι', 'ι', 'ι', ]), ('Ͱ', &['ͱ']),
+  ('ͱ', &['Ͱ']), ('Ͳ', &['ͳ']), ('ͳ', &['Ͳ']), ('Ͷ', &['ͷ']), ('ͷ', &[
+  'Ͷ']), ('ͻ', &['Ͻ']), ('ͼ', &['Ͼ']), ('ͽ', &['Ͽ']), ('Ϳ', &['ϳ']),
+  ('Ά', &['ά']), ('Έ', &['έ']), ('Ή', &['ή']), ('Ί', &['ί']), ('Ό', &[
+  'ό']), ('Ύ', &['ύ']), ('Ώ', &['ώ']), ('Α', &['α']), ('Β', &['β',
+  'ϐ', ]), ('Γ', &['γ']), ('Δ', &['δ']), ('Ε', &['ε', 'ϵ', ]),
+  ('Ζ', &['ζ']), ('Η', &['η']), ('Θ', &['θ', 'ϑ', 'ϴ', ]), ('Ι', &[
+  'ͅ', 'ι', 'ι', ]), ('Κ', &['κ', 'ϰ', ]), ('Λ', &['λ']), ('Μ', &[
+  'µ', 'μ', ]), ('Ν', &['ν']), ('Ξ', &['ξ']), ('Ο', &['ο']), ('Π', &[
+  'π', 'ϖ', ]), ('Ρ', &['ρ', 'ϱ', ]), ('Σ', &['ς', 'σ', ]), ('Τ', &[
+  'τ']), ('Υ', &['υ']), ('Φ', &['φ', 'ϕ', ]), ('Χ', &['χ']), ('Ψ', &[
+  'ψ']), ('Ω', &['ω', 'Ω', ]), ('Ϊ', &['ϊ']), ('Ϋ', &['ϋ']), ('ά', &[
+  'Ά']), ('έ', &['Έ']), ('ή', &['Ή']), ('ί', &['Ί']), ('α', &['Α']),
+  ('β', &['Β', 'ϐ', ]), ('γ', &['Γ']), ('δ', &['Δ']), ('ε', &['Ε',
+  'ϵ', ]), ('ζ', &['Ζ']), ('η', &['Η']), ('θ', &['Θ', 'ϑ', 'ϴ', ]),
+  ('ι', &['ͅ', 'Ι', 'ι', ]), ('κ', &['Κ', 'ϰ', ]), ('λ', &['Λ']),
+  ('μ', &['µ', 'Μ', ]), ('ν', &['Ν']), ('ξ', &['Ξ']), ('ο', &['Ο']),
+  ('π', &['Π', 'ϖ', ]), ('ρ', &['Ρ', 'ϱ', ]), ('ς', &['Σ', 'σ', ]),
+  ('σ', &['Σ', 'ς', ]), ('τ', &['Τ']), ('υ', &['Υ']), ('φ', &['Φ',
+  'ϕ', ]), ('χ', &['Χ']), ('ψ', &['Ψ']), ('ω', &['Ω', 'Ω', ]),
+  ('ϊ', &['Ϊ']), ('ϋ', &['Ϋ']), ('ό', &['Ό']), ('ύ', &['Ύ']), ('ώ', &[
+  'Ώ']), ('Ϗ', &['ϗ']), ('ϐ', &['Β', 'β', ]), ('ϑ', &['Θ', 'θ', 'ϴ',
+  ]), ('ϕ', &['Φ', 'φ', ]), ('ϖ', &['Π', 'π', ]), ('ϗ', &['Ϗ']),
+  ('Ϙ', &['ϙ']), ('ϙ', &['Ϙ']), ('Ϛ', &['ϛ']), ('ϛ', &['Ϛ']), ('Ϝ', &[
+  'ϝ']), ('ϝ', &['Ϝ']), ('Ϟ', &['ϟ']), ('ϟ', &['Ϟ']), ('Ϡ', &['ϡ']),
+  ('ϡ', &['Ϡ']), ('Ϣ', &['ϣ']), ('ϣ', &['Ϣ']), ('Ϥ', &['ϥ']), ('ϥ', &[
+  'Ϥ']), ('Ϧ', &['ϧ']), ('ϧ', &['Ϧ']), ('Ϩ', &['ϩ']), ('ϩ', &['Ϩ']),
+  ('Ϫ', &['ϫ']), ('ϫ', &['Ϫ']), ('Ϭ', &['ϭ']), ('ϭ', &['Ϭ']), ('Ϯ', &[
+  'ϯ']), ('ϯ', &['Ϯ']), ('ϰ', &['Κ', 'κ', ]), ('ϱ', &['Ρ', 'ρ', ]),
+  ('ϲ', &['Ϲ']), ('ϳ', &['Ϳ']), ('ϴ', &['Θ', 'θ', 'ϑ', ]), ('ϵ', &[
+  'Ε', 'ε', ]), ('Ϸ', &['ϸ']), ('ϸ', &['Ϸ']), ('Ϲ', &['ϲ']), ('Ϻ', &[
+  'ϻ']), ('ϻ', &['Ϻ']), ('Ͻ', &['ͻ']), ('Ͼ', &['ͼ']), ('Ͽ', &['ͽ']),
+  ('Ѐ', &['ѐ']), ('Ё', &['ё']), ('Ђ', &['ђ']), ('Ѓ', &['ѓ']), ('Є', &[
+  'є']), ('Ѕ', &['ѕ']), ('І', &['і']), ('Ї', &['ї']), ('Ј', &['ј']),
+  ('Љ', &['љ']), ('Њ', &['њ']), ('Ћ', &['ћ']), ('Ќ', &['ќ']), ('Ѝ', &[
+  'ѝ']), ('Ў', &['ў']), ('Џ', &['џ']), ('А', &['а']), ('Б', &['б']),
+  ('В', &['в', 'ᲀ', ]), ('Г', &['г']), ('Д', &['д', 'ᲁ', ]),
+  ('Е', &['е']), ('Ж', &['ж']), ('З', &['з']), ('И', &['и']), ('Й', &[
+  'й']), ('К', &['к']), ('Л', &['л']), ('М', &['м']), ('Н', &['н']),
+  ('О', &['о', 'ᲂ', ]), ('П', &['п']), ('Р', &['р']), ('С', &['с',
+  'ᲃ', ]), ('Т', &['т', 'ᲄ', 'ᲅ', ]), ('У', &['у']), ('Ф', &['ф'
+  ]), ('Х', &['х']), ('Ц', &['ц']), ('Ч', &['ч']), ('Ш', &['ш']),
+  ('Щ', &['щ']), ('Ъ', &['ъ', 'ᲆ', ]), ('Ы', &['ы']), ('Ь', &['ь']),
+  ('Э', &['э']), ('Ю', &['ю']), ('Я', &['я']), ('а', &['А']), ('б', &[
+  'Б']), ('в', &['В', 'ᲀ', ]), ('г', &['Г']), ('д', &['Д', 'ᲁ', ]),
+  ('е', &['Е']), ('ж', &['Ж']), ('з', &['З']), ('и', &['И']), ('й', &[
+  'Й']), ('к', &['К']), ('л', &['Л']), ('м', &['М']), ('н', &['Н']),
+  ('о', &['О', 'ᲂ', ]), ('п', &['П']), ('р', &['Р']), ('с', &['С',
+  'ᲃ', ]), ('т', &['Т', 'ᲄ', 'ᲅ', ]), ('у', &['У']), ('ф', &['Ф'
+  ]), ('х', &['Х']), ('ц', &['Ц']), ('ч', &['Ч']), ('ш', &['Ш']),
+  ('щ', &['Щ']), ('ъ', &['Ъ', 'ᲆ', ]), ('ы', &['Ы']), ('ь', &['Ь']),
+  ('э', &['Э']), ('ю', &['Ю']), ('я', &['Я']), ('ѐ', &['Ѐ']), ('ё', &[
+  'Ё']), ('ђ', &['Ђ']), ('ѓ', &['Ѓ']), ('є', &['Є']), ('ѕ', &['Ѕ']),
+  ('і', &['І']), ('ї', &['Ї']), ('ј', &['Ј']), ('љ', &['Љ']), ('њ', &[
+  'Њ']), ('ћ', &['Ћ']), ('ќ', &['Ќ']), ('ѝ', &['Ѝ']), ('ў', &['Ў']),
+  ('џ', &['Џ']), ('Ѡ', &['ѡ']), ('ѡ', &['Ѡ']), ('Ѣ', &['ѣ', 'ᲇ', ]),
+  ('ѣ', &['Ѣ', 'ᲇ', ]), ('Ѥ', &['ѥ']), ('ѥ', &['Ѥ']), ('Ѧ', &['ѧ']),
+  ('ѧ', &['Ѧ']), ('Ѩ', &['ѩ']), ('ѩ', &['Ѩ']), ('Ѫ', &['ѫ']), ('ѫ', &[
+  'Ѫ']), ('Ѭ', &['ѭ']), ('ѭ', &['Ѭ']), ('Ѯ', &['ѯ']), ('ѯ', &['Ѯ']),
+  ('Ѱ', &['ѱ']), ('ѱ', &['Ѱ']), ('Ѳ', &['ѳ']), ('ѳ', &['Ѳ']), ('Ѵ', &[
+  'ѵ']), ('ѵ', &['Ѵ']), ('Ѷ', &['ѷ']), ('ѷ', &['Ѷ']), ('Ѹ', &['ѹ']),
+  ('ѹ', &['Ѹ']), ('Ѻ', &['ѻ']), ('ѻ', &['Ѻ']), ('Ѽ', &['ѽ']), ('ѽ', &[
+  'Ѽ']), ('Ѿ', &['ѿ']), ('ѿ', &['Ѿ']), ('Ҁ', &['ҁ']), ('ҁ', &['Ҁ']),
+  ('Ҋ', &['ҋ']), ('ҋ', &['Ҋ']), ('Ҍ', &['ҍ']), ('ҍ', &['Ҍ']), ('Ҏ', &[
+  'ҏ']), ('ҏ', &['Ҏ']), ('Ґ', &['ґ']), ('ґ', &['Ґ']), ('Ғ', &['ғ']),
+  ('ғ', &['Ғ']), ('Ҕ', &['ҕ']), ('ҕ', &['Ҕ']), ('Җ', &['җ']), ('җ', &[
+  'Җ']), ('Ҙ', &['ҙ']), ('ҙ', &['Ҙ']), ('Қ', &['қ']), ('қ', &['Қ']),
+  ('Ҝ', &['ҝ']), ('ҝ', &['Ҝ']), ('Ҟ', &['ҟ']), ('ҟ', &['Ҟ']), ('Ҡ', &[
+  'ҡ']), ('ҡ', &['Ҡ']), ('Ң', &['ң']), ('ң', &['Ң']), ('Ҥ', &['ҥ']),
+  ('ҥ', &['Ҥ']), ('Ҧ', &['ҧ']), ('ҧ', &['Ҧ']), ('Ҩ', &['ҩ']), ('ҩ', &[
+  'Ҩ']), ('Ҫ', &['ҫ']), ('ҫ', &['Ҫ']), ('Ҭ', &['ҭ']), ('ҭ', &['Ҭ']),
+  ('Ү', &['ү']), ('ү', &['Ү']), ('Ұ', &['ұ']), ('ұ', &['Ұ']), ('Ҳ', &[
+  'ҳ']), ('ҳ', &['Ҳ']), ('Ҵ', &['ҵ']), ('ҵ', &['Ҵ']), ('Ҷ', &['ҷ']),
+  ('ҷ', &['Ҷ']), ('Ҹ', &['ҹ']), ('ҹ', &['Ҹ']), ('Һ', &['һ']), ('һ', &[
+  'Һ']), ('Ҽ', &['ҽ']), ('ҽ', &['Ҽ']), ('Ҿ', &['ҿ']), ('ҿ', &['Ҿ']),
+  ('Ӏ', &['ӏ']), ('Ӂ', &['ӂ']), ('ӂ', &['Ӂ']), ('Ӄ', &['ӄ']), ('ӄ', &[
+  'Ӄ']), ('Ӆ', &['ӆ']), ('ӆ', &['Ӆ']), ('Ӈ', &['ӈ']), ('ӈ', &['Ӈ']),
+  ('Ӊ', &['ӊ']), ('ӊ', &['Ӊ']), ('Ӌ', &['ӌ']), ('ӌ', &['Ӌ']), ('Ӎ', &[
+  'ӎ']), ('ӎ', &['Ӎ']), ('ӏ', &['Ӏ']), ('Ӑ', &['ӑ']), ('ӑ', &['Ӑ']),
+  ('Ӓ', &['ӓ']), ('ӓ', &['Ӓ']), ('Ӕ', &['ӕ']), ('ӕ', &['Ӕ']), ('Ӗ', &[
+  'ӗ']), ('ӗ', &['Ӗ']), ('Ә', &['ә']), ('ә', &['Ә']), ('Ӛ', &['ӛ']),
+  ('ӛ', &['Ӛ']), ('Ӝ', &['ӝ']), ('ӝ', &['Ӝ']), ('Ӟ', &['ӟ']), ('ӟ', &[
+  'Ӟ']), ('Ӡ', &['ӡ']), ('ӡ', &['Ӡ']), ('Ӣ', &['ӣ']), ('ӣ', &['Ӣ']),
+  ('Ӥ', &['ӥ']), ('ӥ', &['Ӥ']), ('Ӧ', &['ӧ']), ('ӧ', &['Ӧ']), ('Ө', &[
+  'ө']), ('ө', &['Ө']), ('Ӫ', &['ӫ']), ('ӫ', &['Ӫ']), ('Ӭ', &['ӭ']),
+  ('ӭ', &['Ӭ']), ('Ӯ', &['ӯ']), ('ӯ', &['Ӯ']), ('Ӱ', &['ӱ']), ('ӱ', &[
+  'Ӱ']), ('Ӳ', &['ӳ']), ('ӳ', &['Ӳ']), ('Ӵ', &['ӵ']), ('ӵ', &['Ӵ']),
+  ('Ӷ', &['ӷ']), ('ӷ', &['Ӷ']), ('Ӹ', &['ӹ']), ('ӹ', &['Ӹ']), ('Ӻ', &[
+  'ӻ']), ('ӻ', &['Ӻ']), ('Ӽ', &['ӽ']), ('ӽ', &['Ӽ']), ('Ӿ', &['ӿ']),
+  ('ӿ', &['Ӿ']), ('Ԁ', &['ԁ']), ('ԁ', &['Ԁ']), ('Ԃ', &['ԃ']), ('ԃ', &[
+  'Ԃ']), ('Ԅ', &['ԅ']), ('ԅ', &['Ԅ']), ('Ԇ', &['ԇ']), ('ԇ', &['Ԇ']),
+  ('Ԉ', &['ԉ']), ('ԉ', &['Ԉ']), ('Ԋ', &['ԋ']), ('ԋ', &['Ԋ']), ('Ԍ', &[
+  'ԍ']), ('ԍ', &['Ԍ']), ('Ԏ', &['ԏ']), ('ԏ', &['Ԏ']), ('Ԑ', &['ԑ']),
+  ('ԑ', &['Ԑ']), ('Ԓ', &['ԓ']), ('ԓ', &['Ԓ']), ('Ԕ', &['ԕ']), ('ԕ', &[
+  'Ԕ']), ('Ԗ', &['ԗ']), ('ԗ', &['Ԗ']), ('Ԙ', &['ԙ']), ('ԙ', &['Ԙ']),
+  ('Ԛ', &['ԛ']), ('ԛ', &['Ԛ']), ('Ԝ', &['ԝ']), ('ԝ', &['Ԝ']), ('Ԟ', &[
+  'ԟ']), ('ԟ', &['Ԟ']), ('Ԡ', &['ԡ']), ('ԡ', &['Ԡ']), ('Ԣ', &['ԣ']),
+  ('ԣ', &['Ԣ']), ('Ԥ', &['ԥ']), ('ԥ', &['Ԥ']), ('Ԧ', &['ԧ']), ('ԧ', &[
+  'Ԧ']), ('Ԩ', &['ԩ']), ('ԩ', &['Ԩ']), ('Ԫ', &['ԫ']), ('ԫ', &['Ԫ']),
+  ('Ԭ', &['ԭ']), ('ԭ', &['Ԭ']), ('Ԯ', &['ԯ']), ('ԯ', &['Ԯ']), ('Ա', &[
+  'ա']), ('Բ', &['բ']), ('Գ', &['գ']), ('Դ', &['դ']), ('Ե', &['ե']),
+  ('Զ', &['զ']), ('Է', &['է']), ('Ը', &['ը']), ('Թ', &['թ']), ('Ժ', &[
+  'ժ']), ('Ի', &['ի']), ('Լ', &['լ']), ('Խ', &['խ']), ('Ծ', &['ծ']),
+  ('Կ', &['կ']), ('Հ', &['հ']), ('Ձ', &['ձ']), ('Ղ', &['ղ']), ('Ճ', &[
+  'ճ']), ('Մ', &['մ']), ('Յ', &['յ']), ('Ն', &['ն']), ('Շ', &['շ']),
+  ('Ո', &['ո']), ('Չ', &['չ']), ('Պ', &['պ']), ('Ջ', &['ջ']), ('Ռ', &[
+  'ռ']), ('Ս', &['ս']), ('Վ', &['վ']), ('Տ', &['տ']), ('Ր', &['ր']),
+  ('Ց', &['ց']), ('Ւ', &['ւ']), ('Փ', &['փ']), ('Ք', &['ք']), ('Օ', &[
+  'օ']), ('Ֆ', &['ֆ']), ('ա', &['Ա']), ('բ', &['Բ']), ('գ', &['Գ']),
+  ('դ', &['Դ']), ('ե', &['Ե']), ('զ', &['Զ']), ('է', &['Է']), ('ը', &[
+  'Ը']), ('թ', &['Թ']), ('ժ', &['Ժ']), ('ի', &['Ի']), ('լ', &['Լ']),
+  ('խ', &['Խ']), ('ծ', &['Ծ']), ('կ', &['Կ']), ('հ', &['Հ']), ('ձ', &[
+  'Ձ']), ('ղ', &['Ղ']), ('ճ', &['Ճ']), ('մ', &['Մ']), ('յ', &['Յ']),
+  ('ն', &['Ն']), ('շ', &['Շ']), ('ո', &['Ո']), ('չ', &['Չ']), ('պ', &[
+  'Պ']), ('ջ', &['Ջ']), ('ռ', &['Ռ']), ('ս', &['Ս']), ('վ', &['Վ']),
+  ('տ', &['Տ']), ('ր', &['Ր']), ('ց', &['Ց']), ('ւ', &['Ւ']), ('փ', &[
+  'Փ']), ('ք', &['Ք']), ('օ', &['Օ']), ('ֆ', &['Ֆ']), ('Ⴀ', &['ⴀ'
+  ]), ('Ⴁ', &['ⴁ']), ('Ⴂ', &['ⴂ']), ('Ⴃ', &['ⴃ']), ('Ⴄ', &['ⴄ'
+  ]), ('Ⴅ', &['ⴅ']), ('Ⴆ', &['ⴆ']), ('Ⴇ', &['ⴇ']), ('Ⴈ', &['ⴈ'
+  ]), ('Ⴉ', &['ⴉ']), ('Ⴊ', &['ⴊ']), ('Ⴋ', &['ⴋ']), ('Ⴌ', &['ⴌ'
+  ]), ('Ⴍ', &['ⴍ']), ('Ⴎ', &['ⴎ']), ('Ⴏ', &['ⴏ']), ('Ⴐ', &['ⴐ'
+  ]), ('Ⴑ', &['ⴑ']), ('Ⴒ', &['ⴒ']), ('Ⴓ', &['ⴓ']), ('Ⴔ', &['ⴔ'
+  ]), ('Ⴕ', &['ⴕ']), ('Ⴖ', &['ⴖ']), ('Ⴗ', &['ⴗ']), ('Ⴘ', &['ⴘ'
+  ]), ('Ⴙ', &['ⴙ']), ('Ⴚ', &['ⴚ']), ('Ⴛ', &['ⴛ']), ('Ⴜ', &['ⴜ'
+  ]), ('Ⴝ', &['ⴝ']), ('Ⴞ', &['ⴞ']), ('Ⴟ', &['ⴟ']), ('Ⴠ', &['ⴠ'
+  ]), ('Ⴡ', &['ⴡ']), ('Ⴢ', &['ⴢ']), ('Ⴣ', &['ⴣ']), ('Ⴤ', &['ⴤ'
+  ]), ('Ⴥ', &['ⴥ']), ('Ⴧ', &['ⴧ']), ('Ⴭ', &['ⴭ']), ('Ꭰ', &['ꭰ'
+  ]), ('Ꭱ', &['ꭱ']), ('Ꭲ', &['ꭲ']), ('Ꭳ', &['ꭳ']), ('Ꭴ', &['ꭴ'
+  ]), ('Ꭵ', &['ꭵ']), ('Ꭶ', &['ꭶ']), ('Ꭷ', &['ꭷ']), ('Ꭸ', &['ꭸ'
+  ]), ('Ꭹ', &['ꭹ']), ('Ꭺ', &['ꭺ']), ('Ꭻ', &['ꭻ']), ('Ꭼ', &['ꭼ'
+  ]), ('Ꭽ', &['ꭽ']), ('Ꭾ', &['ꭾ']), ('Ꭿ', &['ꭿ']), ('Ꮀ', &['ꮀ'
+  ]), ('Ꮁ', &['ꮁ']), ('Ꮂ', &['ꮂ']), ('Ꮃ', &['ꮃ']), ('Ꮄ', &['ꮄ'
+  ]), ('Ꮅ', &['ꮅ']), ('Ꮆ', &['ꮆ']), ('Ꮇ', &['ꮇ']), ('Ꮈ', &['ꮈ'
+  ]), ('Ꮉ', &['ꮉ']), ('Ꮊ', &['ꮊ']), ('Ꮋ', &['ꮋ']), ('Ꮌ', &['ꮌ'
+  ]), ('Ꮍ', &['ꮍ']), ('Ꮎ', &['ꮎ']), ('Ꮏ', &['ꮏ']), ('Ꮐ', &['ꮐ'
+  ]), ('Ꮑ', &['ꮑ']), ('Ꮒ', &['ꮒ']), ('Ꮓ', &['ꮓ']), ('Ꮔ', &['ꮔ'
+  ]), ('Ꮕ', &['ꮕ']), ('Ꮖ', &['ꮖ']), ('Ꮗ', &['ꮗ']), ('Ꮘ', &['ꮘ'
+  ]), ('Ꮙ', &['ꮙ']), ('Ꮚ', &['ꮚ']), ('Ꮛ', &['ꮛ']), ('Ꮜ', &['ꮜ'
+  ]), ('Ꮝ', &['ꮝ']), ('Ꮞ', &['ꮞ']), ('Ꮟ', &['ꮟ']), ('Ꮠ', &['ꮠ'
+  ]), ('Ꮡ', &['ꮡ']), ('Ꮢ', &['ꮢ']), ('Ꮣ', &['ꮣ']), ('Ꮤ', &['ꮤ'
+  ]), ('Ꮥ', &['ꮥ']), ('Ꮦ', &['ꮦ']), ('Ꮧ', &['ꮧ']), ('Ꮨ', &['ꮨ'
+  ]), ('Ꮩ', &['ꮩ']), ('Ꮪ', &['ꮪ']), ('Ꮫ', &['ꮫ']), ('Ꮬ', &['ꮬ'
+  ]), ('Ꮭ', &['ꮭ']), ('Ꮮ', &['ꮮ']), ('Ꮯ', &['ꮯ']), ('Ꮰ', &['ꮰ'
+  ]), ('Ꮱ', &['ꮱ']), ('Ꮲ', &['ꮲ']), ('Ꮳ', &['ꮳ']), ('Ꮴ', &['ꮴ'
+  ]), ('Ꮵ', &['ꮵ']), ('Ꮶ', &['ꮶ']), ('Ꮷ', &['ꮷ']), ('Ꮸ', &['ꮸ'
+  ]), ('Ꮹ', &['ꮹ']), ('Ꮺ', &['ꮺ']), ('Ꮻ', &['ꮻ']), ('Ꮼ', &['ꮼ'
+  ]), ('Ꮽ', &['ꮽ']), ('Ꮾ', &['ꮾ']), ('Ꮿ', &['ꮿ']), ('Ᏸ', &['ᏸ'
+  ]), ('Ᏹ', &['ᏹ']), ('Ᏺ', &['ᏺ']), ('Ᏻ', &['ᏻ']), ('Ᏼ', &['ᏼ'
+  ]), ('Ᏽ', &['ᏽ']), ('ᏸ', &['Ᏸ']), ('ᏹ', &['Ᏹ']), ('ᏺ', &['Ᏺ'
+  ]), ('ᏻ', &['Ᏻ']), ('ᏼ', &['Ᏼ']), ('ᏽ', &['Ᏽ']), ('ᲀ', &['В',
+  'в', ]), ('ᲁ', &['Д', 'д', ]), ('ᲂ', &['О', 'о', ]), ('ᲃ', &[
+  'С', 'с', ]), ('ᲄ', &['Т', 'т', 'ᲅ', ]), ('ᲅ', &['Т', 'т',
+  'ᲄ', ]), ('ᲆ', &['Ъ', 'ъ', ]), ('ᲇ', &['Ѣ', 'ѣ', ]), ('ᲈ', &[
+  'Ꙋ', 'ꙋ', ]), ('ᵹ', &['Ᵹ']), ('ᵽ', &['Ᵽ']), ('Ḁ', &['ḁ']),
+  ('ḁ', &['Ḁ']), ('Ḃ', &['ḃ']), ('ḃ', &['Ḃ']), ('Ḅ', &['ḅ']),
+  ('ḅ', &['Ḅ']), ('Ḇ', &['ḇ']), ('ḇ', &['Ḇ']), ('Ḉ', &['ḉ']),
+  ('ḉ', &['Ḉ']), ('Ḋ', &['ḋ']), ('ḋ', &['Ḋ']), ('Ḍ', &['ḍ']),
+  ('ḍ', &['Ḍ']), ('Ḏ', &['ḏ']), ('ḏ', &['Ḏ']), ('Ḑ', &['ḑ']),
+  ('ḑ', &['Ḑ']), ('Ḓ', &['ḓ']), ('ḓ', &['Ḓ']), ('Ḕ', &['ḕ']),
+  ('ḕ', &['Ḕ']), ('Ḗ', &['ḗ']), ('ḗ', &['Ḗ']), ('Ḙ', &['ḙ']),
+  ('ḙ', &['Ḙ']), ('Ḛ', &['ḛ']), ('ḛ', &['Ḛ']), ('Ḝ', &['ḝ']),
+  ('ḝ', &['Ḝ']), ('Ḟ', &['ḟ']), ('ḟ', &['Ḟ']), ('Ḡ', &['ḡ']),
+  ('ḡ', &['Ḡ']), ('Ḣ', &['ḣ']), ('ḣ', &['Ḣ']), ('Ḥ', &['ḥ']),
+  ('ḥ', &['Ḥ']), ('Ḧ', &['ḧ']), ('ḧ', &['Ḧ']), ('Ḩ', &['ḩ']),
+  ('ḩ', &['Ḩ']), ('Ḫ', &['ḫ']), ('ḫ', &['Ḫ']), ('Ḭ', &['ḭ']),
+  ('ḭ', &['Ḭ']), ('Ḯ', &['ḯ']), ('ḯ', &['Ḯ']), ('Ḱ', &['ḱ']),
+  ('ḱ', &['Ḱ']), ('Ḳ', &['ḳ']), ('ḳ', &['Ḳ']), ('Ḵ', &['ḵ']),
+  ('ḵ', &['Ḵ']), ('Ḷ', &['ḷ']), ('ḷ', &['Ḷ']), ('Ḹ', &['ḹ']),
+  ('ḹ', &['Ḹ']), ('Ḻ', &['ḻ']), ('ḻ', &['Ḻ']), ('Ḽ', &['ḽ']),
+  ('ḽ', &['Ḽ']), ('Ḿ', &['ḿ']), ('ḿ', &['Ḿ']), ('Ṁ', &['ṁ']),
+  ('ṁ', &['Ṁ']), ('Ṃ', &['ṃ']), ('ṃ', &['Ṃ']), ('Ṅ', &['ṅ']),
+  ('ṅ', &['Ṅ']), ('Ṇ', &['ṇ']), ('ṇ', &['Ṇ']), ('Ṉ', &['ṉ']),
+  ('ṉ', &['Ṉ']), ('Ṋ', &['ṋ']), ('ṋ', &['Ṋ']), ('Ṍ', &['ṍ']),
+  ('ṍ', &['Ṍ']), ('Ṏ', &['ṏ']), ('ṏ', &['Ṏ']), ('Ṑ', &['ṑ']),
+  ('ṑ', &['Ṑ']), ('Ṓ', &['ṓ']), ('ṓ', &['Ṓ']), ('Ṕ', &['ṕ']),
+  ('ṕ', &['Ṕ']), ('Ṗ', &['ṗ']), ('ṗ', &['Ṗ']), ('Ṙ', &['ṙ']),
+  ('ṙ', &['Ṙ']), ('Ṛ', &['ṛ']), ('ṛ', &['Ṛ']), ('Ṝ', &['ṝ']),
+  ('ṝ', &['Ṝ']), ('Ṟ', &['ṟ']), ('ṟ', &['Ṟ']), ('Ṡ', &['ṡ',
+  'ẛ', ]), ('ṡ', &['Ṡ', 'ẛ', ]), ('Ṣ', &['ṣ']), ('ṣ', &['Ṣ']),
+  ('Ṥ', &['ṥ']), ('ṥ', &['Ṥ']), ('Ṧ', &['ṧ']), ('ṧ', &['Ṧ']),
+  ('Ṩ', &['ṩ']), ('ṩ', &['Ṩ']), ('Ṫ', &['ṫ']), ('ṫ', &['Ṫ']),
+  ('Ṭ', &['ṭ']), ('ṭ', &['Ṭ']), ('Ṯ', &['ṯ']), ('ṯ', &['Ṯ']),
+  ('Ṱ', &['ṱ']), ('ṱ', &['Ṱ']), ('Ṳ', &['ṳ']), ('ṳ', &['Ṳ']),
+  ('Ṵ', &['ṵ']), ('ṵ', &['Ṵ']), ('Ṷ', &['ṷ']), ('ṷ', &['Ṷ']),
+  ('Ṹ', &['ṹ']), ('ṹ', &['Ṹ']), ('Ṻ', &['ṻ']), ('ṻ', &['Ṻ']),
+  ('Ṽ', &['ṽ']), ('ṽ', &['Ṽ']), ('Ṿ', &['ṿ']), ('ṿ', &['Ṿ']),
+  ('Ẁ', &['ẁ']), ('ẁ', &['Ẁ']), ('Ẃ', &['ẃ']), ('ẃ', &['Ẃ']),
+  ('Ẅ', &['ẅ']), ('ẅ', &['Ẅ']), ('Ẇ', &['ẇ']), ('ẇ', &['Ẇ']),
+  ('Ẉ', &['ẉ']), ('ẉ', &['Ẉ']), ('Ẋ', &['ẋ']), ('ẋ', &['Ẋ']),
+  ('Ẍ', &['ẍ']), ('ẍ', &['Ẍ']), ('Ẏ', &['ẏ']), ('ẏ', &['Ẏ']),
+  ('Ẑ', &['ẑ']), ('ẑ', &['Ẑ']), ('Ẓ', &['ẓ']), ('ẓ', &['Ẓ']),
+  ('Ẕ', &['ẕ']), ('ẕ', &['Ẕ']), ('ẛ', &['Ṡ', 'ṡ', ]), ('ẞ', &[
+  'ß']), ('Ạ', &['ạ']), ('ạ', &['Ạ']), ('Ả', &['ả']), ('ả', &[
+  'Ả']), ('Ấ', &['ấ']), ('ấ', &['Ấ']), ('Ầ', &['ầ']), ('ầ', &[
+  'Ầ']), ('Ẩ', &['ẩ']), ('ẩ', &['Ẩ']), ('Ẫ', &['ẫ']), ('ẫ', &[
+  'Ẫ']), ('Ậ', &['ậ']), ('ậ', &['Ậ']), ('Ắ', &['ắ']), ('ắ', &[
+  'Ắ']), ('Ằ', &['ằ']), ('ằ', &['Ằ']), ('Ẳ', &['ẳ']), ('ẳ', &[
+  'Ẳ']), ('Ẵ', &['ẵ']), ('ẵ', &['Ẵ']), ('Ặ', &['ặ']), ('ặ', &[
+  'Ặ']), ('Ẹ', &['ẹ']), ('ẹ', &['Ẹ']), ('Ẻ', &['ẻ']), ('ẻ', &[
+  'Ẻ']), ('Ẽ', &['ẽ']), ('ẽ', &['Ẽ']), ('Ế', &['ế']), ('ế', &[
+  'Ế']), ('Ề', &['ề']), ('ề', &['Ề']), ('Ể', &['ể']), ('ể', &[
+  'Ể']), ('Ễ', &['ễ']), ('ễ', &['Ễ']), ('Ệ', &['ệ']), ('ệ', &[
+  'Ệ']), ('Ỉ', &['ỉ']), ('ỉ', &['Ỉ']), ('Ị', &['ị']), ('ị', &[
+  'Ị']), ('Ọ', &['ọ']), ('ọ', &['Ọ']), ('Ỏ', &['ỏ']), ('ỏ', &[
+  'Ỏ']), ('Ố', &['ố']), ('ố', &['Ố']), ('Ồ', &['ồ']), ('ồ', &[
+  'Ồ']), ('Ổ', &['ổ']), ('ổ', &['Ổ']), ('Ỗ', &['ỗ']), ('ỗ', &[
+  'Ỗ']), ('Ộ', &['ộ']), ('ộ', &['Ộ']), ('Ớ', &['ớ']), ('ớ', &[
+  'Ớ']), ('Ờ', &['ờ']), ('ờ', &['Ờ']), ('Ở', &['ở']), ('ở', &[
+  'Ở']), ('Ỡ', &['ỡ']), ('ỡ', &['Ỡ']), ('Ợ', &['ợ']), ('ợ', &[
+  'Ợ']), ('Ụ', &['ụ']), ('ụ', &['Ụ']), ('Ủ', &['ủ']), ('ủ', &[
+  'Ủ']), ('Ứ', &['ứ']), ('ứ', &['Ứ']), ('Ừ', &['ừ']), ('ừ', &[
+  'Ừ']), ('Ử', &['ử']), ('ử', &['Ử']), ('Ữ', &['ữ']), ('ữ', &[
+  'Ữ']), ('Ự', &['ự']), ('ự', &['Ự']), ('Ỳ', &['ỳ']), ('ỳ', &[
+  'Ỳ']), ('Ỵ', &['ỵ']), ('ỵ', &['Ỵ']), ('Ỷ', &['ỷ']), ('ỷ', &[
+  'Ỷ']), ('Ỹ', &['ỹ']), ('ỹ', &['Ỹ']), ('Ỻ', &['ỻ']), ('ỻ', &[
+  'Ỻ']), ('Ỽ', &['ỽ']), ('ỽ', &['Ỽ']), ('Ỿ', &['ỿ']), ('ỿ', &[
+  'Ỿ']), ('ἀ', &['Ἀ']), ('ἁ', &['Ἁ']), ('ἂ', &['Ἂ']), ('ἃ', &[
+  'Ἃ']), ('ἄ', &['Ἄ']), ('ἅ', &['Ἅ']), ('ἆ', &['Ἆ']), ('ἇ', &[
+  'Ἇ']), ('Ἀ', &['ἀ']), ('Ἁ', &['ἁ']), ('Ἂ', &['ἂ']), ('Ἃ', &[
+  'ἃ']), ('Ἄ', &['ἄ']), ('Ἅ', &['ἅ']), ('Ἆ', &['ἆ']), ('Ἇ', &[
+  'ἇ']), ('ἐ', &['Ἐ']), ('ἑ', &['Ἑ']), ('ἒ', &['Ἒ']), ('ἓ', &[
+  'Ἓ']), ('ἔ', &['Ἔ']), ('ἕ', &['Ἕ']), ('Ἐ', &['ἐ']), ('Ἑ', &[
+  'ἑ']), ('Ἒ', &['ἒ']), ('Ἓ', &['ἓ']), ('Ἔ', &['ἔ']), ('Ἕ', &[
+  'ἕ']), ('ἠ', &['Ἠ']), ('ἡ', &['Ἡ']), ('ἢ', &['Ἢ']), ('ἣ', &[
+  'Ἣ']), ('ἤ', &['Ἤ']), ('ἥ', &['Ἥ']), ('ἦ', &['Ἦ']), ('ἧ', &[
+  'Ἧ']), ('Ἠ', &['ἠ']), ('Ἡ', &['ἡ']), ('Ἢ', &['ἢ']), ('Ἣ', &[
+  'ἣ']), ('Ἤ', &['ἤ']), ('Ἥ', &['ἥ']), ('Ἦ', &['ἦ']), ('Ἧ', &[
+  'ἧ']), ('ἰ', &['Ἰ']), ('ἱ', &['Ἱ']), ('ἲ', &['Ἲ']), ('ἳ', &[
+  'Ἳ']), ('ἴ', &['Ἴ']), ('ἵ', &['Ἵ']), ('ἶ', &['Ἶ']), ('ἷ', &[
+  'Ἷ']), ('Ἰ', &['ἰ']), ('Ἱ', &['ἱ']), ('Ἲ', &['ἲ']), ('Ἳ', &[
+  'ἳ']), ('Ἴ', &['ἴ']), ('Ἵ', &['ἵ']), ('Ἶ', &['ἶ']), ('Ἷ', &[
+  'ἷ']), ('ὀ', &['Ὀ']), ('ὁ', &['Ὁ']), ('ὂ', &['Ὂ']), ('ὃ', &[
+  'Ὃ']), ('ὄ', &['Ὄ']), ('ὅ', &['Ὅ']), ('Ὀ', &['ὀ']), ('Ὁ', &[
+  'ὁ']), ('Ὂ', &['ὂ']), ('Ὃ', &['ὃ']), ('Ὄ', &['ὄ']), ('Ὅ', &[
+  'ὅ']), ('ὑ', &['Ὑ']), ('ὓ', &['Ὓ']), ('ὕ', &['Ὕ']), ('ὗ', &[
+  'Ὗ']), ('Ὑ', &['ὑ']), ('Ὓ', &['ὓ']), ('Ὕ', &['ὕ']), ('Ὗ', &[
+  'ὗ']), ('ὠ', &['Ὠ']), ('ὡ', &['Ὡ']), ('ὢ', &['Ὢ']), ('ὣ', &[
+  'Ὣ']), ('ὤ', &['Ὤ']), ('ὥ', &['Ὥ']), ('ὦ', &['Ὦ']), ('ὧ', &[
+  'Ὧ']), ('Ὠ', &['ὠ']), ('Ὡ', &['ὡ']), ('Ὢ', &['ὢ']), ('Ὣ', &[
+  'ὣ']), ('Ὤ', &['ὤ']), ('Ὥ', &['ὥ']), ('Ὦ', &['ὦ']), ('Ὧ', &[
+  'ὧ']), ('ὰ', &['Ὰ']), ('ά', &['Ά']), ('ὲ', &['Ὲ']), ('έ', &[
+  'Έ']), ('ὴ', &['Ὴ']), ('ή', &['Ή']), ('ὶ', &['Ὶ']), ('ί', &[
+  'Ί']), ('ὸ', &['Ὸ']), ('ό', &['Ό']), ('ὺ', &['Ὺ']), ('ύ', &[
+  'Ύ']), ('ὼ', &['Ὼ']), ('ώ', &['Ώ']), ('ᾀ', &['ᾈ']), ('ᾁ', &[
+  'ᾉ']), ('ᾂ', &['ᾊ']), ('ᾃ', &['ᾋ']), ('ᾄ', &['ᾌ']), ('ᾅ', &[
+  'ᾍ']), ('ᾆ', &['ᾎ']), ('ᾇ', &['ᾏ']), ('ᾈ', &['ᾀ']), ('ᾉ', &[
+  'ᾁ']), ('ᾊ', &['ᾂ']), ('ᾋ', &['ᾃ']), ('ᾌ', &['ᾄ']), ('ᾍ', &[
+  'ᾅ']), ('ᾎ', &['ᾆ']), ('ᾏ', &['ᾇ']), ('ᾐ', &['ᾘ']), ('ᾑ', &[
+  'ᾙ']), ('ᾒ', &['ᾚ']), ('ᾓ', &['ᾛ']), ('ᾔ', &['ᾜ']), ('ᾕ', &[
+  'ᾝ']), ('ᾖ', &['ᾞ']), ('ᾗ', &['ᾟ']), ('ᾘ', &['ᾐ']), ('ᾙ', &[
+  'ᾑ']), ('ᾚ', &['ᾒ']), ('ᾛ', &['ᾓ']), ('ᾜ', &['ᾔ']), ('ᾝ', &[
+  'ᾕ']), ('ᾞ', &['ᾖ']), ('ᾟ', &['ᾗ']), ('ᾠ', &['ᾨ']), ('ᾡ', &[
+  'ᾩ']), ('ᾢ', &['ᾪ']), ('ᾣ', &['ᾫ']), ('ᾤ', &['ᾬ']), ('ᾥ', &[
+  'ᾭ']), ('ᾦ', &['ᾮ']), ('ᾧ', &['ᾯ']), ('ᾨ', &['ᾠ']), ('ᾩ', &[
+  'ᾡ']), ('ᾪ', &['ᾢ']), ('ᾫ', &['ᾣ']), ('ᾬ', &['ᾤ']), ('ᾭ', &[
+  'ᾥ']), ('ᾮ', &['ᾦ']), ('ᾯ', &['ᾧ']), ('ᾰ', &['Ᾰ']), ('ᾱ', &[
+  'Ᾱ']), ('ᾳ', &['ᾼ']), ('Ᾰ', &['ᾰ']), ('Ᾱ', &['ᾱ']), ('Ὰ', &[
+  'ὰ']), ('Ά', &['ά']), ('ᾼ', &['ᾳ']), ('ι', &['ͅ', 'Ι', 'ι',
+  ]), ('ῃ', &['ῌ']), ('Ὲ', &['ὲ']), ('Έ', &['έ']), ('Ὴ', &['ὴ'
+  ]), ('Ή', &['ή']), ('ῌ', &['ῃ']), ('ῐ', &['Ῐ']), ('ῑ', &['Ῑ'
+  ]), ('Ῐ', &['ῐ']), ('Ῑ', &['ῑ']), ('Ὶ', &['ὶ']), ('Ί', &['ί'
+  ]), ('ῠ', &['Ῠ']), ('ῡ', &['Ῡ']), ('ῥ', &['Ῥ']), ('Ῠ', &['ῠ'
+  ]), ('Ῡ', &['ῡ']), ('Ὺ', &['ὺ']), ('Ύ', &['ύ']), ('Ῥ', &['ῥ'
+  ]), ('ῳ', &['ῼ']), ('Ὸ', &['ὸ']), ('Ό', &['ό']), ('Ὼ', &['ὼ'
+  ]), ('Ώ', &['ώ']), ('ῼ', &['ῳ']), ('Ω', &['Ω', 'ω', ]),
+  ('K', &['K', 'k', ]), ('Å', &['Å', 'å', ]), ('Ⅎ', &['ⅎ']),
+  ('ⅎ', &['Ⅎ']), ('Ⅰ', &['ⅰ']), ('Ⅱ', &['ⅱ']), ('Ⅲ', &['ⅲ']),
+  ('Ⅳ', &['ⅳ']), ('Ⅴ', &['ⅴ']), ('Ⅵ', &['ⅵ']), ('Ⅶ', &['ⅶ']),
+  ('Ⅷ', &['ⅷ']), ('Ⅸ', &['ⅸ']), ('Ⅹ', &['ⅹ']), ('Ⅺ', &['ⅺ']),
+  ('Ⅻ', &['ⅻ']), ('Ⅼ', &['ⅼ']), ('Ⅽ', &['ⅽ']), ('Ⅾ', &['ⅾ']),
+  ('Ⅿ', &['ⅿ']), ('ⅰ', &['Ⅰ']), ('ⅱ', &['Ⅱ']), ('ⅲ', &['Ⅲ']),
+  ('ⅳ', &['Ⅳ']), ('ⅴ', &['Ⅴ']), ('ⅵ', &['Ⅵ']), ('ⅶ', &['Ⅶ']),
+  ('ⅷ', &['Ⅷ']), ('ⅸ', &['Ⅸ']), ('ⅹ', &['Ⅹ']), ('ⅺ', &['Ⅺ']),
+  ('ⅻ', &['Ⅻ']), ('ⅼ', &['Ⅼ']), ('ⅽ', &['Ⅽ']), ('ⅾ', &['Ⅾ']),
+  ('ⅿ', &['Ⅿ']), ('Ↄ', &['ↄ']), ('ↄ', &['Ↄ']), ('Ⓐ', &['ⓐ']),
+  ('Ⓑ', &['ⓑ']), ('Ⓒ', &['ⓒ']), ('Ⓓ', &['ⓓ']), ('Ⓔ', &['ⓔ']),
+  ('Ⓕ', &['ⓕ']), ('Ⓖ', &['ⓖ']), ('Ⓗ', &['ⓗ']), ('Ⓘ', &['ⓘ']),
+  ('Ⓙ', &['ⓙ']), ('Ⓚ', &['ⓚ']), ('Ⓛ', &['ⓛ']), ('Ⓜ', &['ⓜ']),
+  ('Ⓝ', &['ⓝ']), ('Ⓞ', &['ⓞ']), ('Ⓟ', &['ⓟ']), ('Ⓠ', &['ⓠ']),
+  ('Ⓡ', &['ⓡ']), ('Ⓢ', &['ⓢ']), ('Ⓣ', &['ⓣ']), ('Ⓤ', &['ⓤ']),
+  ('Ⓥ', &['ⓥ']), ('Ⓦ', &['ⓦ']), ('Ⓧ', &['ⓧ']), ('Ⓨ', &['ⓨ']),
+  ('Ⓩ', &['ⓩ']), ('ⓐ', &['Ⓐ']), ('ⓑ', &['Ⓑ']), ('ⓒ', &['Ⓒ']),
+  ('ⓓ', &['Ⓓ']), ('ⓔ', &['Ⓔ']), ('ⓕ', &['Ⓕ']), ('ⓖ', &['Ⓖ']),
+  ('ⓗ', &['Ⓗ']), ('ⓘ', &['Ⓘ']), ('ⓙ', &['Ⓙ']), ('ⓚ', &['Ⓚ']),
+  ('ⓛ', &['Ⓛ']), ('ⓜ', &['Ⓜ']), ('ⓝ', &['Ⓝ']), ('ⓞ', &['Ⓞ']),
+  ('ⓟ', &['Ⓟ']), ('ⓠ', &['Ⓠ']), ('ⓡ', &['Ⓡ']), ('ⓢ', &['Ⓢ']),
+  ('ⓣ', &['Ⓣ']), ('ⓤ', &['Ⓤ']), ('ⓥ', &['Ⓥ']), ('ⓦ', &['Ⓦ']),
+  ('ⓧ', &['Ⓧ']), ('ⓨ', &['Ⓨ']), ('ⓩ', &['Ⓩ']), ('Ⰰ', &['ⰰ']),
+  ('Ⰱ', &['ⰱ']), ('Ⰲ', &['ⰲ']), ('Ⰳ', &['ⰳ']), ('Ⰴ', &['ⰴ']),
+  ('Ⰵ', &['ⰵ']), ('Ⰶ', &['ⰶ']), ('Ⰷ', &['ⰷ']), ('Ⰸ', &['ⰸ']),
+  ('Ⰹ', &['ⰹ']), ('Ⰺ', &['ⰺ']), ('Ⰻ', &['ⰻ']), ('Ⰼ', &['ⰼ']),
+  ('Ⰽ', &['ⰽ']), ('Ⰾ', &['ⰾ']), ('Ⰿ', &['ⰿ']), ('Ⱀ', &['ⱀ']),
+  ('Ⱁ', &['ⱁ']), ('Ⱂ', &['ⱂ']), ('Ⱃ', &['ⱃ']), ('Ⱄ', &['ⱄ']),
+  ('Ⱅ', &['ⱅ']), ('Ⱆ', &['ⱆ']), ('Ⱇ', &['ⱇ']), ('Ⱈ', &['ⱈ']),
+  ('Ⱉ', &['ⱉ']), ('Ⱊ', &['ⱊ']), ('Ⱋ', &['ⱋ']), ('Ⱌ', &['ⱌ']),
+  ('Ⱍ', &['ⱍ']), ('Ⱎ', &['ⱎ']), ('Ⱏ', &['ⱏ']), ('Ⱐ', &['ⱐ']),
+  ('Ⱑ', &['ⱑ']), ('Ⱒ', &['ⱒ']), ('Ⱓ', &['ⱓ']), ('Ⱔ', &['ⱔ']),
+  ('Ⱕ', &['ⱕ']), ('Ⱖ', &['ⱖ']), ('Ⱗ', &['ⱗ']), ('Ⱘ', &['ⱘ']),
+  ('Ⱙ', &['ⱙ']), ('Ⱚ', &['ⱚ']), ('Ⱛ', &['ⱛ']), ('Ⱜ', &['ⱜ']),
+  ('Ⱝ', &['ⱝ']), ('Ⱞ', &['ⱞ']), ('ⰰ', &['Ⰰ']), ('ⰱ', &['Ⰱ']),
+  ('ⰲ', &['Ⰲ']), ('ⰳ', &['Ⰳ']), ('ⰴ', &['Ⰴ']), ('ⰵ', &['Ⰵ']),
+  ('ⰶ', &['Ⰶ']), ('ⰷ', &['Ⰷ']), ('ⰸ', &['Ⰸ']), ('ⰹ', &['Ⰹ']),
+  ('ⰺ', &['Ⰺ']), ('ⰻ', &['Ⰻ']), ('ⰼ', &['Ⰼ']), ('ⰽ', &['Ⰽ']),
+  ('ⰾ', &['Ⰾ']), ('ⰿ', &['Ⰿ']), ('ⱀ', &['Ⱀ']), ('ⱁ', &['Ⱁ']),
+  ('ⱂ', &['Ⱂ']), ('ⱃ', &['Ⱃ']), ('ⱄ', &['Ⱄ']), ('ⱅ', &['Ⱅ']),
+  ('ⱆ', &['Ⱆ']), ('ⱇ', &['Ⱇ']), ('ⱈ', &['Ⱈ']), ('ⱉ', &['Ⱉ']),
+  ('ⱊ', &['Ⱊ']), ('ⱋ', &['Ⱋ']), ('ⱌ', &['Ⱌ']), ('ⱍ', &['Ⱍ']),
+  ('ⱎ', &['Ⱎ']), ('ⱏ', &['Ⱏ']), ('ⱐ', &['Ⱐ']), ('ⱑ', &['Ⱑ']),
+  ('ⱒ', &['Ⱒ']), ('ⱓ', &['Ⱓ']), ('ⱔ', &['Ⱔ']), ('ⱕ', &['Ⱕ']),
+  ('ⱖ', &['Ⱖ']), ('ⱗ', &['Ⱗ']), ('ⱘ', &['Ⱘ']), ('ⱙ', &['Ⱙ']),
+  ('ⱚ', &['Ⱚ']), ('ⱛ', &['Ⱛ']), ('ⱜ', &['Ⱜ']), ('ⱝ', &['Ⱝ']),
+  ('ⱞ', &['Ⱞ']), ('Ⱡ', &['ⱡ']), ('ⱡ', &['Ⱡ']), ('Ɫ', &['ɫ']),
+  ('Ᵽ', &['ᵽ']), ('Ɽ', &['ɽ']), ('ⱥ', &['Ⱥ']), ('ⱦ', &['Ⱦ']),
+  ('Ⱨ', &['ⱨ']), ('ⱨ', &['Ⱨ']), ('Ⱪ', &['ⱪ']), ('ⱪ', &['Ⱪ']),
+  ('Ⱬ', &['ⱬ']), ('ⱬ', &['Ⱬ']), ('Ɑ', &['ɑ']), ('Ɱ', &['ɱ']),
+  ('Ɐ', &['ɐ']), ('Ɒ', &['ɒ']), ('Ⱳ', &['ⱳ']), ('ⱳ', &['Ⱳ']),
+  ('Ⱶ', &['ⱶ']), ('ⱶ', &['Ⱶ']), ('Ȿ', &['ȿ']), ('Ɀ', &['ɀ']),
+  ('Ⲁ', &['ⲁ']), ('ⲁ', &['Ⲁ']), ('Ⲃ', &['ⲃ']), ('ⲃ', &['Ⲃ']),
+  ('Ⲅ', &['ⲅ']), ('ⲅ', &['Ⲅ']), ('Ⲇ', &['ⲇ']), ('ⲇ', &['Ⲇ']),
+  ('Ⲉ', &['ⲉ']), ('ⲉ', &['Ⲉ']), ('Ⲋ', &['ⲋ']), ('ⲋ', &['Ⲋ']),
+  ('Ⲍ', &['ⲍ']), ('ⲍ', &['Ⲍ']), ('Ⲏ', &['ⲏ']), ('ⲏ', &['Ⲏ']),
+  ('Ⲑ', &['ⲑ']), ('ⲑ', &['Ⲑ']), ('Ⲓ', &['ⲓ']), ('ⲓ', &['Ⲓ']),
+  ('Ⲕ', &['ⲕ']), ('ⲕ', &['Ⲕ']), ('Ⲗ', &['ⲗ']), ('ⲗ', &['Ⲗ']),
+  ('Ⲙ', &['ⲙ']), ('ⲙ', &['Ⲙ']), ('Ⲛ', &['ⲛ']), ('ⲛ', &['Ⲛ']),
+  ('Ⲝ', &['ⲝ']), ('ⲝ', &['Ⲝ']), ('Ⲟ', &['ⲟ']), ('ⲟ', &['Ⲟ']),
+  ('Ⲡ', &['ⲡ']), ('ⲡ', &['Ⲡ']), ('Ⲣ', &['ⲣ']), ('ⲣ', &['Ⲣ']),
+  ('Ⲥ', &['ⲥ']), ('ⲥ', &['Ⲥ']), ('Ⲧ', &['ⲧ']), ('ⲧ', &['Ⲧ']),
+  ('Ⲩ', &['ⲩ']), ('ⲩ', &['Ⲩ']), ('Ⲫ', &['ⲫ']), ('ⲫ', &['Ⲫ']),
+  ('Ⲭ', &['ⲭ']), ('ⲭ', &['Ⲭ']), ('Ⲯ', &['ⲯ']), ('ⲯ', &['Ⲯ']),
+  ('Ⲱ', &['ⲱ']), ('ⲱ', &['Ⲱ']), ('Ⲳ', &['ⲳ']), ('ⲳ', &['Ⲳ']),
+  ('Ⲵ', &['ⲵ']), ('ⲵ', &['Ⲵ']), ('Ⲷ', &['ⲷ']), ('ⲷ', &['Ⲷ']),
+  ('Ⲹ', &['ⲹ']), ('ⲹ', &['Ⲹ']), ('Ⲻ', &['ⲻ']), ('ⲻ', &['Ⲻ']),
+  ('Ⲽ', &['ⲽ']), ('ⲽ', &['Ⲽ']), ('Ⲿ', &['ⲿ']), ('ⲿ', &['Ⲿ']),
+  ('Ⳁ', &['ⳁ']), ('ⳁ', &['Ⳁ']), ('Ⳃ', &['ⳃ']), ('ⳃ', &['Ⳃ']),
+  ('Ⳅ', &['ⳅ']), ('ⳅ', &['Ⳅ']), ('Ⳇ', &['ⳇ']), ('ⳇ', &['Ⳇ']),
+  ('Ⳉ', &['ⳉ']), ('ⳉ', &['Ⳉ']), ('Ⳋ', &['ⳋ']), ('ⳋ', &['Ⳋ']),
+  ('Ⳍ', &['ⳍ']), ('ⳍ', &['Ⳍ']), ('Ⳏ', &['ⳏ']), ('ⳏ', &['Ⳏ']),
+  ('Ⳑ', &['ⳑ']), ('ⳑ', &['Ⳑ']), ('Ⳓ', &['ⳓ']), ('ⳓ', &['Ⳓ']),
+  ('Ⳕ', &['ⳕ']), ('ⳕ', &['Ⳕ']), ('Ⳗ', &['ⳗ']), ('ⳗ', &['Ⳗ']),
+  ('Ⳙ', &['ⳙ']), ('ⳙ', &['Ⳙ']), ('Ⳛ', &['ⳛ']), ('ⳛ', &['Ⳛ']),
+  ('Ⳝ', &['ⳝ']), ('ⳝ', &['Ⳝ']), ('Ⳟ', &['ⳟ']), ('ⳟ', &['Ⳟ']),
+  ('Ⳡ', &['ⳡ']), ('ⳡ', &['Ⳡ']), ('Ⳣ', &['ⳣ']), ('ⳣ', &['Ⳣ']),
+  ('Ⳬ', &['ⳬ']), ('ⳬ', &['Ⳬ']), ('Ⳮ', &['ⳮ']), ('ⳮ', &['Ⳮ']),
+  ('Ⳳ', &['ⳳ']), ('ⳳ', &['Ⳳ']), ('ⴀ', &['Ⴀ']), ('ⴁ', &['Ⴁ']),
+  ('ⴂ', &['Ⴂ']), ('ⴃ', &['Ⴃ']), ('ⴄ', &['Ⴄ']), ('ⴅ', &['Ⴅ']),
+  ('ⴆ', &['Ⴆ']), ('ⴇ', &['Ⴇ']), ('ⴈ', &['Ⴈ']), ('ⴉ', &['Ⴉ']),
+  ('ⴊ', &['Ⴊ']), ('ⴋ', &['Ⴋ']), ('ⴌ', &['Ⴌ']), ('ⴍ', &['Ⴍ']),
+  ('ⴎ', &['Ⴎ']), ('ⴏ', &['Ⴏ']), ('ⴐ', &['Ⴐ']), ('ⴑ', &['Ⴑ']),
+  ('ⴒ', &['Ⴒ']), ('ⴓ', &['Ⴓ']), ('ⴔ', &['Ⴔ']), ('ⴕ', &['Ⴕ']),
+  ('ⴖ', &['Ⴖ']), ('ⴗ', &['Ⴗ']), ('ⴘ', &['Ⴘ']), ('ⴙ', &['Ⴙ']),
+  ('ⴚ', &['Ⴚ']), ('ⴛ', &['Ⴛ']), ('ⴜ', &['Ⴜ']), ('ⴝ', &['Ⴝ']),
+  ('ⴞ', &['Ⴞ']), ('ⴟ', &['Ⴟ']), ('ⴠ', &['Ⴠ']), ('ⴡ', &['Ⴡ']),
+  ('ⴢ', &['Ⴢ']), ('ⴣ', &['Ⴣ']), ('ⴤ', &['Ⴤ']), ('ⴥ', &['Ⴥ']),
+  ('ⴧ', &['Ⴧ']), ('ⴭ', &['Ⴭ']), ('Ꙁ', &['ꙁ']), ('ꙁ', &['Ꙁ']),
+  ('Ꙃ', &['ꙃ']), ('ꙃ', &['Ꙃ']), ('Ꙅ', &['ꙅ']), ('ꙅ', &['Ꙅ']),
+  ('Ꙇ', &['ꙇ']), ('ꙇ', &['Ꙇ']), ('Ꙉ', &['ꙉ']), ('ꙉ', &['Ꙉ']),
+  ('Ꙋ', &['ᲈ', 'ꙋ', ]), ('ꙋ', &['ᲈ', 'Ꙋ', ]), ('Ꙍ', &['ꙍ']),
+  ('ꙍ', &['Ꙍ']), ('Ꙏ', &['ꙏ']), ('ꙏ', &['Ꙏ']), ('Ꙑ', &['ꙑ']),
+  ('ꙑ', &['Ꙑ']), ('Ꙓ', &['ꙓ']), ('ꙓ', &['Ꙓ']), ('Ꙕ', &['ꙕ']),
+  ('ꙕ', &['Ꙕ']), ('Ꙗ', &['ꙗ']), ('ꙗ', &['Ꙗ']), ('Ꙙ', &['ꙙ']),
+  ('ꙙ', &['Ꙙ']), ('Ꙛ', &['ꙛ']), ('ꙛ', &['Ꙛ']), ('Ꙝ', &['ꙝ']),
+  ('ꙝ', &['Ꙝ']), ('Ꙟ', &['ꙟ']), ('ꙟ', &['Ꙟ']), ('Ꙡ', &['ꙡ']),
+  ('ꙡ', &['Ꙡ']), ('Ꙣ', &['ꙣ']), ('ꙣ', &['Ꙣ']), ('Ꙥ', &['ꙥ']),
+  ('ꙥ', &['Ꙥ']), ('Ꙧ', &['ꙧ']), ('ꙧ', &['Ꙧ']), ('Ꙩ', &['ꙩ']),
+  ('ꙩ', &['Ꙩ']), ('Ꙫ', &['ꙫ']), ('ꙫ', &['Ꙫ']), ('Ꙭ', &['ꙭ']),
+  ('ꙭ', &['Ꙭ']), ('Ꚁ', &['ꚁ']), ('ꚁ', &['Ꚁ']), ('Ꚃ', &['ꚃ']),
+  ('ꚃ', &['Ꚃ']), ('Ꚅ', &['ꚅ']), ('ꚅ', &['Ꚅ']), ('Ꚇ', &['ꚇ']),
+  ('ꚇ', &['Ꚇ']), ('Ꚉ', &['ꚉ']), ('ꚉ', &['Ꚉ']), ('Ꚋ', &['ꚋ']),
+  ('ꚋ', &['Ꚋ']), ('Ꚍ', &['ꚍ']), ('ꚍ', &['Ꚍ']), ('Ꚏ', &['ꚏ']),
+  ('ꚏ', &['Ꚏ']), ('Ꚑ', &['ꚑ']), ('ꚑ', &['Ꚑ']), ('Ꚓ', &['ꚓ']),
+  ('ꚓ', &['Ꚓ']), ('Ꚕ', &['ꚕ']), ('ꚕ', &['Ꚕ']), ('Ꚗ', &['ꚗ']),
+  ('ꚗ', &['Ꚗ']), ('Ꚙ', &['ꚙ']), ('ꚙ', &['Ꚙ']), ('Ꚛ', &['ꚛ']),
+  ('ꚛ', &['Ꚛ']), ('Ꜣ', &['ꜣ']), ('ꜣ', &['Ꜣ']), ('Ꜥ', &['ꜥ']),
+  ('ꜥ', &['Ꜥ']), ('Ꜧ', &['ꜧ']), ('ꜧ', &['Ꜧ']), ('Ꜩ', &['ꜩ']),
+  ('ꜩ', &['Ꜩ']), ('Ꜫ', &['ꜫ']), ('ꜫ', &['Ꜫ']), ('Ꜭ', &['ꜭ']),
+  ('ꜭ', &['Ꜭ']), ('Ꜯ', &['ꜯ']), ('ꜯ', &['Ꜯ']), ('Ꜳ', &['ꜳ']),
+  ('ꜳ', &['Ꜳ']), ('Ꜵ', &['ꜵ']), ('ꜵ', &['Ꜵ']), ('Ꜷ', &['ꜷ']),
+  ('ꜷ', &['Ꜷ']), ('Ꜹ', &['ꜹ']), ('ꜹ', &['Ꜹ']), ('Ꜻ', &['ꜻ']),
+  ('ꜻ', &['Ꜻ']), ('Ꜽ', &['ꜽ']), ('ꜽ', &['Ꜽ']), ('Ꜿ', &['ꜿ']),
+  ('ꜿ', &['Ꜿ']), ('Ꝁ', &['ꝁ']), ('ꝁ', &['Ꝁ']), ('Ꝃ', &['ꝃ']),
+  ('ꝃ', &['Ꝃ']), ('Ꝅ', &['ꝅ']), ('ꝅ', &['Ꝅ']), ('Ꝇ', &['ꝇ']),
+  ('ꝇ', &['Ꝇ']), ('Ꝉ', &['ꝉ']), ('ꝉ', &['Ꝉ']), ('Ꝋ', &['ꝋ']),
+  ('ꝋ', &['Ꝋ']), ('Ꝍ', &['ꝍ']), ('ꝍ', &['Ꝍ']), ('Ꝏ', &['ꝏ']),
+  ('ꝏ', &['Ꝏ']), ('Ꝑ', &['ꝑ']), ('ꝑ', &['Ꝑ']), ('Ꝓ', &['ꝓ']),
+  ('ꝓ', &['Ꝓ']), ('Ꝕ', &['ꝕ']), ('ꝕ', &['Ꝕ']), ('Ꝗ', &['ꝗ']),
+  ('ꝗ', &['Ꝗ']), ('Ꝙ', &['ꝙ']), ('ꝙ', &['Ꝙ']), ('Ꝛ', &['ꝛ']),
+  ('ꝛ', &['Ꝛ']), ('Ꝝ', &['ꝝ']), ('ꝝ', &['Ꝝ']), ('Ꝟ', &['ꝟ']),
+  ('ꝟ', &['Ꝟ']), ('Ꝡ', &['ꝡ']), ('ꝡ', &['Ꝡ']), ('Ꝣ', &['ꝣ']),
+  ('ꝣ', &['Ꝣ']), ('Ꝥ', &['ꝥ']), ('ꝥ', &['Ꝥ']), ('Ꝧ', &['ꝧ']),
+  ('ꝧ', &['Ꝧ']), ('Ꝩ', &['ꝩ']), ('ꝩ', &['Ꝩ']), ('Ꝫ', &['ꝫ']),
+  ('ꝫ', &['Ꝫ']), ('Ꝭ', &['ꝭ']), ('ꝭ', &['Ꝭ']), ('Ꝯ', &['ꝯ']),
+  ('ꝯ', &['Ꝯ']), ('Ꝺ', &['ꝺ']), ('ꝺ', &['Ꝺ']), ('Ꝼ', &['ꝼ']),
+  ('ꝼ', &['Ꝼ']), ('Ᵹ', &['ᵹ']), ('Ꝿ', &['ꝿ']), ('ꝿ', &['Ꝿ']),
+  ('Ꞁ', &['ꞁ']), ('ꞁ', &['Ꞁ']), ('Ꞃ', &['ꞃ']), ('ꞃ', &['Ꞃ']),
+  ('Ꞅ', &['ꞅ']), ('ꞅ', &['Ꞅ']), ('Ꞇ', &['ꞇ']), ('ꞇ', &['Ꞇ']),
+  ('Ꞌ', &['ꞌ']), ('ꞌ', &['Ꞌ']), ('Ɥ', &['ɥ']), ('Ꞑ', &['ꞑ']),
+  ('ꞑ', &['Ꞑ']), ('Ꞓ', &['ꞓ']), ('ꞓ', &['Ꞓ']), ('Ꞗ', &['ꞗ']),
+  ('ꞗ', &['Ꞗ']), ('Ꞙ', &['ꞙ']), ('ꞙ', &['Ꞙ']), ('Ꞛ', &['ꞛ']),
+  ('ꞛ', &['Ꞛ']), ('Ꞝ', &['ꞝ']), ('ꞝ', &['Ꞝ']), ('Ꞟ', &['ꞟ']),
+  ('ꞟ', &['Ꞟ']), ('Ꞡ', &['ꞡ']), ('ꞡ', &['Ꞡ']), ('Ꞣ', &['ꞣ']),
+  ('ꞣ', &['Ꞣ']), ('Ꞥ', &['ꞥ']), ('ꞥ', &['Ꞥ']), ('Ꞧ', &['ꞧ']),
+  ('ꞧ', &['Ꞧ']), ('Ꞩ', &['ꞩ']), ('ꞩ', &['Ꞩ']), ('Ɦ', &['ɦ']),
+  ('Ɜ', &['ɜ']), ('Ɡ', &['ɡ']), ('Ɬ', &['ɬ']), ('Ɪ', &['ɪ']),
+  ('Ʞ', &['ʞ']), ('Ʇ', &['ʇ']), ('Ʝ', &['ʝ']), ('Ꭓ', &['ꭓ']),
+  ('Ꞵ', &['ꞵ']), ('ꞵ', &['Ꞵ']), ('Ꞷ', &['ꞷ']), ('ꞷ', &['Ꞷ']),
+  ('ꭓ', &['Ꭓ']), ('ꭰ', &['Ꭰ']), ('ꭱ', &['Ꭱ']), ('ꭲ', &['Ꭲ']),
+  ('ꭳ', &['Ꭳ']), ('ꭴ', &['Ꭴ']), ('ꭵ', &['Ꭵ']), ('ꭶ', &['Ꭶ']),
+  ('ꭷ', &['Ꭷ']), ('ꭸ', &['Ꭸ']), ('ꭹ', &['Ꭹ']), ('ꭺ', &['Ꭺ']),
+  ('ꭻ', &['Ꭻ']), ('ꭼ', &['Ꭼ']), ('ꭽ', &['Ꭽ']), ('ꭾ', &['Ꭾ']),
+  ('ꭿ', &['Ꭿ']), ('ꮀ', &['Ꮀ']), ('ꮁ', &['Ꮁ']), ('ꮂ', &['Ꮂ']),
+  ('ꮃ', &['Ꮃ']), ('ꮄ', &['Ꮄ']), ('ꮅ', &['Ꮅ']), ('ꮆ', &['Ꮆ']),
+  ('ꮇ', &['Ꮇ']), ('ꮈ', &['Ꮈ']), ('ꮉ', &['Ꮉ']), ('ꮊ', &['Ꮊ']),
+  ('ꮋ', &['Ꮋ']), ('ꮌ', &['Ꮌ']), ('ꮍ', &['Ꮍ']), ('ꮎ', &['Ꮎ']),
+  ('ꮏ', &['Ꮏ']), ('ꮐ', &['Ꮐ']), ('ꮑ', &['Ꮑ']), ('ꮒ', &['Ꮒ']),
+  ('ꮓ', &['Ꮓ']), ('ꮔ', &['Ꮔ']), ('ꮕ', &['Ꮕ']), ('ꮖ', &['Ꮖ']),
+  ('ꮗ', &['Ꮗ']), ('ꮘ', &['Ꮘ']), ('ꮙ', &['Ꮙ']), ('ꮚ', &['Ꮚ']),
+  ('ꮛ', &['Ꮛ']), ('ꮜ', &['Ꮜ']), ('ꮝ', &['Ꮝ']), ('ꮞ', &['Ꮞ']),
+  ('ꮟ', &['Ꮟ']), ('ꮠ', &['Ꮠ']), ('ꮡ', &['Ꮡ']), ('ꮢ', &['Ꮢ']),
+  ('ꮣ', &['Ꮣ']), ('ꮤ', &['Ꮤ']), ('ꮥ', &['Ꮥ']), ('ꮦ', &['Ꮦ']),
+  ('ꮧ', &['Ꮧ']), ('ꮨ', &['Ꮨ']), ('ꮩ', &['Ꮩ']), ('ꮪ', &['Ꮪ']),
+  ('ꮫ', &['Ꮫ']), ('ꮬ', &['Ꮬ']), ('ꮭ', &['Ꮭ']), ('ꮮ', &['Ꮮ']),
+  ('ꮯ', &['Ꮯ']), ('ꮰ', &['Ꮰ']), ('ꮱ', &['Ꮱ']), ('ꮲ', &['Ꮲ']),
+  ('ꮳ', &['Ꮳ']), ('ꮴ', &['Ꮴ']), ('ꮵ', &['Ꮵ']), ('ꮶ', &['Ꮶ']),
+  ('ꮷ', &['Ꮷ']), ('ꮸ', &['Ꮸ']), ('ꮹ', &['Ꮹ']), ('ꮺ', &['Ꮺ']),
+  ('ꮻ', &['Ꮻ']), ('ꮼ', &['Ꮼ']), ('ꮽ', &['Ꮽ']), ('ꮾ', &['Ꮾ']),
+  ('ꮿ', &['Ꮿ']), ('Ａ', &['ａ']), ('Ｂ', &['ｂ']), ('Ｃ', &['ｃ']),
+  ('Ｄ', &['ｄ']), ('Ｅ', &['ｅ']), ('Ｆ', &['ｆ']), ('Ｇ', &['ｇ']),
+  ('Ｈ', &['ｈ']), ('Ｉ', &['ｉ']), ('Ｊ', &['ｊ']), ('Ｋ', &['ｋ']),
+  ('Ｌ', &['ｌ']), ('Ｍ', &['ｍ']), ('Ｎ', &['ｎ']), ('Ｏ', &['ｏ']),
+  ('Ｐ', &['ｐ']), ('Ｑ', &['ｑ']), ('Ｒ', &['ｒ']), ('Ｓ', &['ｓ']),
+  ('Ｔ', &['ｔ']), ('Ｕ', &['ｕ']), ('Ｖ', &['ｖ']), ('Ｗ', &['ｗ']),
+  ('Ｘ', &['ｘ']), ('Ｙ', &['ｙ']), ('Ｚ', &['ｚ']), ('ａ', &['Ａ']),
+  ('ｂ', &['Ｂ']), ('ｃ', &['Ｃ']), ('ｄ', &['Ｄ']), ('ｅ', &['Ｅ']),
+  ('ｆ', &['Ｆ']), ('ｇ', &['Ｇ']), ('ｈ', &['Ｈ']), ('ｉ', &['Ｉ']),
+  ('ｊ', &['Ｊ']), ('ｋ', &['Ｋ']), ('ｌ', &['Ｌ']), ('ｍ', &['Ｍ']),
+  ('ｎ', &['Ｎ']), ('ｏ', &['Ｏ']), ('ｐ', &['Ｐ']), ('ｑ', &['Ｑ']),
+  ('ｒ', &['Ｒ']), ('ｓ', &['Ｓ']), ('ｔ', &['Ｔ']), ('ｕ', &['Ｕ']),
+  ('ｖ', &['Ｖ']), ('ｗ', &['Ｗ']), ('ｘ', &['Ｘ']), ('ｙ', &['Ｙ']),
+  ('ｚ', &['Ｚ']), ('𐐀', &['𐐨']), ('𐐁', &['𐐩']), ('𐐂', &[
+  '𐐪']), ('𐐃', &['𐐫']), ('𐐄', &['𐐬']), ('𐐅', &['𐐭']),
+  ('𐐆', &['𐐮']), ('𐐇', &['𐐯']), ('𐐈', &['𐐰']), ('𐐉', &[
+  '𐐱']), ('𐐊', &['𐐲']), ('𐐋', &['𐐳']), ('𐐌', &['𐐴']),
+  ('𐐍', &['𐐵']), ('𐐎', &['𐐶']), ('𐐏', &['𐐷']), ('𐐐', &[
+  '𐐸']), ('𐐑', &['𐐹']), ('𐐒', &['𐐺']), ('𐐓', &['𐐻']),
+  ('𐐔', &['𐐼']), ('𐐕', &['𐐽']), ('𐐖', &['𐐾']), ('𐐗', &[
+  '𐐿']), ('𐐘', &['𐑀']), ('𐐙', &['𐑁']), ('𐐚', &['𐑂']),
+  ('𐐛', &['𐑃']), ('𐐜', &['𐑄']), ('𐐝', &['𐑅']), ('𐐞', &[
+  '𐑆']), ('𐐟', &['𐑇']), ('𐐠', &['𐑈']), ('𐐡', &['𐑉']),
+  ('𐐢', &['𐑊']), ('𐐣', &['𐑋']), ('𐐤', &['𐑌']), ('𐐥', &[
+  '𐑍']), ('𐐦', &['𐑎']), ('𐐧', &['𐑏']), ('𐐨', &['𐐀']),
+  ('𐐩', &['𐐁']), ('𐐪', &['𐐂']), ('𐐫', &['𐐃']), ('𐐬', &[
+  '𐐄']), ('𐐭', &['𐐅']), ('𐐮', &['𐐆']), ('𐐯', &['𐐇']),
+  ('𐐰', &['𐐈']), ('𐐱', &['𐐉']), ('𐐲', &['𐐊']), ('𐐳', &[
+  '𐐋']), ('𐐴', &['𐐌']), ('𐐵', &['𐐍']), ('𐐶', &['𐐎']),
+  ('𐐷', &['𐐏']), ('𐐸', &['𐐐']), ('𐐹', &['𐐑']), ('𐐺', &[
+  '𐐒']), ('𐐻', &['𐐓']), ('𐐼', &['𐐔']), ('𐐽', &['𐐕']),
+  ('𐐾', &['𐐖']), ('𐐿', &['𐐗']), ('𐑀', &['𐐘']), ('𐑁', &[
+  '𐐙']), ('𐑂', &['𐐚']), ('𐑃', &['𐐛']), ('𐑄', &['𐐜']),
+  ('𐑅', &['𐐝']), ('𐑆', &['𐐞']), ('𐑇', &['𐐟']), ('𐑈', &[
+  '𐐠']), ('𐑉', &['𐐡']), ('𐑊', &['𐐢']), ('𐑋', &['𐐣']),
+  ('𐑌', &['𐐤']), ('𐑍', &['𐐥']), ('𐑎', &['𐐦']), ('𐑏', &[
+  '𐐧']), ('𐒰', &['𐓘']), ('𐒱', &['𐓙']), ('𐒲', &['𐓚']),
+  ('𐒳', &['𐓛']), ('𐒴', &['𐓜']), ('𐒵', &['𐓝']), ('𐒶', &[
+  '𐓞']), ('𐒷', &['𐓟']), ('𐒸', &['𐓠']), ('𐒹', &['𐓡']),
+  ('𐒺', &['𐓢']), ('𐒻', &['𐓣']), ('𐒼', &['𐓤']), ('𐒽', &[
+  '𐓥']), ('𐒾', &['𐓦']), ('𐒿', &['𐓧']), ('𐓀', &['𐓨']),
+  ('𐓁', &['𐓩']), ('𐓂', &['𐓪']), ('𐓃', &['𐓫']), ('𐓄', &[
+  '𐓬']), ('𐓅', &['𐓭']), ('𐓆', &['𐓮']), ('𐓇', &['𐓯']),
+  ('𐓈', &['𐓰']), ('𐓉', &['𐓱']), ('𐓊', &['𐓲']), ('𐓋', &[
+  '𐓳']), ('𐓌', &['𐓴']), ('𐓍', &['𐓵']), ('𐓎', &['𐓶']),
+  ('𐓏', &['𐓷']), ('𐓐', &['𐓸']), ('𐓑', &['𐓹']), ('𐓒', &[
+  '𐓺']), ('𐓓', &['𐓻']), ('𐓘', &['𐒰']), ('𐓙', &['𐒱']),
+  ('𐓚', &['𐒲']), ('𐓛', &['𐒳']), ('𐓜', &['𐒴']), ('𐓝', &[
+  '𐒵']), ('𐓞', &['𐒶']), ('𐓟', &['𐒷']), ('𐓠', &['𐒸']),
+  ('𐓡', &['𐒹']), ('𐓢', &['𐒺']), ('𐓣', &['𐒻']), ('𐓤', &[
+  '𐒼']), ('𐓥', &['𐒽']), ('𐓦', &['𐒾']), ('𐓧', &['𐒿']),
+  ('𐓨', &['𐓀']), ('𐓩', &['𐓁']), ('𐓪', &['𐓂']), ('𐓫', &[
+  '𐓃']), ('𐓬', &['𐓄']), ('𐓭', &['𐓅']), ('𐓮', &['𐓆']),
+  ('𐓯', &['𐓇']), ('𐓰', &['𐓈']), ('𐓱', &['𐓉']), ('𐓲', &[
+  '𐓊']), ('𐓳', &['𐓋']), ('𐓴', &['𐓌']), ('𐓵', &['𐓍']),
+  ('𐓶', &['𐓎']), ('𐓷', &['𐓏']), ('𐓸', &['𐓐']), ('𐓹', &[
+  '𐓑']), ('𐓺', &['𐓒']), ('𐓻', &['𐓓']), ('𐲀', &['𐳀']),
+  ('𐲁', &['𐳁']), ('𐲂', &['𐳂']), ('𐲃', &['𐳃']), ('𐲄', &[
+  '𐳄']), ('𐲅', &['𐳅']), ('𐲆', &['𐳆']), ('𐲇', &['𐳇']),
+  ('𐲈', &['𐳈']), ('𐲉', &['𐳉']), ('𐲊', &['𐳊']), ('𐲋', &[
+  '𐳋']), ('𐲌', &['𐳌']), ('𐲍', &['𐳍']), ('𐲎', &['𐳎']),
+  ('𐲏', &['𐳏']), ('𐲐', &['𐳐']), ('𐲑', &['𐳑']), ('𐲒', &[
+  '𐳒']), ('𐲓', &['𐳓']), ('𐲔', &['𐳔']), ('𐲕', &['𐳕']),
+  ('𐲖', &['𐳖']), ('𐲗', &['𐳗']), ('𐲘', &['𐳘']), ('𐲙', &[
+  '𐳙']), ('𐲚', &['𐳚']), ('𐲛', &['𐳛']), ('𐲜', &['𐳜']),
+  ('𐲝', &['𐳝']), ('𐲞', &['𐳞']), ('𐲟', &['𐳟']), ('𐲠', &[
+  '𐳠']), ('𐲡', &['𐳡']), ('𐲢', &['𐳢']), ('𐲣', &['𐳣']),
+  ('𐲤', &['𐳤']), ('𐲥', &['𐳥']), ('𐲦', &['𐳦']), ('𐲧', &[
+  '𐳧']), ('𐲨', &['𐳨']), ('𐲩', &['𐳩']), ('𐲪', &['𐳪']),
+  ('𐲫', &['𐳫']), ('𐲬', &['𐳬']), ('𐲭', &['𐳭']), ('𐲮', &[
+  '𐳮']), ('𐲯', &['𐳯']), ('𐲰', &['𐳰']), ('𐲱', &['𐳱']),
+  ('𐲲', &['𐳲']), ('𐳀', &['𐲀']), ('𐳁', &['𐲁']), ('𐳂', &[
+  '𐲂']), ('𐳃', &['𐲃']), ('𐳄', &['𐲄']), ('𐳅', &['𐲅']),
+  ('𐳆', &['𐲆']), ('𐳇', &['𐲇']), ('𐳈', &['𐲈']), ('𐳉', &[
+  '𐲉']), ('𐳊', &['𐲊']), ('𐳋', &['𐲋']), ('𐳌', &['𐲌']),
+  ('𐳍', &['𐲍']), ('𐳎', &['𐲎']), ('𐳏', &['𐲏']), ('𐳐', &[
+  '𐲐']), ('𐳑', &['𐲑']), ('𐳒', &['𐲒']), ('𐳓', &['𐲓']),
+  ('𐳔', &['𐲔']), ('𐳕', &['𐲕']), ('𐳖', &['𐲖']), ('𐳗', &[
+  '𐲗']), ('𐳘', &['𐲘']), ('𐳙', &['𐲙']), ('𐳚', &['𐲚']),
+  ('𐳛', &['𐲛']), ('𐳜', &['𐲜']), ('𐳝', &['𐲝']), ('𐳞', &[
+  '𐲞']), ('𐳟', &['𐲟']), ('𐳠', &['𐲠']), ('𐳡', &['𐲡']),
+  ('𐳢', &['𐲢']), ('𐳣', &['𐲣']), ('𐳤', &['𐲤']), ('𐳥', &[
+  '𐲥']), ('𐳦', &['𐲦']), ('𐳧', &['𐲧']), ('𐳨', &['𐲨']),
+  ('𐳩', &['𐲩']), ('𐳪', &['𐲪']), ('𐳫', &['𐲫']), ('𐳬', &[
+  '𐲬']), ('𐳭', &['𐲭']), ('𐳮', &['𐲮']), ('𐳯', &['𐲯']),
+  ('𐳰', &['𐲰']), ('𐳱', &['𐲱']), ('𐳲', &['𐲲']), ('𑢠', &[
+  '𑣀']), ('𑢡', &['𑣁']), ('𑢢', &['𑣂']), ('𑢣', &['𑣃']),
+  ('𑢤', &['𑣄']), ('𑢥', &['𑣅']), ('𑢦', &['𑣆']), ('𑢧', &[
+  '𑣇']), ('𑢨', &['𑣈']), ('𑢩', &['𑣉']), ('𑢪', &['𑣊']),
+  ('𑢫', &['𑣋']), ('𑢬', &['𑣌']), ('𑢭', &['𑣍']), ('𑢮', &[
+  '𑣎']), ('𑢯', &['𑣏']), ('𑢰', &['𑣐']), ('𑢱', &['𑣑']),
+  ('𑢲', &['𑣒']), ('𑢳', &['𑣓']), ('𑢴', &['𑣔']), ('𑢵', &[
+  '𑣕']), ('𑢶', &['𑣖']), ('𑢷', &['𑣗']), ('𑢸', &['𑣘']),
+  ('𑢹', &['𑣙']), ('𑢺', &['𑣚']), ('𑢻', &['𑣛']), ('𑢼', &[
+  '𑣜']), ('𑢽', &['𑣝']), ('𑢾', &['𑣞']), ('𑢿', &['𑣟']),
+  ('𑣀', &['𑢠']), ('𑣁', &['𑢡']), ('𑣂', &['𑢢']), ('𑣃', &[
+  '𑢣']), ('𑣄', &['𑢤']), ('𑣅', &['𑢥']), ('𑣆', &['𑢦']),
+  ('𑣇', &['𑢧']), ('𑣈', &['𑢨']), ('𑣉', &['𑢩']), ('𑣊', &[
+  '𑢪']), ('𑣋', &['𑢫']), ('𑣌', &['𑢬']), ('𑣍', &['𑢭']),
+  ('𑣎', &['𑢮']), ('𑣏', &['𑢯']), ('𑣐', &['𑢰']), ('𑣑', &[
+  '𑢱']), ('𑣒', &['𑢲']), ('𑣓', &['𑢳']), ('𑣔', &['𑢴']),
+  ('𑣕', &['𑢵']), ('𑣖', &['𑢶']), ('𑣗', &['𑢷']), ('𑣘', &[
+  '𑢸']), ('𑣙', &['𑢹']), ('𑣚', &['𑢺']), ('𑣛', &['𑢻']),
+  ('𑣜', &['𑢼']), ('𑣝', &['𑢽']), ('𑣞', &['𑢾']), ('𑣟', &[
+  '𑢿']), ('𞤀', &['𞤢']), ('𞤁', &['𞤣']), ('𞤂', &['𞤤']),
+  ('𞤃', &['𞤥']), ('𞤄', &['𞤦']), ('𞤅', &['𞤧']), ('𞤆', &[
+  '𞤨']), ('𞤇', &['𞤩']), ('𞤈', &['𞤪']), ('𞤉', &['𞤫']),
+  ('𞤊', &['𞤬']), ('𞤋', &['𞤭']), ('𞤌', &['𞤮']), ('𞤍', &[
+  '𞤯']), ('𞤎', &['𞤰']), ('𞤏', &['𞤱']), ('𞤐', &['𞤲']),
+  ('𞤑', &['𞤳']), ('𞤒', &['𞤴']), ('𞤓', &['𞤵']), ('𞤔', &[
+  '𞤶']), ('𞤕', &['𞤷']), ('𞤖', &['𞤸']), ('𞤗', &['𞤹']),
+  ('𞤘', &['𞤺']), ('𞤙', &['𞤻']), ('𞤚', &['𞤼']), ('𞤛', &[
+  '𞤽']), ('𞤜', &['𞤾']), ('𞤝', &['𞤿']), ('𞤞', &['𞥀']),
+  ('𞤟', &['𞥁']), ('𞤠', &['𞥂']), ('𞤡', &['𞥃']), ('𞤢', &[
+  '𞤀']), ('𞤣', &['𞤁']), ('𞤤', &['𞤂']), ('𞤥', &['𞤃']),
+  ('𞤦', &['𞤄']), ('𞤧', &['𞤅']), ('𞤨', &['𞤆']), ('𞤩', &[
+  '𞤇']), ('𞤪', &['𞤈']), ('𞤫', &['𞤉']), ('𞤬', &['𞤊']),
+  ('𞤭', &['𞤋']), ('𞤮', &['𞤌']), ('𞤯', &['𞤍']), ('𞤰', &[
+  '𞤎']), ('𞤱', &['𞤏']), ('𞤲', &['𞤐']), ('𞤳', &['𞤑']),
+  ('𞤴', &['𞤒']), ('𞤵', &['𞤓']), ('𞤶', &['𞤔']), ('𞤷', &[
+  '𞤕']), ('𞤸', &['𞤖']), ('𞤹', &['𞤗']), ('𞤺', &['𞤘']),
+  ('𞤻', &['𞤙']), ('𞤼', &['𞤚']), ('𞤽', &['𞤛']), ('𞤾', &[
+  '𞤜']), ('𞤿', &['𞤝']), ('𞥀', &['𞤞']), ('𞥁', &['𞤟']),
+  ('𞥂', &['𞤠']), ('𞥃', &['𞤡']),
+];
diff --git a/regex-syntax/src/unicode_tables/general_category.rs b/regex-syntax/src/unicode_tables/general_category.rs
new file mode 100644
index 0000000000..451a0b27c7
--- /dev/null
+++ b/regex-syntax/src/unicode_tables/general_category.rs
@@ -0,0 +1,1844 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+//  ucd-generate general-category tmp/ucd-10.0.0/ --chars --exclude surrogate
+//
+// ucd-generate is available on crates.io.
+
+pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
+  ("Cased_Letter", CASED_LETTER), ("Close_Punctuation", CLOSE_PUNCTUATION),
+  ("Connector_Punctuation", CONNECTOR_PUNCTUATION), ("Control", CONTROL),
+  ("Currency_Symbol", CURRENCY_SYMBOL),
+  ("Dash_Punctuation", DASH_PUNCTUATION), ("Decimal_Number", DECIMAL_NUMBER),
+  ("Enclosing_Mark", ENCLOSING_MARK),
+  ("Final_Punctuation", FINAL_PUNCTUATION), ("Format", FORMAT),
+  ("Initial_Punctuation", INITIAL_PUNCTUATION), ("Letter", LETTER),
+  ("Letter_Number", LETTER_NUMBER), ("Line_Separator", LINE_SEPARATOR),
+  ("Lowercase_Letter", LOWERCASE_LETTER), ("Mark", MARK),
+  ("Math_Symbol", MATH_SYMBOL), ("Modifier_Letter", MODIFIER_LETTER),
+  ("Modifier_Symbol", MODIFIER_SYMBOL), ("Nonspacing_Mark", NONSPACING_MARK),
+  ("Number", NUMBER), ("Open_Punctuation", OPEN_PUNCTUATION),
+  ("Other", OTHER), ("Other_Letter", OTHER_LETTER),
+  ("Other_Number", OTHER_NUMBER), ("Other_Punctuation", OTHER_PUNCTUATION),
+  ("Other_Symbol", OTHER_SYMBOL),
+  ("Paragraph_Separator", PARAGRAPH_SEPARATOR), ("Private_Use", PRIVATE_USE),
+  ("Punctuation", PUNCTUATION), ("Separator", SEPARATOR),
+  ("Space_Separator", SPACE_SEPARATOR), ("Spacing_Mark", SPACING_MARK),
+  ("Symbol", SYMBOL), ("Titlecase_Letter", TITLECASE_LETTER),
+  ("Unassigned", UNASSIGNED), ("Uppercase_Letter", UPPERCASE_LETTER),
+];
+
+pub const CASED_LETTER: &'static [(char, char)] = &[
+  ('A', 'Z'), ('a', 'z'), ('µ', 'µ'), ('À', 'Ö'), ('Ø', 'ö'),
+  ('ø', 'ƺ'), ('Ƽ', 'ƿ'), ('Ǆ', 'ʓ'), ('ʕ', 'ʯ'), ('Ͱ', 'ͳ'),
+  ('Ͷ', 'ͷ'), ('ͻ', 'ͽ'), ('Ϳ', 'Ϳ'), ('Ά', 'Ά'), ('Έ', 'Ί'),
+  ('Ό', 'Ό'), ('Ύ', 'Ρ'), ('Σ', 'ϵ'), ('Ϸ', 'ҁ'), ('Ҋ', 'ԯ'),
+  ('Ա', 'Ֆ'), ('ա', 'և'), ('Ⴀ', 'Ⴥ'), ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'),
+  ('Ꭰ', 'Ᏽ'), ('ᏸ', 'ᏽ'), ('ᲀ', 'ᲈ'), ('ᴀ', 'ᴫ'),
+  ('ᵫ', 'ᵷ'), ('ᵹ', 'ᶚ'), ('Ḁ', 'ἕ'), ('Ἐ', 'Ἕ'),
+  ('ἠ', 'ὅ'), ('Ὀ', 'Ὅ'), ('ὐ', 'ὗ'), ('Ὑ', 'Ὑ'),
+  ('Ὓ', 'Ὓ'), ('Ὕ', 'Ὕ'), ('Ὗ', 'ώ'), ('ᾀ', 'ᾴ'),
+  ('ᾶ', 'ᾼ'), ('ι', 'ι'), ('ῂ', 'ῄ'), ('ῆ', 'ῌ'),
+  ('ῐ', 'ΐ'), ('ῖ', 'Ί'), ('ῠ', 'Ῥ'), ('ῲ', 'ῴ'),
+  ('ῶ', 'ῼ'), ('ℂ', 'ℂ'), ('ℇ', 'ℇ'), ('ℊ', 'ℓ'),
+  ('ℕ', 'ℕ'), ('ℙ', 'ℝ'), ('ℤ', 'ℤ'), ('Ω', 'Ω'),
+  ('ℨ', 'ℨ'), ('K', 'ℭ'), ('ℯ', 'ℴ'), ('ℹ', 'ℹ'),
+  ('ℼ', 'ℿ'), ('ⅅ', 'ⅉ'), ('ⅎ', 'ⅎ'), ('Ↄ', 'ↄ'),
+  ('Ⰰ', 'Ⱞ'), ('ⰰ', 'ⱞ'), ('Ⱡ', 'ⱻ'), ('Ȿ', 'ⳤ'),
+  ('Ⳬ', 'ⳮ'), ('Ⳳ', 'ⳳ'), ('ⴀ', 'ⴥ'), ('ⴧ', 'ⴧ'),
+  ('ⴭ', 'ⴭ'), ('Ꙁ', 'ꙭ'), ('Ꚁ', 'ꚛ'), ('Ꜣ', 'ꝯ'),
+  ('ꝱ', 'ꞇ'), ('Ꞌ', 'ꞎ'), ('Ꞑ', 'Ɪ'), ('Ʞ', 'ꞷ'),
+  ('ꟺ', 'ꟺ'), ('ꬰ', 'ꭚ'), ('ꭠ', 'ꭥ'), ('ꭰ', 'ꮿ'),
+  ('ﬀ', 'ﬆ'), ('ﬓ', 'ﬗ'), ('Ａ', 'Ｚ'), ('ａ', 'ｚ'),
+  ('𐐀', '𐑏'), ('𐒰', '𐓓'), ('𐓘', '𐓻'), ('𐲀', '𐲲'),
+  ('𐳀', '𐳲'), ('𑢠', '𑣟'), ('𝐀', '𝑔'), ('𝑖', '𝒜'),
+  ('𝒞', '𝒟'), ('𝒢', '𝒢'), ('𝒥', '𝒦'), ('𝒩', '𝒬'),
+  ('𝒮', '𝒹'), ('𝒻', '𝒻'), ('𝒽', '𝓃'), ('𝓅', '𝔅'),
+  ('𝔇', '𝔊'), ('𝔍', '𝔔'), ('𝔖', '𝔜'), ('𝔞', '𝔹'),
+  ('𝔻', '𝔾'), ('𝕀', '𝕄'), ('𝕆', '𝕆'), ('𝕊', '𝕐'),
+  ('𝕒', '𝚥'), ('𝚨', '𝛀'), ('𝛂', '𝛚'), ('𝛜', '𝛺'),
+  ('𝛼', '𝜔'), ('𝜖', '𝜴'), ('𝜶', '𝝎'), ('𝝐', '𝝮'),
+  ('𝝰', '𝞈'), ('𝞊', '𝞨'), ('𝞪', '𝟂'), ('𝟄', '𝟋'),
+  ('𞤀', '𞥃'),
+];
+
+pub const CLOSE_PUNCTUATION: &'static [(char, char)] = &[
+  (')', ')'), (']', ']'), ('}', '}'), ('༻', '༻'), ('༽', '༽'),
+  ('᚜', '᚜'), ('⁆', '⁆'), ('⁾', '⁾'), ('₎', '₎'),
+  ('⌉', '⌉'), ('⌋', '⌋'), ('〉', '〉'), ('❩', '❩'),
+  ('❫', '❫'), ('❭', '❭'), ('❯', '❯'), ('❱', '❱'),
+  ('❳', '❳'), ('❵', '❵'), ('⟆', '⟆'), ('⟧', '⟧'),
+  ('⟩', '⟩'), ('⟫', '⟫'), ('⟭', '⟭'), ('⟯', '⟯'),
+  ('⦄', '⦄'), ('⦆', '⦆'), ('⦈', '⦈'), ('⦊', '⦊'),
+  ('⦌', '⦌'), ('⦎', '⦎'), ('⦐', '⦐'), ('⦒', '⦒'),
+  ('⦔', '⦔'), ('⦖', '⦖'), ('⦘', '⦘'), ('⧙', '⧙'),
+  ('⧛', '⧛'), ('⧽', '⧽'), ('⸣', '⸣'), ('⸥', '⸥'),
+  ('⸧', '⸧'), ('⸩', '⸩'), ('〉', '〉'), ('》', '》'),
+  ('」', '」'), ('』', '』'), ('】', '】'), ('〕', '〕'),
+  ('〗', '〗'), ('〙', '〙'), ('〛', '〛'), ('〞', '〟'),
+  ('﴾', '﴾'), ('︘', '︘'), ('︶', '︶'), ('︸', '︸'),
+  ('︺', '︺'), ('︼', '︼'), ('︾', '︾'), ('﹀', '﹀'),
+  ('﹂', '﹂'), ('﹄', '﹄'), ('﹈', '﹈'), ('﹚', '﹚'),
+  ('﹜', '﹜'), ('﹞', '﹞'), ('）', '）'), ('］', '］'),
+  ('｝', '｝'), ('｠', '｠'), ('｣', '｣'),
+];
+
+pub const CONNECTOR_PUNCTUATION: &'static [(char, char)] = &[
+  ('_', '_'), ('‿', '⁀'), ('⁔', '⁔'), ('︳', '︴'), ('﹍', '﹏'),
+  ('＿', '＿'),
+];
+
+pub const CONTROL: &'static [(char, char)] = &[
+  ('\u{0}', '\u{1f}'), ('\u{7f}', '\u{9f}'),
+];
+
+pub const CURRENCY_SYMBOL: &'static [(char, char)] = &[
+  ('$', '$'), ('¢', '¥'), ('֏', '֏'), ('؋', '؋'), ('৲', '৳'),
+  ('৻', '৻'), ('૱', '૱'), ('௹', '௹'), ('฿', '฿'),
+  ('៛', '៛'), ('₠', '₿'), ('꠸', '꠸'), ('﷼', '﷼'),
+  ('﹩', '﹩'), ('＄', '＄'), ('￠', '￡'), ('￥', '￦'),
+];
+
+pub const DASH_PUNCTUATION: &'static [(char, char)] = &[
+  ('-', '-'), ('֊', '֊'), ('־', '־'), ('᐀', '᐀'), ('᠆', '᠆'),
+  ('‐', '―'), ('⸗', '⸗'), ('⸚', '⸚'), ('⸺', '⸻'),
+  ('⹀', '⹀'), ('〜', '〜'), ('〰', '〰'), ('゠', '゠'),
+  ('︱', '︲'), ('﹘', '﹘'), ('﹣', '﹣'), ('－', '－'),
+];
+
+pub const DECIMAL_NUMBER: &'static [(char, char)] = &[
+  ('0', '9'), ('٠', '٩'), ('۰', '۹'), ('߀', '߉'), ('०', '९'),
+  ('০', '৯'), ('੦', '੯'), ('૦', '૯'), ('୦', '୯'),
+  ('௦', '௯'), ('౦', '౯'), ('೦', '೯'), ('൦', '൯'),
+  ('෦', '෯'), ('๐', '๙'), ('໐', '໙'), ('༠', '༩'),
+  ('၀', '၉'), ('႐', '႙'), ('០', '៩'), ('᠐', '᠙'),
+  ('᥆', '᥏'), ('᧐', '᧙'), ('᪀', '᪉'), ('᪐', '᪙'),
+  ('᭐', '᭙'), ('᮰', '᮹'), ('᱀', '᱉'), ('᱐', '᱙'),
+  ('꘠', '꘩'), ('꣐', '꣙'), ('꤀', '꤉'), ('꧐', '꧙'),
+  ('꧰', '꧹'), ('꩐', '꩙'), ('꯰', '꯹'), ('０', '９'),
+  ('𐒠', '𐒩'), ('𑁦', '𑁯'), ('𑃰', '𑃹'), ('𑄶', '𑄿'),
+  ('𑇐', '𑇙'), ('𑋰', '𑋹'), ('𑑐', '𑑙'), ('𑓐', '𑓙'),
+  ('𑙐', '𑙙'), ('𑛀', '𑛉'), ('𑜰', '𑜹'), ('𑣠', '𑣩'),
+  ('𑱐', '𑱙'), ('𑵐', '𑵙'), ('𖩠', '𖩩'), ('𖭐', '𖭙'),
+  ('𝟎', '𝟿'), ('𞥐', '𞥙'),
+];
+
+pub const ENCLOSING_MARK: &'static [(char, char)] = &[
+  ('҈', '҉'), ('᪾', '᪾'), ('⃝', '⃠'), ('⃢', '⃤'),
+  ('꙰', '꙲'),
+];
+
+pub const FINAL_PUNCTUATION: &'static [(char, char)] = &[
+  ('»', '»'), ('’', '’'), ('”', '”'), ('›', '›'),
+  ('⸃', '⸃'), ('⸅', '⸅'), ('⸊', '⸊'), ('⸍', '⸍'),
+  ('⸝', '⸝'), ('⸡', '⸡'),
+];
+
+pub const FORMAT: &'static [(char, char)] = &[
+  ('\u{ad}', '\u{ad}'), ('\u{600}', '\u{605}'), ('\u{61c}', '\u{61c}'),
+  ('\u{6dd}', '\u{6dd}'), ('\u{70f}', '\u{70f}'), ('\u{8e2}', '\u{8e2}'),
+  ('\u{180e}', '\u{180e}'), ('\u{200b}', '\u{200f}'),
+  ('\u{202a}', '\u{202e}'), ('\u{2060}', '\u{2064}'),
+  ('\u{2066}', '\u{206f}'), ('\u{feff}', '\u{feff}'),
+  ('\u{fff9}', '\u{fffb}'), ('\u{110bd}', '\u{110bd}'),
+  ('\u{1bca0}', '\u{1bca3}'), ('\u{1d173}', '\u{1d17a}'),
+  ('\u{e0001}', '\u{e0001}'), ('\u{e0020}', '\u{e007f}'),
+];
+
+pub const INITIAL_PUNCTUATION: &'static [(char, char)] = &[
+  ('«', '«'), ('‘', '‘'), ('‛', '“'), ('‟', '‟'),
+  ('‹', '‹'), ('⸂', '⸂'), ('⸄', '⸄'), ('⸉', '⸉'),
+  ('⸌', '⸌'), ('⸜', '⸜'), ('⸠', '⸠'),
+];
+
+pub const LETTER: &'static [(char, char)] = &[
+  ('A', 'Z'), ('a', 'z'), ('ª', 'ª'), ('µ', 'µ'), ('º', 'º'),
+  ('À', 'Ö'), ('Ø', 'ö'), ('ø', 'ˁ'), ('ˆ', 'ˑ'), ('ˠ', 'ˤ'),
+  ('ˬ', 'ˬ'), ('ˮ', 'ˮ'), ('Ͱ', 'ʹ'), ('Ͷ', 'ͷ'), ('ͺ', 'ͽ'),
+  ('Ϳ', 'Ϳ'), ('Ά', 'Ά'), ('Έ', 'Ί'), ('Ό', 'Ό'), ('Ύ', 'Ρ'),
+  ('Σ', 'ϵ'), ('Ϸ', 'ҁ'), ('Ҋ', 'ԯ'), ('Ա', 'Ֆ'), ('ՙ', 'ՙ'),
+  ('ա', 'և'), ('א', 'ת'), ('װ', 'ײ'), ('ؠ', 'ي'), ('ٮ', 'ٯ'),
+  ('ٱ', 'ۓ'), ('ە', 'ە'), ('ۥ', 'ۦ'), ('ۮ', 'ۯ'), ('ۺ', 'ۼ'),
+  ('ۿ', 'ۿ'), ('ܐ', 'ܐ'), ('ܒ', 'ܯ'), ('ݍ', 'ޥ'), ('ޱ', 'ޱ'),
+  ('ߊ', 'ߪ'), ('ߴ', 'ߵ'), ('ߺ', 'ߺ'), ('ࠀ', 'ࠕ'), ('ࠚ', 'ࠚ'),
+  ('ࠤ', 'ࠤ'), ('ࠨ', 'ࠨ'), ('ࡀ', 'ࡘ'), ('ࡠ', 'ࡪ'),
+  ('ࢠ', 'ࢴ'), ('ࢶ', 'ࢽ'), ('ऄ', 'ह'), ('ऽ', 'ऽ'),
+  ('ॐ', 'ॐ'), ('क़', 'ॡ'), ('ॱ', 'ঀ'), ('অ', 'ঌ'),
+  ('এ', 'ঐ'), ('ও', 'ন'), ('প', 'র'), ('ল', 'ল'),
+  ('শ', 'হ'), ('ঽ', 'ঽ'), ('ৎ', 'ৎ'), ('ড়', 'ঢ়'),
+  ('য়', 'ৡ'), ('ৰ', 'ৱ'), ('ৼ', 'ৼ'), ('ਅ', 'ਊ'),
+  ('ਏ', 'ਐ'), ('ਓ', 'ਨ'), ('ਪ', 'ਰ'), ('ਲ', 'ਲ਼'),
+  ('ਵ', 'ਸ਼'), ('ਸ', 'ਹ'), ('ਖ਼', 'ੜ'), ('ਫ਼', 'ਫ਼'),
+  ('ੲ', 'ੴ'), ('અ', 'ઍ'), ('એ', 'ઑ'), ('ઓ', 'ન'),
+  ('પ', 'ર'), ('લ', 'ળ'), ('વ', 'હ'), ('ઽ', 'ઽ'),
+  ('ૐ', 'ૐ'), ('ૠ', 'ૡ'), ('ૹ', 'ૹ'), ('ଅ', 'ଌ'),
+  ('ଏ', 'ଐ'), ('ଓ', 'ନ'), ('ପ', 'ର'), ('ଲ', 'ଳ'),
+  ('ଵ', 'ହ'), ('ଽ', 'ଽ'), ('ଡ଼', 'ଢ଼'), ('ୟ', 'ୡ'),
+  ('ୱ', 'ୱ'), ('ஃ', 'ஃ'), ('அ', 'ஊ'), ('எ', 'ஐ'),
+  ('ஒ', 'க'), ('ங', 'ச'), ('ஜ', 'ஜ'), ('ஞ', 'ட'),
+  ('ண', 'த'), ('ந', 'ப'), ('ம', 'ஹ'), ('ௐ', 'ௐ'),
+  ('అ', 'ఌ'), ('ఎ', 'ఐ'), ('ఒ', 'న'), ('ప', 'హ'),
+  ('ఽ', 'ఽ'), ('ౘ', 'ౚ'), ('ౠ', 'ౡ'), ('ಀ', 'ಀ'),
+  ('ಅ', 'ಌ'), ('ಎ', 'ಐ'), ('ಒ', 'ನ'), ('ಪ', 'ಳ'),
+  ('ವ', 'ಹ'), ('ಽ', 'ಽ'), ('ೞ', 'ೞ'), ('ೠ', 'ೡ'),
+  ('ೱ', 'ೲ'), ('അ', 'ഌ'), ('എ', 'ഐ'), ('ഒ', 'ഺ'),
+  ('ഽ', 'ഽ'), ('ൎ', 'ൎ'), ('ൔ', 'ൖ'), ('ൟ', 'ൡ'),
+  ('ൺ', 'ൿ'), ('අ', 'ඖ'), ('ක', 'න'), ('ඳ', 'ර'),
+  ('ල', 'ල'), ('ව', 'ෆ'), ('ก', 'ะ'), ('า', 'ำ'),
+  ('เ', 'ๆ'), ('ກ', 'ຂ'), ('ຄ', 'ຄ'), ('ງ', 'ຈ'),
+  ('ຊ', 'ຊ'), ('ຍ', 'ຍ'), ('ດ', 'ທ'), ('ນ', 'ຟ'),
+  ('ມ', 'ຣ'), ('ລ', 'ລ'), ('ວ', 'ວ'), ('ສ', 'ຫ'),
+  ('ອ', 'ະ'), ('າ', 'ຳ'), ('ຽ', 'ຽ'), ('ເ', 'ໄ'),
+  ('ໆ', 'ໆ'), ('ໜ', 'ໟ'), ('ༀ', 'ༀ'), ('ཀ', 'ཇ'),
+  ('ཉ', 'ཬ'), ('ྈ', 'ྌ'), ('က', 'ဪ'), ('ဿ', 'ဿ'),
+  ('ၐ', 'ၕ'), ('ၚ', 'ၝ'), ('ၡ', 'ၡ'), ('ၥ', 'ၦ'),
+  ('ၮ', 'ၰ'), ('ၵ', 'ႁ'), ('ႎ', 'ႎ'), ('Ⴀ', 'Ⴥ'),
+  ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'), ('ა', 'ჺ'), ('ჼ', 'ቈ'),
+  ('ቊ', 'ቍ'), ('ቐ', 'ቖ'), ('ቘ', 'ቘ'), ('ቚ', 'ቝ'),
+  ('በ', 'ኈ'), ('ኊ', 'ኍ'), ('ነ', 'ኰ'), ('ኲ', 'ኵ'),
+  ('ኸ', 'ኾ'), ('ዀ', 'ዀ'), ('ዂ', 'ዅ'), ('ወ', 'ዖ'),
+  ('ዘ', 'ጐ'), ('ጒ', 'ጕ'), ('ጘ', 'ፚ'), ('ᎀ', 'ᎏ'),
+  ('Ꭰ', 'Ᏽ'), ('ᏸ', 'ᏽ'), ('ᐁ', 'ᙬ'), ('ᙯ', 'ᙿ'),
+  ('ᚁ', 'ᚚ'), ('ᚠ', 'ᛪ'), ('ᛱ', 'ᛸ'), ('ᜀ', 'ᜌ'),
+  ('ᜎ', 'ᜑ'), ('ᜠ', 'ᜱ'), ('ᝀ', 'ᝑ'), ('ᝠ', 'ᝬ'),
+  ('ᝮ', 'ᝰ'), ('ក', 'ឳ'), ('ៗ', 'ៗ'), ('ៜ', 'ៜ'),
+  ('ᠠ', 'ᡷ'), ('ᢀ', 'ᢄ'), ('ᢇ', 'ᢨ'), ('ᢪ', 'ᢪ'),
+  ('ᢰ', 'ᣵ'), ('ᤀ', 'ᤞ'), ('ᥐ', 'ᥭ'), ('ᥰ', 'ᥴ'),
+  ('ᦀ', 'ᦫ'), ('ᦰ', 'ᧉ'), ('ᨀ', 'ᨖ'), ('ᨠ', 'ᩔ'),
+  ('ᪧ', 'ᪧ'), ('ᬅ', 'ᬳ'), ('ᭅ', 'ᭋ'), ('ᮃ', 'ᮠ'),
+  ('ᮮ', 'ᮯ'), ('ᮺ', 'ᯥ'), ('ᰀ', 'ᰣ'), ('ᱍ', 'ᱏ'),
+  ('ᱚ', 'ᱽ'), ('ᲀ', 'ᲈ'), ('ᳩ', 'ᳬ'), ('ᳮ', 'ᳱ'),
+  ('ᳵ', 'ᳶ'), ('ᴀ', 'ᶿ'), ('Ḁ', 'ἕ'), ('Ἐ', 'Ἕ'),
+  ('ἠ', 'ὅ'), ('Ὀ', 'Ὅ'), ('ὐ', 'ὗ'), ('Ὑ', 'Ὑ'),
+  ('Ὓ', 'Ὓ'), ('Ὕ', 'Ὕ'), ('Ὗ', 'ώ'), ('ᾀ', 'ᾴ'),
+  ('ᾶ', 'ᾼ'), ('ι', 'ι'), ('ῂ', 'ῄ'), ('ῆ', 'ῌ'),
+  ('ῐ', 'ΐ'), ('ῖ', 'Ί'), ('ῠ', 'Ῥ'), ('ῲ', 'ῴ'),
+  ('ῶ', 'ῼ'), ('ⁱ', 'ⁱ'), ('ⁿ', 'ⁿ'), ('ₐ', 'ₜ'),
+  ('ℂ', 'ℂ'), ('ℇ', 'ℇ'), ('ℊ', 'ℓ'), ('ℕ', 'ℕ'),
+  ('ℙ', 'ℝ'), ('ℤ', 'ℤ'), ('Ω', 'Ω'), ('ℨ', 'ℨ'),
+  ('K', 'ℭ'), ('ℯ', 'ℹ'), ('ℼ', 'ℿ'), ('ⅅ', 'ⅉ'),
+  ('ⅎ', 'ⅎ'), ('Ↄ', 'ↄ'), ('Ⰰ', 'Ⱞ'), ('ⰰ', 'ⱞ'),
+  ('Ⱡ', 'ⳤ'), ('Ⳬ', 'ⳮ'), ('Ⳳ', 'ⳳ'), ('ⴀ', 'ⴥ'),
+  ('ⴧ', 'ⴧ'), ('ⴭ', 'ⴭ'), ('ⴰ', 'ⵧ'), ('ⵯ', 'ⵯ'),
+  ('ⶀ', 'ⶖ'), ('ⶠ', 'ⶦ'), ('ⶨ', 'ⶮ'), ('ⶰ', 'ⶶ'),
+  ('ⶸ', 'ⶾ'), ('ⷀ', 'ⷆ'), ('ⷈ', 'ⷎ'), ('ⷐ', 'ⷖ'),
+  ('ⷘ', 'ⷞ'), ('ⸯ', 'ⸯ'), ('々', '〆'), ('〱', '〵'),
+  ('〻', '〼'), ('ぁ', 'ゖ'), ('ゝ', 'ゟ'), ('ァ', 'ヺ'),
+  ('ー', 'ヿ'), ('ㄅ', 'ㄮ'), ('ㄱ', 'ㆎ'), ('ㆠ', 'ㆺ'),
+  ('ㇰ', 'ㇿ'), ('㐀', '䶵'), ('一', '鿪'), ('ꀀ', 'ꒌ'),
+  ('ꓐ', 'ꓽ'), ('ꔀ', 'ꘌ'), ('ꘐ', 'ꘟ'), ('ꘪ', 'ꘫ'),
+  ('Ꙁ', 'ꙮ'), ('ꙿ', 'ꚝ'), ('ꚠ', 'ꛥ'), ('ꜗ', 'ꜟ'),
+  ('Ꜣ', 'ꞈ'), ('Ꞌ', 'Ɪ'), ('Ʞ', 'ꞷ'), ('ꟷ', 'ꠁ'),
+  ('ꠃ', 'ꠅ'), ('ꠇ', 'ꠊ'), ('ꠌ', 'ꠢ'), ('ꡀ', 'ꡳ'),
+  ('ꢂ', 'ꢳ'), ('ꣲ', 'ꣷ'), ('ꣻ', 'ꣻ'), ('ꣽ', 'ꣽ'),
+  ('ꤊ', 'ꤥ'), ('ꤰ', 'ꥆ'), ('ꥠ', 'ꥼ'), ('ꦄ', 'ꦲ'),
+  ('ꧏ', 'ꧏ'), ('ꧠ', 'ꧤ'), ('ꧦ', 'ꧯ'), ('ꧺ', 'ꧾ'),
+  ('ꨀ', 'ꨨ'), ('ꩀ', 'ꩂ'), ('ꩄ', 'ꩋ'), ('ꩠ', 'ꩶ'),
+  ('ꩺ', 'ꩺ'), ('ꩾ', 'ꪯ'), ('ꪱ', 'ꪱ'), ('ꪵ', 'ꪶ'),
+  ('ꪹ', 'ꪽ'), ('ꫀ', 'ꫀ'), ('ꫂ', 'ꫂ'), ('ꫛ', 'ꫝ'),
+  ('ꫠ', 'ꫪ'), ('ꫲ', 'ꫴ'), ('ꬁ', 'ꬆ'), ('ꬉ', 'ꬎ'),
+  ('ꬑ', 'ꬖ'), ('ꬠ', 'ꬦ'), ('ꬨ', 'ꬮ'), ('ꬰ', 'ꭚ'),
+  ('ꭜ', 'ꭥ'), ('ꭰ', 'ꯢ'), ('가', '힣'), ('ힰ', 'ퟆ'),
+  ('ퟋ', 'ퟻ'), ('豈', '舘'), ('並', '龎'), ('ﬀ', 'ﬆ'),
+  ('ﬓ', 'ﬗ'), ('יִ', 'יִ'), ('ײַ', 'ﬨ'), ('שׁ', 'זּ'),
+  ('טּ', 'לּ'), ('מּ', 'מּ'), ('נּ', 'סּ'), ('ףּ', 'פּ'),
+  ('צּ', 'ﮱ'), ('ﯓ', 'ﴽ'), ('ﵐ', 'ﶏ'), ('ﶒ', 'ﷇ'),
+  ('ﷰ', 'ﷻ'), ('ﹰ', 'ﹴ'), ('ﹶ', 'ﻼ'), ('Ａ', 'Ｚ'),
+  ('ａ', 'ｚ'), ('ｦ', 'ﾾ'), ('ￂ', 'ￇ'), ('ￊ', 'ￏ'),
+  ('ￒ', 'ￗ'), ('ￚ', 'ￜ'), ('𐀀', '𐀋'), ('𐀍', '𐀦'),
+  ('𐀨', '𐀺'), ('𐀼', '𐀽'), ('𐀿', '𐁍'), ('𐁐', '𐁝'),
+  ('𐂀', '𐃺'), ('𐊀', '𐊜'), ('𐊠', '𐋐'), ('𐌀', '𐌟'),
+  ('𐌭', '𐍀'), ('𐍂', '𐍉'), ('𐍐', '𐍵'), ('𐎀', '𐎝'),
+  ('𐎠', '𐏃'), ('𐏈', '𐏏'), ('𐐀', '𐒝'), ('𐒰', '𐓓'),
+  ('𐓘', '𐓻'), ('𐔀', '𐔧'), ('𐔰', '𐕣'), ('𐘀', '𐜶'),
+  ('𐝀', '𐝕'), ('𐝠', '𐝧'), ('𐠀', '𐠅'), ('𐠈', '𐠈'),
+  ('𐠊', '𐠵'), ('𐠷', '𐠸'), ('𐠼', '𐠼'), ('𐠿', '𐡕'),
+  ('𐡠', '𐡶'), ('𐢀', '𐢞'), ('𐣠', '𐣲'), ('𐣴', '𐣵'),
+  ('𐤀', '𐤕'), ('𐤠', '𐤹'), ('𐦀', '𐦷'), ('𐦾', '𐦿'),
+  ('𐨀', '𐨀'), ('𐨐', '𐨓'), ('𐨕', '𐨗'), ('𐨙', '𐨳'),
+  ('𐩠', '𐩼'), ('𐪀', '𐪜'), ('𐫀', '𐫇'), ('𐫉', '𐫤'),
+  ('𐬀', '𐬵'), ('𐭀', '𐭕'), ('𐭠', '𐭲'), ('𐮀', '𐮑'),
+  ('𐰀', '𐱈'), ('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𑀃', '𑀷'),
+  ('𑂃', '𑂯'), ('𑃐', '𑃨'), ('𑄃', '𑄦'), ('𑅐', '𑅲'),
+  ('𑅶', '𑅶'), ('𑆃', '𑆲'), ('𑇁', '𑇄'), ('𑇚', '𑇚'),
+  ('𑇜', '𑇜'), ('𑈀', '𑈑'), ('𑈓', '𑈫'), ('𑊀', '𑊆'),
+  ('𑊈', '𑊈'), ('𑊊', '𑊍'), ('𑊏', '𑊝'), ('𑊟', '𑊨'),
+  ('𑊰', '𑋞'), ('𑌅', '𑌌'), ('𑌏', '𑌐'), ('𑌓', '𑌨'),
+  ('𑌪', '𑌰'), ('𑌲', '𑌳'), ('𑌵', '𑌹'), ('𑌽', '𑌽'),
+  ('𑍐', '𑍐'), ('𑍝', '𑍡'), ('𑐀', '𑐴'), ('𑑇', '𑑊'),
+  ('𑒀', '𑒯'), ('𑓄', '𑓅'), ('𑓇', '𑓇'), ('𑖀', '𑖮'),
+  ('𑗘', '𑗛'), ('𑘀', '𑘯'), ('𑙄', '𑙄'), ('𑚀', '𑚪'),
+  ('𑜀', '𑜙'), ('𑢠', '𑣟'), ('𑣿', '𑣿'), ('𑨀', '𑨀'),
+  ('𑨋', '𑨲'), ('𑨺', '𑨺'), ('𑩐', '𑩐'), ('𑩜', '𑪃'),
+  ('𑪆', '𑪉'), ('𑫀', '𑫸'), ('𑰀', '𑰈'), ('𑰊', '𑰮'),
+  ('𑱀', '𑱀'), ('𑱲', '𑲏'), ('𑴀', '𑴆'), ('𑴈', '𑴉'),
+  ('𑴋', '𑴰'), ('𑵆', '𑵆'), ('𒀀', '𒎙'), ('𒒀', '𒕃'),
+  ('𓀀', '𓐮'), ('𔐀', '𔙆'), ('𖠀', '𖨸'), ('𖩀', '𖩞'),
+  ('𖫐', '𖫭'), ('𖬀', '𖬯'), ('𖭀', '𖭃'), ('𖭣', '𖭷'),
+  ('𖭽', '𖮏'), ('𖼀', '𖽄'), ('𖽐', '𖽐'), ('𖾓', '𖾟'),
+  ('𖿠', '𖿡'), ('𗀀', '𘟬'), ('𘠀', '𘫲'), ('𛀀', '𛄞'),
+  ('𛅰', '𛋻'), ('𛰀', '𛱪'), ('𛱰', '𛱼'), ('𛲀', '𛲈'),
+  ('𛲐', '𛲙'), ('𝐀', '𝑔'), ('𝑖', '𝒜'), ('𝒞', '𝒟'),
+  ('𝒢', '𝒢'), ('𝒥', '𝒦'), ('𝒩', '𝒬'), ('𝒮', '𝒹'),
+  ('𝒻', '𝒻'), ('𝒽', '𝓃'), ('𝓅', '𝔅'), ('𝔇', '𝔊'),
+  ('𝔍', '𝔔'), ('𝔖', '𝔜'), ('𝔞', '𝔹'), ('𝔻', '𝔾'),
+  ('𝕀', '𝕄'), ('𝕆', '𝕆'), ('𝕊', '𝕐'), ('𝕒', '𝚥'),
+  ('𝚨', '𝛀'), ('𝛂', '𝛚'), ('𝛜', '𝛺'), ('𝛼', '𝜔'),
+  ('𝜖', '𝜴'), ('𝜶', '𝝎'), ('𝝐', '𝝮'), ('𝝰', '𝞈'),
+  ('𝞊', '𝞨'), ('𝞪', '𝟂'), ('𝟄', '𝟋'), ('𞠀', '𞣄'),
+  ('𞤀', '𞥃'), ('𞸀', '𞸃'), ('𞸅', '𞸟'), ('𞸡', '𞸢'),
+  ('𞸤', '𞸤'), ('𞸧', '𞸧'), ('𞸩', '𞸲'), ('𞸴', '𞸷'),
+  ('𞸹', '𞸹'), ('𞸻', '𞸻'), ('𞹂', '𞹂'), ('𞹇', '𞹇'),
+  ('𞹉', '𞹉'), ('𞹋', '𞹋'), ('𞹍', '𞹏'), ('𞹑', '𞹒'),
+  ('𞹔', '𞹔'), ('𞹗', '𞹗'), ('𞹙', '𞹙'), ('𞹛', '𞹛'),
+  ('𞹝', '𞹝'), ('𞹟', '𞹟'), ('𞹡', '𞹢'), ('𞹤', '𞹤'),
+  ('𞹧', '𞹪'), ('𞹬', '𞹲'), ('𞹴', '𞹷'), ('𞹹', '𞹼'),
+  ('𞹾', '𞹾'), ('𞺀', '𞺉'), ('𞺋', '𞺛'), ('𞺡', '𞺣'),
+  ('𞺥', '𞺩'), ('𞺫', '𞺻'), ('𠀀', '𪛖'), ('𪜀', '𫜴'),
+  ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), ('丽', '𪘀'),
+];
+
+pub const LETTER_NUMBER: &'static [(char, char)] = &[
+  ('ᛮ', 'ᛰ'), ('Ⅰ', 'ↂ'), ('ↅ', 'ↈ'), ('〇', '〇'),
+  ('〡', '〩'), ('〸', '〺'), ('ꛦ', 'ꛯ'), ('𐅀', '𐅴'),
+  ('𐍁', '𐍁'), ('𐍊', '𐍊'), ('𐏑', '𐏕'), ('𒐀', '𒑮'),
+];
+
+pub const LINE_SEPARATOR: &'static [(char, char)] = &[
+  ('\u{2028}', '\u{2028}'),
+];
+
+pub const LOWERCASE_LETTER: &'static [(char, char)] = &[
+  ('a', 'z'), ('µ', 'µ'), ('ß', 'ö'), ('ø', 'ÿ'), ('ā', 'ā'),
+  ('ă', 'ă'), ('ą', 'ą'), ('ć', 'ć'), ('ĉ', 'ĉ'), ('ċ', 'ċ'),
+  ('č', 'č'), ('ď', 'ď'), ('đ', 'đ'), ('ē', 'ē'), ('ĕ', 'ĕ'),
+  ('ė', 'ė'), ('ę', 'ę'), ('ě', 'ě'), ('ĝ', 'ĝ'), ('ğ', 'ğ'),
+  ('ġ', 'ġ'), ('ģ', 'ģ'), ('ĥ', 'ĥ'), ('ħ', 'ħ'), ('ĩ', 'ĩ'),
+  ('ī', 'ī'), ('ĭ', 'ĭ'), ('į', 'į'), ('ı', 'ı'), ('ĳ', 'ĳ'),
+  ('ĵ', 'ĵ'), ('ķ', 'ĸ'), ('ĺ', 'ĺ'), ('ļ', 'ļ'), ('ľ', 'ľ'),
+  ('ŀ', 'ŀ'), ('ł', 'ł'), ('ń', 'ń'), ('ņ', 'ņ'), ('ň', 'ŉ'),
+  ('ŋ', 'ŋ'), ('ō', 'ō'), ('ŏ', 'ŏ'), ('ő', 'ő'), ('œ', 'œ'),
+  ('ŕ', 'ŕ'), ('ŗ', 'ŗ'), ('ř', 'ř'), ('ś', 'ś'), ('ŝ', 'ŝ'),
+  ('ş', 'ş'), ('š', 'š'), ('ţ', 'ţ'), ('ť', 'ť'), ('ŧ', 'ŧ'),
+  ('ũ', 'ũ'), ('ū', 'ū'), ('ŭ', 'ŭ'), ('ů', 'ů'), ('ű', 'ű'),
+  ('ų', 'ų'), ('ŵ', 'ŵ'), ('ŷ', 'ŷ'), ('ź', 'ź'), ('ż', 'ż'),
+  ('ž', 'ƀ'), ('ƃ', 'ƃ'), ('ƅ', 'ƅ'), ('ƈ', 'ƈ'), ('ƌ', 'ƍ'),
+  ('ƒ', 'ƒ'), ('ƕ', 'ƕ'), ('ƙ', 'ƛ'), ('ƞ', 'ƞ'), ('ơ', 'ơ'),
+  ('ƣ', 'ƣ'), ('ƥ', 'ƥ'), ('ƨ', 'ƨ'), ('ƪ', 'ƫ'), ('ƭ', 'ƭ'),
+  ('ư', 'ư'), ('ƴ', 'ƴ'), ('ƶ', 'ƶ'), ('ƹ', 'ƺ'), ('ƽ', 'ƿ'),
+  ('ǆ', 'ǆ'), ('ǉ', 'ǉ'), ('ǌ', 'ǌ'), ('ǎ', 'ǎ'), ('ǐ', 'ǐ'),
+  ('ǒ', 'ǒ'), ('ǔ', 'ǔ'), ('ǖ', 'ǖ'), ('ǘ', 'ǘ'), ('ǚ', 'ǚ'),
+  ('ǜ', 'ǝ'), ('ǟ', 'ǟ'), ('ǡ', 'ǡ'), ('ǣ', 'ǣ'), ('ǥ', 'ǥ'),
+  ('ǧ', 'ǧ'), ('ǩ', 'ǩ'), ('ǫ', 'ǫ'), ('ǭ', 'ǭ'), ('ǯ', 'ǰ'),
+  ('ǳ', 'ǳ'), ('ǵ', 'ǵ'), ('ǹ', 'ǹ'), ('ǻ', 'ǻ'), ('ǽ', 'ǽ'),
+  ('ǿ', 'ǿ'), ('ȁ', 'ȁ'), ('ȃ', 'ȃ'), ('ȅ', 'ȅ'), ('ȇ', 'ȇ'),
+  ('ȉ', 'ȉ'), ('ȋ', 'ȋ'), ('ȍ', 'ȍ'), ('ȏ', 'ȏ'), ('ȑ', 'ȑ'),
+  ('ȓ', 'ȓ'), ('ȕ', 'ȕ'), ('ȗ', 'ȗ'), ('ș', 'ș'), ('ț', 'ț'),
+  ('ȝ', 'ȝ'), ('ȟ', 'ȟ'), ('ȡ', 'ȡ'), ('ȣ', 'ȣ'), ('ȥ', 'ȥ'),
+  ('ȧ', 'ȧ'), ('ȩ', 'ȩ'), ('ȫ', 'ȫ'), ('ȭ', 'ȭ'), ('ȯ', 'ȯ'),
+  ('ȱ', 'ȱ'), ('ȳ', 'ȹ'), ('ȼ', 'ȼ'), ('ȿ', 'ɀ'), ('ɂ', 'ɂ'),
+  ('ɇ', 'ɇ'), ('ɉ', 'ɉ'), ('ɋ', 'ɋ'), ('ɍ', 'ɍ'), ('ɏ', 'ʓ'),
+  ('ʕ', 'ʯ'), ('ͱ', 'ͱ'), ('ͳ', 'ͳ'), ('ͷ', 'ͷ'), ('ͻ', 'ͽ'),
+  ('ΐ', 'ΐ'), ('ά', 'ώ'), ('ϐ', 'ϑ'), ('ϕ', 'ϗ'), ('ϙ', 'ϙ'),
+  ('ϛ', 'ϛ'), ('ϝ', 'ϝ'), ('ϟ', 'ϟ'), ('ϡ', 'ϡ'), ('ϣ', 'ϣ'),
+  ('ϥ', 'ϥ'), ('ϧ', 'ϧ'), ('ϩ', 'ϩ'), ('ϫ', 'ϫ'), ('ϭ', 'ϭ'),
+  ('ϯ', 'ϳ'), ('ϵ', 'ϵ'), ('ϸ', 'ϸ'), ('ϻ', 'ϼ'), ('а', 'џ'),
+  ('ѡ', 'ѡ'), ('ѣ', 'ѣ'), ('ѥ', 'ѥ'), ('ѧ', 'ѧ'), ('ѩ', 'ѩ'),
+  ('ѫ', 'ѫ'), ('ѭ', 'ѭ'), ('ѯ', 'ѯ'), ('ѱ', 'ѱ'), ('ѳ', 'ѳ'),
+  ('ѵ', 'ѵ'), ('ѷ', 'ѷ'), ('ѹ', 'ѹ'), ('ѻ', 'ѻ'), ('ѽ', 'ѽ'),
+  ('ѿ', 'ѿ'), ('ҁ', 'ҁ'), ('ҋ', 'ҋ'), ('ҍ', 'ҍ'), ('ҏ', 'ҏ'),
+  ('ґ', 'ґ'), ('ғ', 'ғ'), ('ҕ', 'ҕ'), ('җ', 'җ'), ('ҙ', 'ҙ'),
+  ('қ', 'қ'), ('ҝ', 'ҝ'), ('ҟ', 'ҟ'), ('ҡ', 'ҡ'), ('ң', 'ң'),
+  ('ҥ', 'ҥ'), ('ҧ', 'ҧ'), ('ҩ', 'ҩ'), ('ҫ', 'ҫ'), ('ҭ', 'ҭ'),
+  ('ү', 'ү'), ('ұ', 'ұ'), ('ҳ', 'ҳ'), ('ҵ', 'ҵ'), ('ҷ', 'ҷ'),
+  ('ҹ', 'ҹ'), ('һ', 'һ'), ('ҽ', 'ҽ'), ('ҿ', 'ҿ'), ('ӂ', 'ӂ'),
+  ('ӄ', 'ӄ'), ('ӆ', 'ӆ'), ('ӈ', 'ӈ'), ('ӊ', 'ӊ'), ('ӌ', 'ӌ'),
+  ('ӎ', 'ӏ'), ('ӑ', 'ӑ'), ('ӓ', 'ӓ'), ('ӕ', 'ӕ'), ('ӗ', 'ӗ'),
+  ('ә', 'ә'), ('ӛ', 'ӛ'), ('ӝ', 'ӝ'), ('ӟ', 'ӟ'), ('ӡ', 'ӡ'),
+  ('ӣ', 'ӣ'), ('ӥ', 'ӥ'), ('ӧ', 'ӧ'), ('ө', 'ө'), ('ӫ', 'ӫ'),
+  ('ӭ', 'ӭ'), ('ӯ', 'ӯ'), ('ӱ', 'ӱ'), ('ӳ', 'ӳ'), ('ӵ', 'ӵ'),
+  ('ӷ', 'ӷ'), ('ӹ', 'ӹ'), ('ӻ', 'ӻ'), ('ӽ', 'ӽ'), ('ӿ', 'ӿ'),
+  ('ԁ', 'ԁ'), ('ԃ', 'ԃ'), ('ԅ', 'ԅ'), ('ԇ', 'ԇ'), ('ԉ', 'ԉ'),
+  ('ԋ', 'ԋ'), ('ԍ', 'ԍ'), ('ԏ', 'ԏ'), ('ԑ', 'ԑ'), ('ԓ', 'ԓ'),
+  ('ԕ', 'ԕ'), ('ԗ', 'ԗ'), ('ԙ', 'ԙ'), ('ԛ', 'ԛ'), ('ԝ', 'ԝ'),
+  ('ԟ', 'ԟ'), ('ԡ', 'ԡ'), ('ԣ', 'ԣ'), ('ԥ', 'ԥ'), ('ԧ', 'ԧ'),
+  ('ԩ', 'ԩ'), ('ԫ', 'ԫ'), ('ԭ', 'ԭ'), ('ԯ', 'ԯ'), ('ա', 'և'),
+  ('ᏸ', 'ᏽ'), ('ᲀ', 'ᲈ'), ('ᴀ', 'ᴫ'), ('ᵫ', 'ᵷ'),
+  ('ᵹ', 'ᶚ'), ('ḁ', 'ḁ'), ('ḃ', 'ḃ'), ('ḅ', 'ḅ'),
+  ('ḇ', 'ḇ'), ('ḉ', 'ḉ'), ('ḋ', 'ḋ'), ('ḍ', 'ḍ'),
+  ('ḏ', 'ḏ'), ('ḑ', 'ḑ'), ('ḓ', 'ḓ'), ('ḕ', 'ḕ'),
+  ('ḗ', 'ḗ'), ('ḙ', 'ḙ'), ('ḛ', 'ḛ'), ('ḝ', 'ḝ'),
+  ('ḟ', 'ḟ'), ('ḡ', 'ḡ'), ('ḣ', 'ḣ'), ('ḥ', 'ḥ'),
+  ('ḧ', 'ḧ'), ('ḩ', 'ḩ'), ('ḫ', 'ḫ'), ('ḭ', 'ḭ'),
+  ('ḯ', 'ḯ'), ('ḱ', 'ḱ'), ('ḳ', 'ḳ'), ('ḵ', 'ḵ'),
+  ('ḷ', 'ḷ'), ('ḹ', 'ḹ'), ('ḻ', 'ḻ'), ('ḽ', 'ḽ'),
+  ('ḿ', 'ḿ'), ('ṁ', 'ṁ'), ('ṃ', 'ṃ'), ('ṅ', 'ṅ'),
+  ('ṇ', 'ṇ'), ('ṉ', 'ṉ'), ('ṋ', 'ṋ'), ('ṍ', 'ṍ'),
+  ('ṏ', 'ṏ'), ('ṑ', 'ṑ'), ('ṓ', 'ṓ'), ('ṕ', 'ṕ'),
+  ('ṗ', 'ṗ'), ('ṙ', 'ṙ'), ('ṛ', 'ṛ'), ('ṝ', 'ṝ'),
+  ('ṟ', 'ṟ'), ('ṡ', 'ṡ'), ('ṣ', 'ṣ'), ('ṥ', 'ṥ'),
+  ('ṧ', 'ṧ'), ('ṩ', 'ṩ'), ('ṫ', 'ṫ'), ('ṭ', 'ṭ'),
+  ('ṯ', 'ṯ'), ('ṱ', 'ṱ'), ('ṳ', 'ṳ'), ('ṵ', 'ṵ'),
+  ('ṷ', 'ṷ'), ('ṹ', 'ṹ'), ('ṻ', 'ṻ'), ('ṽ', 'ṽ'),
+  ('ṿ', 'ṿ'), ('ẁ', 'ẁ'), ('ẃ', 'ẃ'), ('ẅ', 'ẅ'),
+  ('ẇ', 'ẇ'), ('ẉ', 'ẉ'), ('ẋ', 'ẋ'), ('ẍ', 'ẍ'),
+  ('ẏ', 'ẏ'), ('ẑ', 'ẑ'), ('ẓ', 'ẓ'), ('ẕ', 'ẝ'),
+  ('ẟ', 'ẟ'), ('ạ', 'ạ'), ('ả', 'ả'), ('ấ', 'ấ'),
+  ('ầ', 'ầ'), ('ẩ', 'ẩ'), ('ẫ', 'ẫ'), ('ậ', 'ậ'),
+  ('ắ', 'ắ'), ('ằ', 'ằ'), ('ẳ', 'ẳ'), ('ẵ', 'ẵ'),
+  ('ặ', 'ặ'), ('ẹ', 'ẹ'), ('ẻ', 'ẻ'), ('ẽ', 'ẽ'),
+  ('ế', 'ế'), ('ề', 'ề'), ('ể', 'ể'), ('ễ', 'ễ'),
+  ('ệ', 'ệ'), ('ỉ', 'ỉ'), ('ị', 'ị'), ('ọ', 'ọ'),
+  ('ỏ', 'ỏ'), ('ố', 'ố'), ('ồ', 'ồ'), ('ổ', 'ổ'),
+  ('ỗ', 'ỗ'), ('ộ', 'ộ'), ('ớ', 'ớ'), ('ờ', 'ờ'),
+  ('ở', 'ở'), ('ỡ', 'ỡ'), ('ợ', 'ợ'), ('ụ', 'ụ'),
+  ('ủ', 'ủ'), ('ứ', 'ứ'), ('ừ', 'ừ'), ('ử', 'ử'),
+  ('ữ', 'ữ'), ('ự', 'ự'), ('ỳ', 'ỳ'), ('ỵ', 'ỵ'),
+  ('ỷ', 'ỷ'), ('ỹ', 'ỹ'), ('ỻ', 'ỻ'), ('ỽ', 'ỽ'),
+  ('ỿ', 'ἇ'), ('ἐ', 'ἕ'), ('ἠ', 'ἧ'), ('ἰ', 'ἷ'),
+  ('ὀ', 'ὅ'), ('ὐ', 'ὗ'), ('ὠ', 'ὧ'), ('ὰ', 'ώ'),
+  ('ᾀ', 'ᾇ'), ('ᾐ', 'ᾗ'), ('ᾠ', 'ᾧ'), ('ᾰ', 'ᾴ'),
+  ('ᾶ', 'ᾷ'), ('ι', 'ι'), ('ῂ', 'ῄ'), ('ῆ', 'ῇ'),
+  ('ῐ', 'ΐ'), ('ῖ', 'ῗ'), ('ῠ', 'ῧ'), ('ῲ', 'ῴ'),
+  ('ῶ', 'ῷ'), ('ℊ', 'ℊ'), ('ℎ', 'ℏ'), ('ℓ', 'ℓ'),
+  ('ℯ', 'ℯ'), ('ℴ', 'ℴ'), ('ℹ', 'ℹ'), ('ℼ', 'ℽ'),
+  ('ⅆ', 'ⅉ'), ('ⅎ', 'ⅎ'), ('ↄ', 'ↄ'), ('ⰰ', 'ⱞ'),
+  ('ⱡ', 'ⱡ'), ('ⱥ', 'ⱦ'), ('ⱨ', 'ⱨ'), ('ⱪ', 'ⱪ'),
+  ('ⱬ', 'ⱬ'), ('ⱱ', 'ⱱ'), ('ⱳ', 'ⱴ'), ('ⱶ', 'ⱻ'),
+  ('ⲁ', 'ⲁ'), ('ⲃ', 'ⲃ'), ('ⲅ', 'ⲅ'), ('ⲇ', 'ⲇ'),
+  ('ⲉ', 'ⲉ'), ('ⲋ', 'ⲋ'), ('ⲍ', 'ⲍ'), ('ⲏ', 'ⲏ'),
+  ('ⲑ', 'ⲑ'), ('ⲓ', 'ⲓ'), ('ⲕ', 'ⲕ'), ('ⲗ', 'ⲗ'),
+  ('ⲙ', 'ⲙ'), ('ⲛ', 'ⲛ'), ('ⲝ', 'ⲝ'), ('ⲟ', 'ⲟ'),
+  ('ⲡ', 'ⲡ'), ('ⲣ', 'ⲣ'), ('ⲥ', 'ⲥ'), ('ⲧ', 'ⲧ'),
+  ('ⲩ', 'ⲩ'), ('ⲫ', 'ⲫ'), ('ⲭ', 'ⲭ'), ('ⲯ', 'ⲯ'),
+  ('ⲱ', 'ⲱ'), ('ⲳ', 'ⲳ'), ('ⲵ', 'ⲵ'), ('ⲷ', 'ⲷ'),
+  ('ⲹ', 'ⲹ'), ('ⲻ', 'ⲻ'), ('ⲽ', 'ⲽ'), ('ⲿ', 'ⲿ'),
+  ('ⳁ', 'ⳁ'), ('ⳃ', 'ⳃ'), ('ⳅ', 'ⳅ'), ('ⳇ', 'ⳇ'),
+  ('ⳉ', 'ⳉ'), ('ⳋ', 'ⳋ'), ('ⳍ', 'ⳍ'), ('ⳏ', 'ⳏ'),
+  ('ⳑ', 'ⳑ'), ('ⳓ', 'ⳓ'), ('ⳕ', 'ⳕ'), ('ⳗ', 'ⳗ'),
+  ('ⳙ', 'ⳙ'), ('ⳛ', 'ⳛ'), ('ⳝ', 'ⳝ'), ('ⳟ', 'ⳟ'),
+  ('ⳡ', 'ⳡ'), ('ⳣ', 'ⳤ'), ('ⳬ', 'ⳬ'), ('ⳮ', 'ⳮ'),
+  ('ⳳ', 'ⳳ'), ('ⴀ', 'ⴥ'), ('ⴧ', 'ⴧ'), ('ⴭ', 'ⴭ'),
+  ('ꙁ', 'ꙁ'), ('ꙃ', 'ꙃ'), ('ꙅ', 'ꙅ'), ('ꙇ', 'ꙇ'),
+  ('ꙉ', 'ꙉ'), ('ꙋ', 'ꙋ'), ('ꙍ', 'ꙍ'), ('ꙏ', 'ꙏ'),
+  ('ꙑ', 'ꙑ'), ('ꙓ', 'ꙓ'), ('ꙕ', 'ꙕ'), ('ꙗ', 'ꙗ'),
+  ('ꙙ', 'ꙙ'), ('ꙛ', 'ꙛ'), ('ꙝ', 'ꙝ'), ('ꙟ', 'ꙟ'),
+  ('ꙡ', 'ꙡ'), ('ꙣ', 'ꙣ'), ('ꙥ', 'ꙥ'), ('ꙧ', 'ꙧ'),
+  ('ꙩ', 'ꙩ'), ('ꙫ', 'ꙫ'), ('ꙭ', 'ꙭ'), ('ꚁ', 'ꚁ'),
+  ('ꚃ', 'ꚃ'), ('ꚅ', 'ꚅ'), ('ꚇ', 'ꚇ'), ('ꚉ', 'ꚉ'),
+  ('ꚋ', 'ꚋ'), ('ꚍ', 'ꚍ'), ('ꚏ', 'ꚏ'), ('ꚑ', 'ꚑ'),
+  ('ꚓ', 'ꚓ'), ('ꚕ', 'ꚕ'), ('ꚗ', 'ꚗ'), ('ꚙ', 'ꚙ'),
+  ('ꚛ', 'ꚛ'), ('ꜣ', 'ꜣ'), ('ꜥ', 'ꜥ'), ('ꜧ', 'ꜧ'),
+  ('ꜩ', 'ꜩ'), ('ꜫ', 'ꜫ'), ('ꜭ', 'ꜭ'), ('ꜯ', 'ꜱ'),
+  ('ꜳ', 'ꜳ'), ('ꜵ', 'ꜵ'), ('ꜷ', 'ꜷ'), ('ꜹ', 'ꜹ'),
+  ('ꜻ', 'ꜻ'), ('ꜽ', 'ꜽ'), ('ꜿ', 'ꜿ'), ('ꝁ', 'ꝁ'),
+  ('ꝃ', 'ꝃ'), ('ꝅ', 'ꝅ'), ('ꝇ', 'ꝇ'), ('ꝉ', 'ꝉ'),
+  ('ꝋ', 'ꝋ'), ('ꝍ', 'ꝍ'), ('ꝏ', 'ꝏ'), ('ꝑ', 'ꝑ'),
+  ('ꝓ', 'ꝓ'), ('ꝕ', 'ꝕ'), ('ꝗ', 'ꝗ'), ('ꝙ', 'ꝙ'),
+  ('ꝛ', 'ꝛ'), ('ꝝ', 'ꝝ'), ('ꝟ', 'ꝟ'), ('ꝡ', 'ꝡ'),
+  ('ꝣ', 'ꝣ'), ('ꝥ', 'ꝥ'), ('ꝧ', 'ꝧ'), ('ꝩ', 'ꝩ'),
+  ('ꝫ', 'ꝫ'), ('ꝭ', 'ꝭ'), ('ꝯ', 'ꝯ'), ('ꝱ', 'ꝸ'),
+  ('ꝺ', 'ꝺ'), ('ꝼ', 'ꝼ'), ('ꝿ', 'ꝿ'), ('ꞁ', 'ꞁ'),
+  ('ꞃ', 'ꞃ'), ('ꞅ', 'ꞅ'), ('ꞇ', 'ꞇ'), ('ꞌ', 'ꞌ'),
+  ('ꞎ', 'ꞎ'), ('ꞑ', 'ꞑ'), ('ꞓ', 'ꞕ'), ('ꞗ', 'ꞗ'),
+  ('ꞙ', 'ꞙ'), ('ꞛ', 'ꞛ'), ('ꞝ', 'ꞝ'), ('ꞟ', 'ꞟ'),
+  ('ꞡ', 'ꞡ'), ('ꞣ', 'ꞣ'), ('ꞥ', 'ꞥ'), ('ꞧ', 'ꞧ'),
+  ('ꞩ', 'ꞩ'), ('ꞵ', 'ꞵ'), ('ꞷ', 'ꞷ'), ('ꟺ', 'ꟺ'),
+  ('ꬰ', 'ꭚ'), ('ꭠ', 'ꭥ'), ('ꭰ', 'ꮿ'), ('ﬀ', 'ﬆ'),
+  ('ﬓ', 'ﬗ'), ('ａ', 'ｚ'), ('𐐨', '𐑏'), ('𐓘', '𐓻'),
+  ('𐳀', '𐳲'), ('𑣀', '𑣟'), ('𝐚', '𝐳'), ('𝑎', '𝑔'),
+  ('𝑖', '𝑧'), ('𝒂', '𝒛'), ('𝒶', '𝒹'), ('𝒻', '𝒻'),
+  ('𝒽', '𝓃'), ('𝓅', '𝓏'), ('𝓪', '𝔃'), ('𝔞', '𝔷'),
+  ('𝕒', '𝕫'), ('𝖆', '𝖟'), ('𝖺', '𝗓'), ('𝗮', '𝘇'),
+  ('𝘢', '𝘻'), ('𝙖', '𝙯'), ('𝚊', '𝚥'), ('𝛂', '𝛚'),
+  ('𝛜', '𝛡'), ('𝛼', '𝜔'), ('𝜖', '𝜛'), ('𝜶', '𝝎'),
+  ('𝝐', '𝝕'), ('𝝰', '𝞈'), ('𝞊', '𝞏'), ('𝞪', '𝟂'),
+  ('𝟄', '𝟉'), ('𝟋', '𝟋'), ('𞤢', '𞥃'),
+];
+
+pub const MARK: &'static [(char, char)] = &[
+  ('̀', 'ͯ'), ('҃', '҉'), ('֑', 'ֽ'), ('ֿ', 'ֿ'), ('ׁ', 'ׂ'),
+  ('ׄ', 'ׅ'), ('ׇ', 'ׇ'), ('ؐ', 'ؚ'), ('ً', 'ٟ'), ('ٰ', 'ٰ'),
+  ('ۖ', 'ۜ'), ('۟', 'ۤ'), ('ۧ', 'ۨ'), ('۪', 'ۭ'), ('ܑ', 'ܑ'),
+  ('ܰ', '݊'), ('ަ', 'ް'), ('߫', '߳'), ('ࠖ', '࠙'), ('ࠛ', 'ࠣ'),
+  ('ࠥ', 'ࠧ'), ('ࠩ', '࠭'), ('࡙', '࡛'), ('ࣔ', '࣡'),
+  ('ࣣ', 'ः'), ('ऺ', '़'), ('ा', 'ॏ'), ('॑', 'ॗ'),
+  ('ॢ', 'ॣ'), ('ঁ', 'ঃ'), ('়', '়'), ('া', 'ৄ'),
+  ('ে', 'ৈ'), ('ো', '্'), ('ৗ', 'ৗ'), ('ৢ', 'ৣ'),
+  ('ਁ', 'ਃ'), ('਼', '਼'), ('ਾ', 'ੂ'), ('ੇ', 'ੈ'),
+  ('ੋ', '੍'), ('ੑ', 'ੑ'), ('ੰ', 'ੱ'), ('ੵ', 'ੵ'),
+  ('ઁ', 'ઃ'), ('઼', '઼'), ('ા', 'ૅ'), ('ે', 'ૉ'),
+  ('ો', '્'), ('ૢ', 'ૣ'), ('ૺ', '૿'), ('ଁ', 'ଃ'),
+  ('଼', '଼'), ('ା', 'ୄ'), ('େ', 'ୈ'), ('ୋ', '୍'),
+  ('ୖ', 'ୗ'), ('ୢ', 'ୣ'), ('ஂ', 'ஂ'), ('ா', 'ூ'),
+  ('ெ', 'ை'), ('ொ', '்'), ('ௗ', 'ௗ'), ('ఀ', 'ః'),
+  ('ా', 'ౄ'), ('ె', 'ై'), ('ొ', '్'), ('ౕ', 'ౖ'),
+  ('ౢ', 'ౣ'), ('ಁ', 'ಃ'), ('಼', '಼'), ('ಾ', 'ೄ'),
+  ('ೆ', 'ೈ'), ('ೊ', '್'), ('ೕ', 'ೖ'), ('ೢ', 'ೣ'),
+  ('ഀ', 'ഃ'), ('഻', '഼'), ('ാ', 'ൄ'), ('െ', 'ൈ'),
+  ('ൊ', '്'), ('ൗ', 'ൗ'), ('ൢ', 'ൣ'), ('ං', 'ඃ'),
+  ('්', '්'), ('ා', 'ු'), ('ූ', 'ූ'), ('ෘ', 'ෟ'),
+  ('ෲ', 'ෳ'), ('ั', 'ั'), ('ิ', 'ฺ'), ('็', '๎'),
+  ('ັ', 'ັ'), ('ິ', 'ູ'), ('ົ', 'ຼ'), ('່', 'ໍ'),
+  ('༘', '༙'), ('༵', '༵'), ('༷', '༷'), ('༹', '༹'),
+  ('༾', '༿'), ('ཱ', '྄'), ('྆', '྇'), ('ྍ', 'ྗ'),
+  ('ྙ', 'ྼ'), ('࿆', '࿆'), ('ါ', 'ှ'), ('ၖ', 'ၙ'),
+  ('ၞ', 'ၠ'), ('ၢ', 'ၤ'), ('ၧ', 'ၭ'), ('ၱ', 'ၴ'),
+  ('ႂ', 'ႍ'), ('ႏ', 'ႏ'), ('ႚ', 'ႝ'), ('፝', '፟'),
+  ('ᜒ', '᜔'), ('ᜲ', '᜴'), ('ᝒ', 'ᝓ'), ('ᝲ', 'ᝳ'),
+  ('឴', '៓'), ('៝', '៝'), ('᠋', '᠍'), ('ᢅ', 'ᢆ'),
+  ('ᢩ', 'ᢩ'), ('ᤠ', 'ᤫ'), ('ᤰ', '᤻'), ('ᨗ', 'ᨛ'),
+  ('ᩕ', 'ᩞ'), ('᩠', '᩼'), ('᩿', '᩿'), ('᪰', '᪾'),
+  ('ᬀ', 'ᬄ'), ('᬴', '᭄'), ('᭫', '᭳'), ('ᮀ', 'ᮂ'),
+  ('ᮡ', 'ᮭ'), ('᯦', '᯳'), ('ᰤ', '᰷'), ('᳐', '᳒'),
+  ('᳔', '᳨'), ('᳭', '᳭'), ('ᳲ', '᳴'), ('᳷', '᳹'),
+  ('᷀', '᷹'), ('᷻', '᷿'), ('⃐', '⃰'), ('⳯', '⳱'),
+  ('⵿', '⵿'), ('ⷠ', 'ⷿ'), ('〪', '〯'), ('゙', '゚'),
+  ('꙯', '꙲'), ('ꙴ', '꙽'), ('ꚞ', 'ꚟ'), ('꛰', '꛱'),
+  ('ꠂ', 'ꠂ'), ('꠆', '꠆'), ('ꠋ', 'ꠋ'), ('ꠣ', 'ꠧ'),
+  ('ꢀ', 'ꢁ'), ('ꢴ', 'ꣅ'), ('꣠', '꣱'), ('ꤦ', '꤭'),
+  ('ꥇ', '꥓'), ('ꦀ', 'ꦃ'), ('꦳', '꧀'), ('ꧥ', 'ꧥ'),
+  ('ꨩ', 'ꨶ'), ('ꩃ', 'ꩃ'), ('ꩌ', 'ꩍ'), ('ꩻ', 'ꩽ'),
+  ('ꪰ', 'ꪰ'), ('ꪲ', 'ꪴ'), ('ꪷ', 'ꪸ'), ('ꪾ', '꪿'),
+  ('꫁', '꫁'), ('ꫫ', 'ꫯ'), ('ꫵ', '꫶'), ('ꯣ', 'ꯪ'),
+  ('꯬', '꯭'), ('ﬞ', 'ﬞ'), ('︀', '️'), ('︠', '︯'),
+  ('𐇽', '𐇽'), ('𐋠', '𐋠'), ('𐍶', '𐍺'), ('𐨁', '𐨃'),
+  ('𐨅', '𐨆'), ('𐨌', '𐨏'), ('𐨸', '𐨺'), ('𐨿', '𐨿'),
+  ('𐫥', '𐫦'), ('𑀀', '𑀂'), ('𑀸', '𑁆'), ('𑁿', '𑂂'),
+  ('𑂰', '𑂺'), ('𑄀', '𑄂'), ('𑄧', '𑄴'), ('𑅳', '𑅳'),
+  ('𑆀', '𑆂'), ('𑆳', '𑇀'), ('𑇊', '𑇌'), ('𑈬', '𑈷'),
+  ('𑈾', '𑈾'), ('𑋟', '𑋪'), ('𑌀', '𑌃'), ('𑌼', '𑌼'),
+  ('𑌾', '𑍄'), ('𑍇', '𑍈'), ('𑍋', '𑍍'), ('𑍗', '𑍗'),
+  ('𑍢', '𑍣'), ('𑍦', '𑍬'), ('𑍰', '𑍴'), ('𑐵', '𑑆'),
+  ('𑒰', '𑓃'), ('𑖯', '𑖵'), ('𑖸', '𑗀'), ('𑗜', '𑗝'),
+  ('𑘰', '𑙀'), ('𑚫', '𑚷'), ('𑜝', '𑜫'), ('𑨁', '𑨊'),
+  ('𑨳', '𑨹'), ('𑨻', '𑨾'), ('𑩇', '𑩇'), ('𑩑', '𑩛'),
+  ('𑪊', '𑪙'), ('𑰯', '𑰶'), ('𑰸', '𑰿'), ('𑲒', '𑲧'),
+  ('𑲩', '𑲶'), ('𑴱', '𑴶'), ('𑴺', '𑴺'), ('𑴼', '𑴽'),
+  ('𑴿', '𑵅'), ('𑵇', '𑵇'), ('𖫰', '𖫴'), ('𖬰', '𖬶'),
+  ('𖽑', '𖽾'), ('𖾏', '𖾒'), ('𛲝', '𛲞'), ('𝅥', '𝅩'),
+  ('𝅭', '𝅲'), ('𝅻', '𝆂'), ('𝆅', '𝆋'), ('𝆪', '𝆭'),
+  ('𝉂', '𝉄'), ('𝨀', '𝨶'), ('𝨻', '𝩬'), ('𝩵', '𝩵'),
+  ('𝪄', '𝪄'), ('𝪛', '𝪟'), ('𝪡', '𝪯'), ('𞀀', '𞀆'),
+  ('𞀈', '𞀘'), ('𞀛', '𞀡'), ('𞀣', '𞀤'), ('𞀦', '𞀪'),
+  ('𞣐', '𞣖'), ('𞥄', '𞥊'), ('󠄀', '󠇯'),
+];
+
+pub const MATH_SYMBOL: &'static [(char, char)] = &[
+  ('+', '+'), ('<', '>'), ('|', '|'), ('~', '~'), ('¬', '¬'), ('±', '±'),
+  ('×', '×'), ('÷', '÷'), ('϶', '϶'), ('؆', '؈'), ('⁄', '⁄'),
+  ('⁒', '⁒'), ('⁺', '⁼'), ('₊', '₌'), ('℘', '℘'),
+  ('⅀', '⅄'), ('⅋', '⅋'), ('←', '↔'), ('↚', '↛'),
+  ('↠', '↠'), ('↣', '↣'), ('↦', '↦'), ('↮', '↮'),
+  ('⇎', '⇏'), ('⇒', '⇒'), ('⇔', '⇔'), ('⇴', '⋿'),
+  ('⌠', '⌡'), ('⍼', '⍼'), ('⎛', '⎳'), ('⏜', '⏡'),
+  ('▷', '▷'), ('◁', '◁'), ('◸', '◿'), ('♯', '♯'),
+  ('⟀', '⟄'), ('⟇', '⟥'), ('⟰', '⟿'), ('⤀', '⦂'),
+  ('⦙', '⧗'), ('⧜', '⧻'), ('⧾', '⫿'), ('⬰', '⭄'),
+  ('⭇', '⭌'), ('﬩', '﬩'), ('﹢', '﹢'), ('﹤', '﹦'),
+  ('＋', '＋'), ('＜', '＞'), ('｜', '｜'), ('～', '～'),
+  ('￢', '￢'), ('￩', '￬'), ('𝛁', '𝛁'), ('𝛛', '𝛛'),
+  ('𝛻', '𝛻'), ('𝜕', '𝜕'), ('𝜵', '𝜵'), ('𝝏', '𝝏'),
+  ('𝝯', '𝝯'), ('𝞉', '𝞉'), ('𝞩', '𝞩'), ('𝟃', '𝟃'),
+  ('𞻰', '𞻱'),
+];
+
+pub const MODIFIER_LETTER: &'static [(char, char)] = &[
+  ('ʰ', 'ˁ'), ('ˆ', 'ˑ'), ('ˠ', 'ˤ'), ('ˬ', 'ˬ'), ('ˮ', 'ˮ'),
+  ('ʹ', 'ʹ'), ('ͺ', 'ͺ'), ('ՙ', 'ՙ'), ('ـ', 'ـ'), ('ۥ', 'ۦ'),
+  ('ߴ', 'ߵ'), ('ߺ', 'ߺ'), ('ࠚ', 'ࠚ'), ('ࠤ', 'ࠤ'), ('ࠨ', 'ࠨ'),
+  ('ॱ', 'ॱ'), ('ๆ', 'ๆ'), ('ໆ', 'ໆ'), ('ჼ', 'ჼ'),
+  ('ៗ', 'ៗ'), ('ᡃ', 'ᡃ'), ('ᪧ', 'ᪧ'), ('ᱸ', 'ᱽ'),
+  ('ᴬ', 'ᵪ'), ('ᵸ', 'ᵸ'), ('ᶛ', 'ᶿ'), ('ⁱ', 'ⁱ'),
+  ('ⁿ', 'ⁿ'), ('ₐ', 'ₜ'), ('ⱼ', 'ⱽ'), ('ⵯ', 'ⵯ'),
+  ('ⸯ', 'ⸯ'), ('々', '々'), ('〱', '〵'), ('〻', '〻'),
+  ('ゝ', 'ゞ'), ('ー', 'ヾ'), ('ꀕ', 'ꀕ'), ('ꓸ', 'ꓽ'),
+  ('ꘌ', 'ꘌ'), ('ꙿ', 'ꙿ'), ('ꚜ', 'ꚝ'), ('ꜗ', 'ꜟ'),
+  ('ꝰ', 'ꝰ'), ('ꞈ', 'ꞈ'), ('ꟸ', 'ꟹ'), ('ꧏ', 'ꧏ'),
+  ('ꧦ', 'ꧦ'), ('ꩰ', 'ꩰ'), ('ꫝ', 'ꫝ'), ('ꫳ', 'ꫴ'),
+  ('ꭜ', 'ꭟ'), ('ｰ', 'ｰ'), ('ﾞ', 'ﾟ'), ('𖭀', '𖭃'),
+  ('𖾓', '𖾟'), ('𖿠', '𖿡'),
+];
+
+pub const MODIFIER_SYMBOL: &'static [(char, char)] = &[
+  ('^', '^'), ('`', '`'), ('¨', '¨'), ('¯', '¯'), ('´', '´'),
+  ('¸', '¸'), ('˂', '˅'), ('˒', '˟'), ('˥', '˫'), ('˭', '˭'),
+  ('˯', '˿'), ('͵', '͵'), ('΄', '΅'), ('᾽', '᾽'), ('᾿', '῁'),
+  ('῍', '῏'), ('῝', '῟'), ('῭', '`'), ('´', '῾'),
+  ('゛', '゜'), ('꜀', '꜖'), ('꜠', '꜡'), ('꞉', '꞊'),
+  ('꭛', '꭛'), ('﮲', '﯁'), ('＾', '＾'), ('｀', '｀'),
+  ('￣', '￣'), ('🏻', '🏿'),
+];
+
+pub const NONSPACING_MARK: &'static [(char, char)] = &[
+  ('̀', 'ͯ'), ('҃', '҇'), ('֑', 'ֽ'), ('ֿ', 'ֿ'), ('ׁ', 'ׂ'),
+  ('ׄ', 'ׅ'), ('ׇ', 'ׇ'), ('ؐ', 'ؚ'), ('ً', 'ٟ'), ('ٰ', 'ٰ'),
+  ('ۖ', 'ۜ'), ('۟', 'ۤ'), ('ۧ', 'ۨ'), ('۪', 'ۭ'), ('ܑ', 'ܑ'),
+  ('ܰ', '݊'), ('ަ', 'ް'), ('߫', '߳'), ('ࠖ', '࠙'), ('ࠛ', 'ࠣ'),
+  ('ࠥ', 'ࠧ'), ('ࠩ', '࠭'), ('࡙', '࡛'), ('ࣔ', '࣡'),
+  ('ࣣ', 'ं'), ('ऺ', 'ऺ'), ('़', '़'), ('ु', 'ै'),
+  ('्', '्'), ('॑', 'ॗ'), ('ॢ', 'ॣ'), ('ঁ', 'ঁ'),
+  ('়', '়'), ('ু', 'ৄ'), ('্', '্'), ('ৢ', 'ৣ'),
+  ('ਁ', 'ਂ'), ('਼', '਼'), ('ੁ', 'ੂ'), ('ੇ', 'ੈ'),
+  ('ੋ', '੍'), ('ੑ', 'ੑ'), ('ੰ', 'ੱ'), ('ੵ', 'ੵ'),
+  ('ઁ', 'ં'), ('઼', '઼'), ('ુ', 'ૅ'), ('ે', 'ૈ'),
+  ('્', '્'), ('ૢ', 'ૣ'), ('ૺ', '૿'), ('ଁ', 'ଁ'),
+  ('଼', '଼'), ('ି', 'ି'), ('ୁ', 'ୄ'), ('୍', '୍'),
+  ('ୖ', 'ୖ'), ('ୢ', 'ୣ'), ('ஂ', 'ஂ'), ('ீ', 'ீ'),
+  ('்', '்'), ('ఀ', 'ఀ'), ('ా', 'ీ'), ('ె', 'ై'),
+  ('ొ', '్'), ('ౕ', 'ౖ'), ('ౢ', 'ౣ'), ('ಁ', 'ಁ'),
+  ('಼', '಼'), ('ಿ', 'ಿ'), ('ೆ', 'ೆ'), ('ೌ', '್'),
+  ('ೢ', 'ೣ'), ('ഀ', 'ഁ'), ('഻', '഼'), ('ു', 'ൄ'),
+  ('്', '്'), ('ൢ', 'ൣ'), ('්', '්'), ('ි', 'ු'),
+  ('ූ', 'ූ'), ('ั', 'ั'), ('ิ', 'ฺ'), ('็', '๎'),
+  ('ັ', 'ັ'), ('ິ', 'ູ'), ('ົ', 'ຼ'), ('່', 'ໍ'),
+  ('༘', '༙'), ('༵', '༵'), ('༷', '༷'), ('༹', '༹'),
+  ('ཱ', 'ཾ'), ('ྀ', '྄'), ('྆', '྇'), ('ྍ', 'ྗ'),
+  ('ྙ', 'ྼ'), ('࿆', '࿆'), ('ိ', 'ူ'), ('ဲ', '့'),
+  ('္', '်'), ('ွ', 'ှ'), ('ၘ', 'ၙ'), ('ၞ', 'ၠ'),
+  ('ၱ', 'ၴ'), ('ႂ', 'ႂ'), ('ႅ', 'ႆ'), ('ႍ', 'ႍ'),
+  ('ႝ', 'ႝ'), ('፝', '፟'), ('ᜒ', '᜔'), ('ᜲ', '᜴'),
+  ('ᝒ', 'ᝓ'), ('ᝲ', 'ᝳ'), ('឴', '឵'), ('ិ', 'ួ'),
+  ('ំ', 'ំ'), ('៉', '៓'), ('៝', '៝'), ('᠋', '᠍'),
+  ('ᢅ', 'ᢆ'), ('ᢩ', 'ᢩ'), ('ᤠ', 'ᤢ'), ('ᤧ', 'ᤨ'),
+  ('ᤲ', 'ᤲ'), ('᤹', '᤻'), ('ᨗ', 'ᨘ'), ('ᨛ', 'ᨛ'),
+  ('ᩖ', 'ᩖ'), ('ᩘ', 'ᩞ'), ('᩠', '᩠'), ('ᩢ', 'ᩢ'),
+  ('ᩥ', 'ᩬ'), ('ᩳ', '᩼'), ('᩿', '᩿'), ('᪰', '᪽'),
+  ('ᬀ', 'ᬃ'), ('᬴', '᬴'), ('ᬶ', 'ᬺ'), ('ᬼ', 'ᬼ'),
+  ('ᭂ', 'ᭂ'), ('᭫', '᭳'), ('ᮀ', 'ᮁ'), ('ᮢ', 'ᮥ'),
+  ('ᮨ', 'ᮩ'), ('᮫', 'ᮭ'), ('᯦', '᯦'), ('ᯨ', 'ᯩ'),
+  ('ᯭ', 'ᯭ'), ('ᯯ', 'ᯱ'), ('ᰬ', 'ᰳ'), ('ᰶ', '᰷'),
+  ('᳐', '᳒'), ('᳔', '᳠'), ('᳢', '᳨'), ('᳭', '᳭'),
+  ('᳴', '᳴'), ('᳸', '᳹'), ('᷀', '᷹'), ('᷻', '᷿'),
+  ('⃐', '⃜'), ('⃡', '⃡'), ('⃥', '⃰'), ('⳯', '⳱'),
+  ('⵿', '⵿'), ('ⷠ', 'ⷿ'), ('〪', '〭'), ('゙', '゚'),
+  ('꙯', '꙯'), ('ꙴ', '꙽'), ('ꚞ', 'ꚟ'), ('꛰', '꛱'),
+  ('ꠂ', 'ꠂ'), ('꠆', '꠆'), ('ꠋ', 'ꠋ'), ('ꠥ', 'ꠦ'),
+  ('꣄', 'ꣅ'), ('꣠', '꣱'), ('ꤦ', '꤭'), ('ꥇ', 'ꥑ'),
+  ('ꦀ', 'ꦂ'), ('꦳', '꦳'), ('ꦶ', 'ꦹ'), ('ꦼ', 'ꦼ'),
+  ('ꧥ', 'ꧥ'), ('ꨩ', 'ꨮ'), ('ꨱ', 'ꨲ'), ('ꨵ', 'ꨶ'),
+  ('ꩃ', 'ꩃ'), ('ꩌ', 'ꩌ'), ('ꩼ', 'ꩼ'), ('ꪰ', 'ꪰ'),
+  ('ꪲ', 'ꪴ'), ('ꪷ', 'ꪸ'), ('ꪾ', '꪿'), ('꫁', '꫁'),
+  ('ꫬ', 'ꫭ'), ('꫶', '꫶'), ('ꯥ', 'ꯥ'), ('ꯨ', 'ꯨ'),
+  ('꯭', '꯭'), ('ﬞ', 'ﬞ'), ('︀', '️'), ('︠', '︯'),
+  ('𐇽', '𐇽'), ('𐋠', '𐋠'), ('𐍶', '𐍺'), ('𐨁', '𐨃'),
+  ('𐨅', '𐨆'), ('𐨌', '𐨏'), ('𐨸', '𐨺'), ('𐨿', '𐨿'),
+  ('𐫥', '𐫦'), ('𑀁', '𑀁'), ('𑀸', '𑁆'), ('𑁿', '𑂁'),
+  ('𑂳', '𑂶'), ('𑂹', '𑂺'), ('𑄀', '𑄂'), ('𑄧', '𑄫'),
+  ('𑄭', '𑄴'), ('𑅳', '𑅳'), ('𑆀', '𑆁'), ('𑆶', '𑆾'),
+  ('𑇊', '𑇌'), ('𑈯', '𑈱'), ('𑈴', '𑈴'), ('𑈶', '𑈷'),
+  ('𑈾', '𑈾'), ('𑋟', '𑋟'), ('𑋣', '𑋪'), ('𑌀', '𑌁'),
+  ('𑌼', '𑌼'), ('𑍀', '𑍀'), ('𑍦', '𑍬'), ('𑍰', '𑍴'),
+  ('𑐸', '𑐿'), ('𑑂', '𑑄'), ('𑑆', '𑑆'), ('𑒳', '𑒸'),
+  ('𑒺', '𑒺'), ('𑒿', '𑓀'), ('𑓂', '𑓃'), ('𑖲', '𑖵'),
+  ('𑖼', '𑖽'), ('𑖿', '𑗀'), ('𑗜', '𑗝'), ('𑘳', '𑘺'),
+  ('𑘽', '𑘽'), ('𑘿', '𑙀'), ('𑚫', '𑚫'), ('𑚭', '𑚭'),
+  ('𑚰', '𑚵'), ('𑚷', '𑚷'), ('𑜝', '𑜟'), ('𑜢', '𑜥'),
+  ('𑜧', '𑜫'), ('𑨁', '𑨆'), ('𑨉', '𑨊'), ('𑨳', '𑨸'),
+  ('𑨻', '𑨾'), ('𑩇', '𑩇'), ('𑩑', '𑩖'), ('𑩙', '𑩛'),
+  ('𑪊', '𑪖'), ('𑪘', '𑪙'), ('𑰰', '𑰶'), ('𑰸', '𑰽'),
+  ('𑰿', '𑰿'), ('𑲒', '𑲧'), ('𑲪', '𑲰'), ('𑲲', '𑲳'),
+  ('𑲵', '𑲶'), ('𑴱', '𑴶'), ('𑴺', '𑴺'), ('𑴼', '𑴽'),
+  ('𑴿', '𑵅'), ('𑵇', '𑵇'), ('𖫰', '𖫴'), ('𖬰', '𖬶'),
+  ('𖾏', '𖾒'), ('𛲝', '𛲞'), ('𝅧', '𝅩'), ('𝅻', '𝆂'),
+  ('𝆅', '𝆋'), ('𝆪', '𝆭'), ('𝉂', '𝉄'), ('𝨀', '𝨶'),
+  ('𝨻', '𝩬'), ('𝩵', '𝩵'), ('𝪄', '𝪄'), ('𝪛', '𝪟'),
+  ('𝪡', '𝪯'), ('𞀀', '𞀆'), ('𞀈', '𞀘'), ('𞀛', '𞀡'),
+  ('𞀣', '𞀤'), ('𞀦', '𞀪'), ('𞣐', '𞣖'), ('𞥄', '𞥊'),
+  ('󠄀', '󠇯'),
+];
+
+pub const NUMBER: &'static [(char, char)] = &[
+  ('0', '9'), ('²', '³'), ('¹', '¹'), ('¼', '¾'), ('٠', '٩'),
+  ('۰', '۹'), ('߀', '߉'), ('०', '९'), ('০', '৯'), ('৴', '৹'),
+  ('੦', '੯'), ('૦', '૯'), ('୦', '୯'), ('୲', '୷'),
+  ('௦', '௲'), ('౦', '౯'), ('౸', '౾'), ('೦', '೯'),
+  ('൘', '൞'), ('൦', '൸'), ('෦', '෯'), ('๐', '๙'),
+  ('໐', '໙'), ('༠', '༳'), ('၀', '၉'), ('႐', '႙'),
+  ('፩', '፼'), ('ᛮ', 'ᛰ'), ('០', '៩'), ('៰', '៹'),
+  ('᠐', '᠙'), ('᥆', '᥏'), ('᧐', '᧚'), ('᪀', '᪉'),
+  ('᪐', '᪙'), ('᭐', '᭙'), ('᮰', '᮹'), ('᱀', '᱉'),
+  ('᱐', '᱙'), ('⁰', '⁰'), ('⁴', '⁹'), ('₀', '₉'),
+  ('⅐', 'ↂ'), ('ↅ', '↉'), ('①', '⒛'), ('⓪', '⓿'),
+  ('❶', '➓'), ('⳽', '⳽'), ('〇', '〇'), ('〡', '〩'),
+  ('〸', '〺'), ('㆒', '㆕'), ('㈠', '㈩'), ('㉈', '㉏'),
+  ('㉑', '㉟'), ('㊀', '㊉'), ('㊱', '㊿'), ('꘠', '꘩'),
+  ('ꛦ', 'ꛯ'), ('꠰', '꠵'), ('꣐', '꣙'), ('꤀', '꤉'),
+  ('꧐', '꧙'), ('꧰', '꧹'), ('꩐', '꩙'), ('꯰', '꯹'),
+  ('０', '９'), ('𐄇', '𐄳'), ('𐅀', '𐅸'), ('𐆊', '𐆋'),
+  ('𐋡', '𐋻'), ('𐌠', '𐌣'), ('𐍁', '𐍁'), ('𐍊', '𐍊'),
+  ('𐏑', '𐏕'), ('𐒠', '𐒩'), ('𐡘', '𐡟'), ('𐡹', '𐡿'),
+  ('𐢧', '𐢯'), ('𐣻', '𐣿'), ('𐤖', '𐤛'), ('𐦼', '𐦽'),
+  ('𐧀', '𐧏'), ('𐧒', '𐧿'), ('𐩀', '𐩇'), ('𐩽', '𐩾'),
+  ('𐪝', '𐪟'), ('𐫫', '𐫯'), ('𐭘', '𐭟'), ('𐭸', '𐭿'),
+  ('𐮩', '𐮯'), ('𐳺', '𐳿'), ('𐹠', '𐹾'), ('𑁒', '𑁯'),
+  ('𑃰', '𑃹'), ('𑄶', '𑄿'), ('𑇐', '𑇙'), ('𑇡', '𑇴'),
+  ('𑋰', '𑋹'), ('𑑐', '𑑙'), ('𑓐', '𑓙'), ('𑙐', '𑙙'),
+  ('𑛀', '𑛉'), ('𑜰', '𑜻'), ('𑣠', '𑣲'), ('𑱐', '𑱬'),
+  ('𑵐', '𑵙'), ('𒐀', '𒑮'), ('𖩠', '𖩩'), ('𖭐', '𖭙'),
+  ('𖭛', '𖭡'), ('𝍠', '𝍱'), ('𝟎', '𝟿'), ('𞣇', '𞣏'),
+  ('𞥐', '𞥙'), ('🄀', '🄌'),
+];
+
+pub const OPEN_PUNCTUATION: &'static [(char, char)] = &[
+  ('(', '('), ('[', '['), ('{', '{'), ('༺', '༺'), ('༼', '༼'),
+  ('᚛', '᚛'), ('‚', '‚'), ('„', '„'), ('⁅', '⁅'),
+  ('⁽', '⁽'), ('₍', '₍'), ('⌈', '⌈'), ('⌊', '⌊'),
+  ('〈', '〈'), ('❨', '❨'), ('❪', '❪'), ('❬', '❬'),
+  ('❮', '❮'), ('❰', '❰'), ('❲', '❲'), ('❴', '❴'),
+  ('⟅', '⟅'), ('⟦', '⟦'), ('⟨', '⟨'), ('⟪', '⟪'),
+  ('⟬', '⟬'), ('⟮', '⟮'), ('⦃', '⦃'), ('⦅', '⦅'),
+  ('⦇', '⦇'), ('⦉', '⦉'), ('⦋', '⦋'), ('⦍', '⦍'),
+  ('⦏', '⦏'), ('⦑', '⦑'), ('⦓', '⦓'), ('⦕', '⦕'),
+  ('⦗', '⦗'), ('⧘', '⧘'), ('⧚', '⧚'), ('⧼', '⧼'),
+  ('⸢', '⸢'), ('⸤', '⸤'), ('⸦', '⸦'), ('⸨', '⸨'),
+  ('⹂', '⹂'), ('〈', '〈'), ('《', '《'), ('「', '「'),
+  ('『', '『'), ('【', '【'), ('〔', '〔'), ('〖', '〖'),
+  ('〘', '〘'), ('〚', '〚'), ('〝', '〝'), ('﴿', '﴿'),
+  ('︗', '︗'), ('︵', '︵'), ('︷', '︷'), ('︹', '︹'),
+  ('︻', '︻'), ('︽', '︽'), ('︿', '︿'), ('﹁', '﹁'),
+  ('﹃', '﹃'), ('﹇', '﹇'), ('﹙', '﹙'), ('﹛', '﹛'),
+  ('﹝', '﹝'), ('（', '（'), ('［', '［'), ('｛', '｛'),
+  ('｟', '｟'), ('｢', '｢'),
+];
+
+pub const OTHER: &'static [(char, char)] = &[
+  ('\u{0}', '\u{1f}'), ('\u{7f}', '\u{9f}'), ('\u{ad}', '\u{ad}'),
+  ('\u{378}', '\u{379}'), ('\u{380}', '\u{383}'), ('\u{38b}', '\u{38b}'),
+  ('\u{38d}', '\u{38d}'), ('\u{3a2}', '\u{3a2}'), ('\u{530}', '\u{530}'),
+  ('\u{557}', '\u{558}'), ('\u{560}', '\u{560}'), ('\u{588}', '\u{588}'),
+  ('\u{58b}', '\u{58c}'), ('\u{590}', '\u{590}'), ('\u{5c8}', '\u{5cf}'),
+  ('\u{5eb}', '\u{5ef}'), ('\u{5f5}', '\u{605}'), ('\u{61c}', '\u{61d}'),
+  ('\u{6dd}', '\u{6dd}'), ('\u{70e}', '\u{70f}'), ('\u{74b}', '\u{74c}'),
+  ('\u{7b2}', '\u{7bf}'), ('\u{7fb}', '\u{7ff}'), ('\u{82e}', '\u{82f}'),
+  ('\u{83f}', '\u{83f}'), ('\u{85c}', '\u{85d}'), ('\u{85f}', '\u{85f}'),
+  ('\u{86b}', '\u{89f}'), ('\u{8b5}', '\u{8b5}'), ('\u{8be}', '\u{8d3}'),
+  ('\u{8e2}', '\u{8e2}'), ('\u{984}', '\u{984}'), ('\u{98d}', '\u{98e}'),
+  ('\u{991}', '\u{992}'), ('\u{9a9}', '\u{9a9}'), ('\u{9b1}', '\u{9b1}'),
+  ('\u{9b3}', '\u{9b5}'), ('\u{9ba}', '\u{9bb}'), ('\u{9c5}', '\u{9c6}'),
+  ('\u{9c9}', '\u{9ca}'), ('\u{9cf}', '\u{9d6}'), ('\u{9d8}', '\u{9db}'),
+  ('\u{9de}', '\u{9de}'), ('\u{9e4}', '\u{9e5}'), ('\u{9fe}', '\u{a00}'),
+  ('\u{a04}', '\u{a04}'), ('\u{a0b}', '\u{a0e}'), ('\u{a11}', '\u{a12}'),
+  ('\u{a29}', '\u{a29}'), ('\u{a31}', '\u{a31}'), ('\u{a34}', '\u{a34}'),
+  ('\u{a37}', '\u{a37}'), ('\u{a3a}', '\u{a3b}'), ('\u{a3d}', '\u{a3d}'),
+  ('\u{a43}', '\u{a46}'), ('\u{a49}', '\u{a4a}'), ('\u{a4e}', '\u{a50}'),
+  ('\u{a52}', '\u{a58}'), ('\u{a5d}', '\u{a5d}'), ('\u{a5f}', '\u{a65}'),
+  ('\u{a76}', '\u{a80}'), ('\u{a84}', '\u{a84}'), ('\u{a8e}', '\u{a8e}'),
+  ('\u{a92}', '\u{a92}'), ('\u{aa9}', '\u{aa9}'), ('\u{ab1}', '\u{ab1}'),
+  ('\u{ab4}', '\u{ab4}'), ('\u{aba}', '\u{abb}'), ('\u{ac6}', '\u{ac6}'),
+  ('\u{aca}', '\u{aca}'), ('\u{ace}', '\u{acf}'), ('\u{ad1}', '\u{adf}'),
+  ('\u{ae4}', '\u{ae5}'), ('\u{af2}', '\u{af8}'), ('\u{b00}', '\u{b00}'),
+  ('\u{b04}', '\u{b04}'), ('\u{b0d}', '\u{b0e}'), ('\u{b11}', '\u{b12}'),
+  ('\u{b29}', '\u{b29}'), ('\u{b31}', '\u{b31}'), ('\u{b34}', '\u{b34}'),
+  ('\u{b3a}', '\u{b3b}'), ('\u{b45}', '\u{b46}'), ('\u{b49}', '\u{b4a}'),
+  ('\u{b4e}', '\u{b55}'), ('\u{b58}', '\u{b5b}'), ('\u{b5e}', '\u{b5e}'),
+  ('\u{b64}', '\u{b65}'), ('\u{b78}', '\u{b81}'), ('\u{b84}', '\u{b84}'),
+  ('\u{b8b}', '\u{b8d}'), ('\u{b91}', '\u{b91}'), ('\u{b96}', '\u{b98}'),
+  ('\u{b9b}', '\u{b9b}'), ('\u{b9d}', '\u{b9d}'), ('\u{ba0}', '\u{ba2}'),
+  ('\u{ba5}', '\u{ba7}'), ('\u{bab}', '\u{bad}'), ('\u{bba}', '\u{bbd}'),
+  ('\u{bc3}', '\u{bc5}'), ('\u{bc9}', '\u{bc9}'), ('\u{bce}', '\u{bcf}'),
+  ('\u{bd1}', '\u{bd6}'), ('\u{bd8}', '\u{be5}'), ('\u{bfb}', '\u{bff}'),
+  ('\u{c04}', '\u{c04}'), ('\u{c0d}', '\u{c0d}'), ('\u{c11}', '\u{c11}'),
+  ('\u{c29}', '\u{c29}'), ('\u{c3a}', '\u{c3c}'), ('\u{c45}', '\u{c45}'),
+  ('\u{c49}', '\u{c49}'), ('\u{c4e}', '\u{c54}'), ('\u{c57}', '\u{c57}'),
+  ('\u{c5b}', '\u{c5f}'), ('\u{c64}', '\u{c65}'), ('\u{c70}', '\u{c77}'),
+  ('\u{c84}', '\u{c84}'), ('\u{c8d}', '\u{c8d}'), ('\u{c91}', '\u{c91}'),
+  ('\u{ca9}', '\u{ca9}'), ('\u{cb4}', '\u{cb4}'), ('\u{cba}', '\u{cbb}'),
+  ('\u{cc5}', '\u{cc5}'), ('\u{cc9}', '\u{cc9}'), ('\u{cce}', '\u{cd4}'),
+  ('\u{cd7}', '\u{cdd}'), ('\u{cdf}', '\u{cdf}'), ('\u{ce4}', '\u{ce5}'),
+  ('\u{cf0}', '\u{cf0}'), ('\u{cf3}', '\u{cff}'), ('\u{d04}', '\u{d04}'),
+  ('\u{d0d}', '\u{d0d}'), ('\u{d11}', '\u{d11}'), ('\u{d45}', '\u{d45}'),
+  ('\u{d49}', '\u{d49}'), ('\u{d50}', '\u{d53}'), ('\u{d64}', '\u{d65}'),
+  ('\u{d80}', '\u{d81}'), ('\u{d84}', '\u{d84}'), ('\u{d97}', '\u{d99}'),
+  ('\u{db2}', '\u{db2}'), ('\u{dbc}', '\u{dbc}'), ('\u{dbe}', '\u{dbf}'),
+  ('\u{dc7}', '\u{dc9}'), ('\u{dcb}', '\u{dce}'), ('\u{dd5}', '\u{dd5}'),
+  ('\u{dd7}', '\u{dd7}'), ('\u{de0}', '\u{de5}'), ('\u{df0}', '\u{df1}'),
+  ('\u{df5}', '\u{e00}'), ('\u{e3b}', '\u{e3e}'), ('\u{e5c}', '\u{e80}'),
+  ('\u{e83}', '\u{e83}'), ('\u{e85}', '\u{e86}'), ('\u{e89}', '\u{e89}'),
+  ('\u{e8b}', '\u{e8c}'), ('\u{e8e}', '\u{e93}'), ('\u{e98}', '\u{e98}'),
+  ('\u{ea0}', '\u{ea0}'), ('\u{ea4}', '\u{ea4}'), ('\u{ea6}', '\u{ea6}'),
+  ('\u{ea8}', '\u{ea9}'), ('\u{eac}', '\u{eac}'), ('\u{eba}', '\u{eba}'),
+  ('\u{ebe}', '\u{ebf}'), ('\u{ec5}', '\u{ec5}'), ('\u{ec7}', '\u{ec7}'),
+  ('\u{ece}', '\u{ecf}'), ('\u{eda}', '\u{edb}'), ('\u{ee0}', '\u{eff}'),
+  ('\u{f48}', '\u{f48}'), ('\u{f6d}', '\u{f70}'), ('\u{f98}', '\u{f98}'),
+  ('\u{fbd}', '\u{fbd}'), ('\u{fcd}', '\u{fcd}'), ('\u{fdb}', '\u{fff}'),
+  ('\u{10c6}', '\u{10c6}'), ('\u{10c8}', '\u{10cc}'),
+  ('\u{10ce}', '\u{10cf}'), ('\u{1249}', '\u{1249}'),
+  ('\u{124e}', '\u{124f}'), ('\u{1257}', '\u{1257}'),
+  ('\u{1259}', '\u{1259}'), ('\u{125e}', '\u{125f}'),
+  ('\u{1289}', '\u{1289}'), ('\u{128e}', '\u{128f}'),
+  ('\u{12b1}', '\u{12b1}'), ('\u{12b6}', '\u{12b7}'),
+  ('\u{12bf}', '\u{12bf}'), ('\u{12c1}', '\u{12c1}'),
+  ('\u{12c6}', '\u{12c7}'), ('\u{12d7}', '\u{12d7}'),
+  ('\u{1311}', '\u{1311}'), ('\u{1316}', '\u{1317}'),
+  ('\u{135b}', '\u{135c}'), ('\u{137d}', '\u{137f}'),
+  ('\u{139a}', '\u{139f}'), ('\u{13f6}', '\u{13f7}'),
+  ('\u{13fe}', '\u{13ff}'), ('\u{169d}', '\u{169f}'),
+  ('\u{16f9}', '\u{16ff}'), ('\u{170d}', '\u{170d}'),
+  ('\u{1715}', '\u{171f}'), ('\u{1737}', '\u{173f}'),
+  ('\u{1754}', '\u{175f}'), ('\u{176d}', '\u{176d}'),
+  ('\u{1771}', '\u{1771}'), ('\u{1774}', '\u{177f}'),
+  ('\u{17de}', '\u{17df}'), ('\u{17ea}', '\u{17ef}'),
+  ('\u{17fa}', '\u{17ff}'), ('\u{180e}', '\u{180f}'),
+  ('\u{181a}', '\u{181f}'), ('\u{1878}', '\u{187f}'),
+  ('\u{18ab}', '\u{18af}'), ('\u{18f6}', '\u{18ff}'),
+  ('\u{191f}', '\u{191f}'), ('\u{192c}', '\u{192f}'),
+  ('\u{193c}', '\u{193f}'), ('\u{1941}', '\u{1943}'),
+  ('\u{196e}', '\u{196f}'), ('\u{1975}', '\u{197f}'),
+  ('\u{19ac}', '\u{19af}'), ('\u{19ca}', '\u{19cf}'),
+  ('\u{19db}', '\u{19dd}'), ('\u{1a1c}', '\u{1a1d}'),
+  ('\u{1a5f}', '\u{1a5f}'), ('\u{1a7d}', '\u{1a7e}'),
+  ('\u{1a8a}', '\u{1a8f}'), ('\u{1a9a}', '\u{1a9f}'),
+  ('\u{1aae}', '\u{1aaf}'), ('\u{1abf}', '\u{1aff}'),
+  ('\u{1b4c}', '\u{1b4f}'), ('\u{1b7d}', '\u{1b7f}'),
+  ('\u{1bf4}', '\u{1bfb}'), ('\u{1c38}', '\u{1c3a}'),
+  ('\u{1c4a}', '\u{1c4c}'), ('\u{1c89}', '\u{1cbf}'),
+  ('\u{1cc8}', '\u{1ccf}'), ('\u{1cfa}', '\u{1cff}'),
+  ('\u{1dfa}', '\u{1dfa}'), ('\u{1f16}', '\u{1f17}'),
+  ('\u{1f1e}', '\u{1f1f}'), ('\u{1f46}', '\u{1f47}'),
+  ('\u{1f4e}', '\u{1f4f}'), ('\u{1f58}', '\u{1f58}'),
+  ('\u{1f5a}', '\u{1f5a}'), ('\u{1f5c}', '\u{1f5c}'),
+  ('\u{1f5e}', '\u{1f5e}'), ('\u{1f7e}', '\u{1f7f}'),
+  ('\u{1fb5}', '\u{1fb5}'), ('\u{1fc5}', '\u{1fc5}'),
+  ('\u{1fd4}', '\u{1fd5}'), ('\u{1fdc}', '\u{1fdc}'),
+  ('\u{1ff0}', '\u{1ff1}'), ('\u{1ff5}', '\u{1ff5}'),
+  ('\u{1fff}', '\u{1fff}'), ('\u{200b}', '\u{200f}'),
+  ('\u{202a}', '\u{202e}'), ('\u{2060}', '\u{206f}'),
+  ('\u{2072}', '\u{2073}'), ('\u{208f}', '\u{208f}'),
+  ('\u{209d}', '\u{209f}'), ('\u{20c0}', '\u{20cf}'),
+  ('\u{20f1}', '\u{20ff}'), ('\u{218c}', '\u{218f}'),
+  ('\u{2427}', '\u{243f}'), ('\u{244b}', '\u{245f}'),
+  ('\u{2b74}', '\u{2b75}'), ('\u{2b96}', '\u{2b97}'),
+  ('\u{2bba}', '\u{2bbc}'), ('\u{2bc9}', '\u{2bc9}'),
+  ('\u{2bd3}', '\u{2beb}'), ('\u{2bf0}', '\u{2bff}'),
+  ('\u{2c2f}', '\u{2c2f}'), ('\u{2c5f}', '\u{2c5f}'),
+  ('\u{2cf4}', '\u{2cf8}'), ('\u{2d26}', '\u{2d26}'),
+  ('\u{2d28}', '\u{2d2c}'), ('\u{2d2e}', '\u{2d2f}'),
+  ('\u{2d68}', '\u{2d6e}'), ('\u{2d71}', '\u{2d7e}'),
+  ('\u{2d97}', '\u{2d9f}'), ('\u{2da7}', '\u{2da7}'),
+  ('\u{2daf}', '\u{2daf}'), ('\u{2db7}', '\u{2db7}'),
+  ('\u{2dbf}', '\u{2dbf}'), ('\u{2dc7}', '\u{2dc7}'),
+  ('\u{2dcf}', '\u{2dcf}'), ('\u{2dd7}', '\u{2dd7}'),
+  ('\u{2ddf}', '\u{2ddf}'), ('\u{2e4a}', '\u{2e7f}'),
+  ('\u{2e9a}', '\u{2e9a}'), ('\u{2ef4}', '\u{2eff}'),
+  ('\u{2fd6}', '\u{2fef}'), ('\u{2ffc}', '\u{2fff}'),
+  ('\u{3040}', '\u{3040}'), ('\u{3097}', '\u{3098}'),
+  ('\u{3100}', '\u{3104}'), ('\u{312f}', '\u{3130}'),
+  ('\u{318f}', '\u{318f}'), ('\u{31bb}', '\u{31bf}'),
+  ('\u{31e4}', '\u{31ef}'), ('\u{321f}', '\u{321f}'),
+  ('\u{32ff}', '\u{32ff}'), ('\u{4db6}', '\u{4dbf}'),
+  ('\u{9feb}', '\u{9fff}'), ('\u{a48d}', '\u{a48f}'),
+  ('\u{a4c7}', '\u{a4cf}'), ('\u{a62c}', '\u{a63f}'),
+  ('\u{a6f8}', '\u{a6ff}'), ('\u{a7af}', '\u{a7af}'),
+  ('\u{a7b8}', '\u{a7f6}'), ('\u{a82c}', '\u{a82f}'),
+  ('\u{a83a}', '\u{a83f}'), ('\u{a878}', '\u{a87f}'),
+  ('\u{a8c6}', '\u{a8cd}'), ('\u{a8da}', '\u{a8df}'),
+  ('\u{a8fe}', '\u{a8ff}'), ('\u{a954}', '\u{a95e}'),
+  ('\u{a97d}', '\u{a97f}'), ('\u{a9ce}', '\u{a9ce}'),
+  ('\u{a9da}', '\u{a9dd}'), ('\u{a9ff}', '\u{a9ff}'),
+  ('\u{aa37}', '\u{aa3f}'), ('\u{aa4e}', '\u{aa4f}'),
+  ('\u{aa5a}', '\u{aa5b}'), ('\u{aac3}', '\u{aada}'),
+  ('\u{aaf7}', '\u{ab00}'), ('\u{ab07}', '\u{ab08}'),
+  ('\u{ab0f}', '\u{ab10}'), ('\u{ab17}', '\u{ab1f}'),
+  ('\u{ab27}', '\u{ab27}'), ('\u{ab2f}', '\u{ab2f}'),
+  ('\u{ab66}', '\u{ab6f}'), ('\u{abee}', '\u{abef}'),
+  ('\u{abfa}', '\u{abff}'), ('\u{d7a4}', '\u{d7af}'),
+  ('\u{d7c7}', '\u{d7ca}'), ('\u{d7fc}', '\u{f8ff}'),
+  ('\u{fa6e}', '\u{fa6f}'), ('\u{fada}', '\u{faff}'),
+  ('\u{fb07}', '\u{fb12}'), ('\u{fb18}', '\u{fb1c}'),
+  ('\u{fb37}', '\u{fb37}'), ('\u{fb3d}', '\u{fb3d}'),
+  ('\u{fb3f}', '\u{fb3f}'), ('\u{fb42}', '\u{fb42}'),
+  ('\u{fb45}', '\u{fb45}'), ('\u{fbc2}', '\u{fbd2}'),
+  ('\u{fd40}', '\u{fd4f}'), ('\u{fd90}', '\u{fd91}'),
+  ('\u{fdc8}', '\u{fdef}'), ('\u{fdfe}', '\u{fdff}'),
+  ('\u{fe1a}', '\u{fe1f}'), ('\u{fe53}', '\u{fe53}'),
+  ('\u{fe67}', '\u{fe67}'), ('\u{fe6c}', '\u{fe6f}'),
+  ('\u{fe75}', '\u{fe75}'), ('\u{fefd}', '\u{ff00}'),
+  ('\u{ffbf}', '\u{ffc1}'), ('\u{ffc8}', '\u{ffc9}'),
+  ('\u{ffd0}', '\u{ffd1}'), ('\u{ffd8}', '\u{ffd9}'),
+  ('\u{ffdd}', '\u{ffdf}'), ('\u{ffe7}', '\u{ffe7}'),
+  ('\u{ffef}', '\u{fffb}'), ('\u{fffe}', '\u{ffff}'),
+  ('\u{1000c}', '\u{1000c}'), ('\u{10027}', '\u{10027}'),
+  ('\u{1003b}', '\u{1003b}'), ('\u{1003e}', '\u{1003e}'),
+  ('\u{1004e}', '\u{1004f}'), ('\u{1005e}', '\u{1007f}'),
+  ('\u{100fb}', '\u{100ff}'), ('\u{10103}', '\u{10106}'),
+  ('\u{10134}', '\u{10136}'), ('\u{1018f}', '\u{1018f}'),
+  ('\u{1019c}', '\u{1019f}'), ('\u{101a1}', '\u{101cf}'),
+  ('\u{101fe}', '\u{1027f}'), ('\u{1029d}', '\u{1029f}'),
+  ('\u{102d1}', '\u{102df}'), ('\u{102fc}', '\u{102ff}'),
+  ('\u{10324}', '\u{1032c}'), ('\u{1034b}', '\u{1034f}'),
+  ('\u{1037b}', '\u{1037f}'), ('\u{1039e}', '\u{1039e}'),
+  ('\u{103c4}', '\u{103c7}'), ('\u{103d6}', '\u{103ff}'),
+  ('\u{1049e}', '\u{1049f}'), ('\u{104aa}', '\u{104af}'),
+  ('\u{104d4}', '\u{104d7}'), ('\u{104fc}', '\u{104ff}'),
+  ('\u{10528}', '\u{1052f}'), ('\u{10564}', '\u{1056e}'),
+  ('\u{10570}', '\u{105ff}'), ('\u{10737}', '\u{1073f}'),
+  ('\u{10756}', '\u{1075f}'), ('\u{10768}', '\u{107ff}'),
+  ('\u{10806}', '\u{10807}'), ('\u{10809}', '\u{10809}'),
+  ('\u{10836}', '\u{10836}'), ('\u{10839}', '\u{1083b}'),
+  ('\u{1083d}', '\u{1083e}'), ('\u{10856}', '\u{10856}'),
+  ('\u{1089f}', '\u{108a6}'), ('\u{108b0}', '\u{108df}'),
+  ('\u{108f3}', '\u{108f3}'), ('\u{108f6}', '\u{108fa}'),
+  ('\u{1091c}', '\u{1091e}'), ('\u{1093a}', '\u{1093e}'),
+  ('\u{10940}', '\u{1097f}'), ('\u{109b8}', '\u{109bb}'),
+  ('\u{109d0}', '\u{109d1}'), ('\u{10a04}', '\u{10a04}'),
+  ('\u{10a07}', '\u{10a0b}'), ('\u{10a14}', '\u{10a14}'),
+  ('\u{10a18}', '\u{10a18}'), ('\u{10a34}', '\u{10a37}'),
+  ('\u{10a3b}', '\u{10a3e}'), ('\u{10a48}', '\u{10a4f}'),
+  ('\u{10a59}', '\u{10a5f}'), ('\u{10aa0}', '\u{10abf}'),
+  ('\u{10ae7}', '\u{10aea}'), ('\u{10af7}', '\u{10aff}'),
+  ('\u{10b36}', '\u{10b38}'), ('\u{10b56}', '\u{10b57}'),
+  ('\u{10b73}', '\u{10b77}'), ('\u{10b92}', '\u{10b98}'),
+  ('\u{10b9d}', '\u{10ba8}'), ('\u{10bb0}', '\u{10bff}'),
+  ('\u{10c49}', '\u{10c7f}'), ('\u{10cb3}', '\u{10cbf}'),
+  ('\u{10cf3}', '\u{10cf9}'), ('\u{10d00}', '\u{10e5f}'),
+  ('\u{10e7f}', '\u{10fff}'), ('\u{1104e}', '\u{11051}'),
+  ('\u{11070}', '\u{1107e}'), ('\u{110bd}', '\u{110bd}'),
+  ('\u{110c2}', '\u{110cf}'), ('\u{110e9}', '\u{110ef}'),
+  ('\u{110fa}', '\u{110ff}'), ('\u{11135}', '\u{11135}'),
+  ('\u{11144}', '\u{1114f}'), ('\u{11177}', '\u{1117f}'),
+  ('\u{111ce}', '\u{111cf}'), ('\u{111e0}', '\u{111e0}'),
+  ('\u{111f5}', '\u{111ff}'), ('\u{11212}', '\u{11212}'),
+  ('\u{1123f}', '\u{1127f}'), ('\u{11287}', '\u{11287}'),
+  ('\u{11289}', '\u{11289}'), ('\u{1128e}', '\u{1128e}'),
+  ('\u{1129e}', '\u{1129e}'), ('\u{112aa}', '\u{112af}'),
+  ('\u{112eb}', '\u{112ef}'), ('\u{112fa}', '\u{112ff}'),
+  ('\u{11304}', '\u{11304}'), ('\u{1130d}', '\u{1130e}'),
+  ('\u{11311}', '\u{11312}'), ('\u{11329}', '\u{11329}'),
+  ('\u{11331}', '\u{11331}'), ('\u{11334}', '\u{11334}'),
+  ('\u{1133a}', '\u{1133b}'), ('\u{11345}', '\u{11346}'),
+  ('\u{11349}', '\u{1134a}'), ('\u{1134e}', '\u{1134f}'),
+  ('\u{11351}', '\u{11356}'), ('\u{11358}', '\u{1135c}'),
+  ('\u{11364}', '\u{11365}'), ('\u{1136d}', '\u{1136f}'),
+  ('\u{11375}', '\u{113ff}'), ('\u{1145a}', '\u{1145a}'),
+  ('\u{1145c}', '\u{1145c}'), ('\u{1145e}', '\u{1147f}'),
+  ('\u{114c8}', '\u{114cf}'), ('\u{114da}', '\u{1157f}'),
+  ('\u{115b6}', '\u{115b7}'), ('\u{115de}', '\u{115ff}'),
+  ('\u{11645}', '\u{1164f}'), ('\u{1165a}', '\u{1165f}'),
+  ('\u{1166d}', '\u{1167f}'), ('\u{116b8}', '\u{116bf}'),
+  ('\u{116ca}', '\u{116ff}'), ('\u{1171a}', '\u{1171c}'),
+  ('\u{1172c}', '\u{1172f}'), ('\u{11740}', '\u{1189f}'),
+  ('\u{118f3}', '\u{118fe}'), ('\u{11900}', '\u{119ff}'),
+  ('\u{11a48}', '\u{11a4f}'), ('\u{11a84}', '\u{11a85}'),
+  ('\u{11a9d}', '\u{11a9d}'), ('\u{11aa3}', '\u{11abf}'),
+  ('\u{11af9}', '\u{11bff}'), ('\u{11c09}', '\u{11c09}'),
+  ('\u{11c37}', '\u{11c37}'), ('\u{11c46}', '\u{11c4f}'),
+  ('\u{11c6d}', '\u{11c6f}'), ('\u{11c90}', '\u{11c91}'),
+  ('\u{11ca8}', '\u{11ca8}'), ('\u{11cb7}', '\u{11cff}'),
+  ('\u{11d07}', '\u{11d07}'), ('\u{11d0a}', '\u{11d0a}'),
+  ('\u{11d37}', '\u{11d39}'), ('\u{11d3b}', '\u{11d3b}'),
+  ('\u{11d3e}', '\u{11d3e}'), ('\u{11d48}', '\u{11d4f}'),
+  ('\u{11d5a}', '\u{11fff}'), ('\u{1239a}', '\u{123ff}'),
+  ('\u{1246f}', '\u{1246f}'), ('\u{12475}', '\u{1247f}'),
+  ('\u{12544}', '\u{12fff}'), ('\u{1342f}', '\u{143ff}'),
+  ('\u{14647}', '\u{167ff}'), ('\u{16a39}', '\u{16a3f}'),
+  ('\u{16a5f}', '\u{16a5f}'), ('\u{16a6a}', '\u{16a6d}'),
+  ('\u{16a70}', '\u{16acf}'), ('\u{16aee}', '\u{16aef}'),
+  ('\u{16af6}', '\u{16aff}'), ('\u{16b46}', '\u{16b4f}'),
+  ('\u{16b5a}', '\u{16b5a}'), ('\u{16b62}', '\u{16b62}'),
+  ('\u{16b78}', '\u{16b7c}'), ('\u{16b90}', '\u{16eff}'),
+  ('\u{16f45}', '\u{16f4f}'), ('\u{16f7f}', '\u{16f8e}'),
+  ('\u{16fa0}', '\u{16fdf}'), ('\u{16fe2}', '\u{16fff}'),
+  ('\u{187ed}', '\u{187ff}'), ('\u{18af3}', '\u{1afff}'),
+  ('\u{1b11f}', '\u{1b16f}'), ('\u{1b2fc}', '\u{1bbff}'),
+  ('\u{1bc6b}', '\u{1bc6f}'), ('\u{1bc7d}', '\u{1bc7f}'),
+  ('\u{1bc89}', '\u{1bc8f}'), ('\u{1bc9a}', '\u{1bc9b}'),
+  ('\u{1bca0}', '\u{1cfff}'), ('\u{1d0f6}', '\u{1d0ff}'),
+  ('\u{1d127}', '\u{1d128}'), ('\u{1d173}', '\u{1d17a}'),
+  ('\u{1d1e9}', '\u{1d1ff}'), ('\u{1d246}', '\u{1d2ff}'),
+  ('\u{1d357}', '\u{1d35f}'), ('\u{1d372}', '\u{1d3ff}'),
+  ('\u{1d455}', '\u{1d455}'), ('\u{1d49d}', '\u{1d49d}'),
+  ('\u{1d4a0}', '\u{1d4a1}'), ('\u{1d4a3}', '\u{1d4a4}'),
+  ('\u{1d4a7}', '\u{1d4a8}'), ('\u{1d4ad}', '\u{1d4ad}'),
+  ('\u{1d4ba}', '\u{1d4ba}'), ('\u{1d4bc}', '\u{1d4bc}'),
+  ('\u{1d4c4}', '\u{1d4c4}'), ('\u{1d506}', '\u{1d506}'),
+  ('\u{1d50b}', '\u{1d50c}'), ('\u{1d515}', '\u{1d515}'),
+  ('\u{1d51d}', '\u{1d51d}'), ('\u{1d53a}', '\u{1d53a}'),
+  ('\u{1d53f}', '\u{1d53f}'), ('\u{1d545}', '\u{1d545}'),
+  ('\u{1d547}', '\u{1d549}'), ('\u{1d551}', '\u{1d551}'),
+  ('\u{1d6a6}', '\u{1d6a7}'), ('\u{1d7cc}', '\u{1d7cd}'),
+  ('\u{1da8c}', '\u{1da9a}'), ('\u{1daa0}', '\u{1daa0}'),
+  ('\u{1dab0}', '\u{1dfff}'), ('\u{1e007}', '\u{1e007}'),
+  ('\u{1e019}', '\u{1e01a}'), ('\u{1e022}', '\u{1e022}'),
+  ('\u{1e025}', '\u{1e025}'), ('\u{1e02b}', '\u{1e7ff}'),
+  ('\u{1e8c5}', '\u{1e8c6}'), ('\u{1e8d7}', '\u{1e8ff}'),
+  ('\u{1e94b}', '\u{1e94f}'), ('\u{1e95a}', '\u{1e95d}'),
+  ('\u{1e960}', '\u{1edff}'), ('\u{1ee04}', '\u{1ee04}'),
+  ('\u{1ee20}', '\u{1ee20}'), ('\u{1ee23}', '\u{1ee23}'),
+  ('\u{1ee25}', '\u{1ee26}'), ('\u{1ee28}', '\u{1ee28}'),
+  ('\u{1ee33}', '\u{1ee33}'), ('\u{1ee38}', '\u{1ee38}'),
+  ('\u{1ee3a}', '\u{1ee3a}'), ('\u{1ee3c}', '\u{1ee41}'),
+  ('\u{1ee43}', '\u{1ee46}'), ('\u{1ee48}', '\u{1ee48}'),
+  ('\u{1ee4a}', '\u{1ee4a}'), ('\u{1ee4c}', '\u{1ee4c}'),
+  ('\u{1ee50}', '\u{1ee50}'), ('\u{1ee53}', '\u{1ee53}'),
+  ('\u{1ee55}', '\u{1ee56}'), ('\u{1ee58}', '\u{1ee58}'),
+  ('\u{1ee5a}', '\u{1ee5a}'), ('\u{1ee5c}', '\u{1ee5c}'),
+  ('\u{1ee5e}', '\u{1ee5e}'), ('\u{1ee60}', '\u{1ee60}'),
+  ('\u{1ee63}', '\u{1ee63}'), ('\u{1ee65}', '\u{1ee66}'),
+  ('\u{1ee6b}', '\u{1ee6b}'), ('\u{1ee73}', '\u{1ee73}'),
+  ('\u{1ee78}', '\u{1ee78}'), ('\u{1ee7d}', '\u{1ee7d}'),
+  ('\u{1ee7f}', '\u{1ee7f}'), ('\u{1ee8a}', '\u{1ee8a}'),
+  ('\u{1ee9c}', '\u{1eea0}'), ('\u{1eea4}', '\u{1eea4}'),
+  ('\u{1eeaa}', '\u{1eeaa}'), ('\u{1eebc}', '\u{1eeef}'),
+  ('\u{1eef2}', '\u{1efff}'), ('\u{1f02c}', '\u{1f02f}'),
+  ('\u{1f094}', '\u{1f09f}'), ('\u{1f0af}', '\u{1f0b0}'),
+  ('\u{1f0c0}', '\u{1f0c0}'), ('\u{1f0d0}', '\u{1f0d0}'),
+  ('\u{1f0f6}', '\u{1f0ff}'), ('\u{1f10d}', '\u{1f10f}'),
+  ('\u{1f12f}', '\u{1f12f}'), ('\u{1f16c}', '\u{1f16f}'),
+  ('\u{1f1ad}', '\u{1f1e5}'), ('\u{1f203}', '\u{1f20f}'),
+  ('\u{1f23c}', '\u{1f23f}'), ('\u{1f249}', '\u{1f24f}'),
+  ('\u{1f252}', '\u{1f25f}'), ('\u{1f266}', '\u{1f2ff}'),
+  ('\u{1f6d5}', '\u{1f6df}'), ('\u{1f6ed}', '\u{1f6ef}'),
+  ('\u{1f6f9}', '\u{1f6ff}'), ('\u{1f774}', '\u{1f77f}'),
+  ('\u{1f7d5}', '\u{1f7ff}'), ('\u{1f80c}', '\u{1f80f}'),
+  ('\u{1f848}', '\u{1f84f}'), ('\u{1f85a}', '\u{1f85f}'),
+  ('\u{1f888}', '\u{1f88f}'), ('\u{1f8ae}', '\u{1f8ff}'),
+  ('\u{1f90c}', '\u{1f90f}'), ('\u{1f93f}', '\u{1f93f}'),
+  ('\u{1f94d}', '\u{1f94f}'), ('\u{1f96c}', '\u{1f97f}'),
+  ('\u{1f998}', '\u{1f9bf}'), ('\u{1f9c1}', '\u{1f9cf}'),
+  ('\u{1f9e7}', '\u{1ffff}'), ('\u{2a6d7}', '\u{2a6ff}'),
+  ('\u{2b735}', '\u{2b73f}'), ('\u{2b81e}', '\u{2b81f}'),
+  ('\u{2cea2}', '\u{2ceaf}'), ('\u{2ebe1}', '\u{2f7ff}'),
+  ('\u{2fa1e}', '\u{e00ff}'), ('\u{e01f0}', '\u{10ffff}'),
+];
+
+pub const OTHER_LETTER: &'static [(char, char)] = &[
+  ('ª', 'ª'), ('º', 'º'), ('ƻ', 'ƻ'), ('ǀ', 'ǃ'), ('ʔ', 'ʔ'),
+  ('א', 'ת'), ('װ', 'ײ'), ('ؠ', 'ؿ'), ('ف', 'ي'), ('ٮ', 'ٯ'),
+  ('ٱ', 'ۓ'), ('ە', 'ە'), ('ۮ', 'ۯ'), ('ۺ', 'ۼ'), ('ۿ', 'ۿ'),
+  ('ܐ', 'ܐ'), ('ܒ', 'ܯ'), ('ݍ', 'ޥ'), ('ޱ', 'ޱ'), ('ߊ', 'ߪ'),
+  ('ࠀ', 'ࠕ'), ('ࡀ', 'ࡘ'), ('ࡠ', 'ࡪ'), ('ࢠ', 'ࢴ'),
+  ('ࢶ', 'ࢽ'), ('ऄ', 'ह'), ('ऽ', 'ऽ'), ('ॐ', 'ॐ'),
+  ('क़', 'ॡ'), ('ॲ', 'ঀ'), ('অ', 'ঌ'), ('এ', 'ঐ'),
+  ('ও', 'ন'), ('প', 'র'), ('ল', 'ল'), ('শ', 'হ'),
+  ('ঽ', 'ঽ'), ('ৎ', 'ৎ'), ('ড়', 'ঢ়'), ('য়', 'ৡ'),
+  ('ৰ', 'ৱ'), ('ৼ', 'ৼ'), ('ਅ', 'ਊ'), ('ਏ', 'ਐ'),
+  ('ਓ', 'ਨ'), ('ਪ', 'ਰ'), ('ਲ', 'ਲ਼'), ('ਵ', 'ਸ਼'),
+  ('ਸ', 'ਹ'), ('ਖ਼', 'ੜ'), ('ਫ਼', 'ਫ਼'), ('ੲ', 'ੴ'),
+  ('અ', 'ઍ'), ('એ', 'ઑ'), ('ઓ', 'ન'), ('પ', 'ર'),
+  ('લ', 'ળ'), ('વ', 'હ'), ('ઽ', 'ઽ'), ('ૐ', 'ૐ'),
+  ('ૠ', 'ૡ'), ('ૹ', 'ૹ'), ('ଅ', 'ଌ'), ('ଏ', 'ଐ'),
+  ('ଓ', 'ନ'), ('ପ', 'ର'), ('ଲ', 'ଳ'), ('ଵ', 'ହ'),
+  ('ଽ', 'ଽ'), ('ଡ଼', 'ଢ଼'), ('ୟ', 'ୡ'), ('ୱ', 'ୱ'),
+  ('ஃ', 'ஃ'), ('அ', 'ஊ'), ('எ', 'ஐ'), ('ஒ', 'க'),
+  ('ங', 'ச'), ('ஜ', 'ஜ'), ('ஞ', 'ட'), ('ண', 'த'),
+  ('ந', 'ப'), ('ம', 'ஹ'), ('ௐ', 'ௐ'), ('అ', 'ఌ'),
+  ('ఎ', 'ఐ'), ('ఒ', 'న'), ('ప', 'హ'), ('ఽ', 'ఽ'),
+  ('ౘ', 'ౚ'), ('ౠ', 'ౡ'), ('ಀ', 'ಀ'), ('ಅ', 'ಌ'),
+  ('ಎ', 'ಐ'), ('ಒ', 'ನ'), ('ಪ', 'ಳ'), ('ವ', 'ಹ'),
+  ('ಽ', 'ಽ'), ('ೞ', 'ೞ'), ('ೠ', 'ೡ'), ('ೱ', 'ೲ'),
+  ('അ', 'ഌ'), ('എ', 'ഐ'), ('ഒ', 'ഺ'), ('ഽ', 'ഽ'),
+  ('ൎ', 'ൎ'), ('ൔ', 'ൖ'), ('ൟ', 'ൡ'), ('ൺ', 'ൿ'),
+  ('අ', 'ඖ'), ('ක', 'න'), ('ඳ', 'ර'), ('ල', 'ල'),
+  ('ව', 'ෆ'), ('ก', 'ะ'), ('า', 'ำ'), ('เ', 'ๅ'),
+  ('ກ', 'ຂ'), ('ຄ', 'ຄ'), ('ງ', 'ຈ'), ('ຊ', 'ຊ'),
+  ('ຍ', 'ຍ'), ('ດ', 'ທ'), ('ນ', 'ຟ'), ('ມ', 'ຣ'),
+  ('ລ', 'ລ'), ('ວ', 'ວ'), ('ສ', 'ຫ'), ('ອ', 'ະ'),
+  ('າ', 'ຳ'), ('ຽ', 'ຽ'), ('ເ', 'ໄ'), ('ໜ', 'ໟ'),
+  ('ༀ', 'ༀ'), ('ཀ', 'ཇ'), ('ཉ', 'ཬ'), ('ྈ', 'ྌ'),
+  ('က', 'ဪ'), ('ဿ', 'ဿ'), ('ၐ', 'ၕ'), ('ၚ', 'ၝ'),
+  ('ၡ', 'ၡ'), ('ၥ', 'ၦ'), ('ၮ', 'ၰ'), ('ၵ', 'ႁ'),
+  ('ႎ', 'ႎ'), ('ა', 'ჺ'), ('ჽ', 'ቈ'), ('ቊ', 'ቍ'),
+  ('ቐ', 'ቖ'), ('ቘ', 'ቘ'), ('ቚ', 'ቝ'), ('በ', 'ኈ'),
+  ('ኊ', 'ኍ'), ('ነ', 'ኰ'), ('ኲ', 'ኵ'), ('ኸ', 'ኾ'),
+  ('ዀ', 'ዀ'), ('ዂ', 'ዅ'), ('ወ', 'ዖ'), ('ዘ', 'ጐ'),
+  ('ጒ', 'ጕ'), ('ጘ', 'ፚ'), ('ᎀ', 'ᎏ'), ('ᐁ', 'ᙬ'),
+  ('ᙯ', 'ᙿ'), ('ᚁ', 'ᚚ'), ('ᚠ', 'ᛪ'), ('ᛱ', 'ᛸ'),
+  ('ᜀ', 'ᜌ'), ('ᜎ', 'ᜑ'), ('ᜠ', 'ᜱ'), ('ᝀ', 'ᝑ'),
+  ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), ('ក', 'ឳ'), ('ៜ', 'ៜ'),
+  ('ᠠ', 'ᡂ'), ('ᡄ', 'ᡷ'), ('ᢀ', 'ᢄ'), ('ᢇ', 'ᢨ'),
+  ('ᢪ', 'ᢪ'), ('ᢰ', 'ᣵ'), ('ᤀ', 'ᤞ'), ('ᥐ', 'ᥭ'),
+  ('ᥰ', 'ᥴ'), ('ᦀ', 'ᦫ'), ('ᦰ', 'ᧉ'), ('ᨀ', 'ᨖ'),
+  ('ᨠ', 'ᩔ'), ('ᬅ', 'ᬳ'), ('ᭅ', 'ᭋ'), ('ᮃ', 'ᮠ'),
+  ('ᮮ', 'ᮯ'), ('ᮺ', 'ᯥ'), ('ᰀ', 'ᰣ'), ('ᱍ', 'ᱏ'),
+  ('ᱚ', 'ᱷ'), ('ᳩ', 'ᳬ'), ('ᳮ', 'ᳱ'), ('ᳵ', 'ᳶ'),
+  ('ℵ', 'ℸ'), ('ⴰ', 'ⵧ'), ('ⶀ', 'ⶖ'), ('ⶠ', 'ⶦ'),
+  ('ⶨ', 'ⶮ'), ('ⶰ', 'ⶶ'), ('ⶸ', 'ⶾ'), ('ⷀ', 'ⷆ'),
+  ('ⷈ', 'ⷎ'), ('ⷐ', 'ⷖ'), ('ⷘ', 'ⷞ'), ('〆', '〆'),
+  ('〼', '〼'), ('ぁ', 'ゖ'), ('ゟ', 'ゟ'), ('ァ', 'ヺ'),
+  ('ヿ', 'ヿ'), ('ㄅ', 'ㄮ'), ('ㄱ', 'ㆎ'), ('ㆠ', 'ㆺ'),
+  ('ㇰ', 'ㇿ'), ('㐀', '䶵'), ('一', '鿪'), ('ꀀ', 'ꀔ'),
+  ('ꀖ', 'ꒌ'), ('ꓐ', 'ꓷ'), ('ꔀ', 'ꘋ'), ('ꘐ', 'ꘟ'),
+  ('ꘪ', 'ꘫ'), ('ꙮ', 'ꙮ'), ('ꚠ', 'ꛥ'), ('ꞏ', 'ꞏ'),
+  ('ꟷ', 'ꟷ'), ('ꟻ', 'ꠁ'), ('ꠃ', 'ꠅ'), ('ꠇ', 'ꠊ'),
+  ('ꠌ', 'ꠢ'), ('ꡀ', 'ꡳ'), ('ꢂ', 'ꢳ'), ('ꣲ', 'ꣷ'),
+  ('ꣻ', 'ꣻ'), ('ꣽ', 'ꣽ'), ('ꤊ', 'ꤥ'), ('ꤰ', 'ꥆ'),
+  ('ꥠ', 'ꥼ'), ('ꦄ', 'ꦲ'), ('ꧠ', 'ꧤ'), ('ꧧ', 'ꧯ'),
+  ('ꧺ', 'ꧾ'), ('ꨀ', 'ꨨ'), ('ꩀ', 'ꩂ'), ('ꩄ', 'ꩋ'),
+  ('ꩠ', 'ꩯ'), ('ꩱ', 'ꩶ'), ('ꩺ', 'ꩺ'), ('ꩾ', 'ꪯ'),
+  ('ꪱ', 'ꪱ'), ('ꪵ', 'ꪶ'), ('ꪹ', 'ꪽ'), ('ꫀ', 'ꫀ'),
+  ('ꫂ', 'ꫂ'), ('ꫛ', 'ꫜ'), ('ꫠ', 'ꫪ'), ('ꫲ', 'ꫲ'),
+  ('ꬁ', 'ꬆ'), ('ꬉ', 'ꬎ'), ('ꬑ', 'ꬖ'), ('ꬠ', 'ꬦ'),
+  ('ꬨ', 'ꬮ'), ('ꯀ', 'ꯢ'), ('가', '힣'), ('ힰ', 'ퟆ'),
+  ('ퟋ', 'ퟻ'), ('豈', '舘'), ('並', '龎'), ('יִ', 'יִ'),
+  ('ײַ', 'ﬨ'), ('שׁ', 'זּ'), ('טּ', 'לּ'), ('מּ', 'מּ'),
+  ('נּ', 'סּ'), ('ףּ', 'פּ'), ('צּ', 'ﮱ'), ('ﯓ', 'ﴽ'),
+  ('ﵐ', 'ﶏ'), ('ﶒ', 'ﷇ'), ('ﷰ', 'ﷻ'), ('ﹰ', 'ﹴ'),
+  ('ﹶ', 'ﻼ'), ('ｦ', 'ｯ'), ('ｱ', 'ﾝ'), ('ﾠ', 'ﾾ'),
+  ('ￂ', 'ￇ'), ('ￊ', 'ￏ'), ('ￒ', 'ￗ'), ('ￚ', 'ￜ'),
+  ('𐀀', '𐀋'), ('𐀍', '𐀦'), ('𐀨', '𐀺'), ('𐀼', '𐀽'),
+  ('𐀿', '𐁍'), ('𐁐', '𐁝'), ('𐂀', '𐃺'), ('𐊀', '𐊜'),
+  ('𐊠', '𐋐'), ('𐌀', '𐌟'), ('𐌭', '𐍀'), ('𐍂', '𐍉'),
+  ('𐍐', '𐍵'), ('𐎀', '𐎝'), ('𐎠', '𐏃'), ('𐏈', '𐏏'),
+  ('𐑐', '𐒝'), ('𐔀', '𐔧'), ('𐔰', '𐕣'), ('𐘀', '𐜶'),
+  ('𐝀', '𐝕'), ('𐝠', '𐝧'), ('𐠀', '𐠅'), ('𐠈', '𐠈'),
+  ('𐠊', '𐠵'), ('𐠷', '𐠸'), ('𐠼', '𐠼'), ('𐠿', '𐡕'),
+  ('𐡠', '𐡶'), ('𐢀', '𐢞'), ('𐣠', '𐣲'), ('𐣴', '𐣵'),
+  ('𐤀', '𐤕'), ('𐤠', '𐤹'), ('𐦀', '𐦷'), ('𐦾', '𐦿'),
+  ('𐨀', '𐨀'), ('𐨐', '𐨓'), ('𐨕', '𐨗'), ('𐨙', '𐨳'),
+  ('𐩠', '𐩼'), ('𐪀', '𐪜'), ('𐫀', '𐫇'), ('𐫉', '𐫤'),
+  ('𐬀', '𐬵'), ('𐭀', '𐭕'), ('𐭠', '𐭲'), ('𐮀', '𐮑'),
+  ('𐰀', '𐱈'), ('𑀃', '𑀷'), ('𑂃', '𑂯'), ('𑃐', '𑃨'),
+  ('𑄃', '𑄦'), ('𑅐', '𑅲'), ('𑅶', '𑅶'), ('𑆃', '𑆲'),
+  ('𑇁', '𑇄'), ('𑇚', '𑇚'), ('𑇜', '𑇜'), ('𑈀', '𑈑'),
+  ('𑈓', '𑈫'), ('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'),
+  ('𑊏', '𑊝'), ('𑊟', '𑊨'), ('𑊰', '𑋞'), ('𑌅', '𑌌'),
+  ('𑌏', '𑌐'), ('𑌓', '𑌨'), ('𑌪', '𑌰'), ('𑌲', '𑌳'),
+  ('𑌵', '𑌹'), ('𑌽', '𑌽'), ('𑍐', '𑍐'), ('𑍝', '𑍡'),
+  ('𑐀', '𑐴'), ('𑑇', '𑑊'), ('𑒀', '𑒯'), ('𑓄', '𑓅'),
+  ('𑓇', '𑓇'), ('𑖀', '𑖮'), ('𑗘', '𑗛'), ('𑘀', '𑘯'),
+  ('𑙄', '𑙄'), ('𑚀', '𑚪'), ('𑜀', '𑜙'), ('𑣿', '𑣿'),
+  ('𑨀', '𑨀'), ('𑨋', '𑨲'), ('𑨺', '𑨺'), ('𑩐', '𑩐'),
+  ('𑩜', '𑪃'), ('𑪆', '𑪉'), ('𑫀', '𑫸'), ('𑰀', '𑰈'),
+  ('𑰊', '𑰮'), ('𑱀', '𑱀'), ('𑱲', '𑲏'), ('𑴀', '𑴆'),
+  ('𑴈', '𑴉'), ('𑴋', '𑴰'), ('𑵆', '𑵆'), ('𒀀', '𒎙'),
+  ('𒒀', '𒕃'), ('𓀀', '𓐮'), ('𔐀', '𔙆'), ('𖠀', '𖨸'),
+  ('𖩀', '𖩞'), ('𖫐', '𖫭'), ('𖬀', '𖬯'), ('𖭣', '𖭷'),
+  ('𖭽', '𖮏'), ('𖼀', '𖽄'), ('𖽐', '𖽐'), ('𗀀', '𘟬'),
+  ('𘠀', '𘫲'), ('𛀀', '𛄞'), ('𛅰', '𛋻'), ('𛰀', '𛱪'),
+  ('𛱰', '𛱼'), ('𛲀', '𛲈'), ('𛲐', '𛲙'), ('𞠀', '𞣄'),
+  ('𞸀', '𞸃'), ('𞸅', '𞸟'), ('𞸡', '𞸢'), ('𞸤', '𞸤'),
+  ('𞸧', '𞸧'), ('𞸩', '𞸲'), ('𞸴', '𞸷'), ('𞸹', '𞸹'),
+  ('𞸻', '𞸻'), ('𞹂', '𞹂'), ('𞹇', '𞹇'), ('𞹉', '𞹉'),
+  ('𞹋', '𞹋'), ('𞹍', '𞹏'), ('𞹑', '𞹒'), ('𞹔', '𞹔'),
+  ('𞹗', '𞹗'), ('𞹙', '𞹙'), ('𞹛', '𞹛'), ('𞹝', '𞹝'),
+  ('𞹟', '𞹟'), ('𞹡', '𞹢'), ('𞹤', '𞹤'), ('𞹧', '𞹪'),
+  ('𞹬', '𞹲'), ('𞹴', '𞹷'), ('𞹹', '𞹼'), ('𞹾', '𞹾'),
+  ('𞺀', '𞺉'), ('𞺋', '𞺛'), ('𞺡', '𞺣'), ('𞺥', '𞺩'),
+  ('𞺫', '𞺻'), ('𠀀', '𪛖'), ('𪜀', '𫜴'), ('𫝀', '𫠝'),
+  ('𫠠', '𬺡'), ('𬺰', '𮯠'), ('丽', '𪘀'),
+];
+
+pub const OTHER_NUMBER: &'static [(char, char)] = &[
+  ('²', '³'), ('¹', '¹'), ('¼', '¾'), ('৴', '৹'), ('୲', '୷'),
+  ('௰', '௲'), ('౸', '౾'), ('൘', '൞'), ('൰', '൸'),
+  ('༪', '༳'), ('፩', '፼'), ('៰', '៹'), ('᧚', '᧚'),
+  ('⁰', '⁰'), ('⁴', '⁹'), ('₀', '₉'), ('⅐', '⅟'),
+  ('↉', '↉'), ('①', '⒛'), ('⓪', '⓿'), ('❶', '➓'),
+  ('⳽', '⳽'), ('㆒', '㆕'), ('㈠', '㈩'), ('㉈', '㉏'),
+  ('㉑', '㉟'), ('㊀', '㊉'), ('㊱', '㊿'), ('꠰', '꠵'),
+  ('𐄇', '𐄳'), ('𐅵', '𐅸'), ('𐆊', '𐆋'), ('𐋡', '𐋻'),
+  ('𐌠', '𐌣'), ('𐡘', '𐡟'), ('𐡹', '𐡿'), ('𐢧', '𐢯'),
+  ('𐣻', '𐣿'), ('𐤖', '𐤛'), ('𐦼', '𐦽'), ('𐧀', '𐧏'),
+  ('𐧒', '𐧿'), ('𐩀', '𐩇'), ('𐩽', '𐩾'), ('𐪝', '𐪟'),
+  ('𐫫', '𐫯'), ('𐭘', '𐭟'), ('𐭸', '𐭿'), ('𐮩', '𐮯'),
+  ('𐳺', '𐳿'), ('𐹠', '𐹾'), ('𑁒', '𑁥'), ('𑇡', '𑇴'),
+  ('𑜺', '𑜻'), ('𑣪', '𑣲'), ('𑱚', '𑱬'), ('𖭛', '𖭡'),
+  ('𝍠', '𝍱'), ('𞣇', '𞣏'), ('🄀', '🄌'),
+];
+
+pub const OTHER_PUNCTUATION: &'static [(char, char)] = &[
+  ('!', '#'), ('%', '\''), ('*', '*'), (',', ','), ('.', '/'), (':', ';'),
+  ('?', '@'), ('\\', '\\'), ('¡', '¡'), ('§', '§'), ('¶', '·'),
+  ('¿', '¿'), (';', ';'), ('·', '·'), ('՚', '՟'), ('։', '։'),
+  ('׀', '׀'), ('׃', '׃'), ('׆', '׆'), ('׳', '״'), ('؉', '؊'),
+  ('،', '؍'), ('؛', '؛'), ('؞', '؟'), ('٪', '٭'), ('۔', '۔'),
+  ('܀', '܍'), ('߷', '߹'), ('࠰', '࠾'), ('࡞', '࡞'), ('।', '॥'),
+  ('॰', '॰'), ('৽', '৽'), ('૰', '૰'), ('෴', '෴'),
+  ('๏', '๏'), ('๚', '๛'), ('༄', '༒'), ('༔', '༔'),
+  ('྅', '྅'), ('࿐', '࿔'), ('࿙', '࿚'), ('၊', '၏'),
+  ('჻', '჻'), ('፠', '፨'), ('᙭', '᙮'), ('᛫', '᛭'),
+  ('᜵', '᜶'), ('។', '៖'), ('៘', '៚'), ('᠀', '᠅'),
+  ('᠇', '᠊'), ('᥄', '᥅'), ('᨞', '᨟'), ('᪠', '᪦'),
+  ('᪨', '᪭'), ('᭚', '᭠'), ('᯼', '᯿'), ('᰻', '᰿'),
+  ('᱾', '᱿'), ('᳀', '᳇'), ('᳓', '᳓'), ('‖', '‗'),
+  ('†', '‧'), ('‰', '‸'), ('※', '‾'), ('⁁', '⁃'),
+  ('⁇', '⁑'), ('⁓', '⁓'), ('⁕', '⁞'), ('⳹', '⳼'),
+  ('⳾', '⳿'), ('⵰', '⵰'), ('⸀', '⸁'), ('⸆', '⸈'),
+  ('⸋', '⸋'), ('⸎', '⸖'), ('⸘', '⸙'), ('⸛', '⸛'),
+  ('⸞', '⸟'), ('⸪', '⸮'), ('⸰', '⸹'), ('⸼', '⸿'),
+  ('⹁', '⹁'), ('⹃', '⹉'), ('、', '〃'), ('〽', '〽'),
+  ('・', '・'), ('꓾', '꓿'), ('꘍', '꘏'), ('꙳', '꙳'),
+  ('꙾', '꙾'), ('꛲', '꛷'), ('꡴', '꡷'), ('꣎', '꣏'),
+  ('꣸', '꣺'), ('꣼', '꣼'), ('꤮', '꤯'), ('꥟', '꥟'),
+  ('꧁', '꧍'), ('꧞', '꧟'), ('꩜', '꩟'), ('꫞', '꫟'),
+  ('꫰', '꫱'), ('꯫', '꯫'), ('︐', '︖'), ('︙', '︙'),
+  ('︰', '︰'), ('﹅', '﹆'), ('﹉', '﹌'), ('﹐', '﹒'),
+  ('﹔', '﹗'), ('﹟', '﹡'), ('﹨', '﹨'), ('﹪', '﹫'),
+  ('！', '＃'), ('％', '＇'), ('＊', '＊'), ('，', '，'),
+  ('．', '／'), ('：', '；'), ('？', '＠'), ('＼', '＼'),
+  ('｡', '｡'), ('､', '･'), ('𐄀', '𐄂'), ('𐎟', '𐎟'),
+  ('𐏐', '𐏐'), ('𐕯', '𐕯'), ('𐡗', '𐡗'), ('𐤟', '𐤟'),
+  ('𐤿', '𐤿'), ('𐩐', '𐩘'), ('𐩿', '𐩿'), ('𐫰', '𐫶'),
+  ('𐬹', '𐬿'), ('𐮙', '𐮜'), ('𑁇', '𑁍'), ('𑂻', '𑂼'),
+  ('𑂾', '𑃁'), ('𑅀', '𑅃'), ('𑅴', '𑅵'), ('𑇅', '𑇉'),
+  ('𑇍', '𑇍'), ('𑇛', '𑇛'), ('𑇝', '𑇟'), ('𑈸', '𑈽'),
+  ('𑊩', '𑊩'), ('𑑋', '𑑏'), ('𑑛', '𑑛'), ('𑑝', '𑑝'),
+  ('𑓆', '𑓆'), ('𑗁', '𑗗'), ('𑙁', '𑙃'), ('𑙠', '𑙬'),
+  ('𑜼', '𑜾'), ('𑨿', '𑩆'), ('𑪚', '𑪜'), ('𑪞', '𑪢'),
+  ('𑱁', '𑱅'), ('𑱰', '𑱱'), ('𒑰', '𒑴'), ('𖩮', '𖩯'),
+  ('𖫵', '𖫵'), ('𖬷', '𖬻'), ('𖭄', '𖭄'), ('𛲟', '𛲟'),
+  ('𝪇', '𝪋'), ('𞥞', '𞥟'),
+];
+
+pub const OTHER_SYMBOL: &'static [(char, char)] = &[
+  ('¦', '¦'), ('©', '©'), ('®', '®'), ('°', '°'), ('҂', '҂'),
+  ('֍', '֎'), ('؎', '؏'), ('۞', '۞'), ('۩', '۩'), ('۽', '۾'),
+  ('߶', '߶'), ('৺', '৺'), ('୰', '୰'), ('௳', '௸'),
+  ('௺', '௺'), ('౿', '౿'), ('൏', '൏'), ('൹', '൹'),
+  ('༁', '༃'), ('༓', '༓'), ('༕', '༗'), ('༚', '༟'),
+  ('༴', '༴'), ('༶', '༶'), ('༸', '༸'), ('྾', '࿅'),
+  ('࿇', '࿌'), ('࿎', '࿏'), ('࿕', '࿘'), ('႞', '႟'),
+  ('᎐', '᎙'), ('᥀', '᥀'), ('᧞', '᧿'), ('᭡', '᭪'),
+  ('᭴', '᭼'), ('℀', '℁'), ('℃', '℆'), ('℈', '℉'),
+  ('℔', '℔'), ('№', '℗'), ('℞', '℣'), ('℥', '℥'),
+  ('℧', '℧'), ('℩', '℩'), ('℮', '℮'), ('℺', '℻'),
+  ('⅊', '⅊'), ('⅌', '⅍'), ('⅏', '⅏'), ('↊', '↋'),
+  ('↕', '↙'), ('↜', '↟'), ('↡', '↢'), ('↤', '↥'),
+  ('↧', '↭'), ('↯', '⇍'), ('⇐', '⇑'), ('⇓', '⇓'),
+  ('⇕', '⇳'), ('⌀', '⌇'), ('⌌', '⌟'), ('⌢', '⌨'),
+  ('⌫', '⍻'), ('⍽', '⎚'), ('⎴', '⏛'), ('⏢', '␦'),
+  ('⑀', '⑊'), ('⒜', 'ⓩ'), ('─', '▶'), ('▸', '◀'),
+  ('◂', '◷'), ('☀', '♮'), ('♰', '❧'), ('➔', '➿'),
+  ('⠀', '⣿'), ('⬀', '⬯'), ('⭅', '⭆'), ('⭍', '⭳'),
+  ('⭶', '⮕'), ('⮘', '⮹'), ('⮽', '⯈'), ('⯊', '⯒'),
+  ('⯬', '⯯'), ('⳥', '⳪'), ('⺀', '⺙'), ('⺛', '⻳'),
+  ('⼀', '⿕'), ('⿰', '⿻'), ('〄', '〄'), ('〒', '〓'),
+  ('〠', '〠'), ('〶', '〷'), ('〾', '〿'), ('㆐', '㆑'),
+  ('㆖', '㆟'), ('㇀', '㇣'), ('㈀', '㈞'), ('㈪', '㉇'),
+  ('㉐', '㉐'), ('㉠', '㉿'), ('㊊', '㊰'), ('㋀', '㋾'),
+  ('㌀', '㏿'), ('䷀', '䷿'), ('꒐', '꓆'), ('꠨', '꠫'),
+  ('꠶', '꠷'), ('꠹', '꠹'), ('꩷', '꩹'), ('﷽', '﷽'),
+  ('￤', '￤'), ('￨', '￨'), ('￭', '￮'), ('￼', '�'),
+  ('𐄷', '𐄿'), ('𐅹', '𐆉'), ('𐆌', '𐆎'), ('𐆐', '𐆛'),
+  ('𐆠', '𐆠'), ('𐇐', '𐇼'), ('𐡷', '𐡸'), ('𐫈', '𐫈'),
+  ('𑜿', '𑜿'), ('𖬼', '𖬿'), ('𖭅', '𖭅'), ('𛲜', '𛲜'),
+  ('𝀀', '𝃵'), ('𝄀', '𝄦'), ('𝄩', '𝅘𝅥𝅲'), ('𝅪', '𝅬'),
+  ('𝆃', '𝆄'), ('𝆌', '𝆩'), ('𝆮', '𝇨'), ('𝈀', '𝉁'),
+  ('𝉅', '𝉅'), ('𝌀', '𝍖'), ('𝠀', '𝧿'), ('𝨷', '𝨺'),
+  ('𝩭', '𝩴'), ('𝩶', '𝪃'), ('𝪅', '𝪆'), ('🀀', '🀫'),
+  ('🀰', '🂓'), ('🂠', '🂮'), ('🂱', '🂿'), ('🃁', '🃏'),
+  ('🃑', '🃵'), ('🄐', '🄮'), ('🄰', '🅫'), ('🅰', '🆬'),
+  ('🇦', '🈂'), ('🈐', '🈻'), ('🉀', '🉈'), ('🉐', '🉑'),
+  ('🉠', '🉥'), ('🌀', '🏺'), ('🐀', '🛔'), ('🛠', '🛬'),
+  ('🛰', '🛸'), ('🜀', '🝳'), ('🞀', '🟔'), ('🠀', '🠋'),
+  ('🠐', '🡇'), ('🡐', '🡙'), ('🡠', '🢇'), ('🢐', '🢭'),
+  ('🤀', '🤋'), ('🤐', '🤾'), ('🥀', '🥌'), ('🥐', '🥫'),
+  ('🦀', '🦗'), ('🧀', '🧀'), ('🧐', '🧦'),
+];
+
+pub const PARAGRAPH_SEPARATOR: &'static [(char, char)] = &[
+  ('\u{2029}', '\u{2029}'),
+];
+
+pub const PRIVATE_USE: &'static [(char, char)] = &[
+  ('\u{e000}', '\u{f8ff}'), ('\u{f0000}', '\u{ffffd}'),
+  ('\u{100000}', '\u{10fffd}'),
+];
+
+pub const PUNCTUATION: &'static [(char, char)] = &[
+  ('!', '#'), ('%', '*'), (',', '/'), (':', ';'), ('?', '@'), ('[', ']'),
+  ('_', '_'), ('{', '{'), ('}', '}'), ('¡', '¡'), ('§', '§'),
+  ('«', '«'), ('¶', '·'), ('»', '»'), ('¿', '¿'), (';', ';'),
+  ('·', '·'), ('՚', '՟'), ('։', '֊'), ('־', '־'), ('׀', '׀'),
+  ('׃', '׃'), ('׆', '׆'), ('׳', '״'), ('؉', '؊'), ('،', '؍'),
+  ('؛', '؛'), ('؞', '؟'), ('٪', '٭'), ('۔', '۔'), ('܀', '܍'),
+  ('߷', '߹'), ('࠰', '࠾'), ('࡞', '࡞'), ('।', '॥'),
+  ('॰', '॰'), ('৽', '৽'), ('૰', '૰'), ('෴', '෴'),
+  ('๏', '๏'), ('๚', '๛'), ('༄', '༒'), ('༔', '༔'),
+  ('༺', '༽'), ('྅', '྅'), ('࿐', '࿔'), ('࿙', '࿚'),
+  ('၊', '၏'), ('჻', '჻'), ('፠', '፨'), ('᐀', '᐀'),
+  ('᙭', '᙮'), ('᚛', '᚜'), ('᛫', '᛭'), ('᜵', '᜶'),
+  ('។', '៖'), ('៘', '៚'), ('᠀', '᠊'), ('᥄', '᥅'),
+  ('᨞', '᨟'), ('᪠', '᪦'), ('᪨', '᪭'), ('᭚', '᭠'),
+  ('᯼', '᯿'), ('᰻', '᰿'), ('᱾', '᱿'), ('᳀', '᳇'),
+  ('᳓', '᳓'), ('‐', '‧'), ('‰', '⁃'), ('⁅', '⁑'),
+  ('⁓', '⁞'), ('⁽', '⁾'), ('₍', '₎'), ('⌈', '⌋'),
+  ('〈', '〉'), ('❨', '❵'), ('⟅', '⟆'), ('⟦', '⟯'),
+  ('⦃', '⦘'), ('⧘', '⧛'), ('⧼', '⧽'), ('⳹', '⳼'),
+  ('⳾', '⳿'), ('⵰', '⵰'), ('⸀', '⸮'), ('⸰', '⹉'),
+  ('、', '〃'), ('〈', '】'), ('〔', '〟'), ('〰', '〰'),
+  ('〽', '〽'), ('゠', '゠'), ('・', '・'), ('꓾', '꓿'),
+  ('꘍', '꘏'), ('꙳', '꙳'), ('꙾', '꙾'), ('꛲', '꛷'),
+  ('꡴', '꡷'), ('꣎', '꣏'), ('꣸', '꣺'), ('꣼', '꣼'),
+  ('꤮', '꤯'), ('꥟', '꥟'), ('꧁', '꧍'), ('꧞', '꧟'),
+  ('꩜', '꩟'), ('꫞', '꫟'), ('꫰', '꫱'), ('꯫', '꯫'),
+  ('﴾', '﴿'), ('︐', '︙'), ('︰', '﹒'), ('﹔', '﹡'),
+  ('﹣', '﹣'), ('﹨', '﹨'), ('﹪', '﹫'), ('！', '＃'),
+  ('％', '＊'), ('，', '／'), ('：', '；'), ('？', '＠'),
+  ('［', '］'), ('＿', '＿'), ('｛', '｛'), ('｝', '｝'),
+  ('｟', '･'), ('𐄀', '𐄂'), ('𐎟', '𐎟'), ('𐏐', '𐏐'),
+  ('𐕯', '𐕯'), ('𐡗', '𐡗'), ('𐤟', '𐤟'), ('𐤿', '𐤿'),
+  ('𐩐', '𐩘'), ('𐩿', '𐩿'), ('𐫰', '𐫶'), ('𐬹', '𐬿'),
+  ('𐮙', '𐮜'), ('𑁇', '𑁍'), ('𑂻', '𑂼'), ('𑂾', '𑃁'),
+  ('𑅀', '𑅃'), ('𑅴', '𑅵'), ('𑇅', '𑇉'), ('𑇍', '𑇍'),
+  ('𑇛', '𑇛'), ('𑇝', '𑇟'), ('𑈸', '𑈽'), ('𑊩', '𑊩'),
+  ('𑑋', '𑑏'), ('𑑛', '𑑛'), ('𑑝', '𑑝'), ('𑓆', '𑓆'),
+  ('𑗁', '𑗗'), ('𑙁', '𑙃'), ('𑙠', '𑙬'), ('𑜼', '𑜾'),
+  ('𑨿', '𑩆'), ('𑪚', '𑪜'), ('𑪞', '𑪢'), ('𑱁', '𑱅'),
+  ('𑱰', '𑱱'), ('𒑰', '𒑴'), ('𖩮', '𖩯'), ('𖫵', '𖫵'),
+  ('𖬷', '𖬻'), ('𖭄', '𖭄'), ('𛲟', '𛲟'), ('𝪇', '𝪋'),
+  ('𞥞', '𞥟'),
+];
+
+pub const SEPARATOR: &'static [(char, char)] = &[
+  (' ', ' '), ('\u{a0}', '\u{a0}'), ('\u{1680}', '\u{1680}'),
+  ('\u{2000}', '\u{200a}'), ('\u{2028}', '\u{2029}'),
+  ('\u{202f}', '\u{202f}'), ('\u{205f}', '\u{205f}'),
+  ('\u{3000}', '\u{3000}'),
+];
+
+pub const SPACE_SEPARATOR: &'static [(char, char)] = &[
+  (' ', ' '), ('\u{a0}', '\u{a0}'), ('\u{1680}', '\u{1680}'),
+  ('\u{2000}', '\u{200a}'), ('\u{202f}', '\u{202f}'),
+  ('\u{205f}', '\u{205f}'), ('\u{3000}', '\u{3000}'),
+];
+
+pub const SPACING_MARK: &'static [(char, char)] = &[
+  ('ः', 'ः'), ('ऻ', 'ऻ'), ('ा', 'ी'), ('ॉ', 'ौ'),
+  ('ॎ', 'ॏ'), ('ং', 'ঃ'), ('া', 'ী'), ('ে', 'ৈ'),
+  ('ো', 'ৌ'), ('ৗ', 'ৗ'), ('ਃ', 'ਃ'), ('ਾ', 'ੀ'),
+  ('ઃ', 'ઃ'), ('ા', 'ી'), ('ૉ', 'ૉ'), ('ો', 'ૌ'),
+  ('ଂ', 'ଃ'), ('ା', 'ା'), ('ୀ', 'ୀ'), ('େ', 'ୈ'),
+  ('ୋ', 'ୌ'), ('ୗ', 'ୗ'), ('ா', 'ி'), ('ு', 'ூ'),
+  ('ெ', 'ை'), ('ொ', 'ௌ'), ('ௗ', 'ௗ'), ('ఁ', 'ః'),
+  ('ు', 'ౄ'), ('ಂ', 'ಃ'), ('ಾ', 'ಾ'), ('ೀ', 'ೄ'),
+  ('ೇ', 'ೈ'), ('ೊ', 'ೋ'), ('ೕ', 'ೖ'), ('ം', 'ഃ'),
+  ('ാ', 'ീ'), ('െ', 'ൈ'), ('ൊ', 'ൌ'), ('ൗ', 'ൗ'),
+  ('ං', 'ඃ'), ('ා', 'ෑ'), ('ෘ', 'ෟ'), ('ෲ', 'ෳ'),
+  ('༾', '༿'), ('ཿ', 'ཿ'), ('ါ', 'ာ'), ('ေ', 'ေ'),
+  ('း', 'း'), ('ျ', 'ြ'), ('ၖ', 'ၗ'), ('ၢ', 'ၤ'),
+  ('ၧ', 'ၭ'), ('ႃ', 'ႄ'), ('ႇ', 'ႌ'), ('ႏ', 'ႏ'),
+  ('ႚ', 'ႜ'), ('ា', 'ា'), ('ើ', 'ៅ'), ('ះ', 'ៈ'),
+  ('ᤣ', 'ᤦ'), ('ᤩ', 'ᤫ'), ('ᤰ', 'ᤱ'), ('ᤳ', 'ᤸ'),
+  ('ᨙ', 'ᨚ'), ('ᩕ', 'ᩕ'), ('ᩗ', 'ᩗ'), ('ᩡ', 'ᩡ'),
+  ('ᩣ', 'ᩤ'), ('ᩭ', 'ᩲ'), ('ᬄ', 'ᬄ'), ('ᬵ', 'ᬵ'),
+  ('ᬻ', 'ᬻ'), ('ᬽ', 'ᭁ'), ('ᭃ', '᭄'), ('ᮂ', 'ᮂ'),
+  ('ᮡ', 'ᮡ'), ('ᮦ', 'ᮧ'), ('᮪', '᮪'), ('ᯧ', 'ᯧ'),
+  ('ᯪ', 'ᯬ'), ('ᯮ', 'ᯮ'), ('᯲', '᯳'), ('ᰤ', 'ᰫ'),
+  ('ᰴ', 'ᰵ'), ('᳡', '᳡'), ('ᳲ', 'ᳳ'), ('᳷', '᳷'),
+  ('〮', '〯'), ('ꠣ', 'ꠤ'), ('ꠧ', 'ꠧ'), ('ꢀ', 'ꢁ'),
+  ('ꢴ', 'ꣃ'), ('ꥒ', '꥓'), ('ꦃ', 'ꦃ'), ('ꦴ', 'ꦵ'),
+  ('ꦺ', 'ꦻ'), ('ꦽ', '꧀'), ('ꨯ', 'ꨰ'), ('ꨳ', 'ꨴ'),
+  ('ꩍ', 'ꩍ'), ('ꩻ', 'ꩻ'), ('ꩽ', 'ꩽ'), ('ꫫ', 'ꫫ'),
+  ('ꫮ', 'ꫯ'), ('ꫵ', 'ꫵ'), ('ꯣ', 'ꯤ'), ('ꯦ', 'ꯧ'),
+  ('ꯩ', 'ꯪ'), ('꯬', '꯬'), ('𑀀', '𑀀'), ('𑀂', '𑀂'),
+  ('𑂂', '𑂂'), ('𑂰', '𑂲'), ('𑂷', '𑂸'), ('𑄬', '𑄬'),
+  ('𑆂', '𑆂'), ('𑆳', '𑆵'), ('𑆿', '𑇀'), ('𑈬', '𑈮'),
+  ('𑈲', '𑈳'), ('𑈵', '𑈵'), ('𑋠', '𑋢'), ('𑌂', '𑌃'),
+  ('𑌾', '𑌿'), ('𑍁', '𑍄'), ('𑍇', '𑍈'), ('𑍋', '𑍍'),
+  ('𑍗', '𑍗'), ('𑍢', '𑍣'), ('𑐵', '𑐷'), ('𑑀', '𑑁'),
+  ('𑑅', '𑑅'), ('𑒰', '𑒲'), ('𑒹', '𑒹'), ('𑒻', '𑒾'),
+  ('𑓁', '𑓁'), ('𑖯', '𑖱'), ('𑖸', '𑖻'), ('𑖾', '𑖾'),
+  ('𑘰', '𑘲'), ('𑘻', '𑘼'), ('𑘾', '𑘾'), ('𑚬', '𑚬'),
+  ('𑚮', '𑚯'), ('𑚶', '𑚶'), ('𑜠', '𑜡'), ('𑜦', '𑜦'),
+  ('𑨇', '𑨈'), ('𑨹', '𑨹'), ('𑩗', '𑩘'), ('𑪗', '𑪗'),
+  ('𑰯', '𑰯'), ('𑰾', '𑰾'), ('𑲩', '𑲩'), ('𑲱', '𑲱'),
+  ('𑲴', '𑲴'), ('𖽑', '𖽾'), ('𝅥', '𝅦'), ('𝅭', '𝅲'),
+];
+
+pub const SYMBOL: &'static [(char, char)] = &[
+  ('$', '$'), ('+', '+'), ('<', '>'), ('^', '^'), ('`', '`'), ('|', '|'),
+  ('~', '~'), ('¢', '¦'), ('¨', '©'), ('¬', '¬'), ('®', '±'),
+  ('´', '´'), ('¸', '¸'), ('×', '×'), ('÷', '÷'), ('˂', '˅'),
+  ('˒', '˟'), ('˥', '˫'), ('˭', '˭'), ('˯', '˿'), ('͵', '͵'),
+  ('΄', '΅'), ('϶', '϶'), ('҂', '҂'), ('֍', '֏'), ('؆', '؈'),
+  ('؋', '؋'), ('؎', '؏'), ('۞', '۞'), ('۩', '۩'), ('۽', '۾'),
+  ('߶', '߶'), ('৲', '৳'), ('৺', '৻'), ('૱', '૱'),
+  ('୰', '୰'), ('௳', '௺'), ('౿', '౿'), ('൏', '൏'),
+  ('൹', '൹'), ('฿', '฿'), ('༁', '༃'), ('༓', '༓'),
+  ('༕', '༗'), ('༚', '༟'), ('༴', '༴'), ('༶', '༶'),
+  ('༸', '༸'), ('྾', '࿅'), ('࿇', '࿌'), ('࿎', '࿏'),
+  ('࿕', '࿘'), ('႞', '႟'), ('᎐', '᎙'), ('៛', '៛'),
+  ('᥀', '᥀'), ('᧞', '᧿'), ('᭡', '᭪'), ('᭴', '᭼'),
+  ('᾽', '᾽'), ('᾿', '῁'), ('῍', '῏'), ('῝', '῟'),
+  ('῭', '`'), ('´', '῾'), ('⁄', '⁄'), ('⁒', '⁒'),
+  ('⁺', '⁼'), ('₊', '₌'), ('₠', '₿'), ('℀', '℁'),
+  ('℃', '℆'), ('℈', '℉'), ('℔', '℔'), ('№', '℘'),
+  ('℞', '℣'), ('℥', '℥'), ('℧', '℧'), ('℩', '℩'),
+  ('℮', '℮'), ('℺', '℻'), ('⅀', '⅄'), ('⅊', '⅍'),
+  ('⅏', '⅏'), ('↊', '↋'), ('←', '⌇'), ('⌌', '⌨'),
+  ('⌫', '␦'), ('⑀', '⑊'), ('⒜', 'ⓩ'), ('─', '❧'),
+  ('➔', '⟄'), ('⟇', '⟥'), ('⟰', '⦂'), ('⦙', '⧗'),
+  ('⧜', '⧻'), ('⧾', '⭳'), ('⭶', '⮕'), ('⮘', '⮹'),
+  ('⮽', '⯈'), ('⯊', '⯒'), ('⯬', '⯯'), ('⳥', '⳪'),
+  ('⺀', '⺙'), ('⺛', '⻳'), ('⼀', '⿕'), ('⿰', '⿻'),
+  ('〄', '〄'), ('〒', '〓'), ('〠', '〠'), ('〶', '〷'),
+  ('〾', '〿'), ('゛', '゜'), ('㆐', '㆑'), ('㆖', '㆟'),
+  ('㇀', '㇣'), ('㈀', '㈞'), ('㈪', '㉇'), ('㉐', '㉐'),
+  ('㉠', '㉿'), ('㊊', '㊰'), ('㋀', '㋾'), ('㌀', '㏿'),
+  ('䷀', '䷿'), ('꒐', '꓆'), ('꜀', '꜖'), ('꜠', '꜡'),
+  ('꞉', '꞊'), ('꠨', '꠫'), ('꠶', '꠹'), ('꩷', '꩹'),
+  ('꭛', '꭛'), ('﬩', '﬩'), ('﮲', '﯁'), ('﷼', '﷽'),
+  ('﹢', '﹢'), ('﹤', '﹦'), ('﹩', '﹩'), ('＄', '＄'),
+  ('＋', '＋'), ('＜', '＞'), ('＾', '＾'), ('｀', '｀'),
+  ('｜', '｜'), ('～', '～'), ('￠', '￦'), ('￨', '￮'),
+  ('￼', '�'), ('𐄷', '𐄿'), ('𐅹', '𐆉'), ('𐆌', '𐆎'),
+  ('𐆐', '𐆛'), ('𐆠', '𐆠'), ('𐇐', '𐇼'), ('𐡷', '𐡸'),
+  ('𐫈', '𐫈'), ('𑜿', '𑜿'), ('𖬼', '𖬿'), ('𖭅', '𖭅'),
+  ('𛲜', '𛲜'), ('𝀀', '𝃵'), ('𝄀', '𝄦'), ('𝄩', '𝅘𝅥𝅲'),
+  ('𝅪', '𝅬'), ('𝆃', '𝆄'), ('𝆌', '𝆩'), ('𝆮', '𝇨'),
+  ('𝈀', '𝉁'), ('𝉅', '𝉅'), ('𝌀', '𝍖'), ('𝛁', '𝛁'),
+  ('𝛛', '𝛛'), ('𝛻', '𝛻'), ('𝜕', '𝜕'), ('𝜵', '𝜵'),
+  ('𝝏', '𝝏'), ('𝝯', '𝝯'), ('𝞉', '𝞉'), ('𝞩', '𝞩'),
+  ('𝟃', '𝟃'), ('𝠀', '𝧿'), ('𝨷', '𝨺'), ('𝩭', '𝩴'),
+  ('𝩶', '𝪃'), ('𝪅', '𝪆'), ('𞻰', '𞻱'), ('🀀', '🀫'),
+  ('🀰', '🂓'), ('🂠', '🂮'), ('🂱', '🂿'), ('🃁', '🃏'),
+  ('🃑', '🃵'), ('🄐', '🄮'), ('🄰', '🅫'), ('🅰', '🆬'),
+  ('🇦', '🈂'), ('🈐', '🈻'), ('🉀', '🉈'), ('🉐', '🉑'),
+  ('🉠', '🉥'), ('🌀', '🛔'), ('🛠', '🛬'), ('🛰', '🛸'),
+  ('🜀', '🝳'), ('🞀', '🟔'), ('🠀', '🠋'), ('🠐', '🡇'),
+  ('🡐', '🡙'), ('🡠', '🢇'), ('🢐', '🢭'), ('🤀', '🤋'),
+  ('🤐', '🤾'), ('🥀', '🥌'), ('🥐', '🥫'), ('🦀', '🦗'),
+  ('🧀', '🧀'), ('🧐', '🧦'),
+];
+
+pub const TITLECASE_LETTER: &'static [(char, char)] = &[
+  ('ǅ', 'ǅ'), ('ǈ', 'ǈ'), ('ǋ', 'ǋ'), ('ǲ', 'ǲ'), ('ᾈ', 'ᾏ'),
+  ('ᾘ', 'ᾟ'), ('ᾨ', 'ᾯ'), ('ᾼ', 'ᾼ'), ('ῌ', 'ῌ'),
+  ('ῼ', 'ῼ'),
+];
+
+pub const UNASSIGNED: &'static [(char, char)] = &[
+  ('\u{378}', '\u{379}'), ('\u{380}', '\u{383}'), ('\u{38b}', '\u{38b}'),
+  ('\u{38d}', '\u{38d}'), ('\u{3a2}', '\u{3a2}'), ('\u{530}', '\u{530}'),
+  ('\u{557}', '\u{558}'), ('\u{560}', '\u{560}'), ('\u{588}', '\u{588}'),
+  ('\u{58b}', '\u{58c}'), ('\u{590}', '\u{590}'), ('\u{5c8}', '\u{5cf}'),
+  ('\u{5eb}', '\u{5ef}'), ('\u{5f5}', '\u{5ff}'), ('\u{61d}', '\u{61d}'),
+  ('\u{70e}', '\u{70e}'), ('\u{74b}', '\u{74c}'), ('\u{7b2}', '\u{7bf}'),
+  ('\u{7fb}', '\u{7ff}'), ('\u{82e}', '\u{82f}'), ('\u{83f}', '\u{83f}'),
+  ('\u{85c}', '\u{85d}'), ('\u{85f}', '\u{85f}'), ('\u{86b}', '\u{89f}'),
+  ('\u{8b5}', '\u{8b5}'), ('\u{8be}', '\u{8d3}'), ('\u{984}', '\u{984}'),
+  ('\u{98d}', '\u{98e}'), ('\u{991}', '\u{992}'), ('\u{9a9}', '\u{9a9}'),
+  ('\u{9b1}', '\u{9b1}'), ('\u{9b3}', '\u{9b5}'), ('\u{9ba}', '\u{9bb}'),
+  ('\u{9c5}', '\u{9c6}'), ('\u{9c9}', '\u{9ca}'), ('\u{9cf}', '\u{9d6}'),
+  ('\u{9d8}', '\u{9db}'), ('\u{9de}', '\u{9de}'), ('\u{9e4}', '\u{9e5}'),
+  ('\u{9fe}', '\u{a00}'), ('\u{a04}', '\u{a04}'), ('\u{a0b}', '\u{a0e}'),
+  ('\u{a11}', '\u{a12}'), ('\u{a29}', '\u{a29}'), ('\u{a31}', '\u{a31}'),
+  ('\u{a34}', '\u{a34}'), ('\u{a37}', '\u{a37}'), ('\u{a3a}', '\u{a3b}'),
+  ('\u{a3d}', '\u{a3d}'), ('\u{a43}', '\u{a46}'), ('\u{a49}', '\u{a4a}'),
+  ('\u{a4e}', '\u{a50}'), ('\u{a52}', '\u{a58}'), ('\u{a5d}', '\u{a5d}'),
+  ('\u{a5f}', '\u{a65}'), ('\u{a76}', '\u{a80}'), ('\u{a84}', '\u{a84}'),
+  ('\u{a8e}', '\u{a8e}'), ('\u{a92}', '\u{a92}'), ('\u{aa9}', '\u{aa9}'),
+  ('\u{ab1}', '\u{ab1}'), ('\u{ab4}', '\u{ab4}'), ('\u{aba}', '\u{abb}'),
+  ('\u{ac6}', '\u{ac6}'), ('\u{aca}', '\u{aca}'), ('\u{ace}', '\u{acf}'),
+  ('\u{ad1}', '\u{adf}'), ('\u{ae4}', '\u{ae5}'), ('\u{af2}', '\u{af8}'),
+  ('\u{b00}', '\u{b00}'), ('\u{b04}', '\u{b04}'), ('\u{b0d}', '\u{b0e}'),
+  ('\u{b11}', '\u{b12}'), ('\u{b29}', '\u{b29}'), ('\u{b31}', '\u{b31}'),
+  ('\u{b34}', '\u{b34}'), ('\u{b3a}', '\u{b3b}'), ('\u{b45}', '\u{b46}'),
+  ('\u{b49}', '\u{b4a}'), ('\u{b4e}', '\u{b55}'), ('\u{b58}', '\u{b5b}'),
+  ('\u{b5e}', '\u{b5e}'), ('\u{b64}', '\u{b65}'), ('\u{b78}', '\u{b81}'),
+  ('\u{b84}', '\u{b84}'), ('\u{b8b}', '\u{b8d}'), ('\u{b91}', '\u{b91}'),
+  ('\u{b96}', '\u{b98}'), ('\u{b9b}', '\u{b9b}'), ('\u{b9d}', '\u{b9d}'),
+  ('\u{ba0}', '\u{ba2}'), ('\u{ba5}', '\u{ba7}'), ('\u{bab}', '\u{bad}'),
+  ('\u{bba}', '\u{bbd}'), ('\u{bc3}', '\u{bc5}'), ('\u{bc9}', '\u{bc9}'),
+  ('\u{bce}', '\u{bcf}'), ('\u{bd1}', '\u{bd6}'), ('\u{bd8}', '\u{be5}'),
+  ('\u{bfb}', '\u{bff}'), ('\u{c04}', '\u{c04}'), ('\u{c0d}', '\u{c0d}'),
+  ('\u{c11}', '\u{c11}'), ('\u{c29}', '\u{c29}'), ('\u{c3a}', '\u{c3c}'),
+  ('\u{c45}', '\u{c45}'), ('\u{c49}', '\u{c49}'), ('\u{c4e}', '\u{c54}'),
+  ('\u{c57}', '\u{c57}'), ('\u{c5b}', '\u{c5f}'), ('\u{c64}', '\u{c65}'),
+  ('\u{c70}', '\u{c77}'), ('\u{c84}', '\u{c84}'), ('\u{c8d}', '\u{c8d}'),
+  ('\u{c91}', '\u{c91}'), ('\u{ca9}', '\u{ca9}'), ('\u{cb4}', '\u{cb4}'),
+  ('\u{cba}', '\u{cbb}'), ('\u{cc5}', '\u{cc5}'), ('\u{cc9}', '\u{cc9}'),
+  ('\u{cce}', '\u{cd4}'), ('\u{cd7}', '\u{cdd}'), ('\u{cdf}', '\u{cdf}'),
+  ('\u{ce4}', '\u{ce5}'), ('\u{cf0}', '\u{cf0}'), ('\u{cf3}', '\u{cff}'),
+  ('\u{d04}', '\u{d04}'), ('\u{d0d}', '\u{d0d}'), ('\u{d11}', '\u{d11}'),
+  ('\u{d45}', '\u{d45}'), ('\u{d49}', '\u{d49}'), ('\u{d50}', '\u{d53}'),
+  ('\u{d64}', '\u{d65}'), ('\u{d80}', '\u{d81}'), ('\u{d84}', '\u{d84}'),
+  ('\u{d97}', '\u{d99}'), ('\u{db2}', '\u{db2}'), ('\u{dbc}', '\u{dbc}'),
+  ('\u{dbe}', '\u{dbf}'), ('\u{dc7}', '\u{dc9}'), ('\u{dcb}', '\u{dce}'),
+  ('\u{dd5}', '\u{dd5}'), ('\u{dd7}', '\u{dd7}'), ('\u{de0}', '\u{de5}'),
+  ('\u{df0}', '\u{df1}'), ('\u{df5}', '\u{e00}'), ('\u{e3b}', '\u{e3e}'),
+  ('\u{e5c}', '\u{e80}'), ('\u{e83}', '\u{e83}'), ('\u{e85}', '\u{e86}'),
+  ('\u{e89}', '\u{e89}'), ('\u{e8b}', '\u{e8c}'), ('\u{e8e}', '\u{e93}'),
+  ('\u{e98}', '\u{e98}'), ('\u{ea0}', '\u{ea0}'), ('\u{ea4}', '\u{ea4}'),
+  ('\u{ea6}', '\u{ea6}'), ('\u{ea8}', '\u{ea9}'), ('\u{eac}', '\u{eac}'),
+  ('\u{eba}', '\u{eba}'), ('\u{ebe}', '\u{ebf}'), ('\u{ec5}', '\u{ec5}'),
+  ('\u{ec7}', '\u{ec7}'), ('\u{ece}', '\u{ecf}'), ('\u{eda}', '\u{edb}'),
+  ('\u{ee0}', '\u{eff}'), ('\u{f48}', '\u{f48}'), ('\u{f6d}', '\u{f70}'),
+  ('\u{f98}', '\u{f98}'), ('\u{fbd}', '\u{fbd}'), ('\u{fcd}', '\u{fcd}'),
+  ('\u{fdb}', '\u{fff}'), ('\u{10c6}', '\u{10c6}'), ('\u{10c8}', '\u{10cc}'),
+  ('\u{10ce}', '\u{10cf}'), ('\u{1249}', '\u{1249}'),
+  ('\u{124e}', '\u{124f}'), ('\u{1257}', '\u{1257}'),
+  ('\u{1259}', '\u{1259}'), ('\u{125e}', '\u{125f}'),
+  ('\u{1289}', '\u{1289}'), ('\u{128e}', '\u{128f}'),
+  ('\u{12b1}', '\u{12b1}'), ('\u{12b6}', '\u{12b7}'),
+  ('\u{12bf}', '\u{12bf}'), ('\u{12c1}', '\u{12c1}'),
+  ('\u{12c6}', '\u{12c7}'), ('\u{12d7}', '\u{12d7}'),
+  ('\u{1311}', '\u{1311}'), ('\u{1316}', '\u{1317}'),
+  ('\u{135b}', '\u{135c}'), ('\u{137d}', '\u{137f}'),
+  ('\u{139a}', '\u{139f}'), ('\u{13f6}', '\u{13f7}'),
+  ('\u{13fe}', '\u{13ff}'), ('\u{169d}', '\u{169f}'),
+  ('\u{16f9}', '\u{16ff}'), ('\u{170d}', '\u{170d}'),
+  ('\u{1715}', '\u{171f}'), ('\u{1737}', '\u{173f}'),
+  ('\u{1754}', '\u{175f}'), ('\u{176d}', '\u{176d}'),
+  ('\u{1771}', '\u{1771}'), ('\u{1774}', '\u{177f}'),
+  ('\u{17de}', '\u{17df}'), ('\u{17ea}', '\u{17ef}'),
+  ('\u{17fa}', '\u{17ff}'), ('\u{180f}', '\u{180f}'),
+  ('\u{181a}', '\u{181f}'), ('\u{1878}', '\u{187f}'),
+  ('\u{18ab}', '\u{18af}'), ('\u{18f6}', '\u{18ff}'),
+  ('\u{191f}', '\u{191f}'), ('\u{192c}', '\u{192f}'),
+  ('\u{193c}', '\u{193f}'), ('\u{1941}', '\u{1943}'),
+  ('\u{196e}', '\u{196f}'), ('\u{1975}', '\u{197f}'),
+  ('\u{19ac}', '\u{19af}'), ('\u{19ca}', '\u{19cf}'),
+  ('\u{19db}', '\u{19dd}'), ('\u{1a1c}', '\u{1a1d}'),
+  ('\u{1a5f}', '\u{1a5f}'), ('\u{1a7d}', '\u{1a7e}'),
+  ('\u{1a8a}', '\u{1a8f}'), ('\u{1a9a}', '\u{1a9f}'),
+  ('\u{1aae}', '\u{1aaf}'), ('\u{1abf}', '\u{1aff}'),
+  ('\u{1b4c}', '\u{1b4f}'), ('\u{1b7d}', '\u{1b7f}'),
+  ('\u{1bf4}', '\u{1bfb}'), ('\u{1c38}', '\u{1c3a}'),
+  ('\u{1c4a}', '\u{1c4c}'), ('\u{1c89}', '\u{1cbf}'),
+  ('\u{1cc8}', '\u{1ccf}'), ('\u{1cfa}', '\u{1cff}'),
+  ('\u{1dfa}', '\u{1dfa}'), ('\u{1f16}', '\u{1f17}'),
+  ('\u{1f1e}', '\u{1f1f}'), ('\u{1f46}', '\u{1f47}'),
+  ('\u{1f4e}', '\u{1f4f}'), ('\u{1f58}', '\u{1f58}'),
+  ('\u{1f5a}', '\u{1f5a}'), ('\u{1f5c}', '\u{1f5c}'),
+  ('\u{1f5e}', '\u{1f5e}'), ('\u{1f7e}', '\u{1f7f}'),
+  ('\u{1fb5}', '\u{1fb5}'), ('\u{1fc5}', '\u{1fc5}'),
+  ('\u{1fd4}', '\u{1fd5}'), ('\u{1fdc}', '\u{1fdc}'),
+  ('\u{1ff0}', '\u{1ff1}'), ('\u{1ff5}', '\u{1ff5}'),
+  ('\u{1fff}', '\u{1fff}'), ('\u{2065}', '\u{2065}'),
+  ('\u{2072}', '\u{2073}'), ('\u{208f}', '\u{208f}'),
+  ('\u{209d}', '\u{209f}'), ('\u{20c0}', '\u{20cf}'),
+  ('\u{20f1}', '\u{20ff}'), ('\u{218c}', '\u{218f}'),
+  ('\u{2427}', '\u{243f}'), ('\u{244b}', '\u{245f}'),
+  ('\u{2b74}', '\u{2b75}'), ('\u{2b96}', '\u{2b97}'),
+  ('\u{2bba}', '\u{2bbc}'), ('\u{2bc9}', '\u{2bc9}'),
+  ('\u{2bd3}', '\u{2beb}'), ('\u{2bf0}', '\u{2bff}'),
+  ('\u{2c2f}', '\u{2c2f}'), ('\u{2c5f}', '\u{2c5f}'),
+  ('\u{2cf4}', '\u{2cf8}'), ('\u{2d26}', '\u{2d26}'),
+  ('\u{2d28}', '\u{2d2c}'), ('\u{2d2e}', '\u{2d2f}'),
+  ('\u{2d68}', '\u{2d6e}'), ('\u{2d71}', '\u{2d7e}'),
+  ('\u{2d97}', '\u{2d9f}'), ('\u{2da7}', '\u{2da7}'),
+  ('\u{2daf}', '\u{2daf}'), ('\u{2db7}', '\u{2db7}'),
+  ('\u{2dbf}', '\u{2dbf}'), ('\u{2dc7}', '\u{2dc7}'),
+  ('\u{2dcf}', '\u{2dcf}'), ('\u{2dd7}', '\u{2dd7}'),
+  ('\u{2ddf}', '\u{2ddf}'), ('\u{2e4a}', '\u{2e7f}'),
+  ('\u{2e9a}', '\u{2e9a}'), ('\u{2ef4}', '\u{2eff}'),
+  ('\u{2fd6}', '\u{2fef}'), ('\u{2ffc}', '\u{2fff}'),
+  ('\u{3040}', '\u{3040}'), ('\u{3097}', '\u{3098}'),
+  ('\u{3100}', '\u{3104}'), ('\u{312f}', '\u{3130}'),
+  ('\u{318f}', '\u{318f}'), ('\u{31bb}', '\u{31bf}'),
+  ('\u{31e4}', '\u{31ef}'), ('\u{321f}', '\u{321f}'),
+  ('\u{32ff}', '\u{32ff}'), ('\u{4db6}', '\u{4dbf}'),
+  ('\u{9feb}', '\u{9fff}'), ('\u{a48d}', '\u{a48f}'),
+  ('\u{a4c7}', '\u{a4cf}'), ('\u{a62c}', '\u{a63f}'),
+  ('\u{a6f8}', '\u{a6ff}'), ('\u{a7af}', '\u{a7af}'),
+  ('\u{a7b8}', '\u{a7f6}'), ('\u{a82c}', '\u{a82f}'),
+  ('\u{a83a}', '\u{a83f}'), ('\u{a878}', '\u{a87f}'),
+  ('\u{a8c6}', '\u{a8cd}'), ('\u{a8da}', '\u{a8df}'),
+  ('\u{a8fe}', '\u{a8ff}'), ('\u{a954}', '\u{a95e}'),
+  ('\u{a97d}', '\u{a97f}'), ('\u{a9ce}', '\u{a9ce}'),
+  ('\u{a9da}', '\u{a9dd}'), ('\u{a9ff}', '\u{a9ff}'),
+  ('\u{aa37}', '\u{aa3f}'), ('\u{aa4e}', '\u{aa4f}'),
+  ('\u{aa5a}', '\u{aa5b}'), ('\u{aac3}', '\u{aada}'),
+  ('\u{aaf7}', '\u{ab00}'), ('\u{ab07}', '\u{ab08}'),
+  ('\u{ab0f}', '\u{ab10}'), ('\u{ab17}', '\u{ab1f}'),
+  ('\u{ab27}', '\u{ab27}'), ('\u{ab2f}', '\u{ab2f}'),
+  ('\u{ab66}', '\u{ab6f}'), ('\u{abee}', '\u{abef}'),
+  ('\u{abfa}', '\u{abff}'), ('\u{d7a4}', '\u{d7af}'),
+  ('\u{d7c7}', '\u{d7ca}'), ('\u{d7fc}', '\u{d7ff}'),
+  ('\u{fa6e}', '\u{fa6f}'), ('\u{fada}', '\u{faff}'),
+  ('\u{fb07}', '\u{fb12}'), ('\u{fb18}', '\u{fb1c}'),
+  ('\u{fb37}', '\u{fb37}'), ('\u{fb3d}', '\u{fb3d}'),
+  ('\u{fb3f}', '\u{fb3f}'), ('\u{fb42}', '\u{fb42}'),
+  ('\u{fb45}', '\u{fb45}'), ('\u{fbc2}', '\u{fbd2}'),
+  ('\u{fd40}', '\u{fd4f}'), ('\u{fd90}', '\u{fd91}'),
+  ('\u{fdc8}', '\u{fdef}'), ('\u{fdfe}', '\u{fdff}'),
+  ('\u{fe1a}', '\u{fe1f}'), ('\u{fe53}', '\u{fe53}'),
+  ('\u{fe67}', '\u{fe67}'), ('\u{fe6c}', '\u{fe6f}'),
+  ('\u{fe75}', '\u{fe75}'), ('\u{fefd}', '\u{fefe}'),
+  ('\u{ff00}', '\u{ff00}'), ('\u{ffbf}', '\u{ffc1}'),
+  ('\u{ffc8}', '\u{ffc9}'), ('\u{ffd0}', '\u{ffd1}'),
+  ('\u{ffd8}', '\u{ffd9}'), ('\u{ffdd}', '\u{ffdf}'),
+  ('\u{ffe7}', '\u{ffe7}'), ('\u{ffef}', '\u{fff8}'),
+  ('\u{fffe}', '\u{ffff}'), ('\u{1000c}', '\u{1000c}'),
+  ('\u{10027}', '\u{10027}'), ('\u{1003b}', '\u{1003b}'),
+  ('\u{1003e}', '\u{1003e}'), ('\u{1004e}', '\u{1004f}'),
+  ('\u{1005e}', '\u{1007f}'), ('\u{100fb}', '\u{100ff}'),
+  ('\u{10103}', '\u{10106}'), ('\u{10134}', '\u{10136}'),
+  ('\u{1018f}', '\u{1018f}'), ('\u{1019c}', '\u{1019f}'),
+  ('\u{101a1}', '\u{101cf}'), ('\u{101fe}', '\u{1027f}'),
+  ('\u{1029d}', '\u{1029f}'), ('\u{102d1}', '\u{102df}'),
+  ('\u{102fc}', '\u{102ff}'), ('\u{10324}', '\u{1032c}'),
+  ('\u{1034b}', '\u{1034f}'), ('\u{1037b}', '\u{1037f}'),
+  ('\u{1039e}', '\u{1039e}'), ('\u{103c4}', '\u{103c7}'),
+  ('\u{103d6}', '\u{103ff}'), ('\u{1049e}', '\u{1049f}'),
+  ('\u{104aa}', '\u{104af}'), ('\u{104d4}', '\u{104d7}'),
+  ('\u{104fc}', '\u{104ff}'), ('\u{10528}', '\u{1052f}'),
+  ('\u{10564}', '\u{1056e}'), ('\u{10570}', '\u{105ff}'),
+  ('\u{10737}', '\u{1073f}'), ('\u{10756}', '\u{1075f}'),
+  ('\u{10768}', '\u{107ff}'), ('\u{10806}', '\u{10807}'),
+  ('\u{10809}', '\u{10809}'), ('\u{10836}', '\u{10836}'),
+  ('\u{10839}', '\u{1083b}'), ('\u{1083d}', '\u{1083e}'),
+  ('\u{10856}', '\u{10856}'), ('\u{1089f}', '\u{108a6}'),
+  ('\u{108b0}', '\u{108df}'), ('\u{108f3}', '\u{108f3}'),
+  ('\u{108f6}', '\u{108fa}'), ('\u{1091c}', '\u{1091e}'),
+  ('\u{1093a}', '\u{1093e}'), ('\u{10940}', '\u{1097f}'),
+  ('\u{109b8}', '\u{109bb}'), ('\u{109d0}', '\u{109d1}'),
+  ('\u{10a04}', '\u{10a04}'), ('\u{10a07}', '\u{10a0b}'),
+  ('\u{10a14}', '\u{10a14}'), ('\u{10a18}', '\u{10a18}'),
+  ('\u{10a34}', '\u{10a37}'), ('\u{10a3b}', '\u{10a3e}'),
+  ('\u{10a48}', '\u{10a4f}'), ('\u{10a59}', '\u{10a5f}'),
+  ('\u{10aa0}', '\u{10abf}'), ('\u{10ae7}', '\u{10aea}'),
+  ('\u{10af7}', '\u{10aff}'), ('\u{10b36}', '\u{10b38}'),
+  ('\u{10b56}', '\u{10b57}'), ('\u{10b73}', '\u{10b77}'),
+  ('\u{10b92}', '\u{10b98}'), ('\u{10b9d}', '\u{10ba8}'),
+  ('\u{10bb0}', '\u{10bff}'), ('\u{10c49}', '\u{10c7f}'),
+  ('\u{10cb3}', '\u{10cbf}'), ('\u{10cf3}', '\u{10cf9}'),
+  ('\u{10d00}', '\u{10e5f}'), ('\u{10e7f}', '\u{10fff}'),
+  ('\u{1104e}', '\u{11051}'), ('\u{11070}', '\u{1107e}'),
+  ('\u{110c2}', '\u{110cf}'), ('\u{110e9}', '\u{110ef}'),
+  ('\u{110fa}', '\u{110ff}'), ('\u{11135}', '\u{11135}'),
+  ('\u{11144}', '\u{1114f}'), ('\u{11177}', '\u{1117f}'),
+  ('\u{111ce}', '\u{111cf}'), ('\u{111e0}', '\u{111e0}'),
+  ('\u{111f5}', '\u{111ff}'), ('\u{11212}', '\u{11212}'),
+  ('\u{1123f}', '\u{1127f}'), ('\u{11287}', '\u{11287}'),
+  ('\u{11289}', '\u{11289}'), ('\u{1128e}', '\u{1128e}'),
+  ('\u{1129e}', '\u{1129e}'), ('\u{112aa}', '\u{112af}'),
+  ('\u{112eb}', '\u{112ef}'), ('\u{112fa}', '\u{112ff}'),
+  ('\u{11304}', '\u{11304}'), ('\u{1130d}', '\u{1130e}'),
+  ('\u{11311}', '\u{11312}'), ('\u{11329}', '\u{11329}'),
+  ('\u{11331}', '\u{11331}'), ('\u{11334}', '\u{11334}'),
+  ('\u{1133a}', '\u{1133b}'), ('\u{11345}', '\u{11346}'),
+  ('\u{11349}', '\u{1134a}'), ('\u{1134e}', '\u{1134f}'),
+  ('\u{11351}', '\u{11356}'), ('\u{11358}', '\u{1135c}'),
+  ('\u{11364}', '\u{11365}'), ('\u{1136d}', '\u{1136f}'),
+  ('\u{11375}', '\u{113ff}'), ('\u{1145a}', '\u{1145a}'),
+  ('\u{1145c}', '\u{1145c}'), ('\u{1145e}', '\u{1147f}'),
+  ('\u{114c8}', '\u{114cf}'), ('\u{114da}', '\u{1157f}'),
+  ('\u{115b6}', '\u{115b7}'), ('\u{115de}', '\u{115ff}'),
+  ('\u{11645}', '\u{1164f}'), ('\u{1165a}', '\u{1165f}'),
+  ('\u{1166d}', '\u{1167f}'), ('\u{116b8}', '\u{116bf}'),
+  ('\u{116ca}', '\u{116ff}'), ('\u{1171a}', '\u{1171c}'),
+  ('\u{1172c}', '\u{1172f}'), ('\u{11740}', '\u{1189f}'),
+  ('\u{118f3}', '\u{118fe}'), ('\u{11900}', '\u{119ff}'),
+  ('\u{11a48}', '\u{11a4f}'), ('\u{11a84}', '\u{11a85}'),
+  ('\u{11a9d}', '\u{11a9d}'), ('\u{11aa3}', '\u{11abf}'),
+  ('\u{11af9}', '\u{11bff}'), ('\u{11c09}', '\u{11c09}'),
+  ('\u{11c37}', '\u{11c37}'), ('\u{11c46}', '\u{11c4f}'),
+  ('\u{11c6d}', '\u{11c6f}'), ('\u{11c90}', '\u{11c91}'),
+  ('\u{11ca8}', '\u{11ca8}'), ('\u{11cb7}', '\u{11cff}'),
+  ('\u{11d07}', '\u{11d07}'), ('\u{11d0a}', '\u{11d0a}'),
+  ('\u{11d37}', '\u{11d39}'), ('\u{11d3b}', '\u{11d3b}'),
+  ('\u{11d3e}', '\u{11d3e}'), ('\u{11d48}', '\u{11d4f}'),
+  ('\u{11d5a}', '\u{11fff}'), ('\u{1239a}', '\u{123ff}'),
+  ('\u{1246f}', '\u{1246f}'), ('\u{12475}', '\u{1247f}'),
+  ('\u{12544}', '\u{12fff}'), ('\u{1342f}', '\u{143ff}'),
+  ('\u{14647}', '\u{167ff}'), ('\u{16a39}', '\u{16a3f}'),
+  ('\u{16a5f}', '\u{16a5f}'), ('\u{16a6a}', '\u{16a6d}'),
+  ('\u{16a70}', '\u{16acf}'), ('\u{16aee}', '\u{16aef}'),
+  ('\u{16af6}', '\u{16aff}'), ('\u{16b46}', '\u{16b4f}'),
+  ('\u{16b5a}', '\u{16b5a}'), ('\u{16b62}', '\u{16b62}'),
+  ('\u{16b78}', '\u{16b7c}'), ('\u{16b90}', '\u{16eff}'),
+  ('\u{16f45}', '\u{16f4f}'), ('\u{16f7f}', '\u{16f8e}'),
+  ('\u{16fa0}', '\u{16fdf}'), ('\u{16fe2}', '\u{16fff}'),
+  ('\u{187ed}', '\u{187ff}'), ('\u{18af3}', '\u{1afff}'),
+  ('\u{1b11f}', '\u{1b16f}'), ('\u{1b2fc}', '\u{1bbff}'),
+  ('\u{1bc6b}', '\u{1bc6f}'), ('\u{1bc7d}', '\u{1bc7f}'),
+  ('\u{1bc89}', '\u{1bc8f}'), ('\u{1bc9a}', '\u{1bc9b}'),
+  ('\u{1bca4}', '\u{1cfff}'), ('\u{1d0f6}', '\u{1d0ff}'),
+  ('\u{1d127}', '\u{1d128}'), ('\u{1d1e9}', '\u{1d1ff}'),
+  ('\u{1d246}', '\u{1d2ff}'), ('\u{1d357}', '\u{1d35f}'),
+  ('\u{1d372}', '\u{1d3ff}'), ('\u{1d455}', '\u{1d455}'),
+  ('\u{1d49d}', '\u{1d49d}'), ('\u{1d4a0}', '\u{1d4a1}'),
+  ('\u{1d4a3}', '\u{1d4a4}'), ('\u{1d4a7}', '\u{1d4a8}'),
+  ('\u{1d4ad}', '\u{1d4ad}'), ('\u{1d4ba}', '\u{1d4ba}'),
+  ('\u{1d4bc}', '\u{1d4bc}'), ('\u{1d4c4}', '\u{1d4c4}'),
+  ('\u{1d506}', '\u{1d506}'), ('\u{1d50b}', '\u{1d50c}'),
+  ('\u{1d515}', '\u{1d515}'), ('\u{1d51d}', '\u{1d51d}'),
+  ('\u{1d53a}', '\u{1d53a}'), ('\u{1d53f}', '\u{1d53f}'),
+  ('\u{1d545}', '\u{1d545}'), ('\u{1d547}', '\u{1d549}'),
+  ('\u{1d551}', '\u{1d551}'), ('\u{1d6a6}', '\u{1d6a7}'),
+  ('\u{1d7cc}', '\u{1d7cd}'), ('\u{1da8c}', '\u{1da9a}'),
+  ('\u{1daa0}', '\u{1daa0}'), ('\u{1dab0}', '\u{1dfff}'),
+  ('\u{1e007}', '\u{1e007}'), ('\u{1e019}', '\u{1e01a}'),
+  ('\u{1e022}', '\u{1e022}'), ('\u{1e025}', '\u{1e025}'),
+  ('\u{1e02b}', '\u{1e7ff}'), ('\u{1e8c5}', '\u{1e8c6}'),
+  ('\u{1e8d7}', '\u{1e8ff}'), ('\u{1e94b}', '\u{1e94f}'),
+  ('\u{1e95a}', '\u{1e95d}'), ('\u{1e960}', '\u{1edff}'),
+  ('\u{1ee04}', '\u{1ee04}'), ('\u{1ee20}', '\u{1ee20}'),
+  ('\u{1ee23}', '\u{1ee23}'), ('\u{1ee25}', '\u{1ee26}'),
+  ('\u{1ee28}', '\u{1ee28}'), ('\u{1ee33}', '\u{1ee33}'),
+  ('\u{1ee38}', '\u{1ee38}'), ('\u{1ee3a}', '\u{1ee3a}'),
+  ('\u{1ee3c}', '\u{1ee41}'), ('\u{1ee43}', '\u{1ee46}'),
+  ('\u{1ee48}', '\u{1ee48}'), ('\u{1ee4a}', '\u{1ee4a}'),
+  ('\u{1ee4c}', '\u{1ee4c}'), ('\u{1ee50}', '\u{1ee50}'),
+  ('\u{1ee53}', '\u{1ee53}'), ('\u{1ee55}', '\u{1ee56}'),
+  ('\u{1ee58}', '\u{1ee58}'), ('\u{1ee5a}', '\u{1ee5a}'),
+  ('\u{1ee5c}', '\u{1ee5c}'), ('\u{1ee5e}', '\u{1ee5e}'),
+  ('\u{1ee60}', '\u{1ee60}'), ('\u{1ee63}', '\u{1ee63}'),
+  ('\u{1ee65}', '\u{1ee66}'), ('\u{1ee6b}', '\u{1ee6b}'),
+  ('\u{1ee73}', '\u{1ee73}'), ('\u{1ee78}', '\u{1ee78}'),
+  ('\u{1ee7d}', '\u{1ee7d}'), ('\u{1ee7f}', '\u{1ee7f}'),
+  ('\u{1ee8a}', '\u{1ee8a}'), ('\u{1ee9c}', '\u{1eea0}'),
+  ('\u{1eea4}', '\u{1eea4}'), ('\u{1eeaa}', '\u{1eeaa}'),
+  ('\u{1eebc}', '\u{1eeef}'), ('\u{1eef2}', '\u{1efff}'),
+  ('\u{1f02c}', '\u{1f02f}'), ('\u{1f094}', '\u{1f09f}'),
+  ('\u{1f0af}', '\u{1f0b0}'), ('\u{1f0c0}', '\u{1f0c0}'),
+  ('\u{1f0d0}', '\u{1f0d0}'), ('\u{1f0f6}', '\u{1f0ff}'),
+  ('\u{1f10d}', '\u{1f10f}'), ('\u{1f12f}', '\u{1f12f}'),
+  ('\u{1f16c}', '\u{1f16f}'), ('\u{1f1ad}', '\u{1f1e5}'),
+  ('\u{1f203}', '\u{1f20f}'), ('\u{1f23c}', '\u{1f23f}'),
+  ('\u{1f249}', '\u{1f24f}'), ('\u{1f252}', '\u{1f25f}'),
+  ('\u{1f266}', '\u{1f2ff}'), ('\u{1f6d5}', '\u{1f6df}'),
+  ('\u{1f6ed}', '\u{1f6ef}'), ('\u{1f6f9}', '\u{1f6ff}'),
+  ('\u{1f774}', '\u{1f77f}'), ('\u{1f7d5}', '\u{1f7ff}'),
+  ('\u{1f80c}', '\u{1f80f}'), ('\u{1f848}', '\u{1f84f}'),
+  ('\u{1f85a}', '\u{1f85f}'), ('\u{1f888}', '\u{1f88f}'),
+  ('\u{1f8ae}', '\u{1f8ff}'), ('\u{1f90c}', '\u{1f90f}'),
+  ('\u{1f93f}', '\u{1f93f}'), ('\u{1f94d}', '\u{1f94f}'),
+  ('\u{1f96c}', '\u{1f97f}'), ('\u{1f998}', '\u{1f9bf}'),
+  ('\u{1f9c1}', '\u{1f9cf}'), ('\u{1f9e7}', '\u{1ffff}'),
+  ('\u{2a6d7}', '\u{2a6ff}'), ('\u{2b735}', '\u{2b73f}'),
+  ('\u{2b81e}', '\u{2b81f}'), ('\u{2cea2}', '\u{2ceaf}'),
+  ('\u{2ebe1}', '\u{2f7ff}'), ('\u{2fa1e}', '\u{e0000}'),
+  ('\u{e0002}', '\u{e001f}'), ('\u{e0080}', '\u{e00ff}'),
+  ('\u{e01f0}', '\u{effff}'), ('\u{ffffe}', '\u{fffff}'),
+  ('\u{10fffe}', '\u{10ffff}'),
+];
+
+pub const UPPERCASE_LETTER: &'static [(char, char)] = &[
+  ('A', 'Z'), ('À', 'Ö'), ('Ø', 'Þ'), ('Ā', 'Ā'), ('Ă', 'Ă'),
+  ('Ą', 'Ą'), ('Ć', 'Ć'), ('Ĉ', 'Ĉ'), ('Ċ', 'Ċ'), ('Č', 'Č'),
+  ('Ď', 'Ď'), ('Đ', 'Đ'), ('Ē', 'Ē'), ('Ĕ', 'Ĕ'), ('Ė', 'Ė'),
+  ('Ę', 'Ę'), ('Ě', 'Ě'), ('Ĝ', 'Ĝ'), ('Ğ', 'Ğ'), ('Ġ', 'Ġ'),
+  ('Ģ', 'Ģ'), ('Ĥ', 'Ĥ'), ('Ħ', 'Ħ'), ('Ĩ', 'Ĩ'), ('Ī', 'Ī'),
+  ('Ĭ', 'Ĭ'), ('Į', 'Į'), ('İ', 'İ'), ('Ĳ', 'Ĳ'), ('Ĵ', 'Ĵ'),
+  ('Ķ', 'Ķ'), ('Ĺ', 'Ĺ'), ('Ļ', 'Ļ'), ('Ľ', 'Ľ'), ('Ŀ', 'Ŀ'),
+  ('Ł', 'Ł'), ('Ń', 'Ń'), ('Ņ', 'Ņ'), ('Ň', 'Ň'), ('Ŋ', 'Ŋ'),
+  ('Ō', 'Ō'), ('Ŏ', 'Ŏ'), ('Ő', 'Ő'), ('Œ', 'Œ'), ('Ŕ', 'Ŕ'),
+  ('Ŗ', 'Ŗ'), ('Ř', 'Ř'), ('Ś', 'Ś'), ('Ŝ', 'Ŝ'), ('Ş', 'Ş'),
+  ('Š', 'Š'), ('Ţ', 'Ţ'), ('Ť', 'Ť'), ('Ŧ', 'Ŧ'), ('Ũ', 'Ũ'),
+  ('Ū', 'Ū'), ('Ŭ', 'Ŭ'), ('Ů', 'Ů'), ('Ű', 'Ű'), ('Ų', 'Ų'),
+  ('Ŵ', 'Ŵ'), ('Ŷ', 'Ŷ'), ('Ÿ', 'Ź'), ('Ż', 'Ż'), ('Ž', 'Ž'),
+  ('Ɓ', 'Ƃ'), ('Ƅ', 'Ƅ'), ('Ɔ', 'Ƈ'), ('Ɖ', 'Ƌ'), ('Ǝ', 'Ƒ'),
+  ('Ɠ', 'Ɣ'), ('Ɩ', 'Ƙ'), ('Ɯ', 'Ɲ'), ('Ɵ', 'Ơ'), ('Ƣ', 'Ƣ'),
+  ('Ƥ', 'Ƥ'), ('Ʀ', 'Ƨ'), ('Ʃ', 'Ʃ'), ('Ƭ', 'Ƭ'), ('Ʈ', 'Ư'),
+  ('Ʊ', 'Ƴ'), ('Ƶ', 'Ƶ'), ('Ʒ', 'Ƹ'), ('Ƽ', 'Ƽ'), ('Ǆ', 'Ǆ'),
+  ('Ǉ', 'Ǉ'), ('Ǌ', 'Ǌ'), ('Ǎ', 'Ǎ'), ('Ǐ', 'Ǐ'), ('Ǒ', 'Ǒ'),
+  ('Ǔ', 'Ǔ'), ('Ǖ', 'Ǖ'), ('Ǘ', 'Ǘ'), ('Ǚ', 'Ǚ'), ('Ǜ', 'Ǜ'),
+  ('Ǟ', 'Ǟ'), ('Ǡ', 'Ǡ'), ('Ǣ', 'Ǣ'), ('Ǥ', 'Ǥ'), ('Ǧ', 'Ǧ'),
+  ('Ǩ', 'Ǩ'), ('Ǫ', 'Ǫ'), ('Ǭ', 'Ǭ'), ('Ǯ', 'Ǯ'), ('Ǳ', 'Ǳ'),
+  ('Ǵ', 'Ǵ'), ('Ƕ', 'Ǹ'), ('Ǻ', 'Ǻ'), ('Ǽ', 'Ǽ'), ('Ǿ', 'Ǿ'),
+  ('Ȁ', 'Ȁ'), ('Ȃ', 'Ȃ'), ('Ȅ', 'Ȅ'), ('Ȇ', 'Ȇ'), ('Ȉ', 'Ȉ'),
+  ('Ȋ', 'Ȋ'), ('Ȍ', 'Ȍ'), ('Ȏ', 'Ȏ'), ('Ȑ', 'Ȑ'), ('Ȓ', 'Ȓ'),
+  ('Ȕ', 'Ȕ'), ('Ȗ', 'Ȗ'), ('Ș', 'Ș'), ('Ț', 'Ț'), ('Ȝ', 'Ȝ'),
+  ('Ȟ', 'Ȟ'), ('Ƞ', 'Ƞ'), ('Ȣ', 'Ȣ'), ('Ȥ', 'Ȥ'), ('Ȧ', 'Ȧ'),
+  ('Ȩ', 'Ȩ'), ('Ȫ', 'Ȫ'), ('Ȭ', 'Ȭ'), ('Ȯ', 'Ȯ'), ('Ȱ', 'Ȱ'),
+  ('Ȳ', 'Ȳ'), ('Ⱥ', 'Ȼ'), ('Ƚ', 'Ⱦ'), ('Ɂ', 'Ɂ'), ('Ƀ', 'Ɇ'),
+  ('Ɉ', 'Ɉ'), ('Ɋ', 'Ɋ'), ('Ɍ', 'Ɍ'), ('Ɏ', 'Ɏ'), ('Ͱ', 'Ͱ'),
+  ('Ͳ', 'Ͳ'), ('Ͷ', 'Ͷ'), ('Ϳ', 'Ϳ'), ('Ά', 'Ά'), ('Έ', 'Ί'),
+  ('Ό', 'Ό'), ('Ύ', 'Ώ'), ('Α', 'Ρ'), ('Σ', 'Ϋ'), ('Ϗ', 'Ϗ'),
+  ('ϒ', 'ϔ'), ('Ϙ', 'Ϙ'), ('Ϛ', 'Ϛ'), ('Ϝ', 'Ϝ'), ('Ϟ', 'Ϟ'),
+  ('Ϡ', 'Ϡ'), ('Ϣ', 'Ϣ'), ('Ϥ', 'Ϥ'), ('Ϧ', 'Ϧ'), ('Ϩ', 'Ϩ'),
+  ('Ϫ', 'Ϫ'), ('Ϭ', 'Ϭ'), ('Ϯ', 'Ϯ'), ('ϴ', 'ϴ'), ('Ϸ', 'Ϸ'),
+  ('Ϲ', 'Ϻ'), ('Ͻ', 'Я'), ('Ѡ', 'Ѡ'), ('Ѣ', 'Ѣ'), ('Ѥ', 'Ѥ'),
+  ('Ѧ', 'Ѧ'), ('Ѩ', 'Ѩ'), ('Ѫ', 'Ѫ'), ('Ѭ', 'Ѭ'), ('Ѯ', 'Ѯ'),
+  ('Ѱ', 'Ѱ'), ('Ѳ', 'Ѳ'), ('Ѵ', 'Ѵ'), ('Ѷ', 'Ѷ'), ('Ѹ', 'Ѹ'),
+  ('Ѻ', 'Ѻ'), ('Ѽ', 'Ѽ'), ('Ѿ', 'Ѿ'), ('Ҁ', 'Ҁ'), ('Ҋ', 'Ҋ'),
+  ('Ҍ', 'Ҍ'), ('Ҏ', 'Ҏ'), ('Ґ', 'Ґ'), ('Ғ', 'Ғ'), ('Ҕ', 'Ҕ'),
+  ('Җ', 'Җ'), ('Ҙ', 'Ҙ'), ('Қ', 'Қ'), ('Ҝ', 'Ҝ'), ('Ҟ', 'Ҟ'),
+  ('Ҡ', 'Ҡ'), ('Ң', 'Ң'), ('Ҥ', 'Ҥ'), ('Ҧ', 'Ҧ'), ('Ҩ', 'Ҩ'),
+  ('Ҫ', 'Ҫ'), ('Ҭ', 'Ҭ'), ('Ү', 'Ү'), ('Ұ', 'Ұ'), ('Ҳ', 'Ҳ'),
+  ('Ҵ', 'Ҵ'), ('Ҷ', 'Ҷ'), ('Ҹ', 'Ҹ'), ('Һ', 'Һ'), ('Ҽ', 'Ҽ'),
+  ('Ҿ', 'Ҿ'), ('Ӏ', 'Ӂ'), ('Ӄ', 'Ӄ'), ('Ӆ', 'Ӆ'), ('Ӈ', 'Ӈ'),
+  ('Ӊ', 'Ӊ'), ('Ӌ', 'Ӌ'), ('Ӎ', 'Ӎ'), ('Ӑ', 'Ӑ'), ('Ӓ', 'Ӓ'),
+  ('Ӕ', 'Ӕ'), ('Ӗ', 'Ӗ'), ('Ә', 'Ә'), ('Ӛ', 'Ӛ'), ('Ӝ', 'Ӝ'),
+  ('Ӟ', 'Ӟ'), ('Ӡ', 'Ӡ'), ('Ӣ', 'Ӣ'), ('Ӥ', 'Ӥ'), ('Ӧ', 'Ӧ'),
+  ('Ө', 'Ө'), ('Ӫ', 'Ӫ'), ('Ӭ', 'Ӭ'), ('Ӯ', 'Ӯ'), ('Ӱ', 'Ӱ'),
+  ('Ӳ', 'Ӳ'), ('Ӵ', 'Ӵ'), ('Ӷ', 'Ӷ'), ('Ӹ', 'Ӹ'), ('Ӻ', 'Ӻ'),
+  ('Ӽ', 'Ӽ'), ('Ӿ', 'Ӿ'), ('Ԁ', 'Ԁ'), ('Ԃ', 'Ԃ'), ('Ԅ', 'Ԅ'),
+  ('Ԇ', 'Ԇ'), ('Ԉ', 'Ԉ'), ('Ԋ', 'Ԋ'), ('Ԍ', 'Ԍ'), ('Ԏ', 'Ԏ'),
+  ('Ԑ', 'Ԑ'), ('Ԓ', 'Ԓ'), ('Ԕ', 'Ԕ'), ('Ԗ', 'Ԗ'), ('Ԙ', 'Ԙ'),
+  ('Ԛ', 'Ԛ'), ('Ԝ', 'Ԝ'), ('Ԟ', 'Ԟ'), ('Ԡ', 'Ԡ'), ('Ԣ', 'Ԣ'),
+  ('Ԥ', 'Ԥ'), ('Ԧ', 'Ԧ'), ('Ԩ', 'Ԩ'), ('Ԫ', 'Ԫ'), ('Ԭ', 'Ԭ'),
+  ('Ԯ', 'Ԯ'), ('Ա', 'Ֆ'), ('Ⴀ', 'Ⴥ'), ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'),
+  ('Ꭰ', 'Ᏽ'), ('Ḁ', 'Ḁ'), ('Ḃ', 'Ḃ'), ('Ḅ', 'Ḅ'),
+  ('Ḇ', 'Ḇ'), ('Ḉ', 'Ḉ'), ('Ḋ', 'Ḋ'), ('Ḍ', 'Ḍ'),
+  ('Ḏ', 'Ḏ'), ('Ḑ', 'Ḑ'), ('Ḓ', 'Ḓ'), ('Ḕ', 'Ḕ'),
+  ('Ḗ', 'Ḗ'), ('Ḙ', 'Ḙ'), ('Ḛ', 'Ḛ'), ('Ḝ', 'Ḝ'),
+  ('Ḟ', 'Ḟ'), ('Ḡ', 'Ḡ'), ('Ḣ', 'Ḣ'), ('Ḥ', 'Ḥ'),
+  ('Ḧ', 'Ḧ'), ('Ḩ', 'Ḩ'), ('Ḫ', 'Ḫ'), ('Ḭ', 'Ḭ'),
+  ('Ḯ', 'Ḯ'), ('Ḱ', 'Ḱ'), ('Ḳ', 'Ḳ'), ('Ḵ', 'Ḵ'),
+  ('Ḷ', 'Ḷ'), ('Ḹ', 'Ḹ'), ('Ḻ', 'Ḻ'), ('Ḽ', 'Ḽ'),
+  ('Ḿ', 'Ḿ'), ('Ṁ', 'Ṁ'), ('Ṃ', 'Ṃ'), ('Ṅ', 'Ṅ'),
+  ('Ṇ', 'Ṇ'), ('Ṉ', 'Ṉ'), ('Ṋ', 'Ṋ'), ('Ṍ', 'Ṍ'),
+  ('Ṏ', 'Ṏ'), ('Ṑ', 'Ṑ'), ('Ṓ', 'Ṓ'), ('Ṕ', 'Ṕ'),
+  ('Ṗ', 'Ṗ'), ('Ṙ', 'Ṙ'), ('Ṛ', 'Ṛ'), ('Ṝ', 'Ṝ'),
+  ('Ṟ', 'Ṟ'), ('Ṡ', 'Ṡ'), ('Ṣ', 'Ṣ'), ('Ṥ', 'Ṥ'),
+  ('Ṧ', 'Ṧ'), ('Ṩ', 'Ṩ'), ('Ṫ', 'Ṫ'), ('Ṭ', 'Ṭ'),
+  ('Ṯ', 'Ṯ'), ('Ṱ', 'Ṱ'), ('Ṳ', 'Ṳ'), ('Ṵ', 'Ṵ'),
+  ('Ṷ', 'Ṷ'), ('Ṹ', 'Ṹ'), ('Ṻ', 'Ṻ'), ('Ṽ', 'Ṽ'),
+  ('Ṿ', 'Ṿ'), ('Ẁ', 'Ẁ'), ('Ẃ', 'Ẃ'), ('Ẅ', 'Ẅ'),
+  ('Ẇ', 'Ẇ'), ('Ẉ', 'Ẉ'), ('Ẋ', 'Ẋ'), ('Ẍ', 'Ẍ'),
+  ('Ẏ', 'Ẏ'), ('Ẑ', 'Ẑ'), ('Ẓ', 'Ẓ'), ('Ẕ', 'Ẕ'),
+  ('ẞ', 'ẞ'), ('Ạ', 'Ạ'), ('Ả', 'Ả'), ('Ấ', 'Ấ'),
+  ('Ầ', 'Ầ'), ('Ẩ', 'Ẩ'), ('Ẫ', 'Ẫ'), ('Ậ', 'Ậ'),
+  ('Ắ', 'Ắ'), ('Ằ', 'Ằ'), ('Ẳ', 'Ẳ'), ('Ẵ', 'Ẵ'),
+  ('Ặ', 'Ặ'), ('Ẹ', 'Ẹ'), ('Ẻ', 'Ẻ'), ('Ẽ', 'Ẽ'),
+  ('Ế', 'Ế'), ('Ề', 'Ề'), ('Ể', 'Ể'), ('Ễ', 'Ễ'),
+  ('Ệ', 'Ệ'), ('Ỉ', 'Ỉ'), ('Ị', 'Ị'), ('Ọ', 'Ọ'),
+  ('Ỏ', 'Ỏ'), ('Ố', 'Ố'), ('Ồ', 'Ồ'), ('Ổ', 'Ổ'),
+  ('Ỗ', 'Ỗ'), ('Ộ', 'Ộ'), ('Ớ', 'Ớ'), ('Ờ', 'Ờ'),
+  ('Ở', 'Ở'), ('Ỡ', 'Ỡ'), ('Ợ', 'Ợ'), ('Ụ', 'Ụ'),
+  ('Ủ', 'Ủ'), ('Ứ', 'Ứ'), ('Ừ', 'Ừ'), ('Ử', 'Ử'),
+  ('Ữ', 'Ữ'), ('Ự', 'Ự'), ('Ỳ', 'Ỳ'), ('Ỵ', 'Ỵ'),
+  ('Ỷ', 'Ỷ'), ('Ỹ', 'Ỹ'), ('Ỻ', 'Ỻ'), ('Ỽ', 'Ỽ'),
+  ('Ỿ', 'Ỿ'), ('Ἀ', 'Ἇ'), ('Ἐ', 'Ἕ'), ('Ἠ', 'Ἧ'),
+  ('Ἰ', 'Ἷ'), ('Ὀ', 'Ὅ'), ('Ὑ', 'Ὑ'), ('Ὓ', 'Ὓ'),
+  ('Ὕ', 'Ὕ'), ('Ὗ', 'Ὗ'), ('Ὠ', 'Ὧ'), ('Ᾰ', 'Ά'),
+  ('Ὲ', 'Ή'), ('Ῐ', 'Ί'), ('Ῠ', 'Ῥ'), ('Ὸ', 'Ώ'),
+  ('ℂ', 'ℂ'), ('ℇ', 'ℇ'), ('ℋ', 'ℍ'), ('ℐ', 'ℒ'),
+  ('ℕ', 'ℕ'), ('ℙ', 'ℝ'), ('ℤ', 'ℤ'), ('Ω', 'Ω'),
+  ('ℨ', 'ℨ'), ('K', 'ℭ'), ('ℰ', 'ℳ'), ('ℾ', 'ℿ'),
+  ('ⅅ', 'ⅅ'), ('Ↄ', 'Ↄ'), ('Ⰰ', 'Ⱞ'), ('Ⱡ', 'Ⱡ'),
+  ('Ɫ', 'Ɽ'), ('Ⱨ', 'Ⱨ'), ('Ⱪ', 'Ⱪ'), ('Ⱬ', 'Ⱬ'),
+  ('Ɑ', 'Ɒ'), ('Ⱳ', 'Ⱳ'), ('Ⱶ', 'Ⱶ'), ('Ȿ', 'Ⲁ'),
+  ('Ⲃ', 'Ⲃ'), ('Ⲅ', 'Ⲅ'), ('Ⲇ', 'Ⲇ'), ('Ⲉ', 'Ⲉ'),
+  ('Ⲋ', 'Ⲋ'), ('Ⲍ', 'Ⲍ'), ('Ⲏ', 'Ⲏ'), ('Ⲑ', 'Ⲑ'),
+  ('Ⲓ', 'Ⲓ'), ('Ⲕ', 'Ⲕ'), ('Ⲗ', 'Ⲗ'), ('Ⲙ', 'Ⲙ'),
+  ('Ⲛ', 'Ⲛ'), ('Ⲝ', 'Ⲝ'), ('Ⲟ', 'Ⲟ'), ('Ⲡ', 'Ⲡ'),
+  ('Ⲣ', 'Ⲣ'), ('Ⲥ', 'Ⲥ'), ('Ⲧ', 'Ⲧ'), ('Ⲩ', 'Ⲩ'),
+  ('Ⲫ', 'Ⲫ'), ('Ⲭ', 'Ⲭ'), ('Ⲯ', 'Ⲯ'), ('Ⲱ', 'Ⲱ'),
+  ('Ⲳ', 'Ⲳ'), ('Ⲵ', 'Ⲵ'), ('Ⲷ', 'Ⲷ'), ('Ⲹ', 'Ⲹ'),
+  ('Ⲻ', 'Ⲻ'), ('Ⲽ', 'Ⲽ'), ('Ⲿ', 'Ⲿ'), ('Ⳁ', 'Ⳁ'),
+  ('Ⳃ', 'Ⳃ'), ('Ⳅ', 'Ⳅ'), ('Ⳇ', 'Ⳇ'), ('Ⳉ', 'Ⳉ'),
+  ('Ⳋ', 'Ⳋ'), ('Ⳍ', 'Ⳍ'), ('Ⳏ', 'Ⳏ'), ('Ⳑ', 'Ⳑ'),
+  ('Ⳓ', 'Ⳓ'), ('Ⳕ', 'Ⳕ'), ('Ⳗ', 'Ⳗ'), ('Ⳙ', 'Ⳙ'),
+  ('Ⳛ', 'Ⳛ'), ('Ⳝ', 'Ⳝ'), ('Ⳟ', 'Ⳟ'), ('Ⳡ', 'Ⳡ'),
+  ('Ⳣ', 'Ⳣ'), ('Ⳬ', 'Ⳬ'), ('Ⳮ', 'Ⳮ'), ('Ⳳ', 'Ⳳ'),
+  ('Ꙁ', 'Ꙁ'), ('Ꙃ', 'Ꙃ'), ('Ꙅ', 'Ꙅ'), ('Ꙇ', 'Ꙇ'),
+  ('Ꙉ', 'Ꙉ'), ('Ꙋ', 'Ꙋ'), ('Ꙍ', 'Ꙍ'), ('Ꙏ', 'Ꙏ'),
+  ('Ꙑ', 'Ꙑ'), ('Ꙓ', 'Ꙓ'), ('Ꙕ', 'Ꙕ'), ('Ꙗ', 'Ꙗ'),
+  ('Ꙙ', 'Ꙙ'), ('Ꙛ', 'Ꙛ'), ('Ꙝ', 'Ꙝ'), ('Ꙟ', 'Ꙟ'),
+  ('Ꙡ', 'Ꙡ'), ('Ꙣ', 'Ꙣ'), ('Ꙥ', 'Ꙥ'), ('Ꙧ', 'Ꙧ'),
+  ('Ꙩ', 'Ꙩ'), ('Ꙫ', 'Ꙫ'), ('Ꙭ', 'Ꙭ'), ('Ꚁ', 'Ꚁ'),
+  ('Ꚃ', 'Ꚃ'), ('Ꚅ', 'Ꚅ'), ('Ꚇ', 'Ꚇ'), ('Ꚉ', 'Ꚉ'),
+  ('Ꚋ', 'Ꚋ'), ('Ꚍ', 'Ꚍ'), ('Ꚏ', 'Ꚏ'), ('Ꚑ', 'Ꚑ'),
+  ('Ꚓ', 'Ꚓ'), ('Ꚕ', 'Ꚕ'), ('Ꚗ', 'Ꚗ'), ('Ꚙ', 'Ꚙ'),
+  ('Ꚛ', 'Ꚛ'), ('Ꜣ', 'Ꜣ'), ('Ꜥ', 'Ꜥ'), ('Ꜧ', 'Ꜧ'),
+  ('Ꜩ', 'Ꜩ'), ('Ꜫ', 'Ꜫ'), ('Ꜭ', 'Ꜭ'), ('Ꜯ', 'Ꜯ'),
+  ('Ꜳ', 'Ꜳ'), ('Ꜵ', 'Ꜵ'), ('Ꜷ', 'Ꜷ'), ('Ꜹ', 'Ꜹ'),
+  ('Ꜻ', 'Ꜻ'), ('Ꜽ', 'Ꜽ'), ('Ꜿ', 'Ꜿ'), ('Ꝁ', 'Ꝁ'),
+  ('Ꝃ', 'Ꝃ'), ('Ꝅ', 'Ꝅ'), ('Ꝇ', 'Ꝇ'), ('Ꝉ', 'Ꝉ'),
+  ('Ꝋ', 'Ꝋ'), ('Ꝍ', 'Ꝍ'), ('Ꝏ', 'Ꝏ'), ('Ꝑ', 'Ꝑ'),
+  ('Ꝓ', 'Ꝓ'), ('Ꝕ', 'Ꝕ'), ('Ꝗ', 'Ꝗ'), ('Ꝙ', 'Ꝙ'),
+  ('Ꝛ', 'Ꝛ'), ('Ꝝ', 'Ꝝ'), ('Ꝟ', 'Ꝟ'), ('Ꝡ', 'Ꝡ'),
+  ('Ꝣ', 'Ꝣ'), ('Ꝥ', 'Ꝥ'), ('Ꝧ', 'Ꝧ'), ('Ꝩ', 'Ꝩ'),
+  ('Ꝫ', 'Ꝫ'), ('Ꝭ', 'Ꝭ'), ('Ꝯ', 'Ꝯ'), ('Ꝺ', 'Ꝺ'),
+  ('Ꝼ', 'Ꝼ'), ('Ᵹ', 'Ꝿ'), ('Ꞁ', 'Ꞁ'), ('Ꞃ', 'Ꞃ'),
+  ('Ꞅ', 'Ꞅ'), ('Ꞇ', 'Ꞇ'), ('Ꞌ', 'Ꞌ'), ('Ɥ', 'Ɥ'),
+  ('Ꞑ', 'Ꞑ'), ('Ꞓ', 'Ꞓ'), ('Ꞗ', 'Ꞗ'), ('Ꞙ', 'Ꞙ'),
+  ('Ꞛ', 'Ꞛ'), ('Ꞝ', 'Ꞝ'), ('Ꞟ', 'Ꞟ'), ('Ꞡ', 'Ꞡ'),
+  ('Ꞣ', 'Ꞣ'), ('Ꞥ', 'Ꞥ'), ('Ꞧ', 'Ꞧ'), ('Ꞩ', 'Ꞩ'),
+  ('Ɦ', 'Ɪ'), ('Ʞ', 'Ꞵ'), ('Ꞷ', 'Ꞷ'), ('Ａ', 'Ｚ'),
+  ('𐐀', '𐐧'), ('𐒰', '𐓓'), ('𐲀', '𐲲'), ('𑢠', '𑢿'),
+  ('𝐀', '𝐙'), ('𝐴', '𝑍'), ('𝑨', '𝒁'), ('𝒜', '𝒜'),
+  ('𝒞', '𝒟'), ('𝒢', '𝒢'), ('𝒥', '𝒦'), ('𝒩', '𝒬'),
+  ('𝒮', '𝒵'), ('𝓐', '𝓩'), ('𝔄', '𝔅'), ('𝔇', '𝔊'),
+  ('𝔍', '𝔔'), ('𝔖', '𝔜'), ('𝔸', '𝔹'), ('𝔻', '𝔾'),
+  ('𝕀', '𝕄'), ('𝕆', '𝕆'), ('𝕊', '𝕐'), ('𝕬', '𝖅'),
+  ('𝖠', '𝖹'), ('𝗔', '𝗭'), ('𝘈', '𝘡'), ('𝘼', '𝙕'),
+  ('𝙰', '𝚉'), ('𝚨', '𝛀'), ('𝛢', '𝛺'), ('𝜜', '𝜴'),
+  ('𝝖', '𝝮'), ('𝞐', '𝞨'), ('𝟊', '𝟊'), ('𞤀', '𞤡'),
+];
diff --git a/regex-syntax/src/unicode_tables/mod.rs b/regex-syntax/src/unicode_tables/mod.rs
new file mode 100644
index 0000000000..6c2e9e7736
--- /dev/null
+++ b/regex-syntax/src/unicode_tables/mod.rs
@@ -0,0 +1,9 @@
+pub mod age;
+pub mod case_folding_simple;
+pub mod general_category;
+pub mod perl_word;
+pub mod property_bool;
+pub mod property_names;
+pub mod property_values;
+pub mod script_extension;
+pub mod script;
diff --git a/regex-syntax/src/unicode_tables/perl_word.rs b/regex-syntax/src/unicode_tables/perl_word.rs
new file mode 100644
index 0000000000..d33f79a02b
--- /dev/null
+++ b/regex-syntax/src/unicode_tables/perl_word.rs
@@ -0,0 +1,179 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+//  ucd-generate perl-word /home/andrew/tmp/ucd-10.0.0/ --chars
+//
+// ucd-generate is available on crates.io.
+
+pub const PERL_WORD: &'static [(char, char)] = &[
+  ('0', '9'), ('A', 'Z'), ('_', '_'), ('a', 'z'), ('ª', 'ª'), ('µ', 'µ'),
+  ('º', 'º'), ('À', 'Ö'), ('Ø', 'ö'), ('ø', 'ˁ'), ('ˆ', 'ˑ'),
+  ('ˠ', 'ˤ'), ('ˬ', 'ˬ'), ('ˮ', 'ˮ'), ('̀', 'ʹ'), ('Ͷ', 'ͷ'),
+  ('ͺ', 'ͽ'), ('Ϳ', 'Ϳ'), ('Ά', 'Ά'), ('Έ', 'Ί'), ('Ό', 'Ό'),
+  ('Ύ', 'Ρ'), ('Σ', 'ϵ'), ('Ϸ', 'ҁ'), ('҃', 'ԯ'), ('Ա', 'Ֆ'),
+  ('ՙ', 'ՙ'), ('ա', 'և'), ('֑', 'ֽ'), ('ֿ', 'ֿ'), ('ׁ', 'ׂ'),
+  ('ׄ', 'ׅ'), ('ׇ', 'ׇ'), ('א', 'ת'), ('װ', 'ײ'), ('ؐ', 'ؚ'),
+  ('ؠ', '٩'), ('ٮ', 'ۓ'), ('ە', 'ۜ'), ('۟', 'ۨ'), ('۪', 'ۼ'),
+  ('ۿ', 'ۿ'), ('ܐ', '݊'), ('ݍ', 'ޱ'), ('߀', 'ߵ'), ('ߺ', 'ߺ'),
+  ('ࠀ', '࠭'), ('ࡀ', '࡛'), ('ࡠ', 'ࡪ'), ('ࢠ', 'ࢴ'),
+  ('ࢶ', 'ࢽ'), ('ࣔ', '࣡'), ('ࣣ', 'ॣ'), ('०', '९'),
+  ('ॱ', 'ঃ'), ('অ', 'ঌ'), ('এ', 'ঐ'), ('ও', 'ন'),
+  ('প', 'র'), ('ল', 'ল'), ('শ', 'হ'), ('়', 'ৄ'),
+  ('ে', 'ৈ'), ('ো', 'ৎ'), ('ৗ', 'ৗ'), ('ড়', 'ঢ়'),
+  ('য়', 'ৣ'), ('০', 'ৱ'), ('ৼ', 'ৼ'), ('ਁ', 'ਃ'),
+  ('ਅ', 'ਊ'), ('ਏ', 'ਐ'), ('ਓ', 'ਨ'), ('ਪ', 'ਰ'),
+  ('ਲ', 'ਲ਼'), ('ਵ', 'ਸ਼'), ('ਸ', 'ਹ'), ('਼', '਼'),
+  ('ਾ', 'ੂ'), ('ੇ', 'ੈ'), ('ੋ', '੍'), ('ੑ', 'ੑ'),
+  ('ਖ਼', 'ੜ'), ('ਫ਼', 'ਫ਼'), ('੦', 'ੵ'), ('ઁ', 'ઃ'),
+  ('અ', 'ઍ'), ('એ', 'ઑ'), ('ઓ', 'ન'), ('પ', 'ર'),
+  ('લ', 'ળ'), ('વ', 'હ'), ('઼', 'ૅ'), ('ે', 'ૉ'),
+  ('ો', '્'), ('ૐ', 'ૐ'), ('ૠ', 'ૣ'), ('૦', '૯'),
+  ('ૹ', '૿'), ('ଁ', 'ଃ'), ('ଅ', 'ଌ'), ('ଏ', 'ଐ'),
+  ('ଓ', 'ନ'), ('ପ', 'ର'), ('ଲ', 'ଳ'), ('ଵ', 'ହ'),
+  ('଼', 'ୄ'), ('େ', 'ୈ'), ('ୋ', '୍'), ('ୖ', 'ୗ'),
+  ('ଡ଼', 'ଢ଼'), ('ୟ', 'ୣ'), ('୦', '୯'), ('ୱ', 'ୱ'),
+  ('ஂ', 'ஃ'), ('அ', 'ஊ'), ('எ', 'ஐ'), ('ஒ', 'க'),
+  ('ங', 'ச'), ('ஜ', 'ஜ'), ('ஞ', 'ட'), ('ண', 'த'),
+  ('ந', 'ப'), ('ம', 'ஹ'), ('ா', 'ூ'), ('ெ', 'ை'),
+  ('ொ', '்'), ('ௐ', 'ௐ'), ('ௗ', 'ௗ'), ('௦', '௯'),
+  ('ఀ', 'ః'), ('అ', 'ఌ'), ('ఎ', 'ఐ'), ('ఒ', 'న'),
+  ('ప', 'హ'), ('ఽ', 'ౄ'), ('ె', 'ై'), ('ొ', '్'),
+  ('ౕ', 'ౖ'), ('ౘ', 'ౚ'), ('ౠ', 'ౣ'), ('౦', '౯'),
+  ('ಀ', 'ಃ'), ('ಅ', 'ಌ'), ('ಎ', 'ಐ'), ('ಒ', 'ನ'),
+  ('ಪ', 'ಳ'), ('ವ', 'ಹ'), ('಼', 'ೄ'), ('ೆ', 'ೈ'),
+  ('ೊ', '್'), ('ೕ', 'ೖ'), ('ೞ', 'ೞ'), ('ೠ', 'ೣ'),
+  ('೦', '೯'), ('ೱ', 'ೲ'), ('ഀ', 'ഃ'), ('അ', 'ഌ'),
+  ('എ', 'ഐ'), ('ഒ', 'ൄ'), ('െ', 'ൈ'), ('ൊ', 'ൎ'),
+  ('ൔ', 'ൗ'), ('ൟ', 'ൣ'), ('൦', '൯'), ('ൺ', 'ൿ'),
+  ('ං', 'ඃ'), ('අ', 'ඖ'), ('ක', 'න'), ('ඳ', 'ර'),
+  ('ල', 'ල'), ('ව', 'ෆ'), ('්', '්'), ('ා', 'ු'),
+  ('ූ', 'ූ'), ('ෘ', 'ෟ'), ('෦', '෯'), ('ෲ', 'ෳ'),
+  ('ก', 'ฺ'), ('เ', '๎'), ('๐', '๙'), ('ກ', 'ຂ'),
+  ('ຄ', 'ຄ'), ('ງ', 'ຈ'), ('ຊ', 'ຊ'), ('ຍ', 'ຍ'),
+  ('ດ', 'ທ'), ('ນ', 'ຟ'), ('ມ', 'ຣ'), ('ລ', 'ລ'),
+  ('ວ', 'ວ'), ('ສ', 'ຫ'), ('ອ', 'ູ'), ('ົ', 'ຽ'),
+  ('ເ', 'ໄ'), ('ໆ', 'ໆ'), ('່', 'ໍ'), ('໐', '໙'),
+  ('ໜ', 'ໟ'), ('ༀ', 'ༀ'), ('༘', '༙'), ('༠', '༩'),
+  ('༵', '༵'), ('༷', '༷'), ('༹', '༹'), ('༾', 'ཇ'),
+  ('ཉ', 'ཬ'), ('ཱ', '྄'), ('྆', 'ྗ'), ('ྙ', 'ྼ'),
+  ('࿆', '࿆'), ('က', '၉'), ('ၐ', 'ႝ'), ('Ⴀ', 'Ⴥ'),
+  ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'), ('ა', 'ჺ'), ('ჼ', 'ቈ'),
+  ('ቊ', 'ቍ'), ('ቐ', 'ቖ'), ('ቘ', 'ቘ'), ('ቚ', 'ቝ'),
+  ('በ', 'ኈ'), ('ኊ', 'ኍ'), ('ነ', 'ኰ'), ('ኲ', 'ኵ'),
+  ('ኸ', 'ኾ'), ('ዀ', 'ዀ'), ('ዂ', 'ዅ'), ('ወ', 'ዖ'),
+  ('ዘ', 'ጐ'), ('ጒ', 'ጕ'), ('ጘ', 'ፚ'), ('፝', '፟'),
+  ('ᎀ', 'ᎏ'), ('Ꭰ', 'Ᏽ'), ('ᏸ', 'ᏽ'), ('ᐁ', 'ᙬ'),
+  ('ᙯ', 'ᙿ'), ('ᚁ', 'ᚚ'), ('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ'),
+  ('ᜀ', 'ᜌ'), ('ᜎ', '᜔'), ('ᜠ', '᜴'), ('ᝀ', 'ᝓ'),
+  ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), ('ᝲ', 'ᝳ'), ('ក', '៓'),
+  ('ៗ', 'ៗ'), ('ៜ', '៝'), ('០', '៩'), ('᠋', '᠍'),
+  ('᠐', '᠙'), ('ᠠ', 'ᡷ'), ('ᢀ', 'ᢪ'), ('ᢰ', 'ᣵ'),
+  ('ᤀ', 'ᤞ'), ('ᤠ', 'ᤫ'), ('ᤰ', '᤻'), ('᥆', 'ᥭ'),
+  ('ᥰ', 'ᥴ'), ('ᦀ', 'ᦫ'), ('ᦰ', 'ᧉ'), ('᧐', '᧙'),
+  ('ᨀ', 'ᨛ'), ('ᨠ', 'ᩞ'), ('᩠', '᩼'), ('᩿', '᪉'),
+  ('᪐', '᪙'), ('ᪧ', 'ᪧ'), ('᪰', '᪾'), ('ᬀ', 'ᭋ'),
+  ('᭐', '᭙'), ('᭫', '᭳'), ('ᮀ', '᯳'), ('ᰀ', '᰷'),
+  ('᱀', '᱉'), ('ᱍ', 'ᱽ'), ('ᲀ', 'ᲈ'), ('᳐', '᳒'),
+  ('᳔', '᳹'), ('ᴀ', '᷹'), ('᷻', 'ἕ'), ('Ἐ', 'Ἕ'),
+  ('ἠ', 'ὅ'), ('Ὀ', 'Ὅ'), ('ὐ', 'ὗ'), ('Ὑ', 'Ὑ'),
+  ('Ὓ', 'Ὓ'), ('Ὕ', 'Ὕ'), ('Ὗ', 'ώ'), ('ᾀ', 'ᾴ'),
+  ('ᾶ', 'ᾼ'), ('ι', 'ι'), ('ῂ', 'ῄ'), ('ῆ', 'ῌ'),
+  ('ῐ', 'ΐ'), ('ῖ', 'Ί'), ('ῠ', 'Ῥ'), ('ῲ', 'ῴ'),
+  ('ῶ', 'ῼ'), ('\u{200c}', '\u{200d}'), ('‿', '⁀'), ('⁔', '⁔'),
+  ('ⁱ', 'ⁱ'), ('ⁿ', 'ⁿ'), ('ₐ', 'ₜ'), ('⃐', '⃰'),
+  ('ℂ', 'ℂ'), ('ℇ', 'ℇ'), ('ℊ', 'ℓ'), ('ℕ', 'ℕ'),
+  ('ℙ', 'ℝ'), ('ℤ', 'ℤ'), ('Ω', 'Ω'), ('ℨ', 'ℨ'),
+  ('K', 'ℭ'), ('ℯ', 'ℹ'), ('ℼ', 'ℿ'), ('ⅅ', 'ⅉ'),
+  ('ⅎ', 'ⅎ'), ('Ⅰ', 'ↈ'), ('Ⓐ', 'ⓩ'), ('Ⰰ', 'Ⱞ'),
+  ('ⰰ', 'ⱞ'), ('Ⱡ', 'ⳤ'), ('Ⳬ', 'ⳳ'), ('ⴀ', 'ⴥ'),
+  ('ⴧ', 'ⴧ'), ('ⴭ', 'ⴭ'), ('ⴰ', 'ⵧ'), ('ⵯ', 'ⵯ'),
+  ('⵿', 'ⶖ'), ('ⶠ', 'ⶦ'), ('ⶨ', 'ⶮ'), ('ⶰ', 'ⶶ'),
+  ('ⶸ', 'ⶾ'), ('ⷀ', 'ⷆ'), ('ⷈ', 'ⷎ'), ('ⷐ', 'ⷖ'),
+  ('ⷘ', 'ⷞ'), ('ⷠ', 'ⷿ'), ('ⸯ', 'ⸯ'), ('々', '〇'),
+  ('〡', '〯'), ('〱', '〵'), ('〸', '〼'), ('ぁ', 'ゖ'),
+  ('゙', '゚'), ('ゝ', 'ゟ'), ('ァ', 'ヺ'), ('ー', 'ヿ'),
+  ('ㄅ', 'ㄮ'), ('ㄱ', 'ㆎ'), ('ㆠ', 'ㆺ'), ('ㇰ', 'ㇿ'),
+  ('㐀', '䶵'), ('一', '鿪'), ('ꀀ', 'ꒌ'), ('ꓐ', 'ꓽ'),
+  ('ꔀ', 'ꘌ'), ('ꘐ', 'ꘫ'), ('Ꙁ', '꙲'), ('ꙴ', '꙽'),
+  ('ꙿ', '꛱'), ('ꜗ', 'ꜟ'), ('Ꜣ', 'ꞈ'), ('Ꞌ', 'Ɪ'),
+  ('Ʞ', 'ꞷ'), ('ꟷ', 'ꠧ'), ('ꡀ', 'ꡳ'), ('ꢀ', 'ꣅ'),
+  ('꣐', '꣙'), ('꣠', 'ꣷ'), ('ꣻ', 'ꣻ'), ('ꣽ', 'ꣽ'),
+  ('꤀', '꤭'), ('ꤰ', '꥓'), ('ꥠ', 'ꥼ'), ('ꦀ', '꧀'),
+  ('ꧏ', '꧙'), ('ꧠ', 'ꧾ'), ('ꨀ', 'ꨶ'), ('ꩀ', 'ꩍ'),
+  ('꩐', '꩙'), ('ꩠ', 'ꩶ'), ('ꩺ', 'ꫂ'), ('ꫛ', 'ꫝ'),
+  ('ꫠ', 'ꫯ'), ('ꫲ', '꫶'), ('ꬁ', 'ꬆ'), ('ꬉ', 'ꬎ'),
+  ('ꬑ', 'ꬖ'), ('ꬠ', 'ꬦ'), ('ꬨ', 'ꬮ'), ('ꬰ', 'ꭚ'),
+  ('ꭜ', 'ꭥ'), ('ꭰ', 'ꯪ'), ('꯬', '꯭'), ('꯰', '꯹'),
+  ('가', '힣'), ('ힰ', 'ퟆ'), ('ퟋ', 'ퟻ'), ('豈', '舘'),
+  ('並', '龎'), ('ﬀ', 'ﬆ'), ('ﬓ', 'ﬗ'), ('יִ', 'ﬨ'),
+  ('שׁ', 'זּ'), ('טּ', 'לּ'), ('מּ', 'מּ'), ('נּ', 'סּ'),
+  ('ףּ', 'פּ'), ('צּ', 'ﮱ'), ('ﯓ', 'ﴽ'), ('ﵐ', 'ﶏ'),
+  ('ﶒ', 'ﷇ'), ('ﷰ', 'ﷻ'), ('︀', '️'), ('︠', '︯'),
+  ('︳', '︴'), ('﹍', '﹏'), ('ﹰ', 'ﹴ'), ('ﹶ', 'ﻼ'),
+  ('０', '９'), ('Ａ', 'Ｚ'), ('＿', '＿'), ('ａ', 'ｚ'),
+  ('ｦ', 'ﾾ'), ('ￂ', 'ￇ'), ('ￊ', 'ￏ'), ('ￒ', 'ￗ'),
+  ('ￚ', 'ￜ'), ('𐀀', '𐀋'), ('𐀍', '𐀦'), ('𐀨', '𐀺'),
+  ('𐀼', '𐀽'), ('𐀿', '𐁍'), ('𐁐', '𐁝'), ('𐂀', '𐃺'),
+  ('𐅀', '𐅴'), ('𐇽', '𐇽'), ('𐊀', '𐊜'), ('𐊠', '𐋐'),
+  ('𐋠', '𐋠'), ('𐌀', '𐌟'), ('𐌭', '𐍊'), ('𐍐', '𐍺'),
+  ('𐎀', '𐎝'), ('𐎠', '𐏃'), ('𐏈', '𐏏'), ('𐏑', '𐏕'),
+  ('𐐀', '𐒝'), ('𐒠', '𐒩'), ('𐒰', '𐓓'), ('𐓘', '𐓻'),
+  ('𐔀', '𐔧'), ('𐔰', '𐕣'), ('𐘀', '𐜶'), ('𐝀', '𐝕'),
+  ('𐝠', '𐝧'), ('𐠀', '𐠅'), ('𐠈', '𐠈'), ('𐠊', '𐠵'),
+  ('𐠷', '𐠸'), ('𐠼', '𐠼'), ('𐠿', '𐡕'), ('𐡠', '𐡶'),
+  ('𐢀', '𐢞'), ('𐣠', '𐣲'), ('𐣴', '𐣵'), ('𐤀', '𐤕'),
+  ('𐤠', '𐤹'), ('𐦀', '𐦷'), ('𐦾', '𐦿'), ('𐨀', '𐨃'),
+  ('𐨅', '𐨆'), ('𐨌', '𐨓'), ('𐨕', '𐨗'), ('𐨙', '𐨳'),
+  ('𐨸', '𐨺'), ('𐨿', '𐨿'), ('𐩠', '𐩼'), ('𐪀', '𐪜'),
+  ('𐫀', '𐫇'), ('𐫉', '𐫦'), ('𐬀', '𐬵'), ('𐭀', '𐭕'),
+  ('𐭠', '𐭲'), ('𐮀', '𐮑'), ('𐰀', '𐱈'), ('𐲀', '𐲲'),
+  ('𐳀', '𐳲'), ('𑀀', '𑁆'), ('𑁦', '𑁯'), ('𑁿', '𑂺'),
+  ('𑃐', '𑃨'), ('𑃰', '𑃹'), ('𑄀', '𑄴'), ('𑄶', '𑄿'),
+  ('𑅐', '𑅳'), ('𑅶', '𑅶'), ('𑆀', '𑇄'), ('𑇊', '𑇌'),
+  ('𑇐', '𑇚'), ('𑇜', '𑇜'), ('𑈀', '𑈑'), ('𑈓', '𑈷'),
+  ('𑈾', '𑈾'), ('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'),
+  ('𑊏', '𑊝'), ('𑊟', '𑊨'), ('𑊰', '𑋪'), ('𑋰', '𑋹'),
+  ('𑌀', '𑌃'), ('𑌅', '𑌌'), ('𑌏', '𑌐'), ('𑌓', '𑌨'),
+  ('𑌪', '𑌰'), ('𑌲', '𑌳'), ('𑌵', '𑌹'), ('𑌼', '𑍄'),
+  ('𑍇', '𑍈'), ('𑍋', '𑍍'), ('𑍐', '𑍐'), ('𑍗', '𑍗'),
+  ('𑍝', '𑍣'), ('𑍦', '𑍬'), ('𑍰', '𑍴'), ('𑐀', '𑑊'),
+  ('𑑐', '𑑙'), ('𑒀', '𑓅'), ('𑓇', '𑓇'), ('𑓐', '𑓙'),
+  ('𑖀', '𑖵'), ('𑖸', '𑗀'), ('𑗘', '𑗝'), ('𑘀', '𑙀'),
+  ('𑙄', '𑙄'), ('𑙐', '𑙙'), ('𑚀', '𑚷'), ('𑛀', '𑛉'),
+  ('𑜀', '𑜙'), ('𑜝', '𑜫'), ('𑜰', '𑜹'), ('𑢠', '𑣩'),
+  ('𑣿', '𑣿'), ('𑨀', '𑨾'), ('𑩇', '𑩇'), ('𑩐', '𑪃'),
+  ('𑪆', '𑪙'), ('𑫀', '𑫸'), ('𑰀', '𑰈'), ('𑰊', '𑰶'),
+  ('𑰸', '𑱀'), ('𑱐', '𑱙'), ('𑱲', '𑲏'), ('𑲒', '𑲧'),
+  ('𑲩', '𑲶'), ('𑴀', '𑴆'), ('𑴈', '𑴉'), ('𑴋', '𑴶'),
+  ('𑴺', '𑴺'), ('𑴼', '𑴽'), ('𑴿', '𑵇'), ('𑵐', '𑵙'),
+  ('𒀀', '𒎙'), ('𒐀', '𒑮'), ('𒒀', '𒕃'), ('𓀀', '𓐮'),
+  ('𔐀', '𔙆'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), ('𖩠', '𖩩'),
+  ('𖫐', '𖫭'), ('𖫰', '𖫴'), ('𖬀', '𖬶'), ('𖭀', '𖭃'),
+  ('𖭐', '𖭙'), ('𖭣', '𖭷'), ('𖭽', '𖮏'), ('𖼀', '𖽄'),
+  ('𖽐', '𖽾'), ('𖾏', '𖾟'), ('𖿠', '𖿡'), ('𗀀', '𘟬'),
+  ('𘠀', '𘫲'), ('𛀀', '𛄞'), ('𛅰', '𛋻'), ('𛰀', '𛱪'),
+  ('𛱰', '𛱼'), ('𛲀', '𛲈'), ('𛲐', '𛲙'), ('𛲝', '𛲞'),
+  ('𝅥', '𝅩'), ('𝅭', '𝅲'), ('𝅻', '𝆂'), ('𝆅', '𝆋'),
+  ('𝆪', '𝆭'), ('𝉂', '𝉄'), ('𝐀', '𝑔'), ('𝑖', '𝒜'),
+  ('𝒞', '𝒟'), ('𝒢', '𝒢'), ('𝒥', '𝒦'), ('𝒩', '𝒬'),
+  ('𝒮', '𝒹'), ('𝒻', '𝒻'), ('𝒽', '𝓃'), ('𝓅', '𝔅'),
+  ('𝔇', '𝔊'), ('𝔍', '𝔔'), ('𝔖', '𝔜'), ('𝔞', '𝔹'),
+  ('𝔻', '𝔾'), ('𝕀', '𝕄'), ('𝕆', '𝕆'), ('𝕊', '𝕐'),
+  ('𝕒', '𝚥'), ('𝚨', '𝛀'), ('𝛂', '𝛚'), ('𝛜', '𝛺'),
+  ('𝛼', '𝜔'), ('𝜖', '𝜴'), ('𝜶', '𝝎'), ('𝝐', '𝝮'),
+  ('𝝰', '𝞈'), ('𝞊', '𝞨'), ('𝞪', '𝟂'), ('𝟄', '𝟋'),
+  ('𝟎', '𝟿'), ('𝨀', '𝨶'), ('𝨻', '𝩬'), ('𝩵', '𝩵'),
+  ('𝪄', '𝪄'), ('𝪛', '𝪟'), ('𝪡', '𝪯'), ('𞀀', '𞀆'),
+  ('𞀈', '𞀘'), ('𞀛', '𞀡'), ('𞀣', '𞀤'), ('𞀦', '𞀪'),
+  ('𞠀', '𞣄'), ('𞣐', '𞣖'), ('𞤀', '𞥊'), ('𞥐', '𞥙'),
+  ('𞸀', '𞸃'), ('𞸅', '𞸟'), ('𞸡', '𞸢'), ('𞸤', '𞸤'),
+  ('𞸧', '𞸧'), ('𞸩', '𞸲'), ('𞸴', '𞸷'), ('𞸹', '𞸹'),
+  ('𞸻', '𞸻'), ('𞹂', '𞹂'), ('𞹇', '𞹇'), ('𞹉', '𞹉'),
+  ('𞹋', '𞹋'), ('𞹍', '𞹏'), ('𞹑', '𞹒'), ('𞹔', '𞹔'),
+  ('𞹗', '𞹗'), ('𞹙', '𞹙'), ('𞹛', '𞹛'), ('𞹝', '𞹝'),
+  ('𞹟', '𞹟'), ('𞹡', '𞹢'), ('𞹤', '𞹤'), ('𞹧', '𞹪'),
+  ('𞹬', '𞹲'), ('𞹴', '𞹷'), ('𞹹', '𞹼'), ('𞹾', '𞹾'),
+  ('𞺀', '𞺉'), ('𞺋', '𞺛'), ('𞺡', '𞺣'), ('𞺥', '𞺩'),
+  ('𞺫', '𞺻'), ('🄰', '🅉'), ('🅐', '🅩'), ('🅰', '🆉'),
+  ('𠀀', '𪛖'), ('𪜀', '𫜴'), ('𫝀', '𫠝'), ('𫠠', '𬺡'),
+  ('𬺰', '𮯠'), ('丽', '𪘀'), ('󠄀', '󠇯'),
+];
diff --git a/regex-syntax/src/unicode_tables/property_bool.rs b/regex-syntax/src/unicode_tables/property_bool.rs
new file mode 100644
index 0000000000..ae867e3007
--- /dev/null
+++ b/regex-syntax/src/unicode_tables/property_bool.rs
@@ -0,0 +1,2576 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+//  ucd-generate property-bool tmp/ucd-10.0.0/ --chars
+//
+// ucd-generate is available on crates.io.
+
+pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
+  ("ASCII_Hex_Digit", ASCII_HEX_DIGIT), ("Alphabetic", ALPHABETIC),
+  ("Bidi_Control", BIDI_CONTROL), ("Case_Ignorable", CASE_IGNORABLE),
+  ("Cased", CASED), ("Changes_When_Casefolded", CHANGES_WHEN_CASEFOLDED),
+  ("Changes_When_Casemapped", CHANGES_WHEN_CASEMAPPED),
+  ("Changes_When_Lowercased", CHANGES_WHEN_LOWERCASED),
+  ("Changes_When_Titlecased", CHANGES_WHEN_TITLECASED),
+  ("Changes_When_Uppercased", CHANGES_WHEN_UPPERCASED), ("Dash", DASH),
+  ("Default_Ignorable_Code_Point", DEFAULT_IGNORABLE_CODE_POINT),
+  ("Deprecated", DEPRECATED), ("Diacritic", DIACRITIC),
+  ("Extender", EXTENDER), ("Grapheme_Base", GRAPHEME_BASE),
+  ("Grapheme_Extend", GRAPHEME_EXTEND), ("Grapheme_Link", GRAPHEME_LINK),
+  ("Hex_Digit", HEX_DIGIT), ("Hyphen", HYPHEN),
+  ("IDS_Binary_Operator", IDS_BINARY_OPERATOR),
+  ("IDS_Trinary_Operator", IDS_TRINARY_OPERATOR),
+  ("ID_Continue", ID_CONTINUE), ("ID_Start", ID_START),
+  ("Ideographic", IDEOGRAPHIC), ("Join_Control", JOIN_CONTROL),
+  ("Logical_Order_Exception", LOGICAL_ORDER_EXCEPTION),
+  ("Lowercase", LOWERCASE), ("Math", MATH),
+  ("Noncharacter_Code_Point", NONCHARACTER_CODE_POINT),
+  ("Other_Alphabetic", OTHER_ALPHABETIC),
+  ("Other_Default_Ignorable_Code_Point", OTHER_DEFAULT_IGNORABLE_CODE_POINT),
+  ("Other_Grapheme_Extend", OTHER_GRAPHEME_EXTEND),
+  ("Other_ID_Continue", OTHER_ID_CONTINUE),
+  ("Other_ID_Start", OTHER_ID_START), ("Other_Lowercase", OTHER_LOWERCASE),
+  ("Other_Math", OTHER_MATH), ("Other_Uppercase", OTHER_UPPERCASE),
+  ("Pattern_Syntax", PATTERN_SYNTAX),
+  ("Pattern_White_Space", PATTERN_WHITE_SPACE),
+  ("Prepended_Concatenation_Mark", PREPENDED_CONCATENATION_MARK),
+  ("Quotation_Mark", QUOTATION_MARK), ("Radical", RADICAL),
+  ("Regional_Indicator", REGIONAL_INDICATOR),
+  ("Sentence_Terminal", SENTENCE_TERMINAL), ("Soft_Dotted", SOFT_DOTTED),
+  ("Terminal_Punctuation", TERMINAL_PUNCTUATION),
+  ("Unified_Ideograph", UNIFIED_IDEOGRAPH), ("Uppercase", UPPERCASE),
+  ("Variation_Selector", VARIATION_SELECTOR), ("White_Space", WHITE_SPACE),
+  ("XID_Continue", XID_CONTINUE), ("XID_Start", XID_START),
+];
+
+pub const ASCII_HEX_DIGIT: &'static [(char, char)] = &[
+  ('0', '9'), ('A', 'F'), ('a', 'f'),
+];
+
+pub const ALPHABETIC: &'static [(char, char)] = &[
+  ('A', 'Z'), ('a', 'z'), ('ª', 'ª'), ('µ', 'µ'), ('º', 'º'),
+  ('À', 'Ö'), ('Ø', 'ö'), ('ø', 'ˁ'), ('ˆ', 'ˑ'), ('ˠ', 'ˤ'),
+  ('ˬ', 'ˬ'), ('ˮ', 'ˮ'), ('ͅ', 'ͅ'), ('Ͱ', 'ʹ'), ('Ͷ', 'ͷ'),
+  ('ͺ', 'ͽ'), ('Ϳ', 'Ϳ'), ('Ά', 'Ά'), ('Έ', 'Ί'), ('Ό', 'Ό'),
+  ('Ύ', 'Ρ'), ('Σ', 'ϵ'), ('Ϸ', 'ҁ'), ('Ҋ', 'ԯ'), ('Ա', 'Ֆ'),
+  ('ՙ', 'ՙ'), ('ա', 'և'), ('ְ', 'ֽ'), ('ֿ', 'ֿ'), ('ׁ', 'ׂ'),
+  ('ׄ', 'ׅ'), ('ׇ', 'ׇ'), ('א', 'ת'), ('װ', 'ײ'), ('ؐ', 'ؚ'),
+  ('ؠ', 'ٗ'), ('ٙ', 'ٟ'), ('ٮ', 'ۓ'), ('ە', 'ۜ'), ('ۡ', 'ۨ'),
+  ('ۭ', 'ۯ'), ('ۺ', 'ۼ'), ('ۿ', 'ۿ'), ('ܐ', 'ܿ'), ('ݍ', 'ޱ'),
+  ('ߊ', 'ߪ'), ('ߴ', 'ߵ'), ('ߺ', 'ߺ'), ('ࠀ', 'ࠗ'), ('ࠚ', 'ࠬ'),
+  ('ࡀ', 'ࡘ'), ('ࡠ', 'ࡪ'), ('ࢠ', 'ࢴ'), ('ࢶ', 'ࢽ'),
+  ('ࣔ', 'ࣟ'), ('ࣣ', 'ࣩ'), ('ࣰ', 'ऻ'), ('ऽ', 'ौ'),
+  ('ॎ', 'ॐ'), ('ॕ', 'ॣ'), ('ॱ', 'ঃ'), ('অ', 'ঌ'),
+  ('এ', 'ঐ'), ('ও', 'ন'), ('প', 'র'), ('ল', 'ল'),
+  ('শ', 'হ'), ('ঽ', 'ৄ'), ('ে', 'ৈ'), ('ো', 'ৌ'),
+  ('ৎ', 'ৎ'), ('ৗ', 'ৗ'), ('ড়', 'ঢ়'), ('য়', 'ৣ'),
+  ('ৰ', 'ৱ'), ('ৼ', 'ৼ'), ('ਁ', 'ਃ'), ('ਅ', 'ਊ'),
+  ('ਏ', 'ਐ'), ('ਓ', 'ਨ'), ('ਪ', 'ਰ'), ('ਲ', 'ਲ਼'),
+  ('ਵ', 'ਸ਼'), ('ਸ', 'ਹ'), ('ਾ', 'ੂ'), ('ੇ', 'ੈ'),
+  ('ੋ', 'ੌ'), ('ੑ', 'ੑ'), ('ਖ਼', 'ੜ'), ('ਫ਼', 'ਫ਼'),
+  ('ੰ', 'ੵ'), ('ઁ', 'ઃ'), ('અ', 'ઍ'), ('એ', 'ઑ'),
+  ('ઓ', 'ન'), ('પ', 'ર'), ('લ', 'ળ'), ('વ', 'હ'),
+  ('ઽ', 'ૅ'), ('ે', 'ૉ'), ('ો', 'ૌ'), ('ૐ', 'ૐ'),
+  ('ૠ', 'ૣ'), ('ૹ', 'ૼ'), ('ଁ', 'ଃ'), ('ଅ', 'ଌ'),
+  ('ଏ', 'ଐ'), ('ଓ', 'ନ'), ('ପ', 'ର'), ('ଲ', 'ଳ'),
+  ('ଵ', 'ହ'), ('ଽ', 'ୄ'), ('େ', 'ୈ'), ('ୋ', 'ୌ'),
+  ('ୖ', 'ୗ'), ('ଡ଼', 'ଢ଼'), ('ୟ', 'ୣ'), ('ୱ', 'ୱ'),
+  ('ஂ', 'ஃ'), ('அ', 'ஊ'), ('எ', 'ஐ'), ('ஒ', 'க'),
+  ('ங', 'ச'), ('ஜ', 'ஜ'), ('ஞ', 'ட'), ('ண', 'த'),
+  ('ந', 'ப'), ('ம', 'ஹ'), ('ா', 'ூ'), ('ெ', 'ை'),
+  ('ொ', 'ௌ'), ('ௐ', 'ௐ'), ('ௗ', 'ௗ'), ('ఀ', 'ః'),
+  ('అ', 'ఌ'), ('ఎ', 'ఐ'), ('ఒ', 'న'), ('ప', 'హ'),
+  ('ఽ', 'ౄ'), ('ె', 'ై'), ('ొ', 'ౌ'), ('ౕ', 'ౖ'),
+  ('ౘ', 'ౚ'), ('ౠ', 'ౣ'), ('ಀ', 'ಃ'), ('ಅ', 'ಌ'),
+  ('ಎ', 'ಐ'), ('ಒ', 'ನ'), ('ಪ', 'ಳ'), ('ವ', 'ಹ'),
+  ('ಽ', 'ೄ'), ('ೆ', 'ೈ'), ('ೊ', 'ೌ'), ('ೕ', 'ೖ'),
+  ('ೞ', 'ೞ'), ('ೠ', 'ೣ'), ('ೱ', 'ೲ'), ('ഀ', 'ഃ'),
+  ('അ', 'ഌ'), ('എ', 'ഐ'), ('ഒ', 'ഺ'), ('ഽ', 'ൄ'),
+  ('െ', 'ൈ'), ('ൊ', 'ൌ'), ('ൎ', 'ൎ'), ('ൔ', 'ൗ'),
+  ('ൟ', 'ൣ'), ('ൺ', 'ൿ'), ('ං', 'ඃ'), ('අ', 'ඖ'),
+  ('ක', 'න'), ('ඳ', 'ර'), ('ල', 'ල'), ('ව', 'ෆ'),
+  ('ා', 'ු'), ('ූ', 'ූ'), ('ෘ', 'ෟ'), ('ෲ', 'ෳ'),
+  ('ก', 'ฺ'), ('เ', 'ๆ'), ('ํ', 'ํ'), ('ກ', 'ຂ'),
+  ('ຄ', 'ຄ'), ('ງ', 'ຈ'), ('ຊ', 'ຊ'), ('ຍ', 'ຍ'),
+  ('ດ', 'ທ'), ('ນ', 'ຟ'), ('ມ', 'ຣ'), ('ລ', 'ລ'),
+  ('ວ', 'ວ'), ('ສ', 'ຫ'), ('ອ', 'ູ'), ('ົ', 'ຽ'),
+  ('ເ', 'ໄ'), ('ໆ', 'ໆ'), ('ໍ', 'ໍ'), ('ໜ', 'ໟ'),
+  ('ༀ', 'ༀ'), ('ཀ', 'ཇ'), ('ཉ', 'ཬ'), ('ཱ', 'ཱྀ'),
+  ('ྈ', 'ྗ'), ('ྙ', 'ྼ'), ('က', 'ံ'), ('း', 'း'),
+  ('ျ', 'ဿ'), ('ၐ', 'ၢ'), ('ၥ', 'ၨ'), ('ၮ', 'ႆ'),
+  ('ႎ', 'ႎ'), ('ႜ', 'ႝ'), ('Ⴀ', 'Ⴥ'), ('Ⴧ', 'Ⴧ'),
+  ('Ⴭ', 'Ⴭ'), ('ა', 'ჺ'), ('ჼ', 'ቈ'), ('ቊ', 'ቍ'),
+  ('ቐ', 'ቖ'), ('ቘ', 'ቘ'), ('ቚ', 'ቝ'), ('በ', 'ኈ'),
+  ('ኊ', 'ኍ'), ('ነ', 'ኰ'), ('ኲ', 'ኵ'), ('ኸ', 'ኾ'),
+  ('ዀ', 'ዀ'), ('ዂ', 'ዅ'), ('ወ', 'ዖ'), ('ዘ', 'ጐ'),
+  ('ጒ', 'ጕ'), ('ጘ', 'ፚ'), ('፟', '፟'), ('ᎀ', 'ᎏ'),
+  ('Ꭰ', 'Ᏽ'), ('ᏸ', 'ᏽ'), ('ᐁ', 'ᙬ'), ('ᙯ', 'ᙿ'),
+  ('ᚁ', 'ᚚ'), ('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ'), ('ᜀ', 'ᜌ'),
+  ('ᜎ', 'ᜓ'), ('ᜠ', 'ᜳ'), ('ᝀ', 'ᝓ'), ('ᝠ', 'ᝬ'),
+  ('ᝮ', 'ᝰ'), ('ᝲ', 'ᝳ'), ('ក', 'ឳ'), ('ា', 'ៈ'),
+  ('ៗ', 'ៗ'), ('ៜ', 'ៜ'), ('ᠠ', 'ᡷ'), ('ᢀ', 'ᢪ'),
+  ('ᢰ', 'ᣵ'), ('ᤀ', 'ᤞ'), ('ᤠ', 'ᤫ'), ('ᤰ', 'ᤸ'),
+  ('ᥐ', 'ᥭ'), ('ᥰ', 'ᥴ'), ('ᦀ', 'ᦫ'), ('ᦰ', 'ᧉ'),
+  ('ᨀ', 'ᨛ'), ('ᨠ', 'ᩞ'), ('ᩡ', 'ᩴ'), ('ᪧ', 'ᪧ'),
+  ('ᬀ', 'ᬳ'), ('ᬵ', 'ᭃ'), ('ᭅ', 'ᭋ'), ('ᮀ', 'ᮩ'),
+  ('ᮬ', 'ᮯ'), ('ᮺ', 'ᯥ'), ('ᯧ', 'ᯱ'), ('ᰀ', 'ᰵ'),
+  ('ᱍ', 'ᱏ'), ('ᱚ', 'ᱽ'), ('ᲀ', 'ᲈ'), ('ᳩ', 'ᳬ'),
+  ('ᳮ', 'ᳳ'), ('ᳵ', 'ᳶ'), ('ᴀ', 'ᶿ'), ('ᷧ', 'ᷴ'),
+  ('Ḁ', 'ἕ'), ('Ἐ', 'Ἕ'), ('ἠ', 'ὅ'), ('Ὀ', 'Ὅ'),
+  ('ὐ', 'ὗ'), ('Ὑ', 'Ὑ'), ('Ὓ', 'Ὓ'), ('Ὕ', 'Ὕ'),
+  ('Ὗ', 'ώ'), ('ᾀ', 'ᾴ'), ('ᾶ', 'ᾼ'), ('ι', 'ι'),
+  ('ῂ', 'ῄ'), ('ῆ', 'ῌ'), ('ῐ', 'ΐ'), ('ῖ', 'Ί'),
+  ('ῠ', 'Ῥ'), ('ῲ', 'ῴ'), ('ῶ', 'ῼ'), ('ⁱ', 'ⁱ'),
+  ('ⁿ', 'ⁿ'), ('ₐ', 'ₜ'), ('ℂ', 'ℂ'), ('ℇ', 'ℇ'),
+  ('ℊ', 'ℓ'), ('ℕ', 'ℕ'), ('ℙ', 'ℝ'), ('ℤ', 'ℤ'),
+  ('Ω', 'Ω'), ('ℨ', 'ℨ'), ('K', 'ℭ'), ('ℯ', 'ℹ'),
+  ('ℼ', 'ℿ'), ('ⅅ', 'ⅉ'), ('ⅎ', 'ⅎ'), ('Ⅰ', 'ↈ'),
+  ('Ⓐ', 'ⓩ'), ('Ⰰ', 'Ⱞ'), ('ⰰ', 'ⱞ'), ('Ⱡ', 'ⳤ'),
+  ('Ⳬ', 'ⳮ'), ('Ⳳ', 'ⳳ'), ('ⴀ', 'ⴥ'), ('ⴧ', 'ⴧ'),
+  ('ⴭ', 'ⴭ'), ('ⴰ', 'ⵧ'), ('ⵯ', 'ⵯ'), ('ⶀ', 'ⶖ'),
+  ('ⶠ', 'ⶦ'), ('ⶨ', 'ⶮ'), ('ⶰ', 'ⶶ'), ('ⶸ', 'ⶾ'),
+  ('ⷀ', 'ⷆ'), ('ⷈ', 'ⷎ'), ('ⷐ', 'ⷖ'), ('ⷘ', 'ⷞ'),
+  ('ⷠ', 'ⷿ'), ('ⸯ', 'ⸯ'), ('々', '〇'), ('〡', '〩'),
+  ('〱', '〵'), ('〸', '〼'), ('ぁ', 'ゖ'), ('ゝ', 'ゟ'),
+  ('ァ', 'ヺ'), ('ー', 'ヿ'), ('ㄅ', 'ㄮ'), ('ㄱ', 'ㆎ'),
+  ('ㆠ', 'ㆺ'), ('ㇰ', 'ㇿ'), ('㐀', '䶵'), ('一', '鿪'),
+  ('ꀀ', 'ꒌ'), ('ꓐ', 'ꓽ'), ('ꔀ', 'ꘌ'), ('ꘐ', 'ꘟ'),
+  ('ꘪ', 'ꘫ'), ('Ꙁ', 'ꙮ'), ('ꙴ', 'ꙻ'), ('ꙿ', 'ꛯ'),
+  ('ꜗ', 'ꜟ'), ('Ꜣ', 'ꞈ'), ('Ꞌ', 'Ɪ'), ('Ʞ', 'ꞷ'),
+  ('ꟷ', 'ꠁ'), ('ꠃ', 'ꠅ'), ('ꠇ', 'ꠊ'), ('ꠌ', 'ꠧ'),
+  ('ꡀ', 'ꡳ'), ('ꢀ', 'ꣃ'), ('ꣅ', 'ꣅ'), ('ꣲ', 'ꣷ'),
+  ('ꣻ', 'ꣻ'), ('ꣽ', 'ꣽ'), ('ꤊ', 'ꤪ'), ('ꤰ', 'ꥒ'),
+  ('ꥠ', 'ꥼ'), ('ꦀ', 'ꦲ'), ('ꦴ', 'ꦿ'), ('ꧏ', 'ꧏ'),
+  ('ꧠ', 'ꧤ'), ('ꧦ', 'ꧯ'), ('ꧺ', 'ꧾ'), ('ꨀ', 'ꨶ'),
+  ('ꩀ', 'ꩍ'), ('ꩠ', 'ꩶ'), ('ꩺ', 'ꩺ'), ('ꩾ', 'ꪾ'),
+  ('ꫀ', 'ꫀ'), ('ꫂ', 'ꫂ'), ('ꫛ', 'ꫝ'), ('ꫠ', 'ꫯ'),
+  ('ꫲ', 'ꫵ'), ('ꬁ', 'ꬆ'), ('ꬉ', 'ꬎ'), ('ꬑ', 'ꬖ'),
+  ('ꬠ', 'ꬦ'), ('ꬨ', 'ꬮ'), ('ꬰ', 'ꭚ'), ('ꭜ', 'ꭥ'),
+  ('ꭰ', 'ꯪ'), ('가', '힣'), ('ힰ', 'ퟆ'), ('ퟋ', 'ퟻ'),
+  ('豈', '舘'), ('並', '龎'), ('ﬀ', 'ﬆ'), ('ﬓ', 'ﬗ'),
+  ('יִ', 'ﬨ'), ('שׁ', 'זּ'), ('טּ', 'לּ'), ('מּ', 'מּ'),
+  ('נּ', 'סּ'), ('ףּ', 'פּ'), ('צּ', 'ﮱ'), ('ﯓ', 'ﴽ'),
+  ('ﵐ', 'ﶏ'), ('ﶒ', 'ﷇ'), ('ﷰ', 'ﷻ'), ('ﹰ', 'ﹴ'),
+  ('ﹶ', 'ﻼ'), ('Ａ', 'Ｚ'), ('ａ', 'ｚ'), ('ｦ', 'ﾾ'),
+  ('ￂ', 'ￇ'), ('ￊ', 'ￏ'), ('ￒ', 'ￗ'), ('ￚ', 'ￜ'),
+  ('𐀀', '𐀋'), ('𐀍', '𐀦'), ('𐀨', '𐀺'), ('𐀼', '𐀽'),
+  ('𐀿', '𐁍'), ('𐁐', '𐁝'), ('𐂀', '𐃺'), ('𐅀', '𐅴'),
+  ('𐊀', '𐊜'), ('𐊠', '𐋐'), ('𐌀', '𐌟'), ('𐌭', '𐍊'),
+  ('𐍐', '𐍺'), ('𐎀', '𐎝'), ('𐎠', '𐏃'), ('𐏈', '𐏏'),
+  ('𐏑', '𐏕'), ('𐐀', '𐒝'), ('𐒰', '𐓓'), ('𐓘', '𐓻'),
+  ('𐔀', '𐔧'), ('𐔰', '𐕣'), ('𐘀', '𐜶'), ('𐝀', '𐝕'),
+  ('𐝠', '𐝧'), ('𐠀', '𐠅'), ('𐠈', '𐠈'), ('𐠊', '𐠵'),
+  ('𐠷', '𐠸'), ('𐠼', '𐠼'), ('𐠿', '𐡕'), ('𐡠', '𐡶'),
+  ('𐢀', '𐢞'), ('𐣠', '𐣲'), ('𐣴', '𐣵'), ('𐤀', '𐤕'),
+  ('𐤠', '𐤹'), ('𐦀', '𐦷'), ('𐦾', '𐦿'), ('𐨀', '𐨃'),
+  ('𐨅', '𐨆'), ('𐨌', '𐨓'), ('𐨕', '𐨗'), ('𐨙', '𐨳'),
+  ('𐩠', '𐩼'), ('𐪀', '𐪜'), ('𐫀', '𐫇'), ('𐫉', '𐫤'),
+  ('𐬀', '𐬵'), ('𐭀', '𐭕'), ('𐭠', '𐭲'), ('𐮀', '𐮑'),
+  ('𐰀', '𐱈'), ('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𑀀', '𑁅'),
+  ('𑂂', '𑂸'), ('𑃐', '𑃨'), ('𑄀', '𑄲'), ('𑅐', '𑅲'),
+  ('𑅶', '𑅶'), ('𑆀', '𑆿'), ('𑇁', '𑇄'), ('𑇚', '𑇚'),
+  ('𑇜', '𑇜'), ('𑈀', '𑈑'), ('𑈓', '𑈴'), ('𑈷', '𑈷'),
+  ('𑈾', '𑈾'), ('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'),
+  ('𑊏', '𑊝'), ('𑊟', '𑊨'), ('𑊰', '𑋨'), ('𑌀', '𑌃'),
+  ('𑌅', '𑌌'), ('𑌏', '𑌐'), ('𑌓', '𑌨'), ('𑌪', '𑌰'),
+  ('𑌲', '𑌳'), ('𑌵', '𑌹'), ('𑌽', '𑍄'), ('𑍇', '𑍈'),
+  ('𑍋', '𑍌'), ('𑍐', '𑍐'), ('𑍗', '𑍗'), ('𑍝', '𑍣'),
+  ('𑐀', '𑑁'), ('𑑃', '𑑅'), ('𑑇', '𑑊'), ('𑒀', '𑓁'),
+  ('𑓄', '𑓅'), ('𑓇', '𑓇'), ('𑖀', '𑖵'), ('𑖸', '𑖾'),
+  ('𑗘', '𑗝'), ('𑘀', '𑘾'), ('𑙀', '𑙀'), ('𑙄', '𑙄'),
+  ('𑚀', '𑚵'), ('𑜀', '𑜙'), ('𑜝', '𑜪'), ('𑢠', '𑣟'),
+  ('𑣿', '𑣿'), ('𑨀', '𑨲'), ('𑨵', '𑨾'), ('𑩐', '𑪃'),
+  ('𑪆', '𑪗'), ('𑫀', '𑫸'), ('𑰀', '𑰈'), ('𑰊', '𑰶'),
+  ('𑰸', '𑰾'), ('𑱀', '𑱀'), ('𑱲', '𑲏'), ('𑲒', '𑲧'),
+  ('𑲩', '𑲶'), ('𑴀', '𑴆'), ('𑴈', '𑴉'), ('𑴋', '𑴶'),
+  ('𑴺', '𑴺'), ('𑴼', '𑴽'), ('𑴿', '𑵁'), ('𑵃', '𑵃'),
+  ('𑵆', '𑵇'), ('𒀀', '𒎙'), ('𒐀', '𒑮'), ('𒒀', '𒕃'),
+  ('𓀀', '𓐮'), ('𔐀', '𔙆'), ('𖠀', '𖨸'), ('𖩀', '𖩞'),
+  ('𖫐', '𖫭'), ('𖬀', '𖬶'), ('𖭀', '𖭃'), ('𖭣', '𖭷'),
+  ('𖭽', '𖮏'), ('𖼀', '𖽄'), ('𖽐', '𖽾'), ('𖾓', '𖾟'),
+  ('𖿠', '𖿡'), ('𗀀', '𘟬'), ('𘠀', '𘫲'), ('𛀀', '𛄞'),
+  ('𛅰', '𛋻'), ('𛰀', '𛱪'), ('𛱰', '𛱼'), ('𛲀', '𛲈'),
+  ('𛲐', '𛲙'), ('𛲞', '𛲞'), ('𝐀', '𝑔'), ('𝑖', '𝒜'),
+  ('𝒞', '𝒟'), ('𝒢', '𝒢'), ('𝒥', '𝒦'), ('𝒩', '𝒬'),
+  ('𝒮', '𝒹'), ('𝒻', '𝒻'), ('𝒽', '𝓃'), ('𝓅', '𝔅'),
+  ('𝔇', '𝔊'), ('𝔍', '𝔔'), ('𝔖', '𝔜'), ('𝔞', '𝔹'),
+  ('𝔻', '𝔾'), ('𝕀', '𝕄'), ('𝕆', '𝕆'), ('𝕊', '𝕐'),
+  ('𝕒', '𝚥'), ('𝚨', '𝛀'), ('𝛂', '𝛚'), ('𝛜', '𝛺'),
+  ('𝛼', '𝜔'), ('𝜖', '𝜴'), ('𝜶', '𝝎'), ('𝝐', '𝝮'),
+  ('𝝰', '𝞈'), ('𝞊', '𝞨'), ('𝞪', '𝟂'), ('𝟄', '𝟋'),
+  ('𞀀', '𞀆'), ('𞀈', '𞀘'), ('𞀛', '𞀡'), ('𞀣', '𞀤'),
+  ('𞀦', '𞀪'), ('𞠀', '𞣄'), ('𞤀', '𞥃'), ('𞥇', '𞥇'),
+  ('𞸀', '𞸃'), ('𞸅', '𞸟'), ('𞸡', '𞸢'), ('𞸤', '𞸤'),
+  ('𞸧', '𞸧'), ('𞸩', '𞸲'), ('𞸴', '𞸷'), ('𞸹', '𞸹'),
+  ('𞸻', '𞸻'), ('𞹂', '𞹂'), ('𞹇', '𞹇'), ('𞹉', '𞹉'),
+  ('𞹋', '𞹋'), ('𞹍', '𞹏'), ('𞹑', '𞹒'), ('𞹔', '𞹔'),
+  ('𞹗', '𞹗'), ('𞹙', '𞹙'), ('𞹛', '𞹛'), ('𞹝', '𞹝'),
+  ('𞹟', '𞹟'), ('𞹡', '𞹢'), ('𞹤', '𞹤'), ('𞹧', '𞹪'),
+  ('𞹬', '𞹲'), ('𞹴', '𞹷'), ('𞹹', '𞹼'), ('𞹾', '𞹾'),
+  ('𞺀', '𞺉'), ('𞺋', '𞺛'), ('𞺡', '𞺣'), ('𞺥', '𞺩'),
+  ('𞺫', '𞺻'), ('🄰', '🅉'), ('🅐', '🅩'), ('🅰', '🆉'),
+  ('𠀀', '𪛖'), ('𪜀', '𫜴'), ('𫝀', '𫠝'), ('𫠠', '𬺡'),
+  ('𬺰', '𮯠'), ('丽', '𪘀'),
+];
+
+pub const BIDI_CONTROL: &'static [(char, char)] = &[
+  ('\u{61c}', '\u{61c}'), ('\u{200e}', '\u{200f}'), ('\u{202a}', '\u{202e}'),
+  ('\u{2066}', '\u{2069}'),
+];
+
+pub const CASE_IGNORABLE: &'static [(char, char)] = &[
+  ('\'', '\''), ('.', '.'), (':', ':'), ('^', '^'), ('`', '`'), ('¨', '¨'),
+  ('\u{ad}', '\u{ad}'), ('¯', '¯'), ('´', '´'), ('·', '¸'),
+  ('ʰ', 'ͯ'), ('ʹ', '͵'), ('ͺ', 'ͺ'), ('΄', '΅'), ('·', '·'),
+  ('҃', '҉'), ('ՙ', 'ՙ'), ('֑', 'ֽ'), ('ֿ', 'ֿ'), ('ׁ', 'ׂ'),
+  ('ׄ', 'ׅ'), ('ׇ', 'ׇ'), ('״', '״'), ('\u{600}', '\u{605}'),
+  ('ؐ', 'ؚ'), ('\u{61c}', '\u{61c}'), ('ـ', 'ـ'), ('ً', 'ٟ'),
+  ('ٰ', 'ٰ'), ('ۖ', '\u{6dd}'), ('۟', 'ۨ'), ('۪', 'ۭ'),
+  ('\u{70f}', '\u{70f}'), ('ܑ', 'ܑ'), ('ܰ', '݊'), ('ަ', 'ް'),
+  ('߫', 'ߵ'), ('ߺ', 'ߺ'), ('ࠖ', '࠭'), ('࡙', '࡛'), ('ࣔ', 'ं'),
+  ('ऺ', 'ऺ'), ('़', '़'), ('ु', 'ै'), ('्', '्'),
+  ('॑', 'ॗ'), ('ॢ', 'ॣ'), ('ॱ', 'ॱ'), ('ঁ', 'ঁ'),
+  ('়', '়'), ('ু', 'ৄ'), ('্', '্'), ('ৢ', 'ৣ'),
+  ('ਁ', 'ਂ'), ('਼', '਼'), ('ੁ', 'ੂ'), ('ੇ', 'ੈ'),
+  ('ੋ', '੍'), ('ੑ', 'ੑ'), ('ੰ', 'ੱ'), ('ੵ', 'ੵ'),
+  ('ઁ', 'ં'), ('઼', '઼'), ('ુ', 'ૅ'), ('ે', 'ૈ'),
+  ('્', '્'), ('ૢ', 'ૣ'), ('ૺ', '૿'), ('ଁ', 'ଁ'),
+  ('଼', '଼'), ('ି', 'ି'), ('ୁ', 'ୄ'), ('୍', '୍'),
+  ('ୖ', 'ୖ'), ('ୢ', 'ୣ'), ('ஂ', 'ஂ'), ('ீ', 'ீ'),
+  ('்', '்'), ('ఀ', 'ఀ'), ('ా', 'ీ'), ('ె', 'ై'),
+  ('ొ', '్'), ('ౕ', 'ౖ'), ('ౢ', 'ౣ'), ('ಁ', 'ಁ'),
+  ('಼', '಼'), ('ಿ', 'ಿ'), ('ೆ', 'ೆ'), ('ೌ', '್'),
+  ('ೢ', 'ೣ'), ('ഀ', 'ഁ'), ('഻', '഼'), ('ു', 'ൄ'),
+  ('്', '്'), ('ൢ', 'ൣ'), ('්', '්'), ('ි', 'ු'),
+  ('ූ', 'ූ'), ('ั', 'ั'), ('ิ', 'ฺ'), ('ๆ', '๎'),
+  ('ັ', 'ັ'), ('ິ', 'ູ'), ('ົ', 'ຼ'), ('ໆ', 'ໆ'),
+  ('່', 'ໍ'), ('༘', '༙'), ('༵', '༵'), ('༷', '༷'),
+  ('༹', '༹'), ('ཱ', 'ཾ'), ('ྀ', '྄'), ('྆', '྇'),
+  ('ྍ', 'ྗ'), ('ྙ', 'ྼ'), ('࿆', '࿆'), ('ိ', 'ူ'),
+  ('ဲ', '့'), ('္', '်'), ('ွ', 'ှ'), ('ၘ', 'ၙ'),
+  ('ၞ', 'ၠ'), ('ၱ', 'ၴ'), ('ႂ', 'ႂ'), ('ႅ', 'ႆ'),
+  ('ႍ', 'ႍ'), ('ႝ', 'ႝ'), ('ჼ', 'ჼ'), ('፝', '፟'),
+  ('ᜒ', '᜔'), ('ᜲ', '᜴'), ('ᝒ', 'ᝓ'), ('ᝲ', 'ᝳ'),
+  ('឴', '឵'), ('ិ', 'ួ'), ('ំ', 'ំ'), ('៉', '៓'),
+  ('ៗ', 'ៗ'), ('៝', '៝'), ('᠋', '\u{180e}'), ('ᡃ', 'ᡃ'),
+  ('ᢅ', 'ᢆ'), ('ᢩ', 'ᢩ'), ('ᤠ', 'ᤢ'), ('ᤧ', 'ᤨ'),
+  ('ᤲ', 'ᤲ'), ('᤹', '᤻'), ('ᨗ', 'ᨘ'), ('ᨛ', 'ᨛ'),
+  ('ᩖ', 'ᩖ'), ('ᩘ', 'ᩞ'), ('᩠', '᩠'), ('ᩢ', 'ᩢ'),
+  ('ᩥ', 'ᩬ'), ('ᩳ', '᩼'), ('᩿', '᩿'), ('ᪧ', 'ᪧ'),
+  ('᪰', '᪾'), ('ᬀ', 'ᬃ'), ('᬴', '᬴'), ('ᬶ', 'ᬺ'),
+  ('ᬼ', 'ᬼ'), ('ᭂ', 'ᭂ'), ('᭫', '᭳'), ('ᮀ', 'ᮁ'),
+  ('ᮢ', 'ᮥ'), ('ᮨ', 'ᮩ'), ('᮫', 'ᮭ'), ('᯦', '᯦'),
+  ('ᯨ', 'ᯩ'), ('ᯭ', 'ᯭ'), ('ᯯ', 'ᯱ'), ('ᰬ', 'ᰳ'),
+  ('ᰶ', '᰷'), ('ᱸ', 'ᱽ'), ('᳐', '᳒'), ('᳔', '᳠'),
+  ('᳢', '᳨'), ('᳭', '᳭'), ('᳴', '᳴'), ('᳸', '᳹'),
+  ('ᴬ', 'ᵪ'), ('ᵸ', 'ᵸ'), ('ᶛ', '᷹'), ('᷻', '᷿'),
+  ('᾽', '᾽'), ('᾿', '῁'), ('῍', '῏'), ('῝', '῟'),
+  ('῭', '`'), ('´', '῾'), ('\u{200b}', '\u{200f}'), ('‘', '’'),
+  ('․', '․'), ('‧', '‧'), ('\u{202a}', '\u{202e}'),
+  ('\u{2060}', '\u{2064}'), ('\u{2066}', '\u{206f}'), ('ⁱ', 'ⁱ'),
+  ('ⁿ', 'ⁿ'), ('ₐ', 'ₜ'), ('⃐', '⃰'), ('ⱼ', 'ⱽ'),
+  ('⳯', '⳱'), ('ⵯ', 'ⵯ'), ('⵿', '⵿'), ('ⷠ', 'ⷿ'),
+  ('ⸯ', 'ⸯ'), ('々', '々'), ('〪', '〭'), ('〱', '〵'),
+  ('〻', '〻'), ('゙', 'ゞ'), ('ー', 'ヾ'), ('ꀕ', 'ꀕ'),
+  ('ꓸ', 'ꓽ'), ('ꘌ', 'ꘌ'), ('꙯', '꙲'), ('ꙴ', '꙽'),
+  ('ꙿ', 'ꙿ'), ('ꚜ', 'ꚟ'), ('꛰', '꛱'), ('꜀', '꜡'),
+  ('ꝰ', 'ꝰ'), ('ꞈ', '꞊'), ('ꟸ', 'ꟹ'), ('ꠂ', 'ꠂ'),
+  ('꠆', '꠆'), ('ꠋ', 'ꠋ'), ('ꠥ', 'ꠦ'), ('꣄', 'ꣅ'),
+  ('꣠', '꣱'), ('ꤦ', '꤭'), ('ꥇ', 'ꥑ'), ('ꦀ', 'ꦂ'),
+  ('꦳', '꦳'), ('ꦶ', 'ꦹ'), ('ꦼ', 'ꦼ'), ('ꧏ', 'ꧏ'),
+  ('ꧥ', 'ꧦ'), ('ꨩ', 'ꨮ'), ('ꨱ', 'ꨲ'), ('ꨵ', 'ꨶ'),
+  ('ꩃ', 'ꩃ'), ('ꩌ', 'ꩌ'), ('ꩰ', 'ꩰ'), ('ꩼ', 'ꩼ'),
+  ('ꪰ', 'ꪰ'), ('ꪲ', 'ꪴ'), ('ꪷ', 'ꪸ'), ('ꪾ', '꪿'),
+  ('꫁', '꫁'), ('ꫝ', 'ꫝ'), ('ꫬ', 'ꫭ'), ('ꫳ', 'ꫴ'),
+  ('꫶', '꫶'), ('꭛', 'ꭟ'), ('ꯥ', 'ꯥ'), ('ꯨ', 'ꯨ'),
+  ('꯭', '꯭'), ('ﬞ', 'ﬞ'), ('﮲', '﯁'), ('︀', '️'),
+  ('︓', '︓'), ('︠', '︯'), ('﹒', '﹒'), ('﹕', '﹕'),
+  ('\u{feff}', '\u{feff}'), ('＇', '＇'), ('．', '．'), ('：', '：'),
+  ('＾', '＾'), ('｀', '｀'), ('ｰ', 'ｰ'), ('ﾞ', 'ﾟ'),
+  ('￣', '￣'), ('\u{fff9}', '\u{fffb}'), ('𐇽', '𐇽'),
+  ('𐋠', '𐋠'), ('𐍶', '𐍺'), ('𐨁', '𐨃'), ('𐨅', '𐨆'),
+  ('𐨌', '𐨏'), ('𐨸', '𐨺'), ('𐨿', '𐨿'), ('𐫥', '𐫦'),
+  ('𑀁', '𑀁'), ('𑀸', '𑁆'), ('𑁿', '𑂁'), ('𑂳', '𑂶'),
+  ('𑂹', '𑂺'), ('\u{110bd}', '\u{110bd}'), ('𑄀', '𑄂'),
+  ('𑄧', '𑄫'), ('𑄭', '𑄴'), ('𑅳', '𑅳'), ('𑆀', '𑆁'),
+  ('𑆶', '𑆾'), ('𑇊', '𑇌'), ('𑈯', '𑈱'), ('𑈴', '𑈴'),
+  ('𑈶', '𑈷'), ('𑈾', '𑈾'), ('𑋟', '𑋟'), ('𑋣', '𑋪'),
+  ('𑌀', '𑌁'), ('𑌼', '𑌼'), ('𑍀', '𑍀'), ('𑍦', '𑍬'),
+  ('𑍰', '𑍴'), ('𑐸', '𑐿'), ('𑑂', '𑑄'), ('𑑆', '𑑆'),
+  ('𑒳', '𑒸'), ('𑒺', '𑒺'), ('𑒿', '𑓀'), ('𑓂', '𑓃'),
+  ('𑖲', '𑖵'), ('𑖼', '𑖽'), ('𑖿', '𑗀'), ('𑗜', '𑗝'),
+  ('𑘳', '𑘺'), ('𑘽', '𑘽'), ('𑘿', '𑙀'), ('𑚫', '𑚫'),
+  ('𑚭', '𑚭'), ('𑚰', '𑚵'), ('𑚷', '𑚷'), ('𑜝', '𑜟'),
+  ('𑜢', '𑜥'), ('𑜧', '𑜫'), ('𑨁', '𑨆'), ('𑨉', '𑨊'),
+  ('𑨳', '𑨸'), ('𑨻', '𑨾'), ('𑩇', '𑩇'), ('𑩑', '𑩖'),
+  ('𑩙', '𑩛'), ('𑪊', '𑪖'), ('𑪘', '𑪙'), ('𑰰', '𑰶'),
+  ('𑰸', '𑰽'), ('𑰿', '𑰿'), ('𑲒', '𑲧'), ('𑲪', '𑲰'),
+  ('𑲲', '𑲳'), ('𑲵', '𑲶'), ('𑴱', '𑴶'), ('𑴺', '𑴺'),
+  ('𑴼', '𑴽'), ('𑴿', '𑵅'), ('𑵇', '𑵇'), ('𖫰', '𖫴'),
+  ('𖬰', '𖬶'), ('𖭀', '𖭃'), ('𖾏', '𖾟'), ('𖿠', '𖿡'),
+  ('𛲝', '𛲞'), ('\u{1bca0}', '\u{1bca3}'), ('𝅧', '𝅩'),
+  ('\u{1d173}', '𝆂'), ('𝆅', '𝆋'), ('𝆪', '𝆭'), ('𝉂', '𝉄'),
+  ('𝨀', '𝨶'), ('𝨻', '𝩬'), ('𝩵', '𝩵'), ('𝪄', '𝪄'),
+  ('𝪛', '𝪟'), ('𝪡', '𝪯'), ('𞀀', '𞀆'), ('𞀈', '𞀘'),
+  ('𞀛', '𞀡'), ('𞀣', '𞀤'), ('𞀦', '𞀪'), ('𞣐', '𞣖'),
+  ('𞥄', '𞥊'), ('🏻', '🏿'), ('\u{e0001}', '\u{e0001}'),
+  ('\u{e0020}', '\u{e007f}'), ('󠄀', '󠇯'),
+];
+
+pub const CASED: &'static [(char, char)] = &[
+  ('A', 'Z'), ('a', 'z'), ('ª', 'ª'), ('µ', 'µ'), ('º', 'º'),
+  ('À', 'Ö'), ('Ø', 'ö'), ('ø', 'ƺ'), ('Ƽ', 'ƿ'), ('Ǆ', 'ʓ'),
+  ('ʕ', 'ʸ'), ('ˀ', 'ˁ'), ('ˠ', 'ˤ'), ('ͅ', 'ͅ'), ('Ͱ', 'ͳ'),
+  ('Ͷ', 'ͷ'), ('ͺ', 'ͽ'), ('Ϳ', 'Ϳ'), ('Ά', 'Ά'), ('Έ', 'Ί'),
+  ('Ό', 'Ό'), ('Ύ', 'Ρ'), ('Σ', 'ϵ'), ('Ϸ', 'ҁ'), ('Ҋ', 'ԯ'),
+  ('Ա', 'Ֆ'), ('ա', 'և'), ('Ⴀ', 'Ⴥ'), ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'),
+  ('Ꭰ', 'Ᏽ'), ('ᏸ', 'ᏽ'), ('ᲀ', 'ᲈ'), ('ᴀ', 'ᶿ'),
+  ('Ḁ', 'ἕ'), ('Ἐ', 'Ἕ'), ('ἠ', 'ὅ'), ('Ὀ', 'Ὅ'),
+  ('ὐ', 'ὗ'), ('Ὑ', 'Ὑ'), ('Ὓ', 'Ὓ'), ('Ὕ', 'Ὕ'),
+  ('Ὗ', 'ώ'), ('ᾀ', 'ᾴ'), ('ᾶ', 'ᾼ'), ('ι', 'ι'),
+  ('ῂ', 'ῄ'), ('ῆ', 'ῌ'), ('ῐ', 'ΐ'), ('ῖ', 'Ί'),
+  ('ῠ', 'Ῥ'), ('ῲ', 'ῴ'), ('ῶ', 'ῼ'), ('ⁱ', 'ⁱ'),
+  ('ⁿ', 'ⁿ'), ('ₐ', 'ₜ'), ('ℂ', 'ℂ'), ('ℇ', 'ℇ'),
+  ('ℊ', 'ℓ'), ('ℕ', 'ℕ'), ('ℙ', 'ℝ'), ('ℤ', 'ℤ'),
+  ('Ω', 'Ω'), ('ℨ', 'ℨ'), ('K', 'ℭ'), ('ℯ', 'ℴ'),
+  ('ℹ', 'ℹ'), ('ℼ', 'ℿ'), ('ⅅ', 'ⅉ'), ('ⅎ', 'ⅎ'),
+  ('Ⅰ', 'ⅿ'), ('Ↄ', 'ↄ'), ('Ⓐ', 'ⓩ'), ('Ⰰ', 'Ⱞ'),
+  ('ⰰ', 'ⱞ'), ('Ⱡ', 'ⳤ'), ('Ⳬ', 'ⳮ'), ('Ⳳ', 'ⳳ'),
+  ('ⴀ', 'ⴥ'), ('ⴧ', 'ⴧ'), ('ⴭ', 'ⴭ'), ('Ꙁ', 'ꙭ'),
+  ('Ꚁ', 'ꚝ'), ('Ꜣ', 'ꞇ'), ('Ꞌ', 'ꞎ'), ('Ꞑ', 'Ɪ'),
+  ('Ʞ', 'ꞷ'), ('ꟸ', 'ꟺ'), ('ꬰ', 'ꭚ'), ('ꭜ', 'ꭥ'),
+  ('ꭰ', 'ꮿ'), ('ﬀ', 'ﬆ'), ('ﬓ', 'ﬗ'), ('Ａ', 'Ｚ'),
+  ('ａ', 'ｚ'), ('𐐀', '𐑏'), ('𐒰', '𐓓'), ('𐓘', '𐓻'),
+  ('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𑢠', '𑣟'), ('𝐀', '𝑔'),
+  ('𝑖', '𝒜'), ('𝒞', '𝒟'), ('𝒢', '𝒢'), ('𝒥', '𝒦'),
+  ('𝒩', '𝒬'), ('𝒮', '𝒹'), ('𝒻', '𝒻'), ('𝒽', '𝓃'),
+  ('𝓅', '𝔅'), ('𝔇', '𝔊'), ('𝔍', '𝔔'), ('𝔖', '𝔜'),
+  ('𝔞', '𝔹'), ('𝔻', '𝔾'), ('𝕀', '𝕄'), ('𝕆', '𝕆'),
+  ('𝕊', '𝕐'), ('𝕒', '𝚥'), ('𝚨', '𝛀'), ('𝛂', '𝛚'),
+  ('𝛜', '𝛺'), ('𝛼', '𝜔'), ('𝜖', '𝜴'), ('𝜶', '𝝎'),
+  ('𝝐', '𝝮'), ('𝝰', '𝞈'), ('𝞊', '𝞨'), ('𝞪', '𝟂'),
+  ('𝟄', '𝟋'), ('𞤀', '𞥃'), ('🄰', '🅉'), ('🅐', '🅩'),
+  ('🅰', '🆉'),
+];
+
+pub const CHANGES_WHEN_CASEFOLDED: &'static [(char, char)] = &[
+  ('A', 'Z'), ('µ', 'µ'), ('À', 'Ö'), ('Ø', 'ß'), ('Ā', 'Ā'),
+  ('Ă', 'Ă'), ('Ą', 'Ą'), ('Ć', 'Ć'), ('Ĉ', 'Ĉ'), ('Ċ', 'Ċ'),
+  ('Č', 'Č'), ('Ď', 'Ď'), ('Đ', 'Đ'), ('Ē', 'Ē'), ('Ĕ', 'Ĕ'),
+  ('Ė', 'Ė'), ('Ę', 'Ę'), ('Ě', 'Ě'), ('Ĝ', 'Ĝ'), ('Ğ', 'Ğ'),
+  ('Ġ', 'Ġ'), ('Ģ', 'Ģ'), ('Ĥ', 'Ĥ'), ('Ħ', 'Ħ'), ('Ĩ', 'Ĩ'),
+  ('Ī', 'Ī'), ('Ĭ', 'Ĭ'), ('Į', 'Į'), ('İ', 'İ'), ('Ĳ', 'Ĳ'),
+  ('Ĵ', 'Ĵ'), ('Ķ', 'Ķ'), ('Ĺ', 'Ĺ'), ('Ļ', 'Ļ'), ('Ľ', 'Ľ'),
+  ('Ŀ', 'Ŀ'), ('Ł', 'Ł'), ('Ń', 'Ń'), ('Ņ', 'Ņ'), ('Ň', 'Ň'),
+  ('ŉ', 'Ŋ'), ('Ō', 'Ō'), ('Ŏ', 'Ŏ'), ('Ő', 'Ő'), ('Œ', 'Œ'),
+  ('Ŕ', 'Ŕ'), ('Ŗ', 'Ŗ'), ('Ř', 'Ř'), ('Ś', 'Ś'), ('Ŝ', 'Ŝ'),
+  ('Ş', 'Ş'), ('Š', 'Š'), ('Ţ', 'Ţ'), ('Ť', 'Ť'), ('Ŧ', 'Ŧ'),
+  ('Ũ', 'Ũ'), ('Ū', 'Ū'), ('Ŭ', 'Ŭ'), ('Ů', 'Ů'), ('Ű', 'Ű'),
+  ('Ų', 'Ų'), ('Ŵ', 'Ŵ'), ('Ŷ', 'Ŷ'), ('Ÿ', 'Ź'), ('Ż', 'Ż'),
+  ('Ž', 'Ž'), ('ſ', 'ſ'), ('Ɓ', 'Ƃ'), ('Ƅ', 'Ƅ'), ('Ɔ', 'Ƈ'),
+  ('Ɖ', 'Ƌ'), ('Ǝ', 'Ƒ'), ('Ɠ', 'Ɣ'), ('Ɩ', 'Ƙ'), ('Ɯ', 'Ɲ'),
+  ('Ɵ', 'Ơ'), ('Ƣ', 'Ƣ'), ('Ƥ', 'Ƥ'), ('Ʀ', 'Ƨ'), ('Ʃ', 'Ʃ'),
+  ('Ƭ', 'Ƭ'), ('Ʈ', 'Ư'), ('Ʊ', 'Ƴ'), ('Ƶ', 'Ƶ'), ('Ʒ', 'Ƹ'),
+  ('Ƽ', 'Ƽ'), ('Ǆ', 'ǅ'), ('Ǉ', 'ǈ'), ('Ǌ', 'ǋ'), ('Ǎ', 'Ǎ'),
+  ('Ǐ', 'Ǐ'), ('Ǒ', 'Ǒ'), ('Ǔ', 'Ǔ'), ('Ǖ', 'Ǖ'), ('Ǘ', 'Ǘ'),
+  ('Ǚ', 'Ǚ'), ('Ǜ', 'Ǜ'), ('Ǟ', 'Ǟ'), ('Ǡ', 'Ǡ'), ('Ǣ', 'Ǣ'),
+  ('Ǥ', 'Ǥ'), ('Ǧ', 'Ǧ'), ('Ǩ', 'Ǩ'), ('Ǫ', 'Ǫ'), ('Ǭ', 'Ǭ'),
+  ('Ǯ', 'Ǯ'), ('Ǳ', 'ǲ'), ('Ǵ', 'Ǵ'), ('Ƕ', 'Ǹ'), ('Ǻ', 'Ǻ'),
+  ('Ǽ', 'Ǽ'), ('Ǿ', 'Ǿ'), ('Ȁ', 'Ȁ'), ('Ȃ', 'Ȃ'), ('Ȅ', 'Ȅ'),
+  ('Ȇ', 'Ȇ'), ('Ȉ', 'Ȉ'), ('Ȋ', 'Ȋ'), ('Ȍ', 'Ȍ'), ('Ȏ', 'Ȏ'),
+  ('Ȑ', 'Ȑ'), ('Ȓ', 'Ȓ'), ('Ȕ', 'Ȕ'), ('Ȗ', 'Ȗ'), ('Ș', 'Ș'),
+  ('Ț', 'Ț'), ('Ȝ', 'Ȝ'), ('Ȟ', 'Ȟ'), ('Ƞ', 'Ƞ'), ('Ȣ', 'Ȣ'),
+  ('Ȥ', 'Ȥ'), ('Ȧ', 'Ȧ'), ('Ȩ', 'Ȩ'), ('Ȫ', 'Ȫ'), ('Ȭ', 'Ȭ'),
+  ('Ȯ', 'Ȯ'), ('Ȱ', 'Ȱ'), ('Ȳ', 'Ȳ'), ('Ⱥ', 'Ȼ'), ('Ƚ', 'Ⱦ'),
+  ('Ɂ', 'Ɂ'), ('Ƀ', 'Ɇ'), ('Ɉ', 'Ɉ'), ('Ɋ', 'Ɋ'), ('Ɍ', 'Ɍ'),
+  ('Ɏ', 'Ɏ'), ('ͅ', 'ͅ'), ('Ͱ', 'Ͱ'), ('Ͳ', 'Ͳ'), ('Ͷ', 'Ͷ'),
+  ('Ϳ', 'Ϳ'), ('Ά', 'Ά'), ('Έ', 'Ί'), ('Ό', 'Ό'), ('Ύ', 'Ώ'),
+  ('Α', 'Ρ'), ('Σ', 'Ϋ'), ('ς', 'ς'), ('Ϗ', 'ϑ'), ('ϕ', 'ϖ'),
+  ('Ϙ', 'Ϙ'), ('Ϛ', 'Ϛ'), ('Ϝ', 'Ϝ'), ('Ϟ', 'Ϟ'), ('Ϡ', 'Ϡ'),
+  ('Ϣ', 'Ϣ'), ('Ϥ', 'Ϥ'), ('Ϧ', 'Ϧ'), ('Ϩ', 'Ϩ'), ('Ϫ', 'Ϫ'),
+  ('Ϭ', 'Ϭ'), ('Ϯ', 'Ϯ'), ('ϰ', 'ϱ'), ('ϴ', 'ϵ'), ('Ϸ', 'Ϸ'),
+  ('Ϲ', 'Ϻ'), ('Ͻ', 'Я'), ('Ѡ', 'Ѡ'), ('Ѣ', 'Ѣ'), ('Ѥ', 'Ѥ'),
+  ('Ѧ', 'Ѧ'), ('Ѩ', 'Ѩ'), ('Ѫ', 'Ѫ'), ('Ѭ', 'Ѭ'), ('Ѯ', 'Ѯ'),
+  ('Ѱ', 'Ѱ'), ('Ѳ', 'Ѳ'), ('Ѵ', 'Ѵ'), ('Ѷ', 'Ѷ'), ('Ѹ', 'Ѹ'),
+  ('Ѻ', 'Ѻ'), ('Ѽ', 'Ѽ'), ('Ѿ', 'Ѿ'), ('Ҁ', 'Ҁ'), ('Ҋ', 'Ҋ'),
+  ('Ҍ', 'Ҍ'), ('Ҏ', 'Ҏ'), ('Ґ', 'Ґ'), ('Ғ', 'Ғ'), ('Ҕ', 'Ҕ'),
+  ('Җ', 'Җ'), ('Ҙ', 'Ҙ'), ('Қ', 'Қ'), ('Ҝ', 'Ҝ'), ('Ҟ', 'Ҟ'),
+  ('Ҡ', 'Ҡ'), ('Ң', 'Ң'), ('Ҥ', 'Ҥ'), ('Ҧ', 'Ҧ'), ('Ҩ', 'Ҩ'),
+  ('Ҫ', 'Ҫ'), ('Ҭ', 'Ҭ'), ('Ү', 'Ү'), ('Ұ', 'Ұ'), ('Ҳ', 'Ҳ'),
+  ('Ҵ', 'Ҵ'), ('Ҷ', 'Ҷ'), ('Ҹ', 'Ҹ'), ('Һ', 'Һ'), ('Ҽ', 'Ҽ'),
+  ('Ҿ', 'Ҿ'), ('Ӏ', 'Ӂ'), ('Ӄ', 'Ӄ'), ('Ӆ', 'Ӆ'), ('Ӈ', 'Ӈ'),
+  ('Ӊ', 'Ӊ'), ('Ӌ', 'Ӌ'), ('Ӎ', 'Ӎ'), ('Ӑ', 'Ӑ'), ('Ӓ', 'Ӓ'),
+  ('Ӕ', 'Ӕ'), ('Ӗ', 'Ӗ'), ('Ә', 'Ә'), ('Ӛ', 'Ӛ'), ('Ӝ', 'Ӝ'),
+  ('Ӟ', 'Ӟ'), ('Ӡ', 'Ӡ'), ('Ӣ', 'Ӣ'), ('Ӥ', 'Ӥ'), ('Ӧ', 'Ӧ'),
+  ('Ө', 'Ө'), ('Ӫ', 'Ӫ'), ('Ӭ', 'Ӭ'), ('Ӯ', 'Ӯ'), ('Ӱ', 'Ӱ'),
+  ('Ӳ', 'Ӳ'), ('Ӵ', 'Ӵ'), ('Ӷ', 'Ӷ'), ('Ӹ', 'Ӹ'), ('Ӻ', 'Ӻ'),
+  ('Ӽ', 'Ӽ'), ('Ӿ', 'Ӿ'), ('Ԁ', 'Ԁ'), ('Ԃ', 'Ԃ'), ('Ԅ', 'Ԅ'),
+  ('Ԇ', 'Ԇ'), ('Ԉ', 'Ԉ'), ('Ԋ', 'Ԋ'), ('Ԍ', 'Ԍ'), ('Ԏ', 'Ԏ'),
+  ('Ԑ', 'Ԑ'), ('Ԓ', 'Ԓ'), ('Ԕ', 'Ԕ'), ('Ԗ', 'Ԗ'), ('Ԙ', 'Ԙ'),
+  ('Ԛ', 'Ԛ'), ('Ԝ', 'Ԝ'), ('Ԟ', 'Ԟ'), ('Ԡ', 'Ԡ'), ('Ԣ', 'Ԣ'),
+  ('Ԥ', 'Ԥ'), ('Ԧ', 'Ԧ'), ('Ԩ', 'Ԩ'), ('Ԫ', 'Ԫ'), ('Ԭ', 'Ԭ'),
+  ('Ԯ', 'Ԯ'), ('Ա', 'Ֆ'), ('և', 'և'), ('Ⴀ', 'Ⴥ'), ('Ⴧ', 'Ⴧ'),
+  ('Ⴭ', 'Ⴭ'), ('ᏸ', 'ᏽ'), ('ᲀ', 'ᲈ'), ('Ḁ', 'Ḁ'),
+  ('Ḃ', 'Ḃ'), ('Ḅ', 'Ḅ'), ('Ḇ', 'Ḇ'), ('Ḉ', 'Ḉ'),
+  ('Ḋ', 'Ḋ'), ('Ḍ', 'Ḍ'), ('Ḏ', 'Ḏ'), ('Ḑ', 'Ḑ'),
+  ('Ḓ', 'Ḓ'), ('Ḕ', 'Ḕ'), ('Ḗ', 'Ḗ'), ('Ḙ', 'Ḙ'),
+  ('Ḛ', 'Ḛ'), ('Ḝ', 'Ḝ'), ('Ḟ', 'Ḟ'), ('Ḡ', 'Ḡ'),
+  ('Ḣ', 'Ḣ'), ('Ḥ', 'Ḥ'), ('Ḧ', 'Ḧ'), ('Ḩ', 'Ḩ'),
+  ('Ḫ', 'Ḫ'), ('Ḭ', 'Ḭ'), ('Ḯ', 'Ḯ'), ('Ḱ', 'Ḱ'),
+  ('Ḳ', 'Ḳ'), ('Ḵ', 'Ḵ'), ('Ḷ', 'Ḷ'), ('Ḹ', 'Ḹ'),
+  ('Ḻ', 'Ḻ'), ('Ḽ', 'Ḽ'), ('Ḿ', 'Ḿ'), ('Ṁ', 'Ṁ'),
+  ('Ṃ', 'Ṃ'), ('Ṅ', 'Ṅ'), ('Ṇ', 'Ṇ'), ('Ṉ', 'Ṉ'),
+  ('Ṋ', 'Ṋ'), ('Ṍ', 'Ṍ'), ('Ṏ', 'Ṏ'), ('Ṑ', 'Ṑ'),
+  ('Ṓ', 'Ṓ'), ('Ṕ', 'Ṕ'), ('Ṗ', 'Ṗ'), ('Ṙ', 'Ṙ'),
+  ('Ṛ', 'Ṛ'), ('Ṝ', 'Ṝ'), ('Ṟ', 'Ṟ'), ('Ṡ', 'Ṡ'),
+  ('Ṣ', 'Ṣ'), ('Ṥ', 'Ṥ'), ('Ṧ', 'Ṧ'), ('Ṩ', 'Ṩ'),
+  ('Ṫ', 'Ṫ'), ('Ṭ', 'Ṭ'), ('Ṯ', 'Ṯ'), ('Ṱ', 'Ṱ'),
+  ('Ṳ', 'Ṳ'), ('Ṵ', 'Ṵ'), ('Ṷ', 'Ṷ'), ('Ṹ', 'Ṹ'),
+  ('Ṻ', 'Ṻ'), ('Ṽ', 'Ṽ'), ('Ṿ', 'Ṿ'), ('Ẁ', 'Ẁ'),
+  ('Ẃ', 'Ẃ'), ('Ẅ', 'Ẅ'), ('Ẇ', 'Ẇ'), ('Ẉ', 'Ẉ'),
+  ('Ẋ', 'Ẋ'), ('Ẍ', 'Ẍ'), ('Ẏ', 'Ẏ'), ('Ẑ', 'Ẑ'),
+  ('Ẓ', 'Ẓ'), ('Ẕ', 'Ẕ'), ('ẚ', 'ẛ'), ('ẞ', 'ẞ'),
+  ('Ạ', 'Ạ'), ('Ả', 'Ả'), ('Ấ', 'Ấ'), ('Ầ', 'Ầ'),
+  ('Ẩ', 'Ẩ'), ('Ẫ', 'Ẫ'), ('Ậ', 'Ậ'), ('Ắ', 'Ắ'),
+  ('Ằ', 'Ằ'), ('Ẳ', 'Ẳ'), ('Ẵ', 'Ẵ'), ('Ặ', 'Ặ'),
+  ('Ẹ', 'Ẹ'), ('Ẻ', 'Ẻ'), ('Ẽ', 'Ẽ'), ('Ế', 'Ế'),
+  ('Ề', 'Ề'), ('Ể', 'Ể'), ('Ễ', 'Ễ'), ('Ệ', 'Ệ'),
+  ('Ỉ', 'Ỉ'), ('Ị', 'Ị'), ('Ọ', 'Ọ'), ('Ỏ', 'Ỏ'),
+  ('Ố', 'Ố'), ('Ồ', 'Ồ'), ('Ổ', 'Ổ'), ('Ỗ', 'Ỗ'),
+  ('Ộ', 'Ộ'), ('Ớ', 'Ớ'), ('Ờ', 'Ờ'), ('Ở', 'Ở'),
+  ('Ỡ', 'Ỡ'), ('Ợ', 'Ợ'), ('Ụ', 'Ụ'), ('Ủ', 'Ủ'),
+  ('Ứ', 'Ứ'), ('Ừ', 'Ừ'), ('Ử', 'Ử'), ('Ữ', 'Ữ'),
+  ('Ự', 'Ự'), ('Ỳ', 'Ỳ'), ('Ỵ', 'Ỵ'), ('Ỷ', 'Ỷ'),
+  ('Ỹ', 'Ỹ'), ('Ỻ', 'Ỻ'), ('Ỽ', 'Ỽ'), ('Ỿ', 'Ỿ'),
+  ('Ἀ', 'Ἇ'), ('Ἐ', 'Ἕ'), ('Ἠ', 'Ἧ'), ('Ἰ', 'Ἷ'),
+  ('Ὀ', 'Ὅ'), ('Ὑ', 'Ὑ'), ('Ὓ', 'Ὓ'), ('Ὕ', 'Ὕ'),
+  ('Ὗ', 'Ὗ'), ('Ὠ', 'Ὧ'), ('ᾀ', 'ᾯ'), ('ᾲ', 'ᾴ'),
+  ('ᾷ', 'ᾼ'), ('ῂ', 'ῄ'), ('ῇ', 'ῌ'), ('Ῐ', 'Ί'),
+  ('Ῠ', 'Ῥ'), ('ῲ', 'ῴ'), ('ῷ', 'ῼ'), ('Ω', 'Ω'),
+  ('K', 'Å'), ('Ⅎ', 'Ⅎ'), ('Ⅰ', 'Ⅿ'), ('Ↄ', 'Ↄ'),
+  ('Ⓐ', 'Ⓩ'), ('Ⰰ', 'Ⱞ'), ('Ⱡ', 'Ⱡ'), ('Ɫ', 'Ɽ'),
+  ('Ⱨ', 'Ⱨ'), ('Ⱪ', 'Ⱪ'), ('Ⱬ', 'Ⱬ'), ('Ɑ', 'Ɒ'),
+  ('Ⱳ', 'Ⱳ'), ('Ⱶ', 'Ⱶ'), ('Ȿ', 'Ⲁ'), ('Ⲃ', 'Ⲃ'),
+  ('Ⲅ', 'Ⲅ'), ('Ⲇ', 'Ⲇ'), ('Ⲉ', 'Ⲉ'), ('Ⲋ', 'Ⲋ'),
+  ('Ⲍ', 'Ⲍ'), ('Ⲏ', 'Ⲏ'), ('Ⲑ', 'Ⲑ'), ('Ⲓ', 'Ⲓ'),
+  ('Ⲕ', 'Ⲕ'), ('Ⲗ', 'Ⲗ'), ('Ⲙ', 'Ⲙ'), ('Ⲛ', 'Ⲛ'),
+  ('Ⲝ', 'Ⲝ'), ('Ⲟ', 'Ⲟ'), ('Ⲡ', 'Ⲡ'), ('Ⲣ', 'Ⲣ'),
+  ('Ⲥ', 'Ⲥ'), ('Ⲧ', 'Ⲧ'), ('Ⲩ', 'Ⲩ'), ('Ⲫ', 'Ⲫ'),
+  ('Ⲭ', 'Ⲭ'), ('Ⲯ', 'Ⲯ'), ('Ⲱ', 'Ⲱ'), ('Ⲳ', 'Ⲳ'),
+  ('Ⲵ', 'Ⲵ'), ('Ⲷ', 'Ⲷ'), ('Ⲹ', 'Ⲹ'), ('Ⲻ', 'Ⲻ'),
+  ('Ⲽ', 'Ⲽ'), ('Ⲿ', 'Ⲿ'), ('Ⳁ', 'Ⳁ'), ('Ⳃ', 'Ⳃ'),
+  ('Ⳅ', 'Ⳅ'), ('Ⳇ', 'Ⳇ'), ('Ⳉ', 'Ⳉ'), ('Ⳋ', 'Ⳋ'),
+  ('Ⳍ', 'Ⳍ'), ('Ⳏ', 'Ⳏ'), ('Ⳑ', 'Ⳑ'), ('Ⳓ', 'Ⳓ'),
+  ('Ⳕ', 'Ⳕ'), ('Ⳗ', 'Ⳗ'), ('Ⳙ', 'Ⳙ'), ('Ⳛ', 'Ⳛ'),
+  ('Ⳝ', 'Ⳝ'), ('Ⳟ', 'Ⳟ'), ('Ⳡ', 'Ⳡ'), ('Ⳣ', 'Ⳣ'),
+  ('Ⳬ', 'Ⳬ'), ('Ⳮ', 'Ⳮ'), ('Ⳳ', 'Ⳳ'), ('Ꙁ', 'Ꙁ'),
+  ('Ꙃ', 'Ꙃ'), ('Ꙅ', 'Ꙅ'), ('Ꙇ', 'Ꙇ'), ('Ꙉ', 'Ꙉ'),
+  ('Ꙋ', 'Ꙋ'), ('Ꙍ', 'Ꙍ'), ('Ꙏ', 'Ꙏ'), ('Ꙑ', 'Ꙑ'),
+  ('Ꙓ', 'Ꙓ'), ('Ꙕ', 'Ꙕ'), ('Ꙗ', 'Ꙗ'), ('Ꙙ', 'Ꙙ'),
+  ('Ꙛ', 'Ꙛ'), ('Ꙝ', 'Ꙝ'), ('Ꙟ', 'Ꙟ'), ('Ꙡ', 'Ꙡ'),
+  ('Ꙣ', 'Ꙣ'), ('Ꙥ', 'Ꙥ'), ('Ꙧ', 'Ꙧ'), ('Ꙩ', 'Ꙩ'),
+  ('Ꙫ', 'Ꙫ'), ('Ꙭ', 'Ꙭ'), ('Ꚁ', 'Ꚁ'), ('Ꚃ', 'Ꚃ'),
+  ('Ꚅ', 'Ꚅ'), ('Ꚇ', 'Ꚇ'), ('Ꚉ', 'Ꚉ'), ('Ꚋ', 'Ꚋ'),
+  ('Ꚍ', 'Ꚍ'), ('Ꚏ', 'Ꚏ'), ('Ꚑ', 'Ꚑ'), ('Ꚓ', 'Ꚓ'),
+  ('Ꚕ', 'Ꚕ'), ('Ꚗ', 'Ꚗ'), ('Ꚙ', 'Ꚙ'), ('Ꚛ', 'Ꚛ'),
+  ('Ꜣ', 'Ꜣ'), ('Ꜥ', 'Ꜥ'), ('Ꜧ', 'Ꜧ'), ('Ꜩ', 'Ꜩ'),
+  ('Ꜫ', 'Ꜫ'), ('Ꜭ', 'Ꜭ'), ('Ꜯ', 'Ꜯ'), ('Ꜳ', 'Ꜳ'),
+  ('Ꜵ', 'Ꜵ'), ('Ꜷ', 'Ꜷ'), ('Ꜹ', 'Ꜹ'), ('Ꜻ', 'Ꜻ'),
+  ('Ꜽ', 'Ꜽ'), ('Ꜿ', 'Ꜿ'), ('Ꝁ', 'Ꝁ'), ('Ꝃ', 'Ꝃ'),
+  ('Ꝅ', 'Ꝅ'), ('Ꝇ', 'Ꝇ'), ('Ꝉ', 'Ꝉ'), ('Ꝋ', 'Ꝋ'),
+  ('Ꝍ', 'Ꝍ'), ('Ꝏ', 'Ꝏ'), ('Ꝑ', 'Ꝑ'), ('Ꝓ', 'Ꝓ'),
+  ('Ꝕ', 'Ꝕ'), ('Ꝗ', 'Ꝗ'), ('Ꝙ', 'Ꝙ'), ('Ꝛ', 'Ꝛ'),
+  ('Ꝝ', 'Ꝝ'), ('Ꝟ', 'Ꝟ'), ('Ꝡ', 'Ꝡ'), ('Ꝣ', 'Ꝣ'),
+  ('Ꝥ', 'Ꝥ'), ('Ꝧ', 'Ꝧ'), ('Ꝩ', 'Ꝩ'), ('Ꝫ', 'Ꝫ'),
+  ('Ꝭ', 'Ꝭ'), ('Ꝯ', 'Ꝯ'), ('Ꝺ', 'Ꝺ'), ('Ꝼ', 'Ꝼ'),
+  ('Ᵹ', 'Ꝿ'), ('Ꞁ', 'Ꞁ'), ('Ꞃ', 'Ꞃ'), ('Ꞅ', 'Ꞅ'),
+  ('Ꞇ', 'Ꞇ'), ('Ꞌ', 'Ꞌ'), ('Ɥ', 'Ɥ'), ('Ꞑ', 'Ꞑ'),
+  ('Ꞓ', 'Ꞓ'), ('Ꞗ', 'Ꞗ'), ('Ꞙ', 'Ꞙ'), ('Ꞛ', 'Ꞛ'),
+  ('Ꞝ', 'Ꞝ'), ('Ꞟ', 'Ꞟ'), ('Ꞡ', 'Ꞡ'), ('Ꞣ', 'Ꞣ'),
+  ('Ꞥ', 'Ꞥ'), ('Ꞧ', 'Ꞧ'), ('Ꞩ', 'Ꞩ'), ('Ɦ', 'Ɪ'),
+  ('Ʞ', 'Ꞵ'), ('Ꞷ', 'Ꞷ'), ('ꭰ', 'ꮿ'), ('ﬀ', 'ﬆ'),
+  ('ﬓ', 'ﬗ'), ('Ａ', 'Ｚ'), ('𐐀', '𐐧'), ('𐒰', '𐓓'),
+  ('𐲀', '𐲲'), ('𑢠', '𑢿'), ('𞤀', '𞤡'),
+];
+
+pub const CHANGES_WHEN_CASEMAPPED: &'static [(char, char)] = &[
+  ('A', 'Z'), ('a', 'z'), ('µ', 'µ'), ('À', 'Ö'), ('Ø', 'ö'),
+  ('ø', 'ķ'), ('Ĺ', 'ƌ'), ('Ǝ', 'ƚ'), ('Ɯ', 'Ʃ'), ('Ƭ', 'ƹ'),
+  ('Ƽ', 'ƽ'), ('ƿ', 'ƿ'), ('Ǆ', 'Ƞ'), ('Ȣ', 'ȳ'), ('Ⱥ', 'ɔ'),
+  ('ɖ', 'ɗ'), ('ə', 'ə'), ('ɛ', 'ɜ'), ('ɠ', 'ɡ'), ('ɣ', 'ɣ'),
+  ('ɥ', 'ɦ'), ('ɨ', 'ɬ'), ('ɯ', 'ɯ'), ('ɱ', 'ɲ'), ('ɵ', 'ɵ'),
+  ('ɽ', 'ɽ'), ('ʀ', 'ʀ'), ('ʃ', 'ʃ'), ('ʇ', 'ʌ'), ('ʒ', 'ʒ'),
+  ('ʝ', 'ʞ'), ('ͅ', 'ͅ'), ('Ͱ', 'ͳ'), ('Ͷ', 'ͷ'), ('ͻ', 'ͽ'),
+  ('Ϳ', 'Ϳ'), ('Ά', 'Ά'), ('Έ', 'Ί'), ('Ό', 'Ό'), ('Ύ', 'Ρ'),
+  ('Σ', 'ϑ'), ('ϕ', 'ϵ'), ('Ϸ', 'ϻ'), ('Ͻ', 'ҁ'), ('Ҋ', 'ԯ'),
+  ('Ա', 'Ֆ'), ('ա', 'և'), ('Ⴀ', 'Ⴥ'), ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'),
+  ('Ꭰ', 'Ᏽ'), ('ᏸ', 'ᏽ'), ('ᲀ', 'ᲈ'), ('ᵹ', 'ᵹ'),
+  ('ᵽ', 'ᵽ'), ('Ḁ', 'ẛ'), ('ẞ', 'ẞ'), ('Ạ', 'ἕ'),
+  ('Ἐ', 'Ἕ'), ('ἠ', 'ὅ'), ('Ὀ', 'Ὅ'), ('ὐ', 'ὗ'),
+  ('Ὑ', 'Ὑ'), ('Ὓ', 'Ὓ'), ('Ὕ', 'Ὕ'), ('Ὗ', 'ώ'),
+  ('ᾀ', 'ᾴ'), ('ᾶ', 'ᾼ'), ('ι', 'ι'), ('ῂ', 'ῄ'),
+  ('ῆ', 'ῌ'), ('ῐ', 'ΐ'), ('ῖ', 'Ί'), ('ῠ', 'Ῥ'),
+  ('ῲ', 'ῴ'), ('ῶ', 'ῼ'), ('Ω', 'Ω'), ('K', 'Å'),
+  ('Ⅎ', 'Ⅎ'), ('ⅎ', 'ⅎ'), ('Ⅰ', 'ⅿ'), ('Ↄ', 'ↄ'),
+  ('Ⓐ', 'ⓩ'), ('Ⰰ', 'Ⱞ'), ('ⰰ', 'ⱞ'), ('Ⱡ', 'Ɒ'),
+  ('Ⱳ', 'ⱳ'), ('Ⱶ', 'ⱶ'), ('Ȿ', 'ⳣ'), ('Ⳬ', 'ⳮ'),
+  ('Ⳳ', 'ⳳ'), ('ⴀ', 'ⴥ'), ('ⴧ', 'ⴧ'), ('ⴭ', 'ⴭ'),
+  ('Ꙁ', 'ꙭ'), ('Ꚁ', 'ꚛ'), ('Ꜣ', 'ꜯ'), ('Ꜳ', 'ꝯ'),
+  ('Ꝺ', 'ꞇ'), ('Ꞌ', 'Ɥ'), ('Ꞑ', 'ꞓ'), ('Ꞗ', 'Ɪ'),
+  ('Ʞ', 'ꞷ'), ('ꭓ', 'ꭓ'), ('ꭰ', 'ꮿ'), ('ﬀ', 'ﬆ'),
+  ('ﬓ', 'ﬗ'), ('Ａ', 'Ｚ'), ('ａ', 'ｚ'), ('𐐀', '𐑏'),
+  ('𐒰', '𐓓'), ('𐓘', '𐓻'), ('𐲀', '𐲲'), ('𐳀', '𐳲'),
+  ('𑢠', '𑣟'), ('𞤀', '𞥃'),
+];
+
+pub const CHANGES_WHEN_LOWERCASED: &'static [(char, char)] = &[
+  ('A', 'Z'), ('À', 'Ö'), ('Ø', 'Þ'), ('Ā', 'Ā'), ('Ă', 'Ă'),
+  ('Ą', 'Ą'), ('Ć', 'Ć'), ('Ĉ', 'Ĉ'), ('Ċ', 'Ċ'), ('Č', 'Č'),
+  ('Ď', 'Ď'), ('Đ', 'Đ'), ('Ē', 'Ē'), ('Ĕ', 'Ĕ'), ('Ė', 'Ė'),
+  ('Ę', 'Ę'), ('Ě', 'Ě'), ('Ĝ', 'Ĝ'), ('Ğ', 'Ğ'), ('Ġ', 'Ġ'),
+  ('Ģ', 'Ģ'), ('Ĥ', 'Ĥ'), ('Ħ', 'Ħ'), ('Ĩ', 'Ĩ'), ('Ī', 'Ī'),
+  ('Ĭ', 'Ĭ'), ('Į', 'Į'), ('İ', 'İ'), ('Ĳ', 'Ĳ'), ('Ĵ', 'Ĵ'),
+  ('Ķ', 'Ķ'), ('Ĺ', 'Ĺ'), ('Ļ', 'Ļ'), ('Ľ', 'Ľ'), ('Ŀ', 'Ŀ'),
+  ('Ł', 'Ł'), ('Ń', 'Ń'), ('Ņ', 'Ņ'), ('Ň', 'Ň'), ('Ŋ', 'Ŋ'),
+  ('Ō', 'Ō'), ('Ŏ', 'Ŏ'), ('Ő', 'Ő'), ('Œ', 'Œ'), ('Ŕ', 'Ŕ'),
+  ('Ŗ', 'Ŗ'), ('Ř', 'Ř'), ('Ś', 'Ś'), ('Ŝ', 'Ŝ'), ('Ş', 'Ş'),
+  ('Š', 'Š'), ('Ţ', 'Ţ'), ('Ť', 'Ť'), ('Ŧ', 'Ŧ'), ('Ũ', 'Ũ'),
+  ('Ū', 'Ū'), ('Ŭ', 'Ŭ'), ('Ů', 'Ů'), ('Ű', 'Ű'), ('Ų', 'Ų'),
+  ('Ŵ', 'Ŵ'), ('Ŷ', 'Ŷ'), ('Ÿ', 'Ź'), ('Ż', 'Ż'), ('Ž', 'Ž'),
+  ('Ɓ', 'Ƃ'), ('Ƅ', 'Ƅ'), ('Ɔ', 'Ƈ'), ('Ɖ', 'Ƌ'), ('Ǝ', 'Ƒ'),
+  ('Ɠ', 'Ɣ'), ('Ɩ', 'Ƙ'), ('Ɯ', 'Ɲ'), ('Ɵ', 'Ơ'), ('Ƣ', 'Ƣ'),
+  ('Ƥ', 'Ƥ'), ('Ʀ', 'Ƨ'), ('Ʃ', 'Ʃ'), ('Ƭ', 'Ƭ'), ('Ʈ', 'Ư'),
+  ('Ʊ', 'Ƴ'), ('Ƶ', 'Ƶ'), ('Ʒ', 'Ƹ'), ('Ƽ', 'Ƽ'), ('Ǆ', 'ǅ'),
+  ('Ǉ', 'ǈ'), ('Ǌ', 'ǋ'), ('Ǎ', 'Ǎ'), ('Ǐ', 'Ǐ'), ('Ǒ', 'Ǒ'),
+  ('Ǔ', 'Ǔ'), ('Ǖ', 'Ǖ'), ('Ǘ', 'Ǘ'), ('Ǚ', 'Ǚ'), ('Ǜ', 'Ǜ'),
+  ('Ǟ', 'Ǟ'), ('Ǡ', 'Ǡ'), ('Ǣ', 'Ǣ'), ('Ǥ', 'Ǥ'), ('Ǧ', 'Ǧ'),
+  ('Ǩ', 'Ǩ'), ('Ǫ', 'Ǫ'), ('Ǭ', 'Ǭ'), ('Ǯ', 'Ǯ'), ('Ǳ', 'ǲ'),
+  ('Ǵ', 'Ǵ'), ('Ƕ', 'Ǹ'), ('Ǻ', 'Ǻ'), ('Ǽ', 'Ǽ'), ('Ǿ', 'Ǿ'),
+  ('Ȁ', 'Ȁ'), ('Ȃ', 'Ȃ'), ('Ȅ', 'Ȅ'), ('Ȇ', 'Ȇ'), ('Ȉ', 'Ȉ'),
+  ('Ȋ', 'Ȋ'), ('Ȍ', 'Ȍ'), ('Ȏ', 'Ȏ'), ('Ȑ', 'Ȑ'), ('Ȓ', 'Ȓ'),
+  ('Ȕ', 'Ȕ'), ('Ȗ', 'Ȗ'), ('Ș', 'Ș'), ('Ț', 'Ț'), ('Ȝ', 'Ȝ'),
+  ('Ȟ', 'Ȟ'), ('Ƞ', 'Ƞ'), ('Ȣ', 'Ȣ'), ('Ȥ', 'Ȥ'), ('Ȧ', 'Ȧ'),
+  ('Ȩ', 'Ȩ'), ('Ȫ', 'Ȫ'), ('Ȭ', 'Ȭ'), ('Ȯ', 'Ȯ'), ('Ȱ', 'Ȱ'),
+  ('Ȳ', 'Ȳ'), ('Ⱥ', 'Ȼ'), ('Ƚ', 'Ⱦ'), ('Ɂ', 'Ɂ'), ('Ƀ', 'Ɇ'),
+  ('Ɉ', 'Ɉ'), ('Ɋ', 'Ɋ'), ('Ɍ', 'Ɍ'), ('Ɏ', 'Ɏ'), ('Ͱ', 'Ͱ'),
+  ('Ͳ', 'Ͳ'), ('Ͷ', 'Ͷ'), ('Ϳ', 'Ϳ'), ('Ά', 'Ά'), ('Έ', 'Ί'),
+  ('Ό', 'Ό'), ('Ύ', 'Ώ'), ('Α', 'Ρ'), ('Σ', 'Ϋ'), ('Ϗ', 'Ϗ'),
+  ('Ϙ', 'Ϙ'), ('Ϛ', 'Ϛ'), ('Ϝ', 'Ϝ'), ('Ϟ', 'Ϟ'), ('Ϡ', 'Ϡ'),
+  ('Ϣ', 'Ϣ'), ('Ϥ', 'Ϥ'), ('Ϧ', 'Ϧ'), ('Ϩ', 'Ϩ'), ('Ϫ', 'Ϫ'),
+  ('Ϭ', 'Ϭ'), ('Ϯ', 'Ϯ'), ('ϴ', 'ϴ'), ('Ϸ', 'Ϸ'), ('Ϲ', 'Ϻ'),
+  ('Ͻ', 'Я'), ('Ѡ', 'Ѡ'), ('Ѣ', 'Ѣ'), ('Ѥ', 'Ѥ'), ('Ѧ', 'Ѧ'),
+  ('Ѩ', 'Ѩ'), ('Ѫ', 'Ѫ'), ('Ѭ', 'Ѭ'), ('Ѯ', 'Ѯ'), ('Ѱ', 'Ѱ'),
+  ('Ѳ', 'Ѳ'), ('Ѵ', 'Ѵ'), ('Ѷ', 'Ѷ'), ('Ѹ', 'Ѹ'), ('Ѻ', 'Ѻ'),
+  ('Ѽ', 'Ѽ'), ('Ѿ', 'Ѿ'), ('Ҁ', 'Ҁ'), ('Ҋ', 'Ҋ'), ('Ҍ', 'Ҍ'),
+  ('Ҏ', 'Ҏ'), ('Ґ', 'Ґ'), ('Ғ', 'Ғ'), ('Ҕ', 'Ҕ'), ('Җ', 'Җ'),
+  ('Ҙ', 'Ҙ'), ('Қ', 'Қ'), ('Ҝ', 'Ҝ'), ('Ҟ', 'Ҟ'), ('Ҡ', 'Ҡ'),
+  ('Ң', 'Ң'), ('Ҥ', 'Ҥ'), ('Ҧ', 'Ҧ'), ('Ҩ', 'Ҩ'), ('Ҫ', 'Ҫ'),
+  ('Ҭ', 'Ҭ'), ('Ү', 'Ү'), ('Ұ', 'Ұ'), ('Ҳ', 'Ҳ'), ('Ҵ', 'Ҵ'),
+  ('Ҷ', 'Ҷ'), ('Ҹ', 'Ҹ'), ('Һ', 'Һ'), ('Ҽ', 'Ҽ'), ('Ҿ', 'Ҿ'),
+  ('Ӏ', 'Ӂ'), ('Ӄ', 'Ӄ'), ('Ӆ', 'Ӆ'), ('Ӈ', 'Ӈ'), ('Ӊ', 'Ӊ'),
+  ('Ӌ', 'Ӌ'), ('Ӎ', 'Ӎ'), ('Ӑ', 'Ӑ'), ('Ӓ', 'Ӓ'), ('Ӕ', 'Ӕ'),
+  ('Ӗ', 'Ӗ'), ('Ә', 'Ә'), ('Ӛ', 'Ӛ'), ('Ӝ', 'Ӝ'), ('Ӟ', 'Ӟ'),
+  ('Ӡ', 'Ӡ'), ('Ӣ', 'Ӣ'), ('Ӥ', 'Ӥ'), ('Ӧ', 'Ӧ'), ('Ө', 'Ө'),
+  ('Ӫ', 'Ӫ'), ('Ӭ', 'Ӭ'), ('Ӯ', 'Ӯ'), ('Ӱ', 'Ӱ'), ('Ӳ', 'Ӳ'),
+  ('Ӵ', 'Ӵ'), ('Ӷ', 'Ӷ'), ('Ӹ', 'Ӹ'), ('Ӻ', 'Ӻ'), ('Ӽ', 'Ӽ'),
+  ('Ӿ', 'Ӿ'), ('Ԁ', 'Ԁ'), ('Ԃ', 'Ԃ'), ('Ԅ', 'Ԅ'), ('Ԇ', 'Ԇ'),
+  ('Ԉ', 'Ԉ'), ('Ԋ', 'Ԋ'), ('Ԍ', 'Ԍ'), ('Ԏ', 'Ԏ'), ('Ԑ', 'Ԑ'),
+  ('Ԓ', 'Ԓ'), ('Ԕ', 'Ԕ'), ('Ԗ', 'Ԗ'), ('Ԙ', 'Ԙ'), ('Ԛ', 'Ԛ'),
+  ('Ԝ', 'Ԝ'), ('Ԟ', 'Ԟ'), ('Ԡ', 'Ԡ'), ('Ԣ', 'Ԣ'), ('Ԥ', 'Ԥ'),
+  ('Ԧ', 'Ԧ'), ('Ԩ', 'Ԩ'), ('Ԫ', 'Ԫ'), ('Ԭ', 'Ԭ'), ('Ԯ', 'Ԯ'),
+  ('Ա', 'Ֆ'), ('Ⴀ', 'Ⴥ'), ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'),
+  ('Ꭰ', 'Ᏽ'), ('Ḁ', 'Ḁ'), ('Ḃ', 'Ḃ'), ('Ḅ', 'Ḅ'),
+  ('Ḇ', 'Ḇ'), ('Ḉ', 'Ḉ'), ('Ḋ', 'Ḋ'), ('Ḍ', 'Ḍ'),
+  ('Ḏ', 'Ḏ'), ('Ḑ', 'Ḑ'), ('Ḓ', 'Ḓ'), ('Ḕ', 'Ḕ'),
+  ('Ḗ', 'Ḗ'), ('Ḙ', 'Ḙ'), ('Ḛ', 'Ḛ'), ('Ḝ', 'Ḝ'),
+  ('Ḟ', 'Ḟ'), ('Ḡ', 'Ḡ'), ('Ḣ', 'Ḣ'), ('Ḥ', 'Ḥ'),
+  ('Ḧ', 'Ḧ'), ('Ḩ', 'Ḩ'), ('Ḫ', 'Ḫ'), ('Ḭ', 'Ḭ'),
+  ('Ḯ', 'Ḯ'), ('Ḱ', 'Ḱ'), ('Ḳ', 'Ḳ'), ('Ḵ', 'Ḵ'),
+  ('Ḷ', 'Ḷ'), ('Ḹ', 'Ḹ'), ('Ḻ', 'Ḻ'), ('Ḽ', 'Ḽ'),
+  ('Ḿ', 'Ḿ'), ('Ṁ', 'Ṁ'), ('Ṃ', 'Ṃ'), ('Ṅ', 'Ṅ'),
+  ('Ṇ', 'Ṇ'), ('Ṉ', 'Ṉ'), ('Ṋ', 'Ṋ'), ('Ṍ', 'Ṍ'),
+  ('Ṏ', 'Ṏ'), ('Ṑ', 'Ṑ'), ('Ṓ', 'Ṓ'), ('Ṕ', 'Ṕ'),
+  ('Ṗ', 'Ṗ'), ('Ṙ', 'Ṙ'), ('Ṛ', 'Ṛ'), ('Ṝ', 'Ṝ'),
+  ('Ṟ', 'Ṟ'), ('Ṡ', 'Ṡ'), ('Ṣ', 'Ṣ'), ('Ṥ', 'Ṥ'),
+  ('Ṧ', 'Ṧ'), ('Ṩ', 'Ṩ'), ('Ṫ', 'Ṫ'), ('Ṭ', 'Ṭ'),
+  ('Ṯ', 'Ṯ'), ('Ṱ', 'Ṱ'), ('Ṳ', 'Ṳ'), ('Ṵ', 'Ṵ'),
+  ('Ṷ', 'Ṷ'), ('Ṹ', 'Ṹ'), ('Ṻ', 'Ṻ'), ('Ṽ', 'Ṽ'),
+  ('Ṿ', 'Ṿ'), ('Ẁ', 'Ẁ'), ('Ẃ', 'Ẃ'), ('Ẅ', 'Ẅ'),
+  ('Ẇ', 'Ẇ'), ('Ẉ', 'Ẉ'), ('Ẋ', 'Ẋ'), ('Ẍ', 'Ẍ'),
+  ('Ẏ', 'Ẏ'), ('Ẑ', 'Ẑ'), ('Ẓ', 'Ẓ'), ('Ẕ', 'Ẕ'),
+  ('ẞ', 'ẞ'), ('Ạ', 'Ạ'), ('Ả', 'Ả'), ('Ấ', 'Ấ'),
+  ('Ầ', 'Ầ'), ('Ẩ', 'Ẩ'), ('Ẫ', 'Ẫ'), ('Ậ', 'Ậ'),
+  ('Ắ', 'Ắ'), ('Ằ', 'Ằ'), ('Ẳ', 'Ẳ'), ('Ẵ', 'Ẵ'),
+  ('Ặ', 'Ặ'), ('Ẹ', 'Ẹ'), ('Ẻ', 'Ẻ'), ('Ẽ', 'Ẽ'),
+  ('Ế', 'Ế'), ('Ề', 'Ề'), ('Ể', 'Ể'), ('Ễ', 'Ễ'),
+  ('Ệ', 'Ệ'), ('Ỉ', 'Ỉ'), ('Ị', 'Ị'), ('Ọ', 'Ọ'),
+  ('Ỏ', 'Ỏ'), ('Ố', 'Ố'), ('Ồ', 'Ồ'), ('Ổ', 'Ổ'),
+  ('Ỗ', 'Ỗ'), ('Ộ', 'Ộ'), ('Ớ', 'Ớ'), ('Ờ', 'Ờ'),
+  ('Ở', 'Ở'), ('Ỡ', 'Ỡ'), ('Ợ', 'Ợ'), ('Ụ', 'Ụ'),
+  ('Ủ', 'Ủ'), ('Ứ', 'Ứ'), ('Ừ', 'Ừ'), ('Ử', 'Ử'),
+  ('Ữ', 'Ữ'), ('Ự', 'Ự'), ('Ỳ', 'Ỳ'), ('Ỵ', 'Ỵ'),
+  ('Ỷ', 'Ỷ'), ('Ỹ', 'Ỹ'), ('Ỻ', 'Ỻ'), ('Ỽ', 'Ỽ'),
+  ('Ỿ', 'Ỿ'), ('Ἀ', 'Ἇ'), ('Ἐ', 'Ἕ'), ('Ἠ', 'Ἧ'),
+  ('Ἰ', 'Ἷ'), ('Ὀ', 'Ὅ'), ('Ὑ', 'Ὑ'), ('Ὓ', 'Ὓ'),
+  ('Ὕ', 'Ὕ'), ('Ὗ', 'Ὗ'), ('Ὠ', 'Ὧ'), ('ᾈ', 'ᾏ'),
+  ('ᾘ', 'ᾟ'), ('ᾨ', 'ᾯ'), ('Ᾰ', 'ᾼ'), ('Ὲ', 'ῌ'),
+  ('Ῐ', 'Ί'), ('Ῠ', 'Ῥ'), ('Ὸ', 'ῼ'), ('Ω', 'Ω'),
+  ('K', 'Å'), ('Ⅎ', 'Ⅎ'), ('Ⅰ', 'Ⅿ'), ('Ↄ', 'Ↄ'),
+  ('Ⓐ', 'Ⓩ'), ('Ⰰ', 'Ⱞ'), ('Ⱡ', 'Ⱡ'), ('Ɫ', 'Ɽ'),
+  ('Ⱨ', 'Ⱨ'), ('Ⱪ', 'Ⱪ'), ('Ⱬ', 'Ⱬ'), ('Ɑ', 'Ɒ'),
+  ('Ⱳ', 'Ⱳ'), ('Ⱶ', 'Ⱶ'), ('Ȿ', 'Ⲁ'), ('Ⲃ', 'Ⲃ'),
+  ('Ⲅ', 'Ⲅ'), ('Ⲇ', 'Ⲇ'), ('Ⲉ', 'Ⲉ'), ('Ⲋ', 'Ⲋ'),
+  ('Ⲍ', 'Ⲍ'), ('Ⲏ', 'Ⲏ'), ('Ⲑ', 'Ⲑ'), ('Ⲓ', 'Ⲓ'),
+  ('Ⲕ', 'Ⲕ'), ('Ⲗ', 'Ⲗ'), ('Ⲙ', 'Ⲙ'), ('Ⲛ', 'Ⲛ'),
+  ('Ⲝ', 'Ⲝ'), ('Ⲟ', 'Ⲟ'), ('Ⲡ', 'Ⲡ'), ('Ⲣ', 'Ⲣ'),
+  ('Ⲥ', 'Ⲥ'), ('Ⲧ', 'Ⲧ'), ('Ⲩ', 'Ⲩ'), ('Ⲫ', 'Ⲫ'),
+  ('Ⲭ', 'Ⲭ'), ('Ⲯ', 'Ⲯ'), ('Ⲱ', 'Ⲱ'), ('Ⲳ', 'Ⲳ'),
+  ('Ⲵ', 'Ⲵ'), ('Ⲷ', 'Ⲷ'), ('Ⲹ', 'Ⲹ'), ('Ⲻ', 'Ⲻ'),
+  ('Ⲽ', 'Ⲽ'), ('Ⲿ', 'Ⲿ'), ('Ⳁ', 'Ⳁ'), ('Ⳃ', 'Ⳃ'),
+  ('Ⳅ', 'Ⳅ'), ('Ⳇ', 'Ⳇ'), ('Ⳉ', 'Ⳉ'), ('Ⳋ', 'Ⳋ'),
+  ('Ⳍ', 'Ⳍ'), ('Ⳏ', 'Ⳏ'), ('Ⳑ', 'Ⳑ'), ('Ⳓ', 'Ⳓ'),
+  ('Ⳕ', 'Ⳕ'), ('Ⳗ', 'Ⳗ'), ('Ⳙ', 'Ⳙ'), ('Ⳛ', 'Ⳛ'),
+  ('Ⳝ', 'Ⳝ'), ('Ⳟ', 'Ⳟ'), ('Ⳡ', 'Ⳡ'), ('Ⳣ', 'Ⳣ'),
+  ('Ⳬ', 'Ⳬ'), ('Ⳮ', 'Ⳮ'), ('Ⳳ', 'Ⳳ'), ('Ꙁ', 'Ꙁ'),
+  ('Ꙃ', 'Ꙃ'), ('Ꙅ', 'Ꙅ'), ('Ꙇ', 'Ꙇ'), ('Ꙉ', 'Ꙉ'),
+  ('Ꙋ', 'Ꙋ'), ('Ꙍ', 'Ꙍ'), ('Ꙏ', 'Ꙏ'), ('Ꙑ', 'Ꙑ'),
+  ('Ꙓ', 'Ꙓ'), ('Ꙕ', 'Ꙕ'), ('Ꙗ', 'Ꙗ'), ('Ꙙ', 'Ꙙ'),
+  ('Ꙛ', 'Ꙛ'), ('Ꙝ', 'Ꙝ'), ('Ꙟ', 'Ꙟ'), ('Ꙡ', 'Ꙡ'),
+  ('Ꙣ', 'Ꙣ'), ('Ꙥ', 'Ꙥ'), ('Ꙧ', 'Ꙧ'), ('Ꙩ', 'Ꙩ'),
+  ('Ꙫ', 'Ꙫ'), ('Ꙭ', 'Ꙭ'), ('Ꚁ', 'Ꚁ'), ('Ꚃ', 'Ꚃ'),
+  ('Ꚅ', 'Ꚅ'), ('Ꚇ', 'Ꚇ'), ('Ꚉ', 'Ꚉ'), ('Ꚋ', 'Ꚋ'),
+  ('Ꚍ', 'Ꚍ'), ('Ꚏ', 'Ꚏ'), ('Ꚑ', 'Ꚑ'), ('Ꚓ', 'Ꚓ'),
+  ('Ꚕ', 'Ꚕ'), ('Ꚗ', 'Ꚗ'), ('Ꚙ', 'Ꚙ'), ('Ꚛ', 'Ꚛ'),
+  ('Ꜣ', 'Ꜣ'), ('Ꜥ', 'Ꜥ'), ('Ꜧ', 'Ꜧ'), ('Ꜩ', 'Ꜩ'),
+  ('Ꜫ', 'Ꜫ'), ('Ꜭ', 'Ꜭ'), ('Ꜯ', 'Ꜯ'), ('Ꜳ', 'Ꜳ'),
+  ('Ꜵ', 'Ꜵ'), ('Ꜷ', 'Ꜷ'), ('Ꜹ', 'Ꜹ'), ('Ꜻ', 'Ꜻ'),
+  ('Ꜽ', 'Ꜽ'), ('Ꜿ', 'Ꜿ'), ('Ꝁ', 'Ꝁ'), ('Ꝃ', 'Ꝃ'),
+  ('Ꝅ', 'Ꝅ'), ('Ꝇ', 'Ꝇ'), ('Ꝉ', 'Ꝉ'), ('Ꝋ', 'Ꝋ'),
+  ('Ꝍ', 'Ꝍ'), ('Ꝏ', 'Ꝏ'), ('Ꝑ', 'Ꝑ'), ('Ꝓ', 'Ꝓ'),
+  ('Ꝕ', 'Ꝕ'), ('Ꝗ', 'Ꝗ'), ('Ꝙ', 'Ꝙ'), ('Ꝛ', 'Ꝛ'),
+  ('Ꝝ', 'Ꝝ'), ('Ꝟ', 'Ꝟ'), ('Ꝡ', 'Ꝡ'), ('Ꝣ', 'Ꝣ'),
+  ('Ꝥ', 'Ꝥ'), ('Ꝧ', 'Ꝧ'), ('Ꝩ', 'Ꝩ'), ('Ꝫ', 'Ꝫ'),
+  ('Ꝭ', 'Ꝭ'), ('Ꝯ', 'Ꝯ'), ('Ꝺ', 'Ꝺ'), ('Ꝼ', 'Ꝼ'),
+  ('Ᵹ', 'Ꝿ'), ('Ꞁ', 'Ꞁ'), ('Ꞃ', 'Ꞃ'), ('Ꞅ', 'Ꞅ'),
+  ('Ꞇ', 'Ꞇ'), ('Ꞌ', 'Ꞌ'), ('Ɥ', 'Ɥ'), ('Ꞑ', 'Ꞑ'),
+  ('Ꞓ', 'Ꞓ'), ('Ꞗ', 'Ꞗ'), ('Ꞙ', 'Ꞙ'), ('Ꞛ', 'Ꞛ'),
+  ('Ꞝ', 'Ꞝ'), ('Ꞟ', 'Ꞟ'), ('Ꞡ', 'Ꞡ'), ('Ꞣ', 'Ꞣ'),
+  ('Ꞥ', 'Ꞥ'), ('Ꞧ', 'Ꞧ'), ('Ꞩ', 'Ꞩ'), ('Ɦ', 'Ɪ'),
+  ('Ʞ', 'Ꞵ'), ('Ꞷ', 'Ꞷ'), ('Ａ', 'Ｚ'), ('𐐀', '𐐧'),
+  ('𐒰', '𐓓'), ('𐲀', '𐲲'), ('𑢠', '𑢿'), ('𞤀', '𞤡'),
+];
+
+pub const CHANGES_WHEN_TITLECASED: &'static [(char, char)] = &[
+  ('a', 'z'), ('µ', 'µ'), ('ß', 'ö'), ('ø', 'ÿ'), ('ā', 'ā'),
+  ('ă', 'ă'), ('ą', 'ą'), ('ć', 'ć'), ('ĉ', 'ĉ'), ('ċ', 'ċ'),
+  ('č', 'č'), ('ď', 'ď'), ('đ', 'đ'), ('ē', 'ē'), ('ĕ', 'ĕ'),
+  ('ė', 'ė'), ('ę', 'ę'), ('ě', 'ě'), ('ĝ', 'ĝ'), ('ğ', 'ğ'),
+  ('ġ', 'ġ'), ('ģ', 'ģ'), ('ĥ', 'ĥ'), ('ħ', 'ħ'), ('ĩ', 'ĩ'),
+  ('ī', 'ī'), ('ĭ', 'ĭ'), ('į', 'į'), ('ı', 'ı'), ('ĳ', 'ĳ'),
+  ('ĵ', 'ĵ'), ('ķ', 'ķ'), ('ĺ', 'ĺ'), ('ļ', 'ļ'), ('ľ', 'ľ'),
+  ('ŀ', 'ŀ'), ('ł', 'ł'), ('ń', 'ń'), ('ņ', 'ņ'), ('ň', 'ŉ'),
+  ('ŋ', 'ŋ'), ('ō', 'ō'), ('ŏ', 'ŏ'), ('ő', 'ő'), ('œ', 'œ'),
+  ('ŕ', 'ŕ'), ('ŗ', 'ŗ'), ('ř', 'ř'), ('ś', 'ś'), ('ŝ', 'ŝ'),
+  ('ş', 'ş'), ('š', 'š'), ('ţ', 'ţ'), ('ť', 'ť'), ('ŧ', 'ŧ'),
+  ('ũ', 'ũ'), ('ū', 'ū'), ('ŭ', 'ŭ'), ('ů', 'ů'), ('ű', 'ű'),
+  ('ų', 'ų'), ('ŵ', 'ŵ'), ('ŷ', 'ŷ'), ('ź', 'ź'), ('ż', 'ż'),
+  ('ž', 'ƀ'), ('ƃ', 'ƃ'), ('ƅ', 'ƅ'), ('ƈ', 'ƈ'), ('ƌ', 'ƌ'),
+  ('ƒ', 'ƒ'), ('ƕ', 'ƕ'), ('ƙ', 'ƚ'), ('ƞ', 'ƞ'), ('ơ', 'ơ'),
+  ('ƣ', 'ƣ'), ('ƥ', 'ƥ'), ('ƨ', 'ƨ'), ('ƭ', 'ƭ'), ('ư', 'ư'),
+  ('ƴ', 'ƴ'), ('ƶ', 'ƶ'), ('ƹ', 'ƹ'), ('ƽ', 'ƽ'), ('ƿ', 'ƿ'),
+  ('Ǆ', 'Ǆ'), ('ǆ', 'Ǉ'), ('ǉ', 'Ǌ'), ('ǌ', 'ǌ'), ('ǎ', 'ǎ'),
+  ('ǐ', 'ǐ'), ('ǒ', 'ǒ'), ('ǔ', 'ǔ'), ('ǖ', 'ǖ'), ('ǘ', 'ǘ'),
+  ('ǚ', 'ǚ'), ('ǜ', 'ǝ'), ('ǟ', 'ǟ'), ('ǡ', 'ǡ'), ('ǣ', 'ǣ'),
+  ('ǥ', 'ǥ'), ('ǧ', 'ǧ'), ('ǩ', 'ǩ'), ('ǫ', 'ǫ'), ('ǭ', 'ǭ'),
+  ('ǯ', 'Ǳ'), ('ǳ', 'ǳ'), ('ǵ', 'ǵ'), ('ǹ', 'ǹ'), ('ǻ', 'ǻ'),
+  ('ǽ', 'ǽ'), ('ǿ', 'ǿ'), ('ȁ', 'ȁ'), ('ȃ', 'ȃ'), ('ȅ', 'ȅ'),
+  ('ȇ', 'ȇ'), ('ȉ', 'ȉ'), ('ȋ', 'ȋ'), ('ȍ', 'ȍ'), ('ȏ', 'ȏ'),
+  ('ȑ', 'ȑ'), ('ȓ', 'ȓ'), ('ȕ', 'ȕ'), ('ȗ', 'ȗ'), ('ș', 'ș'),
+  ('ț', 'ț'), ('ȝ', 'ȝ'), ('ȟ', 'ȟ'), ('ȣ', 'ȣ'), ('ȥ', 'ȥ'),
+  ('ȧ', 'ȧ'), ('ȩ', 'ȩ'), ('ȫ', 'ȫ'), ('ȭ', 'ȭ'), ('ȯ', 'ȯ'),
+  ('ȱ', 'ȱ'), ('ȳ', 'ȳ'), ('ȼ', 'ȼ'), ('ȿ', 'ɀ'), ('ɂ', 'ɂ'),
+  ('ɇ', 'ɇ'), ('ɉ', 'ɉ'), ('ɋ', 'ɋ'), ('ɍ', 'ɍ'), ('ɏ', 'ɔ'),
+  ('ɖ', 'ɗ'), ('ə', 'ə'), ('ɛ', 'ɜ'), ('ɠ', 'ɡ'), ('ɣ', 'ɣ'),
+  ('ɥ', 'ɦ'), ('ɨ', 'ɬ'), ('ɯ', 'ɯ'), ('ɱ', 'ɲ'), ('ɵ', 'ɵ'),
+  ('ɽ', 'ɽ'), ('ʀ', 'ʀ'), ('ʃ', 'ʃ'), ('ʇ', 'ʌ'), ('ʒ', 'ʒ'),
+  ('ʝ', 'ʞ'), ('ͅ', 'ͅ'), ('ͱ', 'ͱ'), ('ͳ', 'ͳ'), ('ͷ', 'ͷ'),
+  ('ͻ', 'ͽ'), ('ΐ', 'ΐ'), ('ά', 'ώ'), ('ϐ', 'ϑ'), ('ϕ', 'ϗ'),
+  ('ϙ', 'ϙ'), ('ϛ', 'ϛ'), ('ϝ', 'ϝ'), ('ϟ', 'ϟ'), ('ϡ', 'ϡ'),
+  ('ϣ', 'ϣ'), ('ϥ', 'ϥ'), ('ϧ', 'ϧ'), ('ϩ', 'ϩ'), ('ϫ', 'ϫ'),
+  ('ϭ', 'ϭ'), ('ϯ', 'ϳ'), ('ϵ', 'ϵ'), ('ϸ', 'ϸ'), ('ϻ', 'ϻ'),
+  ('а', 'џ'), ('ѡ', 'ѡ'), ('ѣ', 'ѣ'), ('ѥ', 'ѥ'), ('ѧ', 'ѧ'),
+  ('ѩ', 'ѩ'), ('ѫ', 'ѫ'), ('ѭ', 'ѭ'), ('ѯ', 'ѯ'), ('ѱ', 'ѱ'),
+  ('ѳ', 'ѳ'), ('ѵ', 'ѵ'), ('ѷ', 'ѷ'), ('ѹ', 'ѹ'), ('ѻ', 'ѻ'),
+  ('ѽ', 'ѽ'), ('ѿ', 'ѿ'), ('ҁ', 'ҁ'), ('ҋ', 'ҋ'), ('ҍ', 'ҍ'),
+  ('ҏ', 'ҏ'), ('ґ', 'ґ'), ('ғ', 'ғ'), ('ҕ', 'ҕ'), ('җ', 'җ'),
+  ('ҙ', 'ҙ'), ('қ', 'қ'), ('ҝ', 'ҝ'), ('ҟ', 'ҟ'), ('ҡ', 'ҡ'),
+  ('ң', 'ң'), ('ҥ', 'ҥ'), ('ҧ', 'ҧ'), ('ҩ', 'ҩ'), ('ҫ', 'ҫ'),
+  ('ҭ', 'ҭ'), ('ү', 'ү'), ('ұ', 'ұ'), ('ҳ', 'ҳ'), ('ҵ', 'ҵ'),
+  ('ҷ', 'ҷ'), ('ҹ', 'ҹ'), ('һ', 'һ'), ('ҽ', 'ҽ'), ('ҿ', 'ҿ'),
+  ('ӂ', 'ӂ'), ('ӄ', 'ӄ'), ('ӆ', 'ӆ'), ('ӈ', 'ӈ'), ('ӊ', 'ӊ'),
+  ('ӌ', 'ӌ'), ('ӎ', 'ӏ'), ('ӑ', 'ӑ'), ('ӓ', 'ӓ'), ('ӕ', 'ӕ'),
+  ('ӗ', 'ӗ'), ('ә', 'ә'), ('ӛ', 'ӛ'), ('ӝ', 'ӝ'), ('ӟ', 'ӟ'),
+  ('ӡ', 'ӡ'), ('ӣ', 'ӣ'), ('ӥ', 'ӥ'), ('ӧ', 'ӧ'), ('ө', 'ө'),
+  ('ӫ', 'ӫ'), ('ӭ', 'ӭ'), ('ӯ', 'ӯ'), ('ӱ', 'ӱ'), ('ӳ', 'ӳ'),
+  ('ӵ', 'ӵ'), ('ӷ', 'ӷ'), ('ӹ', 'ӹ'), ('ӻ', 'ӻ'), ('ӽ', 'ӽ'),
+  ('ӿ', 'ӿ'), ('ԁ', 'ԁ'), ('ԃ', 'ԃ'), ('ԅ', 'ԅ'), ('ԇ', 'ԇ'),
+  ('ԉ', 'ԉ'), ('ԋ', 'ԋ'), ('ԍ', 'ԍ'), ('ԏ', 'ԏ'), ('ԑ', 'ԑ'),
+  ('ԓ', 'ԓ'), ('ԕ', 'ԕ'), ('ԗ', 'ԗ'), ('ԙ', 'ԙ'), ('ԛ', 'ԛ'),
+  ('ԝ', 'ԝ'), ('ԟ', 'ԟ'), ('ԡ', 'ԡ'), ('ԣ', 'ԣ'), ('ԥ', 'ԥ'),
+  ('ԧ', 'ԧ'), ('ԩ', 'ԩ'), ('ԫ', 'ԫ'), ('ԭ', 'ԭ'), ('ԯ', 'ԯ'),
+  ('ա', 'և'), ('ᏸ', 'ᏽ'), ('ᲀ', 'ᲈ'), ('ᵹ', 'ᵹ'),
+  ('ᵽ', 'ᵽ'), ('ḁ', 'ḁ'), ('ḃ', 'ḃ'), ('ḅ', 'ḅ'),
+  ('ḇ', 'ḇ'), ('ḉ', 'ḉ'), ('ḋ', 'ḋ'), ('ḍ', 'ḍ'),
+  ('ḏ', 'ḏ'), ('ḑ', 'ḑ'), ('ḓ', 'ḓ'), ('ḕ', 'ḕ'),
+  ('ḗ', 'ḗ'), ('ḙ', 'ḙ'), ('ḛ', 'ḛ'), ('ḝ', 'ḝ'),
+  ('ḟ', 'ḟ'), ('ḡ', 'ḡ'), ('ḣ', 'ḣ'), ('ḥ', 'ḥ'),
+  ('ḧ', 'ḧ'), ('ḩ', 'ḩ'), ('ḫ', 'ḫ'), ('ḭ', 'ḭ'),
+  ('ḯ', 'ḯ'), ('ḱ', 'ḱ'), ('ḳ', 'ḳ'), ('ḵ', 'ḵ'),
+  ('ḷ', 'ḷ'), ('ḹ', 'ḹ'), ('ḻ', 'ḻ'), ('ḽ', 'ḽ'),
+  ('ḿ', 'ḿ'), ('ṁ', 'ṁ'), ('ṃ', 'ṃ'), ('ṅ', 'ṅ'),
+  ('ṇ', 'ṇ'), ('ṉ', 'ṉ'), ('ṋ', 'ṋ'), ('ṍ', 'ṍ'),
+  ('ṏ', 'ṏ'), ('ṑ', 'ṑ'), ('ṓ', 'ṓ'), ('ṕ', 'ṕ'),
+  ('ṗ', 'ṗ'), ('ṙ', 'ṙ'), ('ṛ', 'ṛ'), ('ṝ', 'ṝ'),
+  ('ṟ', 'ṟ'), ('ṡ', 'ṡ'), ('ṣ', 'ṣ'), ('ṥ', 'ṥ'),
+  ('ṧ', 'ṧ'), ('ṩ', 'ṩ'), ('ṫ', 'ṫ'), ('ṭ', 'ṭ'),
+  ('ṯ', 'ṯ'), ('ṱ', 'ṱ'), ('ṳ', 'ṳ'), ('ṵ', 'ṵ'),
+  ('ṷ', 'ṷ'), ('ṹ', 'ṹ'), ('ṻ', 'ṻ'), ('ṽ', 'ṽ'),
+  ('ṿ', 'ṿ'), ('ẁ', 'ẁ'), ('ẃ', 'ẃ'), ('ẅ', 'ẅ'),
+  ('ẇ', 'ẇ'), ('ẉ', 'ẉ'), ('ẋ', 'ẋ'), ('ẍ', 'ẍ'),
+  ('ẏ', 'ẏ'), ('ẑ', 'ẑ'), ('ẓ', 'ẓ'), ('ẕ', 'ẛ'),
+  ('ạ', 'ạ'), ('ả', 'ả'), ('ấ', 'ấ'), ('ầ', 'ầ'),
+  ('ẩ', 'ẩ'), ('ẫ', 'ẫ'), ('ậ', 'ậ'), ('ắ', 'ắ'),
+  ('ằ', 'ằ'), ('ẳ', 'ẳ'), ('ẵ', 'ẵ'), ('ặ', 'ặ'),
+  ('ẹ', 'ẹ'), ('ẻ', 'ẻ'), ('ẽ', 'ẽ'), ('ế', 'ế'),
+  ('ề', 'ề'), ('ể', 'ể'), ('ễ', 'ễ'), ('ệ', 'ệ'),
+  ('ỉ', 'ỉ'), ('ị', 'ị'), ('ọ', 'ọ'), ('ỏ', 'ỏ'),
+  ('ố', 'ố'), ('ồ', 'ồ'), ('ổ', 'ổ'), ('ỗ', 'ỗ'),
+  ('ộ', 'ộ'), ('ớ', 'ớ'), ('ờ', 'ờ'), ('ở', 'ở'),
+  ('ỡ', 'ỡ'), ('ợ', 'ợ'), ('ụ', 'ụ'), ('ủ', 'ủ'),
+  ('ứ', 'ứ'), ('ừ', 'ừ'), ('ử', 'ử'), ('ữ', 'ữ'),
+  ('ự', 'ự'), ('ỳ', 'ỳ'), ('ỵ', 'ỵ'), ('ỷ', 'ỷ'),
+  ('ỹ', 'ỹ'), ('ỻ', 'ỻ'), ('ỽ', 'ỽ'), ('ỿ', 'ἇ'),
+  ('ἐ', 'ἕ'), ('ἠ', 'ἧ'), ('ἰ', 'ἷ'), ('ὀ', 'ὅ'),
+  ('ὐ', 'ὗ'), ('ὠ', 'ὧ'), ('ὰ', 'ώ'), ('ᾀ', 'ᾇ'),
+  ('ᾐ', 'ᾗ'), ('ᾠ', 'ᾧ'), ('ᾰ', 'ᾴ'), ('ᾶ', 'ᾷ'),
+  ('ι', 'ι'), ('ῂ', 'ῄ'), ('ῆ', 'ῇ'), ('ῐ', 'ΐ'),
+  ('ῖ', 'ῗ'), ('ῠ', 'ῧ'), ('ῲ', 'ῴ'), ('ῶ', 'ῷ'),
+  ('ⅎ', 'ⅎ'), ('ⅰ', 'ⅿ'), ('ↄ', 'ↄ'), ('ⓐ', 'ⓩ'),
+  ('ⰰ', 'ⱞ'), ('ⱡ', 'ⱡ'), ('ⱥ', 'ⱦ'), ('ⱨ', 'ⱨ'),
+  ('ⱪ', 'ⱪ'), ('ⱬ', 'ⱬ'), ('ⱳ', 'ⱳ'), ('ⱶ', 'ⱶ'),
+  ('ⲁ', 'ⲁ'), ('ⲃ', 'ⲃ'), ('ⲅ', 'ⲅ'), ('ⲇ', 'ⲇ'),
+  ('ⲉ', 'ⲉ'), ('ⲋ', 'ⲋ'), ('ⲍ', 'ⲍ'), ('ⲏ', 'ⲏ'),
+  ('ⲑ', 'ⲑ'), ('ⲓ', 'ⲓ'), ('ⲕ', 'ⲕ'), ('ⲗ', 'ⲗ'),
+  ('ⲙ', 'ⲙ'), ('ⲛ', 'ⲛ'), ('ⲝ', 'ⲝ'), ('ⲟ', 'ⲟ'),
+  ('ⲡ', 'ⲡ'), ('ⲣ', 'ⲣ'), ('ⲥ', 'ⲥ'), ('ⲧ', 'ⲧ'),
+  ('ⲩ', 'ⲩ'), ('ⲫ', 'ⲫ'), ('ⲭ', 'ⲭ'), ('ⲯ', 'ⲯ'),
+  ('ⲱ', 'ⲱ'), ('ⲳ', 'ⲳ'), ('ⲵ', 'ⲵ'), ('ⲷ', 'ⲷ'),
+  ('ⲹ', 'ⲹ'), ('ⲻ', 'ⲻ'), ('ⲽ', 'ⲽ'), ('ⲿ', 'ⲿ'),
+  ('ⳁ', 'ⳁ'), ('ⳃ', 'ⳃ'), ('ⳅ', 'ⳅ'), ('ⳇ', 'ⳇ'),
+  ('ⳉ', 'ⳉ'), ('ⳋ', 'ⳋ'), ('ⳍ', 'ⳍ'), ('ⳏ', 'ⳏ'),
+  ('ⳑ', 'ⳑ'), ('ⳓ', 'ⳓ'), ('ⳕ', 'ⳕ'), ('ⳗ', 'ⳗ'),
+  ('ⳙ', 'ⳙ'), ('ⳛ', 'ⳛ'), ('ⳝ', 'ⳝ'), ('ⳟ', 'ⳟ'),
+  ('ⳡ', 'ⳡ'), ('ⳣ', 'ⳣ'), ('ⳬ', 'ⳬ'), ('ⳮ', 'ⳮ'),
+  ('ⳳ', 'ⳳ'), ('ⴀ', 'ⴥ'), ('ⴧ', 'ⴧ'), ('ⴭ', 'ⴭ'),
+  ('ꙁ', 'ꙁ'), ('ꙃ', 'ꙃ'), ('ꙅ', 'ꙅ'), ('ꙇ', 'ꙇ'),
+  ('ꙉ', 'ꙉ'), ('ꙋ', 'ꙋ'), ('ꙍ', 'ꙍ'), ('ꙏ', 'ꙏ'),
+  ('ꙑ', 'ꙑ'), ('ꙓ', 'ꙓ'), ('ꙕ', 'ꙕ'), ('ꙗ', 'ꙗ'),
+  ('ꙙ', 'ꙙ'), ('ꙛ', 'ꙛ'), ('ꙝ', 'ꙝ'), ('ꙟ', 'ꙟ'),
+  ('ꙡ', 'ꙡ'), ('ꙣ', 'ꙣ'), ('ꙥ', 'ꙥ'), ('ꙧ', 'ꙧ'),
+  ('ꙩ', 'ꙩ'), ('ꙫ', 'ꙫ'), ('ꙭ', 'ꙭ'), ('ꚁ', 'ꚁ'),
+  ('ꚃ', 'ꚃ'), ('ꚅ', 'ꚅ'), ('ꚇ', 'ꚇ'), ('ꚉ', 'ꚉ'),
+  ('ꚋ', 'ꚋ'), ('ꚍ', 'ꚍ'), ('ꚏ', 'ꚏ'), ('ꚑ', 'ꚑ'),
+  ('ꚓ', 'ꚓ'), ('ꚕ', 'ꚕ'), ('ꚗ', 'ꚗ'), ('ꚙ', 'ꚙ'),
+  ('ꚛ', 'ꚛ'), ('ꜣ', 'ꜣ'), ('ꜥ', 'ꜥ'), ('ꜧ', 'ꜧ'),
+  ('ꜩ', 'ꜩ'), ('ꜫ', 'ꜫ'), ('ꜭ', 'ꜭ'), ('ꜯ', 'ꜯ'),
+  ('ꜳ', 'ꜳ'), ('ꜵ', 'ꜵ'), ('ꜷ', 'ꜷ'), ('ꜹ', 'ꜹ'),
+  ('ꜻ', 'ꜻ'), ('ꜽ', 'ꜽ'), ('ꜿ', 'ꜿ'), ('ꝁ', 'ꝁ'),
+  ('ꝃ', 'ꝃ'), ('ꝅ', 'ꝅ'), ('ꝇ', 'ꝇ'), ('ꝉ', 'ꝉ'),
+  ('ꝋ', 'ꝋ'), ('ꝍ', 'ꝍ'), ('ꝏ', 'ꝏ'), ('ꝑ', 'ꝑ'),
+  ('ꝓ', 'ꝓ'), ('ꝕ', 'ꝕ'), ('ꝗ', 'ꝗ'), ('ꝙ', 'ꝙ'),
+  ('ꝛ', 'ꝛ'), ('ꝝ', 'ꝝ'), ('ꝟ', 'ꝟ'), ('ꝡ', 'ꝡ'),
+  ('ꝣ', 'ꝣ'), ('ꝥ', 'ꝥ'), ('ꝧ', 'ꝧ'), ('ꝩ', 'ꝩ'),
+  ('ꝫ', 'ꝫ'), ('ꝭ', 'ꝭ'), ('ꝯ', 'ꝯ'), ('ꝺ', 'ꝺ'),
+  ('ꝼ', 'ꝼ'), ('ꝿ', 'ꝿ'), ('ꞁ', 'ꞁ'), ('ꞃ', 'ꞃ'),
+  ('ꞅ', 'ꞅ'), ('ꞇ', 'ꞇ'), ('ꞌ', 'ꞌ'), ('ꞑ', 'ꞑ'),
+  ('ꞓ', 'ꞓ'), ('ꞗ', 'ꞗ'), ('ꞙ', 'ꞙ'), ('ꞛ', 'ꞛ'),
+  ('ꞝ', 'ꞝ'), ('ꞟ', 'ꞟ'), ('ꞡ', 'ꞡ'), ('ꞣ', 'ꞣ'),
+  ('ꞥ', 'ꞥ'), ('ꞧ', 'ꞧ'), ('ꞩ', 'ꞩ'), ('ꞵ', 'ꞵ'),
+  ('ꞷ', 'ꞷ'), ('ꭓ', 'ꭓ'), ('ꭰ', 'ꮿ'), ('ﬀ', 'ﬆ'),
+  ('ﬓ', 'ﬗ'), ('ａ', 'ｚ'), ('𐐨', '𐑏'), ('𐓘', '𐓻'),
+  ('𐳀', '𐳲'), ('𑣀', '𑣟'), ('𞤢', '𞥃'),
+];
+
+pub const CHANGES_WHEN_UPPERCASED: &'static [(char, char)] = &[
+  ('a', 'z'), ('µ', 'µ'), ('ß', 'ö'), ('ø', 'ÿ'), ('ā', 'ā'),
+  ('ă', 'ă'), ('ą', 'ą'), ('ć', 'ć'), ('ĉ', 'ĉ'), ('ċ', 'ċ'),
+  ('č', 'č'), ('ď', 'ď'), ('đ', 'đ'), ('ē', 'ē'), ('ĕ', 'ĕ'),
+  ('ė', 'ė'), ('ę', 'ę'), ('ě', 'ě'), ('ĝ', 'ĝ'), ('ğ', 'ğ'),
+  ('ġ', 'ġ'), ('ģ', 'ģ'), ('ĥ', 'ĥ'), ('ħ', 'ħ'), ('ĩ', 'ĩ'),
+  ('ī', 'ī'), ('ĭ', 'ĭ'), ('į', 'į'), ('ı', 'ı'), ('ĳ', 'ĳ'),
+  ('ĵ', 'ĵ'), ('ķ', 'ķ'), ('ĺ', 'ĺ'), ('ļ', 'ļ'), ('ľ', 'ľ'),
+  ('ŀ', 'ŀ'), ('ł', 'ł'), ('ń', 'ń'), ('ņ', 'ņ'), ('ň', 'ŉ'),
+  ('ŋ', 'ŋ'), ('ō', 'ō'), ('ŏ', 'ŏ'), ('ő', 'ő'), ('œ', 'œ'),
+  ('ŕ', 'ŕ'), ('ŗ', 'ŗ'), ('ř', 'ř'), ('ś', 'ś'), ('ŝ', 'ŝ'),
+  ('ş', 'ş'), ('š', 'š'), ('ţ', 'ţ'), ('ť', 'ť'), ('ŧ', 'ŧ'),
+  ('ũ', 'ũ'), ('ū', 'ū'), ('ŭ', 'ŭ'), ('ů', 'ů'), ('ű', 'ű'),
+  ('ų', 'ų'), ('ŵ', 'ŵ'), ('ŷ', 'ŷ'), ('ź', 'ź'), ('ż', 'ż'),
+  ('ž', 'ƀ'), ('ƃ', 'ƃ'), ('ƅ', 'ƅ'), ('ƈ', 'ƈ'), ('ƌ', 'ƌ'),
+  ('ƒ', 'ƒ'), ('ƕ', 'ƕ'), ('ƙ', 'ƚ'), ('ƞ', 'ƞ'), ('ơ', 'ơ'),
+  ('ƣ', 'ƣ'), ('ƥ', 'ƥ'), ('ƨ', 'ƨ'), ('ƭ', 'ƭ'), ('ư', 'ư'),
+  ('ƴ', 'ƴ'), ('ƶ', 'ƶ'), ('ƹ', 'ƹ'), ('ƽ', 'ƽ'), ('ƿ', 'ƿ'),
+  ('ǅ', 'ǆ'), ('ǈ', 'ǉ'), ('ǋ', 'ǌ'), ('ǎ', 'ǎ'), ('ǐ', 'ǐ'),
+  ('ǒ', 'ǒ'), ('ǔ', 'ǔ'), ('ǖ', 'ǖ'), ('ǘ', 'ǘ'), ('ǚ', 'ǚ'),
+  ('ǜ', 'ǝ'), ('ǟ', 'ǟ'), ('ǡ', 'ǡ'), ('ǣ', 'ǣ'), ('ǥ', 'ǥ'),
+  ('ǧ', 'ǧ'), ('ǩ', 'ǩ'), ('ǫ', 'ǫ'), ('ǭ', 'ǭ'), ('ǯ', 'ǰ'),
+  ('ǲ', 'ǳ'), ('ǵ', 'ǵ'), ('ǹ', 'ǹ'), ('ǻ', 'ǻ'), ('ǽ', 'ǽ'),
+  ('ǿ', 'ǿ'), ('ȁ', 'ȁ'), ('ȃ', 'ȃ'), ('ȅ', 'ȅ'), ('ȇ', 'ȇ'),
+  ('ȉ', 'ȉ'), ('ȋ', 'ȋ'), ('ȍ', 'ȍ'), ('ȏ', 'ȏ'), ('ȑ', 'ȑ'),
+  ('ȓ', 'ȓ'), ('ȕ', 'ȕ'), ('ȗ', 'ȗ'), ('ș', 'ș'), ('ț', 'ț'),
+  ('ȝ', 'ȝ'), ('ȟ', 'ȟ'), ('ȣ', 'ȣ'), ('ȥ', 'ȥ'), ('ȧ', 'ȧ'),
+  ('ȩ', 'ȩ'), ('ȫ', 'ȫ'), ('ȭ', 'ȭ'), ('ȯ', 'ȯ'), ('ȱ', 'ȱ'),
+  ('ȳ', 'ȳ'), ('ȼ', 'ȼ'), ('ȿ', 'ɀ'), ('ɂ', 'ɂ'), ('ɇ', 'ɇ'),
+  ('ɉ', 'ɉ'), ('ɋ', 'ɋ'), ('ɍ', 'ɍ'), ('ɏ', 'ɔ'), ('ɖ', 'ɗ'),
+  ('ə', 'ə'), ('ɛ', 'ɜ'), ('ɠ', 'ɡ'), ('ɣ', 'ɣ'), ('ɥ', 'ɦ'),
+  ('ɨ', 'ɬ'), ('ɯ', 'ɯ'), ('ɱ', 'ɲ'), ('ɵ', 'ɵ'), ('ɽ', 'ɽ'),
+  ('ʀ', 'ʀ'), ('ʃ', 'ʃ'), ('ʇ', 'ʌ'), ('ʒ', 'ʒ'), ('ʝ', 'ʞ'),
+  ('ͅ', 'ͅ'), ('ͱ', 'ͱ'), ('ͳ', 'ͳ'), ('ͷ', 'ͷ'), ('ͻ', 'ͽ'),
+  ('ΐ', 'ΐ'), ('ά', 'ώ'), ('ϐ', 'ϑ'), ('ϕ', 'ϗ'), ('ϙ', 'ϙ'),
+  ('ϛ', 'ϛ'), ('ϝ', 'ϝ'), ('ϟ', 'ϟ'), ('ϡ', 'ϡ'), ('ϣ', 'ϣ'),
+  ('ϥ', 'ϥ'), ('ϧ', 'ϧ'), ('ϩ', 'ϩ'), ('ϫ', 'ϫ'), ('ϭ', 'ϭ'),
+  ('ϯ', 'ϳ'), ('ϵ', 'ϵ'), ('ϸ', 'ϸ'), ('ϻ', 'ϻ'), ('а', 'џ'),
+  ('ѡ', 'ѡ'), ('ѣ', 'ѣ'), ('ѥ', 'ѥ'), ('ѧ', 'ѧ'), ('ѩ', 'ѩ'),
+  ('ѫ', 'ѫ'), ('ѭ', 'ѭ'), ('ѯ', 'ѯ'), ('ѱ', 'ѱ'), ('ѳ', 'ѳ'),
+  ('ѵ', 'ѵ'), ('ѷ', 'ѷ'), ('ѹ', 'ѹ'), ('ѻ', 'ѻ'), ('ѽ', 'ѽ'),
+  ('ѿ', 'ѿ'), ('ҁ', 'ҁ'), ('ҋ', 'ҋ'), ('ҍ', 'ҍ'), ('ҏ', 'ҏ'),
+  ('ґ', 'ґ'), ('ғ', 'ғ'), ('ҕ', 'ҕ'), ('җ', 'җ'), ('ҙ', 'ҙ'),
+  ('қ', 'қ'), ('ҝ', 'ҝ'), ('ҟ', 'ҟ'), ('ҡ', 'ҡ'), ('ң', 'ң'),
+  ('ҥ', 'ҥ'), ('ҧ', 'ҧ'), ('ҩ', 'ҩ'), ('ҫ', 'ҫ'), ('ҭ', 'ҭ'),
+  ('ү', 'ү'), ('ұ', 'ұ'), ('ҳ', 'ҳ'), ('ҵ', 'ҵ'), ('ҷ', 'ҷ'),
+  ('ҹ', 'ҹ'), ('һ', 'һ'), ('ҽ', 'ҽ'), ('ҿ', 'ҿ'), ('ӂ', 'ӂ'),
+  ('ӄ', 'ӄ'), ('ӆ', 'ӆ'), ('ӈ', 'ӈ'), ('ӊ', 'ӊ'), ('ӌ', 'ӌ'),
+  ('ӎ', 'ӏ'), ('ӑ', 'ӑ'), ('ӓ', 'ӓ'), ('ӕ', 'ӕ'), ('ӗ', 'ӗ'),
+  ('ә', 'ә'), ('ӛ', 'ӛ'), ('ӝ', 'ӝ'), ('ӟ', 'ӟ'), ('ӡ', 'ӡ'),
+  ('ӣ', 'ӣ'), ('ӥ', 'ӥ'), ('ӧ', 'ӧ'), ('ө', 'ө'), ('ӫ', 'ӫ'),
+  ('ӭ', 'ӭ'), ('ӯ', 'ӯ'), ('ӱ', 'ӱ'), ('ӳ', 'ӳ'), ('ӵ', 'ӵ'),
+  ('ӷ', 'ӷ'), ('ӹ', 'ӹ'), ('ӻ', 'ӻ'), ('ӽ', 'ӽ'), ('ӿ', 'ӿ'),
+  ('ԁ', 'ԁ'), ('ԃ', 'ԃ'), ('ԅ', 'ԅ'), ('ԇ', 'ԇ'), ('ԉ', 'ԉ'),
+  ('ԋ', 'ԋ'), ('ԍ', 'ԍ'), ('ԏ', 'ԏ'), ('ԑ', 'ԑ'), ('ԓ', 'ԓ'),
+  ('ԕ', 'ԕ'), ('ԗ', 'ԗ'), ('ԙ', 'ԙ'), ('ԛ', 'ԛ'), ('ԝ', 'ԝ'),
+  ('ԟ', 'ԟ'), ('ԡ', 'ԡ'), ('ԣ', 'ԣ'), ('ԥ', 'ԥ'), ('ԧ', 'ԧ'),
+  ('ԩ', 'ԩ'), ('ԫ', 'ԫ'), ('ԭ', 'ԭ'), ('ԯ', 'ԯ'), ('ա', 'և'),
+  ('ᏸ', 'ᏽ'), ('ᲀ', 'ᲈ'), ('ᵹ', 'ᵹ'), ('ᵽ', 'ᵽ'),
+  ('ḁ', 'ḁ'), ('ḃ', 'ḃ'), ('ḅ', 'ḅ'), ('ḇ', 'ḇ'),
+  ('ḉ', 'ḉ'), ('ḋ', 'ḋ'), ('ḍ', 'ḍ'), ('ḏ', 'ḏ'),
+  ('ḑ', 'ḑ'), ('ḓ', 'ḓ'), ('ḕ', 'ḕ'), ('ḗ', 'ḗ'),
+  ('ḙ', 'ḙ'), ('ḛ', 'ḛ'), ('ḝ', 'ḝ'), ('ḟ', 'ḟ'),
+  ('ḡ', 'ḡ'), ('ḣ', 'ḣ'), ('ḥ', 'ḥ'), ('ḧ', 'ḧ'),
+  ('ḩ', 'ḩ'), ('ḫ', 'ḫ'), ('ḭ', 'ḭ'), ('ḯ', 'ḯ'),
+  ('ḱ', 'ḱ'), ('ḳ', 'ḳ'), ('ḵ', 'ḵ'), ('ḷ', 'ḷ'),
+  ('ḹ', 'ḹ'), ('ḻ', 'ḻ'), ('ḽ', 'ḽ'), ('ḿ', 'ḿ'),
+  ('ṁ', 'ṁ'), ('ṃ', 'ṃ'), ('ṅ', 'ṅ'), ('ṇ', 'ṇ'),
+  ('ṉ', 'ṉ'), ('ṋ', 'ṋ'), ('ṍ', 'ṍ'), ('ṏ', 'ṏ'),
+  ('ṑ', 'ṑ'), ('ṓ', 'ṓ'), ('ṕ', 'ṕ'), ('ṗ', 'ṗ'),
+  ('ṙ', 'ṙ'), ('ṛ', 'ṛ'), ('ṝ', 'ṝ'), ('ṟ', 'ṟ'),
+  ('ṡ', 'ṡ'), ('ṣ', 'ṣ'), ('ṥ', 'ṥ'), ('ṧ', 'ṧ'),
+  ('ṩ', 'ṩ'), ('ṫ', 'ṫ'), ('ṭ', 'ṭ'), ('ṯ', 'ṯ'),
+  ('ṱ', 'ṱ'), ('ṳ', 'ṳ'), ('ṵ', 'ṵ'), ('ṷ', 'ṷ'),
+  ('ṹ', 'ṹ'), ('ṻ', 'ṻ'), ('ṽ', 'ṽ'), ('ṿ', 'ṿ'),
+  ('ẁ', 'ẁ'), ('ẃ', 'ẃ'), ('ẅ', 'ẅ'), ('ẇ', 'ẇ'),
+  ('ẉ', 'ẉ'), ('ẋ', 'ẋ'), ('ẍ', 'ẍ'), ('ẏ', 'ẏ'),
+  ('ẑ', 'ẑ'), ('ẓ', 'ẓ'), ('ẕ', 'ẛ'), ('ạ', 'ạ'),
+  ('ả', 'ả'), ('ấ', 'ấ'), ('ầ', 'ầ'), ('ẩ', 'ẩ'),
+  ('ẫ', 'ẫ'), ('ậ', 'ậ'), ('ắ', 'ắ'), ('ằ', 'ằ'),
+  ('ẳ', 'ẳ'), ('ẵ', 'ẵ'), ('ặ', 'ặ'), ('ẹ', 'ẹ'),
+  ('ẻ', 'ẻ'), ('ẽ', 'ẽ'), ('ế', 'ế'), ('ề', 'ề'),
+  ('ể', 'ể'), ('ễ', 'ễ'), ('ệ', 'ệ'), ('ỉ', 'ỉ'),
+  ('ị', 'ị'), ('ọ', 'ọ'), ('ỏ', 'ỏ'), ('ố', 'ố'),
+  ('ồ', 'ồ'), ('ổ', 'ổ'), ('ỗ', 'ỗ'), ('ộ', 'ộ'),
+  ('ớ', 'ớ'), ('ờ', 'ờ'), ('ở', 'ở'), ('ỡ', 'ỡ'),
+  ('ợ', 'ợ'), ('ụ', 'ụ'), ('ủ', 'ủ'), ('ứ', 'ứ'),
+  ('ừ', 'ừ'), ('ử', 'ử'), ('ữ', 'ữ'), ('ự', 'ự'),
+  ('ỳ', 'ỳ'), ('ỵ', 'ỵ'), ('ỷ', 'ỷ'), ('ỹ', 'ỹ'),
+  ('ỻ', 'ỻ'), ('ỽ', 'ỽ'), ('ỿ', 'ἇ'), ('ἐ', 'ἕ'),
+  ('ἠ', 'ἧ'), ('ἰ', 'ἷ'), ('ὀ', 'ὅ'), ('ὐ', 'ὗ'),
+  ('ὠ', 'ὧ'), ('ὰ', 'ώ'), ('ᾀ', 'ᾴ'), ('ᾶ', 'ᾷ'),
+  ('ᾼ', 'ᾼ'), ('ι', 'ι'), ('ῂ', 'ῄ'), ('ῆ', 'ῇ'),
+  ('ῌ', 'ῌ'), ('ῐ', 'ΐ'), ('ῖ', 'ῗ'), ('ῠ', 'ῧ'),
+  ('ῲ', 'ῴ'), ('ῶ', 'ῷ'), ('ῼ', 'ῼ'), ('ⅎ', 'ⅎ'),
+  ('ⅰ', 'ⅿ'), ('ↄ', 'ↄ'), ('ⓐ', 'ⓩ'), ('ⰰ', 'ⱞ'),
+  ('ⱡ', 'ⱡ'), ('ⱥ', 'ⱦ'), ('ⱨ', 'ⱨ'), ('ⱪ', 'ⱪ'),
+  ('ⱬ', 'ⱬ'), ('ⱳ', 'ⱳ'), ('ⱶ', 'ⱶ'), ('ⲁ', 'ⲁ'),
+  ('ⲃ', 'ⲃ'), ('ⲅ', 'ⲅ'), ('ⲇ', 'ⲇ'), ('ⲉ', 'ⲉ'),
+  ('ⲋ', 'ⲋ'), ('ⲍ', 'ⲍ'), ('ⲏ', 'ⲏ'), ('ⲑ', 'ⲑ'),
+  ('ⲓ', 'ⲓ'), ('ⲕ', 'ⲕ'), ('ⲗ', 'ⲗ'), ('ⲙ', 'ⲙ'),
+  ('ⲛ', 'ⲛ'), ('ⲝ', 'ⲝ'), ('ⲟ', 'ⲟ'), ('ⲡ', 'ⲡ'),
+  ('ⲣ', 'ⲣ'), ('ⲥ', 'ⲥ'), ('ⲧ', 'ⲧ'), ('ⲩ', 'ⲩ'),
+  ('ⲫ', 'ⲫ'), ('ⲭ', 'ⲭ'), ('ⲯ', 'ⲯ'), ('ⲱ', 'ⲱ'),
+  ('ⲳ', 'ⲳ'), ('ⲵ', 'ⲵ'), ('ⲷ', 'ⲷ'), ('ⲹ', 'ⲹ'),
+  ('ⲻ', 'ⲻ'), ('ⲽ', 'ⲽ'), ('ⲿ', 'ⲿ'), ('ⳁ', 'ⳁ'),
+  ('ⳃ', 'ⳃ'), ('ⳅ', 'ⳅ'), ('ⳇ', 'ⳇ'), ('ⳉ', 'ⳉ'),
+  ('ⳋ', 'ⳋ'), ('ⳍ', 'ⳍ'), ('ⳏ', 'ⳏ'), ('ⳑ', 'ⳑ'),
+  ('ⳓ', 'ⳓ'), ('ⳕ', 'ⳕ'), ('ⳗ', 'ⳗ'), ('ⳙ', 'ⳙ'),
+  ('ⳛ', 'ⳛ'), ('ⳝ', 'ⳝ'), ('ⳟ', 'ⳟ'), ('ⳡ', 'ⳡ'),
+  ('ⳣ', 'ⳣ'), ('ⳬ', 'ⳬ'), ('ⳮ', 'ⳮ'), ('ⳳ', 'ⳳ'),
+  ('ⴀ', 'ⴥ'), ('ⴧ', 'ⴧ'), ('ⴭ', 'ⴭ'), ('ꙁ', 'ꙁ'),
+  ('ꙃ', 'ꙃ'), ('ꙅ', 'ꙅ'), ('ꙇ', 'ꙇ'), ('ꙉ', 'ꙉ'),
+  ('ꙋ', 'ꙋ'), ('ꙍ', 'ꙍ'), ('ꙏ', 'ꙏ'), ('ꙑ', 'ꙑ'),
+  ('ꙓ', 'ꙓ'), ('ꙕ', 'ꙕ'), ('ꙗ', 'ꙗ'), ('ꙙ', 'ꙙ'),
+  ('ꙛ', 'ꙛ'), ('ꙝ', 'ꙝ'), ('ꙟ', 'ꙟ'), ('ꙡ', 'ꙡ'),
+  ('ꙣ', 'ꙣ'), ('ꙥ', 'ꙥ'), ('ꙧ', 'ꙧ'), ('ꙩ', 'ꙩ'),
+  ('ꙫ', 'ꙫ'), ('ꙭ', 'ꙭ'), ('ꚁ', 'ꚁ'), ('ꚃ', 'ꚃ'),
+  ('ꚅ', 'ꚅ'), ('ꚇ', 'ꚇ'), ('ꚉ', 'ꚉ'), ('ꚋ', 'ꚋ'),
+  ('ꚍ', 'ꚍ'), ('ꚏ', 'ꚏ'), ('ꚑ', 'ꚑ'), ('ꚓ', 'ꚓ'),
+  ('ꚕ', 'ꚕ'), ('ꚗ', 'ꚗ'), ('ꚙ', 'ꚙ'), ('ꚛ', 'ꚛ'),
+  ('ꜣ', 'ꜣ'), ('ꜥ', 'ꜥ'), ('ꜧ', 'ꜧ'), ('ꜩ', 'ꜩ'),
+  ('ꜫ', 'ꜫ'), ('ꜭ', 'ꜭ'), ('ꜯ', 'ꜯ'), ('ꜳ', 'ꜳ'),
+  ('ꜵ', 'ꜵ'), ('ꜷ', 'ꜷ'), ('ꜹ', 'ꜹ'), ('ꜻ', 'ꜻ'),
+  ('ꜽ', 'ꜽ'), ('ꜿ', 'ꜿ'), ('ꝁ', 'ꝁ'), ('ꝃ', 'ꝃ'),
+  ('ꝅ', 'ꝅ'), ('ꝇ', 'ꝇ'), ('ꝉ', 'ꝉ'), ('ꝋ', 'ꝋ'),
+  ('ꝍ', 'ꝍ'), ('ꝏ', 'ꝏ'), ('ꝑ', 'ꝑ'), ('ꝓ', 'ꝓ'),
+  ('ꝕ', 'ꝕ'), ('ꝗ', 'ꝗ'), ('ꝙ', 'ꝙ'), ('ꝛ', 'ꝛ'),
+  ('ꝝ', 'ꝝ'), ('ꝟ', 'ꝟ'), ('ꝡ', 'ꝡ'), ('ꝣ', 'ꝣ'),
+  ('ꝥ', 'ꝥ'), ('ꝧ', 'ꝧ'), ('ꝩ', 'ꝩ'), ('ꝫ', 'ꝫ'),
+  ('ꝭ', 'ꝭ'), ('ꝯ', 'ꝯ'), ('ꝺ', 'ꝺ'), ('ꝼ', 'ꝼ'),
+  ('ꝿ', 'ꝿ'), ('ꞁ', 'ꞁ'), ('ꞃ', 'ꞃ'), ('ꞅ', 'ꞅ'),
+  ('ꞇ', 'ꞇ'), ('ꞌ', 'ꞌ'), ('ꞑ', 'ꞑ'), ('ꞓ', 'ꞓ'),
+  ('ꞗ', 'ꞗ'), ('ꞙ', 'ꞙ'), ('ꞛ', 'ꞛ'), ('ꞝ', 'ꞝ'),
+  ('ꞟ', 'ꞟ'), ('ꞡ', 'ꞡ'), ('ꞣ', 'ꞣ'), ('ꞥ', 'ꞥ'),
+  ('ꞧ', 'ꞧ'), ('ꞩ', 'ꞩ'), ('ꞵ', 'ꞵ'), ('ꞷ', 'ꞷ'),
+  ('ꭓ', 'ꭓ'), ('ꭰ', 'ꮿ'), ('ﬀ', 'ﬆ'), ('ﬓ', 'ﬗ'),
+  ('ａ', 'ｚ'), ('𐐨', '𐑏'), ('𐓘', '𐓻'), ('𐳀', '𐳲'),
+  ('𑣀', '𑣟'), ('𞤢', '𞥃'),
+];
+
+pub const DASH: &'static [(char, char)] = &[
+  ('-', '-'), ('֊', '֊'), ('־', '־'), ('᐀', '᐀'), ('᠆', '᠆'),
+  ('‐', '―'), ('⁓', '⁓'), ('⁻', '⁻'), ('₋', '₋'),
+  ('−', '−'), ('⸗', '⸗'), ('⸚', '⸚'), ('⸺', '⸻'),
+  ('⹀', '⹀'), ('〜', '〜'), ('〰', '〰'), ('゠', '゠'),
+  ('︱', '︲'), ('﹘', '﹘'), ('﹣', '﹣'), ('－', '－'),
+];
+
+pub const DEFAULT_IGNORABLE_CODE_POINT: &'static [(char, char)] = &[
+  ('\u{ad}', '\u{ad}'), ('͏', '͏'), ('\u{61c}', '\u{61c}'), ('ᅟ', 'ᅠ'),
+  ('឴', '឵'), ('᠋', '\u{180e}'), ('\u{200b}', '\u{200f}'),
+  ('\u{202a}', '\u{202e}'), ('\u{2060}', '\u{206f}'), ('ㅤ', 'ㅤ'),
+  ('︀', '️'), ('\u{feff}', '\u{feff}'), ('ﾠ', 'ﾠ'),
+  ('\u{fff0}', '\u{fff8}'), ('\u{1bca0}', '\u{1bca3}'),
+  ('\u{1d173}', '\u{1d17a}'), ('\u{e0000}', '\u{e0fff}'),
+];
+
+pub const DEPRECATED: &'static [(char, char)] = &[
+  ('ŉ', 'ŉ'), ('ٳ', 'ٳ'), ('ཷ', 'ཷ'), ('ཹ', 'ཹ'), ('ឣ', 'ឤ'),
+  ('\u{206a}', '\u{206f}'), ('〈', '〉'), ('\u{e0001}', '\u{e0001}'),
+];
+
+pub const DIACRITIC: &'static [(char, char)] = &[
+  ('^', '^'), ('`', '`'), ('¨', '¨'), ('¯', '¯'), ('´', '´'),
+  ('·', '¸'), ('ʰ', '͎'), ('͐', '͗'), ('͝', '͢'), ('ʹ', '͵'),
+  ('ͺ', 'ͺ'), ('΄', '΅'), ('҃', '҇'), ('ՙ', 'ՙ'), ('֑', '֡'),
+  ('֣', 'ֽ'), ('ֿ', 'ֿ'), ('ׁ', 'ׂ'), ('ׄ', 'ׄ'), ('ً', 'ْ'),
+  ('ٗ', '٘'), ('۟', '۠'), ('ۥ', 'ۦ'), ('۪', '۬'), ('ܰ', '݊'),
+  ('ަ', 'ް'), ('߫', 'ߵ'), ('࠘', '࠙'), ('ࣣ', 'ࣾ'), ('़', '़'),
+  ('्', '्'), ('॑', '॔'), ('ॱ', 'ॱ'), ('়', '়'),
+  ('্', '্'), ('਼', '਼'), ('੍', '੍'), ('઼', '઼'),
+  ('્', '્'), ('૽', '૿'), ('଼', '଼'), ('୍', '୍'),
+  ('்', '்'), ('్', '్'), ('಼', '಼'), ('್', '್'),
+  ('഻', '഼'), ('്', '്'), ('්', '්'), ('็', '์'),
+  ('๎', '๎'), ('່', '໌'), ('༘', '༙'), ('༵', '༵'),
+  ('༷', '༷'), ('༹', '༹'), ('༾', '༿'), ('ྂ', '྄'),
+  ('྆', '྇'), ('࿆', '࿆'), ('့', '့'), ('္', '်'),
+  ('ႇ', 'ႍ'), ('ႏ', 'ႏ'), ('ႚ', 'ႛ'), ('៉', '៓'),
+  ('៝', '៝'), ('᤹', '᤻'), ('᩵', '᩼'), ('᩿', '᩿'),
+  ('᪰', '᪽'), ('᬴', '᬴'), ('᭄', '᭄'), ('᭫', '᭳'),
+  ('᮪', '᮫'), ('ᰶ', '᰷'), ('ᱸ', 'ᱽ'), ('᳐', '᳨'),
+  ('᳭', '᳭'), ('᳴', '᳴'), ('᳷', '᳹'), ('ᴬ', 'ᵪ'),
+  ('᷄', '᷏'), ('᷵', '᷹'), ('᷽', '᷿'), ('᾽', '᾽'),
+  ('᾿', '῁'), ('῍', '῏'), ('῝', '῟'), ('῭', '`'),
+  ('´', '῾'), ('⳯', '⳱'), ('ⸯ', 'ⸯ'), ('〪', '〯'),
+  ('゙', '゜'), ('ー', 'ー'), ('꙯', '꙯'), ('꙼', '꙽'),
+  ('ꙿ', 'ꙿ'), ('ꚜ', 'ꚝ'), ('꛰', '꛱'), ('ꜗ', '꜡'),
+  ('ꞈ', 'ꞈ'), ('ꟸ', 'ꟹ'), ('꣄', '꣄'), ('꣠', '꣱'),
+  ('꤫', '꤮'), ('꥓', '꥓'), ('꦳', '꦳'), ('꧀', '꧀'),
+  ('ꧥ', 'ꧥ'), ('ꩻ', 'ꩽ'), ('꪿', 'ꫂ'), ('꫶', '꫶'),
+  ('꭛', 'ꭟ'), ('꯬', '꯭'), ('ﬞ', 'ﬞ'), ('︠', '︯'),
+  ('＾', '＾'), ('｀', '｀'), ('ｰ', 'ｰ'), ('ﾞ', 'ﾟ'),
+  ('￣', '￣'), ('𐋠', '𐋠'), ('𐫥', '𐫦'), ('𑂹', '𑂺'),
+  ('𑄳', '𑄴'), ('𑅳', '𑅳'), ('𑇀', '𑇀'), ('𑇊', '𑇌'),
+  ('𑈵', '𑈶'), ('𑋩', '𑋪'), ('𑌼', '𑌼'), ('𑍍', '𑍍'),
+  ('𑍦', '𑍬'), ('𑍰', '𑍴'), ('𑑂', '𑑂'), ('𑑆', '𑑆'),
+  ('𑓂', '𑓃'), ('𑖿', '𑗀'), ('𑘿', '𑘿'), ('𑚶', '𑚷'),
+  ('𑜫', '𑜫'), ('𑨴', '𑨴'), ('𑩇', '𑩇'), ('𑪙', '𑪙'),
+  ('𑰿', '𑰿'), ('𑵂', '𑵂'), ('𑵄', '𑵅'), ('𖫰', '𖫴'),
+  ('𖾏', '𖾟'), ('𝅧', '𝅩'), ('𝅭', '𝅲'), ('𝅻', '𝆂'),
+  ('𝆅', '𝆋'), ('𝆪', '𝆭'), ('𞣐', '𞣖'), ('𞥄', '𞥆'),
+  ('𞥈', '𞥊'),
+];
+
+pub const EXTENDER: &'static [(char, char)] = &[
+  ('·', '·'), ('ː', 'ˑ'), ('ـ', 'ـ'), ('ߺ', 'ߺ'), ('ๆ', 'ๆ'),
+  ('ໆ', 'ໆ'), ('᠊', '᠊'), ('ᡃ', 'ᡃ'), ('ᪧ', 'ᪧ'),
+  ('ᰶ', 'ᰶ'), ('ᱻ', 'ᱻ'), ('々', '々'), ('〱', '〵'),
+  ('ゝ', 'ゞ'), ('ー', 'ヾ'), ('ꀕ', 'ꀕ'), ('ꘌ', 'ꘌ'),
+  ('ꧏ', 'ꧏ'), ('ꧦ', 'ꧦ'), ('ꩰ', 'ꩰ'), ('ꫝ', 'ꫝ'),
+  ('ꫳ', 'ꫴ'), ('ｰ', 'ｰ'), ('𑍝', '𑍝'), ('𑗆', '𑗈'),
+  ('𑪘', '𑪘'), ('𖭂', '𖭃'), ('𖿠', '𖿡'), ('𞥄', '𞥆'),
+];
+
+pub const GRAPHEME_BASE: &'static [(char, char)] = &[
+  (' ', '~'), ('\u{a0}', '¬'), ('®', '˿'), ('Ͱ', 'ͷ'), ('ͺ', 'Ϳ'),
+  ('΄', 'Ί'), ('Ό', 'Ό'), ('Ύ', 'Ρ'), ('Σ', '҂'), ('Ҋ', 'ԯ'),
+  ('Ա', 'Ֆ'), ('ՙ', '՟'), ('ա', 'և'), ('։', '֊'), ('֍', '֏'),
+  ('־', '־'), ('׀', '׀'), ('׃', '׃'), ('׆', '׆'), ('א', 'ת'),
+  ('װ', '״'), ('؆', '؏'), ('؛', '؛'), ('؞', 'ي'), ('٠', 'ٯ'),
+  ('ٱ', 'ە'), ('۞', '۞'), ('ۥ', 'ۦ'), ('۩', '۩'), ('ۮ', '܍'),
+  ('ܐ', 'ܐ'), ('ܒ', 'ܯ'), ('ݍ', 'ޥ'), ('ޱ', 'ޱ'), ('߀', 'ߪ'),
+  ('ߴ', 'ߺ'), ('ࠀ', 'ࠕ'), ('ࠚ', 'ࠚ'), ('ࠤ', 'ࠤ'),
+  ('ࠨ', 'ࠨ'), ('࠰', '࠾'), ('ࡀ', 'ࡘ'), ('࡞', '࡞'),
+  ('ࡠ', 'ࡪ'), ('ࢠ', 'ࢴ'), ('ࢶ', 'ࢽ'), ('ः', 'ह'),
+  ('ऻ', 'ऻ'), ('ऽ', 'ी'), ('ॉ', 'ौ'), ('ॎ', 'ॐ'),
+  ('क़', 'ॡ'), ('।', 'ঀ'), ('ং', 'ঃ'), ('অ', 'ঌ'),
+  ('এ', 'ঐ'), ('ও', 'ন'), ('প', 'র'), ('ল', 'ল'),
+  ('শ', 'হ'), ('ঽ', 'ঽ'), ('ি', 'ী'), ('ে', 'ৈ'),
+  ('ো', 'ৌ'), ('ৎ', 'ৎ'), ('ড়', 'ঢ়'), ('য়', 'ৡ'),
+  ('০', '৽'), ('ਃ', 'ਃ'), ('ਅ', 'ਊ'), ('ਏ', 'ਐ'),
+  ('ਓ', 'ਨ'), ('ਪ', 'ਰ'), ('ਲ', 'ਲ਼'), ('ਵ', 'ਸ਼'),
+  ('ਸ', 'ਹ'), ('ਾ', 'ੀ'), ('ਖ਼', 'ੜ'), ('ਫ਼', 'ਫ਼'),
+  ('੦', '੯'), ('ੲ', 'ੴ'), ('ઃ', 'ઃ'), ('અ', 'ઍ'),
+  ('એ', 'ઑ'), ('ઓ', 'ન'), ('પ', 'ર'), ('લ', 'ળ'),
+  ('વ', 'હ'), ('ઽ', 'ી'), ('ૉ', 'ૉ'), ('ો', 'ૌ'),
+  ('ૐ', 'ૐ'), ('ૠ', 'ૡ'), ('૦', '૱'), ('ૹ', 'ૹ'),
+  ('ଂ', 'ଃ'), ('ଅ', 'ଌ'), ('ଏ', 'ଐ'), ('ଓ', 'ନ'),
+  ('ପ', 'ର'), ('ଲ', 'ଳ'), ('ଵ', 'ହ'), ('ଽ', 'ଽ'),
+  ('ୀ', 'ୀ'), ('େ', 'ୈ'), ('ୋ', 'ୌ'), ('ଡ଼', 'ଢ଼'),
+  ('ୟ', 'ୡ'), ('୦', '୷'), ('ஃ', 'ஃ'), ('அ', 'ஊ'),
+  ('எ', 'ஐ'), ('ஒ', 'க'), ('ங', 'ச'), ('ஜ', 'ஜ'),
+  ('ஞ', 'ட'), ('ண', 'த'), ('ந', 'ப'), ('ம', 'ஹ'),
+  ('ி', 'ி'), ('ு', 'ூ'), ('ெ', 'ை'), ('ொ', 'ௌ'),
+  ('ௐ', 'ௐ'), ('௦', '௺'), ('ఁ', 'ః'), ('అ', 'ఌ'),
+  ('ఎ', 'ఐ'), ('ఒ', 'న'), ('ప', 'హ'), ('ఽ', 'ఽ'),
+  ('ు', 'ౄ'), ('ౘ', 'ౚ'), ('ౠ', 'ౡ'), ('౦', '౯'),
+  ('౸', 'ಀ'), ('ಂ', 'ಃ'), ('ಅ', 'ಌ'), ('ಎ', 'ಐ'),
+  ('ಒ', 'ನ'), ('ಪ', 'ಳ'), ('ವ', 'ಹ'), ('ಽ', 'ಾ'),
+  ('ೀ', 'ು'), ('ೃ', 'ೄ'), ('ೇ', 'ೈ'), ('ೊ', 'ೋ'),
+  ('ೞ', 'ೞ'), ('ೠ', 'ೡ'), ('೦', '೯'), ('ೱ', 'ೲ'),
+  ('ം', 'ഃ'), ('അ', 'ഌ'), ('എ', 'ഐ'), ('ഒ', 'ഺ'),
+  ('ഽ', 'ഽ'), ('ി', 'ീ'), ('െ', 'ൈ'), ('ൊ', 'ൌ'),
+  ('ൎ', '൏'), ('ൔ', 'ൖ'), ('൘', 'ൡ'), ('൦', 'ൿ'),
+  ('ං', 'ඃ'), ('අ', 'ඖ'), ('ක', 'න'), ('ඳ', 'ර'),
+  ('ල', 'ල'), ('ව', 'ෆ'), ('ැ', 'ෑ'), ('ෘ', 'ෞ'),
+  ('෦', '෯'), ('ෲ', '෴'), ('ก', 'ะ'), ('า', 'ำ'),
+  ('฿', 'ๆ'), ('๏', '๛'), ('ກ', 'ຂ'), ('ຄ', 'ຄ'),
+  ('ງ', 'ຈ'), ('ຊ', 'ຊ'), ('ຍ', 'ຍ'), ('ດ', 'ທ'),
+  ('ນ', 'ຟ'), ('ມ', 'ຣ'), ('ລ', 'ລ'), ('ວ', 'ວ'),
+  ('ສ', 'ຫ'), ('ອ', 'ະ'), ('າ', 'ຳ'), ('ຽ', 'ຽ'),
+  ('ເ', 'ໄ'), ('ໆ', 'ໆ'), ('໐', '໙'), ('ໜ', 'ໟ'),
+  ('ༀ', '༗'), ('༚', '༴'), ('༶', '༶'), ('༸', '༸'),
+  ('༺', 'ཇ'), ('ཉ', 'ཬ'), ('ཿ', 'ཿ'), ('྅', '྅'),
+  ('ྈ', 'ྌ'), ('྾', '࿅'), ('࿇', '࿌'), ('࿎', '࿚'),
+  ('က', 'ာ'), ('ေ', 'ေ'), ('း', 'း'), ('ျ', 'ြ'),
+  ('ဿ', 'ၗ'), ('ၚ', 'ၝ'), ('ၡ', 'ၰ'), ('ၵ', 'ႁ'),
+  ('ႃ', 'ႄ'), ('ႇ', 'ႌ'), ('ႎ', 'ႜ'), ('႞', 'Ⴥ'),
+  ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'), ('ა', 'ቈ'), ('ቊ', 'ቍ'),
+  ('ቐ', 'ቖ'), ('ቘ', 'ቘ'), ('ቚ', 'ቝ'), ('በ', 'ኈ'),
+  ('ኊ', 'ኍ'), ('ነ', 'ኰ'), ('ኲ', 'ኵ'), ('ኸ', 'ኾ'),
+  ('ዀ', 'ዀ'), ('ዂ', 'ዅ'), ('ወ', 'ዖ'), ('ዘ', 'ጐ'),
+  ('ጒ', 'ጕ'), ('ጘ', 'ፚ'), ('፠', '፼'), ('ᎀ', '᎙'),
+  ('Ꭰ', 'Ᏽ'), ('ᏸ', 'ᏽ'), ('᐀', '᚜'), ('ᚠ', 'ᛸ'),
+  ('ᜀ', 'ᜌ'), ('ᜎ', 'ᜑ'), ('ᜠ', 'ᜱ'), ('᜵', '᜶'),
+  ('ᝀ', 'ᝑ'), ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), ('ក', 'ឳ'),
+  ('ា', 'ា'), ('ើ', 'ៅ'), ('ះ', 'ៈ'), ('។', 'ៜ'),
+  ('០', '៩'), ('៰', '៹'), ('᠀', '᠊'), ('᠐', '᠙'),
+  ('ᠠ', 'ᡷ'), ('ᢀ', 'ᢄ'), ('ᢇ', 'ᢨ'), ('ᢪ', 'ᢪ'),
+  ('ᢰ', 'ᣵ'), ('ᤀ', 'ᤞ'), ('ᤣ', 'ᤦ'), ('ᤩ', 'ᤫ'),
+  ('ᤰ', 'ᤱ'), ('ᤳ', 'ᤸ'), ('᥀', '᥀'), ('᥄', 'ᥭ'),
+  ('ᥰ', 'ᥴ'), ('ᦀ', 'ᦫ'), ('ᦰ', 'ᧉ'), ('᧐', '᧚'),
+  ('᧞', 'ᨖ'), ('ᨙ', 'ᨚ'), ('᨞', 'ᩕ'), ('ᩗ', 'ᩗ'),
+  ('ᩡ', 'ᩡ'), ('ᩣ', 'ᩤ'), ('ᩭ', 'ᩲ'), ('᪀', '᪉'),
+  ('᪐', '᪙'), ('᪠', '᪭'), ('ᬄ', 'ᬳ'), ('ᬵ', 'ᬵ'),
+  ('ᬻ', 'ᬻ'), ('ᬽ', 'ᭁ'), ('ᭃ', 'ᭋ'), ('᭐', '᭪'),
+  ('᭴', '᭼'), ('ᮂ', 'ᮡ'), ('ᮦ', 'ᮧ'), ('᮪', '᮪'),
+  ('ᮮ', 'ᯥ'), ('ᯧ', 'ᯧ'), ('ᯪ', 'ᯬ'), ('ᯮ', 'ᯮ'),
+  ('᯲', '᯳'), ('᯼', 'ᰫ'), ('ᰴ', 'ᰵ'), ('᰻', '᱉'),
+  ('ᱍ', 'ᲈ'), ('᳀', '᳇'), ('᳓', '᳓'), ('᳡', '᳡'),
+  ('ᳩ', 'ᳬ'), ('ᳮ', 'ᳳ'), ('ᳵ', '᳷'), ('ᴀ', 'ᶿ'),
+  ('Ḁ', 'ἕ'), ('Ἐ', 'Ἕ'), ('ἠ', 'ὅ'), ('Ὀ', 'Ὅ'),
+  ('ὐ', 'ὗ'), ('Ὑ', 'Ὑ'), ('Ὓ', 'Ὓ'), ('Ὕ', 'Ὕ'),
+  ('Ὗ', 'ώ'), ('ᾀ', 'ᾴ'), ('ᾶ', 'ῄ'), ('ῆ', 'ΐ'),
+  ('ῖ', 'Ί'), ('῝', '`'), ('ῲ', 'ῴ'), ('ῶ', '῾'),
+  ('\u{2000}', '\u{200a}'), ('‐', '‧'), ('\u{202f}', '\u{205f}'),
+  ('⁰', 'ⁱ'), ('⁴', '₎'), ('ₐ', 'ₜ'), ('₠', '₿'),
+  ('℀', '↋'), ('←', '␦'), ('⑀', '⑊'), ('①', '⭳'),
+  ('⭶', '⮕'), ('⮘', '⮹'), ('⮽', '⯈'), ('⯊', '⯒'),
+  ('⯬', '⯯'), ('Ⰰ', 'Ⱞ'), ('ⰰ', 'ⱞ'), ('Ⱡ', 'ⳮ'),
+  ('Ⳳ', 'ⳳ'), ('⳹', 'ⴥ'), ('ⴧ', 'ⴧ'), ('ⴭ', 'ⴭ'),
+  ('ⴰ', 'ⵧ'), ('ⵯ', '⵰'), ('ⶀ', 'ⶖ'), ('ⶠ', 'ⶦ'),
+  ('ⶨ', 'ⶮ'), ('ⶰ', 'ⶶ'), ('ⶸ', 'ⶾ'), ('ⷀ', 'ⷆ'),
+  ('ⷈ', 'ⷎ'), ('ⷐ', 'ⷖ'), ('ⷘ', 'ⷞ'), ('⸀', '⹉'),
+  ('⺀', '⺙'), ('⺛', '⻳'), ('⼀', '⿕'), ('⿰', '⿻'),
+  ('\u{3000}', '〩'), ('〰', '〿'), ('ぁ', 'ゖ'), ('゛', 'ヿ'),
+  ('ㄅ', 'ㄮ'), ('ㄱ', 'ㆎ'), ('㆐', 'ㆺ'), ('㇀', '㇣'),
+  ('ㇰ', '㈞'), ('㈠', '㋾'), ('㌀', '䶵'), ('䷀', '鿪'),
+  ('ꀀ', 'ꒌ'), ('꒐', '꓆'), ('ꓐ', 'ꘫ'), ('Ꙁ', 'ꙮ'),
+  ('꙳', '꙳'), ('꙾', 'ꚝ'), ('ꚠ', 'ꛯ'), ('꛲', '꛷'),
+  ('꜀', 'Ɪ'), ('Ʞ', 'ꞷ'), ('ꟷ', 'ꠁ'), ('ꠃ', 'ꠅ'),
+  ('ꠇ', 'ꠊ'), ('ꠌ', 'ꠤ'), ('ꠧ', '꠫'), ('꠰', '꠹'),
+  ('ꡀ', '꡷'), ('ꢀ', 'ꣃ'), ('꣎', '꣙'), ('ꣲ', 'ꣽ'),
+  ('꤀', 'ꤥ'), ('꤮', 'ꥆ'), ('ꥒ', '꥓'), ('꥟', 'ꥼ'),
+  ('ꦃ', 'ꦲ'), ('ꦴ', 'ꦵ'), ('ꦺ', 'ꦻ'), ('ꦽ', '꧍'),
+  ('ꧏ', '꧙'), ('꧞', 'ꧤ'), ('ꧦ', 'ꧾ'), ('ꨀ', 'ꨨ'),
+  ('ꨯ', 'ꨰ'), ('ꨳ', 'ꨴ'), ('ꩀ', 'ꩂ'), ('ꩄ', 'ꩋ'),
+  ('ꩍ', 'ꩍ'), ('꩐', '꩙'), ('꩜', 'ꩻ'), ('ꩽ', 'ꪯ'),
+  ('ꪱ', 'ꪱ'), ('ꪵ', 'ꪶ'), ('ꪹ', 'ꪽ'), ('ꫀ', 'ꫀ'),
+  ('ꫂ', 'ꫂ'), ('ꫛ', 'ꫫ'), ('ꫮ', 'ꫵ'), ('ꬁ', 'ꬆ'),
+  ('ꬉ', 'ꬎ'), ('ꬑ', 'ꬖ'), ('ꬠ', 'ꬦ'), ('ꬨ', 'ꬮ'),
+  ('ꬰ', 'ꭥ'), ('ꭰ', 'ꯤ'), ('ꯦ', 'ꯧ'), ('ꯩ', '꯬'),
+  ('꯰', '꯹'), ('가', '힣'), ('ힰ', 'ퟆ'), ('ퟋ', 'ퟻ'),
+  ('豈', '舘'), ('並', '龎'), ('ﬀ', 'ﬆ'), ('ﬓ', 'ﬗ'),
+  ('יִ', 'יִ'), ('ײַ', 'זּ'), ('טּ', 'לּ'), ('מּ', 'מּ'),
+  ('נּ', 'סּ'), ('ףּ', 'פּ'), ('צּ', '﯁'), ('ﯓ', '﴿'),
+  ('ﵐ', 'ﶏ'), ('ﶒ', 'ﷇ'), ('ﷰ', '﷽'), ('︐', '︙'),
+  ('︰', '﹒'), ('﹔', '﹦'), ('﹨', '﹫'), ('ﹰ', 'ﹴ'),
+  ('ﹶ', 'ﻼ'), ('！', 'ﾝ'), ('ﾠ', 'ﾾ'), ('ￂ', 'ￇ'),
+  ('ￊ', 'ￏ'), ('ￒ', 'ￗ'), ('ￚ', 'ￜ'), ('￠', '￦'),
+  ('￨', '￮'), ('￼', '�'), ('𐀀', '𐀋'), ('𐀍', '𐀦'),
+  ('𐀨', '𐀺'), ('𐀼', '𐀽'), ('𐀿', '𐁍'), ('𐁐', '𐁝'),
+  ('𐂀', '𐃺'), ('𐄀', '𐄂'), ('𐄇', '𐄳'), ('𐄷', '𐆎'),
+  ('𐆐', '𐆛'), ('𐆠', '𐆠'), ('𐇐', '𐇼'), ('𐊀', '𐊜'),
+  ('𐊠', '𐋐'), ('𐋡', '𐋻'), ('𐌀', '𐌣'), ('𐌭', '𐍊'),
+  ('𐍐', '𐍵'), ('𐎀', '𐎝'), ('𐎟', '𐏃'), ('𐏈', '𐏕'),
+  ('𐐀', '𐒝'), ('𐒠', '𐒩'), ('𐒰', '𐓓'), ('𐓘', '𐓻'),
+  ('𐔀', '𐔧'), ('𐔰', '𐕣'), ('𐕯', '𐕯'), ('𐘀', '𐜶'),
+  ('𐝀', '𐝕'), ('𐝠', '𐝧'), ('𐠀', '𐠅'), ('𐠈', '𐠈'),
+  ('𐠊', '𐠵'), ('𐠷', '𐠸'), ('𐠼', '𐠼'), ('𐠿', '𐡕'),
+  ('𐡗', '𐢞'), ('𐢧', '𐢯'), ('𐣠', '𐣲'), ('𐣴', '𐣵'),
+  ('𐣻', '𐤛'), ('𐤟', '𐤹'), ('𐤿', '𐤿'), ('𐦀', '𐦷'),
+  ('𐦼', '𐧏'), ('𐧒', '𐨀'), ('𐨐', '𐨓'), ('𐨕', '𐨗'),
+  ('𐨙', '𐨳'), ('𐩀', '𐩇'), ('𐩐', '𐩘'), ('𐩠', '𐪟'),
+  ('𐫀', '𐫤'), ('𐫫', '𐫶'), ('𐬀', '𐬵'), ('𐬹', '𐭕'),
+  ('𐭘', '𐭲'), ('𐭸', '𐮑'), ('𐮙', '𐮜'), ('𐮩', '𐮯'),
+  ('𐰀', '𐱈'), ('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𐳺', '𐳿'),
+  ('𐹠', '𐹾'), ('𑀀', '𑀀'), ('𑀂', '𑀷'), ('𑁇', '𑁍'),
+  ('𑁒', '𑁯'), ('𑂂', '𑂲'), ('𑂷', '𑂸'), ('𑂻', '𑂼'),
+  ('𑂾', '𑃁'), ('𑃐', '𑃨'), ('𑃰', '𑃹'), ('𑄃', '𑄦'),
+  ('𑄬', '𑄬'), ('𑄶', '𑅃'), ('𑅐', '𑅲'), ('𑅴', '𑅶'),
+  ('𑆂', '𑆵'), ('𑆿', '𑇉'), ('𑇍', '𑇍'), ('𑇐', '𑇟'),
+  ('𑇡', '𑇴'), ('𑈀', '𑈑'), ('𑈓', '𑈮'), ('𑈲', '𑈳'),
+  ('𑈵', '𑈵'), ('𑈸', '𑈽'), ('𑊀', '𑊆'), ('𑊈', '𑊈'),
+  ('𑊊', '𑊍'), ('𑊏', '𑊝'), ('𑊟', '𑊩'), ('𑊰', '𑋞'),
+  ('𑋠', '𑋢'), ('𑋰', '𑋹'), ('𑌂', '𑌃'), ('𑌅', '𑌌'),
+  ('𑌏', '𑌐'), ('𑌓', '𑌨'), ('𑌪', '𑌰'), ('𑌲', '𑌳'),
+  ('𑌵', '𑌹'), ('𑌽', '𑌽'), ('𑌿', '𑌿'), ('𑍁', '𑍄'),
+  ('𑍇', '𑍈'), ('𑍋', '𑍍'), ('𑍐', '𑍐'), ('𑍝', '𑍣'),
+  ('𑐀', '𑐷'), ('𑑀', '𑑁'), ('𑑅', '𑑅'), ('𑑇', '𑑙'),
+  ('𑑛', '𑑛'), ('𑑝', '𑑝'), ('𑒀', '𑒯'), ('𑒱', '𑒲'),
+  ('𑒹', '𑒹'), ('𑒻', '𑒼'), ('𑒾', '𑒾'), ('𑓁', '𑓁'),
+  ('𑓄', '𑓇'), ('𑓐', '𑓙'), ('𑖀', '𑖮'), ('𑖰', '𑖱'),
+  ('𑖸', '𑖻'), ('𑖾', '𑖾'), ('𑗁', '𑗛'), ('𑘀', '𑘲'),
+  ('𑘻', '𑘼'), ('𑘾', '𑘾'), ('𑙁', '𑙄'), ('𑙐', '𑙙'),
+  ('𑙠', '𑙬'), ('𑚀', '𑚪'), ('𑚬', '𑚬'), ('𑚮', '𑚯'),
+  ('𑚶', '𑚶'), ('𑛀', '𑛉'), ('𑜀', '𑜙'), ('𑜠', '𑜡'),
+  ('𑜦', '𑜦'), ('𑜰', '𑜿'), ('𑢠', '𑣲'), ('𑣿', '𑣿'),
+  ('𑨀', '𑨀'), ('𑨇', '𑨈'), ('𑨋', '𑨲'), ('𑨹', '𑨺'),
+  ('𑨿', '𑩆'), ('𑩐', '𑩐'), ('𑩗', '𑩘'), ('𑩜', '𑪃'),
+  ('𑪆', '𑪉'), ('𑪗', '𑪗'), ('𑪚', '𑪜'), ('𑪞', '𑪢'),
+  ('𑫀', '𑫸'), ('𑰀', '𑰈'), ('𑰊', '𑰯'), ('𑰾', '𑰾'),
+  ('𑱀', '𑱅'), ('𑱐', '𑱬'), ('𑱰', '𑲏'), ('𑲩', '𑲩'),
+  ('𑲱', '𑲱'), ('𑲴', '𑲴'), ('𑴀', '𑴆'), ('𑴈', '𑴉'),
+  ('𑴋', '𑴰'), ('𑵆', '𑵆'), ('𑵐', '𑵙'), ('𒀀', '𒎙'),
+  ('𒐀', '𒑮'), ('𒑰', '𒑴'), ('𒒀', '𒕃'), ('𓀀', '𓐮'),
+  ('𔐀', '𔙆'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), ('𖩠', '𖩩'),
+  ('𖩮', '𖩯'), ('𖫐', '𖫭'), ('𖫵', '𖫵'), ('𖬀', '𖬯'),
+  ('𖬷', '𖭅'), ('𖭐', '𖭙'), ('𖭛', '𖭡'), ('𖭣', '𖭷'),
+  ('𖭽', '𖮏'), ('𖼀', '𖽄'), ('𖽐', '𖽾'), ('𖾓', '𖾟'),
+  ('𖿠', '𖿡'), ('𗀀', '𘟬'), ('𘠀', '𘫲'), ('𛀀', '𛄞'),
+  ('𛅰', '𛋻'), ('𛰀', '𛱪'), ('𛱰', '𛱼'), ('𛲀', '𛲈'),
+  ('𛲐', '𛲙'), ('𛲜', '𛲜'), ('𛲟', '𛲟'), ('𝀀', '𝃵'),
+  ('𝄀', '𝄦'), ('𝄩', '𝅘𝅥𝅲'), ('𝅦', '𝅦'), ('𝅪', '𝅭'),
+  ('𝆃', '𝆄'), ('𝆌', '𝆩'), ('𝆮', '𝇨'), ('𝈀', '𝉁'),
+  ('𝉅', '𝉅'), ('𝌀', '𝍖'), ('𝍠', '𝍱'), ('𝐀', '𝑔'),
+  ('𝑖', '𝒜'), ('𝒞', '𝒟'), ('𝒢', '𝒢'), ('𝒥', '𝒦'),
+  ('𝒩', '𝒬'), ('𝒮', '𝒹'), ('𝒻', '𝒻'), ('𝒽', '𝓃'),
+  ('𝓅', '𝔅'), ('𝔇', '𝔊'), ('𝔍', '𝔔'), ('𝔖', '𝔜'),
+  ('𝔞', '𝔹'), ('𝔻', '𝔾'), ('𝕀', '𝕄'), ('𝕆', '𝕆'),
+  ('𝕊', '𝕐'), ('𝕒', '𝚥'), ('𝚨', '𝟋'), ('𝟎', '𝧿'),
+  ('𝨷', '𝨺'), ('𝩭', '𝩴'), ('𝩶', '𝪃'), ('𝪅', '𝪋'),
+  ('𞠀', '𞣄'), ('𞣇', '𞣏'), ('𞤀', '𞥃'), ('𞥐', '𞥙'),
+  ('𞥞', '𞥟'), ('𞸀', '𞸃'), ('𞸅', '𞸟'), ('𞸡', '𞸢'),
+  ('𞸤', '𞸤'), ('𞸧', '𞸧'), ('𞸩', '𞸲'), ('𞸴', '𞸷'),
+  ('𞸹', '𞸹'), ('𞸻', '𞸻'), ('𞹂', '𞹂'), ('𞹇', '𞹇'),
+  ('𞹉', '𞹉'), ('𞹋', '𞹋'), ('𞹍', '𞹏'), ('𞹑', '𞹒'),
+  ('𞹔', '𞹔'), ('𞹗', '𞹗'), ('𞹙', '𞹙'), ('𞹛', '𞹛'),
+  ('𞹝', '𞹝'), ('𞹟', '𞹟'), ('𞹡', '𞹢'), ('𞹤', '𞹤'),
+  ('𞹧', '𞹪'), ('𞹬', '𞹲'), ('𞹴', '𞹷'), ('𞹹', '𞹼'),
+  ('𞹾', '𞹾'), ('𞺀', '𞺉'), ('𞺋', '𞺛'), ('𞺡', '𞺣'),
+  ('𞺥', '𞺩'), ('𞺫', '𞺻'), ('𞻰', '𞻱'), ('🀀', '🀫'),
+  ('🀰', '🂓'), ('🂠', '🂮'), ('🂱', '🂿'), ('🃁', '🃏'),
+  ('🃑', '🃵'), ('🄀', '🄌'), ('🄐', '🄮'), ('🄰', '🅫'),
+  ('🅰', '🆬'), ('🇦', '🈂'), ('🈐', '🈻'), ('🉀', '🉈'),
+  ('🉐', '🉑'), ('🉠', '🉥'), ('🌀', '🛔'), ('🛠', '🛬'),
+  ('🛰', '🛸'), ('🜀', '🝳'), ('🞀', '🟔'), ('🠀', '🠋'),
+  ('🠐', '🡇'), ('🡐', '🡙'), ('🡠', '🢇'), ('🢐', '🢭'),
+  ('🤀', '🤋'), ('🤐', '🤾'), ('🥀', '🥌'), ('🥐', '🥫'),
+  ('🦀', '🦗'), ('🧀', '🧀'), ('🧐', '🧦'), ('𠀀', '𪛖'),
+  ('𪜀', '𫜴'), ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'),
+  ('丽', '𪘀'),
+];
+
+pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[
+  ('̀', 'ͯ'), ('҃', '҉'), ('֑', 'ֽ'), ('ֿ', 'ֿ'), ('ׁ', 'ׂ'),
+  ('ׄ', 'ׅ'), ('ׇ', 'ׇ'), ('ؐ', 'ؚ'), ('ً', 'ٟ'), ('ٰ', 'ٰ'),
+  ('ۖ', 'ۜ'), ('۟', 'ۤ'), ('ۧ', 'ۨ'), ('۪', 'ۭ'), ('ܑ', 'ܑ'),
+  ('ܰ', '݊'), ('ަ', 'ް'), ('߫', '߳'), ('ࠖ', '࠙'), ('ࠛ', 'ࠣ'),
+  ('ࠥ', 'ࠧ'), ('ࠩ', '࠭'), ('࡙', '࡛'), ('ࣔ', '࣡'),
+  ('ࣣ', 'ं'), ('ऺ', 'ऺ'), ('़', '़'), ('ु', 'ै'),
+  ('्', '्'), ('॑', 'ॗ'), ('ॢ', 'ॣ'), ('ঁ', 'ঁ'),
+  ('়', '়'), ('া', 'া'), ('ু', 'ৄ'), ('্', '্'),
+  ('ৗ', 'ৗ'), ('ৢ', 'ৣ'), ('ਁ', 'ਂ'), ('਼', '਼'),
+  ('ੁ', 'ੂ'), ('ੇ', 'ੈ'), ('ੋ', '੍'), ('ੑ', 'ੑ'),
+  ('ੰ', 'ੱ'), ('ੵ', 'ੵ'), ('ઁ', 'ં'), ('઼', '઼'),
+  ('ુ', 'ૅ'), ('ે', 'ૈ'), ('્', '્'), ('ૢ', 'ૣ'),
+  ('ૺ', '૿'), ('ଁ', 'ଁ'), ('଼', '଼'), ('ା', 'ି'),
+  ('ୁ', 'ୄ'), ('୍', '୍'), ('ୖ', 'ୗ'), ('ୢ', 'ୣ'),
+  ('ஂ', 'ஂ'), ('ா', 'ா'), ('ீ', 'ீ'), ('்', '்'),
+  ('ௗ', 'ௗ'), ('ఀ', 'ఀ'), ('ా', 'ీ'), ('ె', 'ై'),
+  ('ొ', '్'), ('ౕ', 'ౖ'), ('ౢ', 'ౣ'), ('ಁ', 'ಁ'),
+  ('಼', '಼'), ('ಿ', 'ಿ'), ('ೂ', 'ೂ'), ('ೆ', 'ೆ'),
+  ('ೌ', '್'), ('ೕ', 'ೖ'), ('ೢ', 'ೣ'), ('ഀ', 'ഁ'),
+  ('഻', '഼'), ('ാ', 'ാ'), ('ു', 'ൄ'), ('്', '്'),
+  ('ൗ', 'ൗ'), ('ൢ', 'ൣ'), ('්', '්'), ('ා', 'ා'),
+  ('ි', 'ු'), ('ූ', 'ූ'), ('ෟ', 'ෟ'), ('ั', 'ั'),
+  ('ิ', 'ฺ'), ('็', '๎'), ('ັ', 'ັ'), ('ິ', 'ູ'),
+  ('ົ', 'ຼ'), ('່', 'ໍ'), ('༘', '༙'), ('༵', '༵'),
+  ('༷', '༷'), ('༹', '༹'), ('ཱ', 'ཾ'), ('ྀ', '྄'),
+  ('྆', '྇'), ('ྍ', 'ྗ'), ('ྙ', 'ྼ'), ('࿆', '࿆'),
+  ('ိ', 'ူ'), ('ဲ', '့'), ('္', '်'), ('ွ', 'ှ'),
+  ('ၘ', 'ၙ'), ('ၞ', 'ၠ'), ('ၱ', 'ၴ'), ('ႂ', 'ႂ'),
+  ('ႅ', 'ႆ'), ('ႍ', 'ႍ'), ('ႝ', 'ႝ'), ('፝', '፟'),
+  ('ᜒ', '᜔'), ('ᜲ', '᜴'), ('ᝒ', 'ᝓ'), ('ᝲ', 'ᝳ'),
+  ('឴', '឵'), ('ិ', 'ួ'), ('ំ', 'ំ'), ('៉', '៓'),
+  ('៝', '៝'), ('᠋', '᠍'), ('ᢅ', 'ᢆ'), ('ᢩ', 'ᢩ'),
+  ('ᤠ', 'ᤢ'), ('ᤧ', 'ᤨ'), ('ᤲ', 'ᤲ'), ('᤹', '᤻'),
+  ('ᨗ', 'ᨘ'), ('ᨛ', 'ᨛ'), ('ᩖ', 'ᩖ'), ('ᩘ', 'ᩞ'),
+  ('᩠', '᩠'), ('ᩢ', 'ᩢ'), ('ᩥ', 'ᩬ'), ('ᩳ', '᩼'),
+  ('᩿', '᩿'), ('᪰', '᪾'), ('ᬀ', 'ᬃ'), ('᬴', '᬴'),
+  ('ᬶ', 'ᬺ'), ('ᬼ', 'ᬼ'), ('ᭂ', 'ᭂ'), ('᭫', '᭳'),
+  ('ᮀ', 'ᮁ'), ('ᮢ', 'ᮥ'), ('ᮨ', 'ᮩ'), ('᮫', 'ᮭ'),
+  ('᯦', '᯦'), ('ᯨ', 'ᯩ'), ('ᯭ', 'ᯭ'), ('ᯯ', 'ᯱ'),
+  ('ᰬ', 'ᰳ'), ('ᰶ', '᰷'), ('᳐', '᳒'), ('᳔', '᳠'),
+  ('᳢', '᳨'), ('᳭', '᳭'), ('᳴', '᳴'), ('᳸', '᳹'),
+  ('᷀', '᷹'), ('᷻', '᷿'), ('\u{200c}', '\u{200c}'), ('⃐', '⃰'),
+  ('⳯', '⳱'), ('⵿', '⵿'), ('ⷠ', 'ⷿ'), ('〪', '〯'),
+  ('゙', '゚'), ('꙯', '꙲'), ('ꙴ', '꙽'), ('ꚞ', 'ꚟ'),
+  ('꛰', '꛱'), ('ꠂ', 'ꠂ'), ('꠆', '꠆'), ('ꠋ', 'ꠋ'),
+  ('ꠥ', 'ꠦ'), ('꣄', 'ꣅ'), ('꣠', '꣱'), ('ꤦ', '꤭'),
+  ('ꥇ', 'ꥑ'), ('ꦀ', 'ꦂ'), ('꦳', '꦳'), ('ꦶ', 'ꦹ'),
+  ('ꦼ', 'ꦼ'), ('ꧥ', 'ꧥ'), ('ꨩ', 'ꨮ'), ('ꨱ', 'ꨲ'),
+  ('ꨵ', 'ꨶ'), ('ꩃ', 'ꩃ'), ('ꩌ', 'ꩌ'), ('ꩼ', 'ꩼ'),
+  ('ꪰ', 'ꪰ'), ('ꪲ', 'ꪴ'), ('ꪷ', 'ꪸ'), ('ꪾ', '꪿'),
+  ('꫁', '꫁'), ('ꫬ', 'ꫭ'), ('꫶', '꫶'), ('ꯥ', 'ꯥ'),
+  ('ꯨ', 'ꯨ'), ('꯭', '꯭'), ('ﬞ', 'ﬞ'), ('︀', '️'),
+  ('︠', '︯'), ('ﾞ', 'ﾟ'), ('𐇽', '𐇽'), ('𐋠', '𐋠'),
+  ('𐍶', '𐍺'), ('𐨁', '𐨃'), ('𐨅', '𐨆'), ('𐨌', '𐨏'),
+  ('𐨸', '𐨺'), ('𐨿', '𐨿'), ('𐫥', '𐫦'), ('𑀁', '𑀁'),
+  ('𑀸', '𑁆'), ('𑁿', '𑂁'), ('𑂳', '𑂶'), ('𑂹', '𑂺'),
+  ('𑄀', '𑄂'), ('𑄧', '𑄫'), ('𑄭', '𑄴'), ('𑅳', '𑅳'),
+  ('𑆀', '𑆁'), ('𑆶', '𑆾'), ('𑇊', '𑇌'), ('𑈯', '𑈱'),
+  ('𑈴', '𑈴'), ('𑈶', '𑈷'), ('𑈾', '𑈾'), ('𑋟', '𑋟'),
+  ('𑋣', '𑋪'), ('𑌀', '𑌁'), ('𑌼', '𑌼'), ('𑌾', '𑌾'),
+  ('𑍀', '𑍀'), ('𑍗', '𑍗'), ('𑍦', '𑍬'), ('𑍰', '𑍴'),
+  ('𑐸', '𑐿'), ('𑑂', '𑑄'), ('𑑆', '𑑆'), ('𑒰', '𑒰'),
+  ('𑒳', '𑒸'), ('𑒺', '𑒺'), ('𑒽', '𑒽'), ('𑒿', '𑓀'),
+  ('𑓂', '𑓃'), ('𑖯', '𑖯'), ('𑖲', '𑖵'), ('𑖼', '𑖽'),
+  ('𑖿', '𑗀'), ('𑗜', '𑗝'), ('𑘳', '𑘺'), ('𑘽', '𑘽'),
+  ('𑘿', '𑙀'), ('𑚫', '𑚫'), ('𑚭', '𑚭'), ('𑚰', '𑚵'),
+  ('𑚷', '𑚷'), ('𑜝', '𑜟'), ('𑜢', '𑜥'), ('𑜧', '𑜫'),
+  ('𑨁', '𑨆'), ('𑨉', '𑨊'), ('𑨳', '𑨸'), ('𑨻', '𑨾'),
+  ('𑩇', '𑩇'), ('𑩑', '𑩖'), ('𑩙', '𑩛'), ('𑪊', '𑪖'),
+  ('𑪘', '𑪙'), ('𑰰', '𑰶'), ('𑰸', '𑰽'), ('𑰿', '𑰿'),
+  ('𑲒', '𑲧'), ('𑲪', '𑲰'), ('𑲲', '𑲳'), ('𑲵', '𑲶'),
+  ('𑴱', '𑴶'), ('𑴺', '𑴺'), ('𑴼', '𑴽'), ('𑴿', '𑵅'),
+  ('𑵇', '𑵇'), ('𖫰', '𖫴'), ('𖬰', '𖬶'), ('𖾏', '𖾒'),
+  ('𛲝', '𛲞'), ('𝅥', '𝅥'), ('𝅧', '𝅩'), ('𝅮', '𝅲'),
+  ('𝅻', '𝆂'), ('𝆅', '𝆋'), ('𝆪', '𝆭'), ('𝉂', '𝉄'),
+  ('𝨀', '𝨶'), ('𝨻', '𝩬'), ('𝩵', '𝩵'), ('𝪄', '𝪄'),
+  ('𝪛', '𝪟'), ('𝪡', '𝪯'), ('𞀀', '𞀆'), ('𞀈', '𞀘'),
+  ('𞀛', '𞀡'), ('𞀣', '𞀤'), ('𞀦', '𞀪'), ('𞣐', '𞣖'),
+  ('𞥄', '𞥊'), ('\u{e0020}', '\u{e007f}'), ('󠄀', '󠇯'),
+];
+
+pub const GRAPHEME_LINK: &'static [(char, char)] = &[
+  ('्', '्'), ('্', '্'), ('੍', '੍'), ('્', '્'),
+  ('୍', '୍'), ('்', '்'), ('్', '్'), ('್', '್'),
+  ('഻', '഼'), ('്', '്'), ('්', '්'), ('ฺ', 'ฺ'),
+  ('྄', '྄'), ('္', '်'), ('᜔', '᜔'), ('᜴', '᜴'),
+  ('្', '្'), ('᩠', '᩠'), ('᭄', '᭄'), ('᮪', '᮫'),
+  ('᯲', '᯳'), ('⵿', '⵿'), ('꠆', '꠆'), ('꣄', '꣄'),
+  ('꥓', '꥓'), ('꧀', '꧀'), ('꫶', '꫶'), ('꯭', '꯭'),
+  ('𐨿', '𐨿'), ('𑁆', '𑁆'), ('𑁿', '𑁿'), ('𑂹', '𑂹'),
+  ('𑄳', '𑄴'), ('𑇀', '𑇀'), ('𑈵', '𑈵'), ('𑋪', '𑋪'),
+  ('𑍍', '𑍍'), ('𑑂', '𑑂'), ('𑓂', '𑓂'), ('𑖿', '𑖿'),
+  ('𑘿', '𑘿'), ('𑚶', '𑚶'), ('𑜫', '𑜫'), ('𑨴', '𑨴'),
+  ('𑩇', '𑩇'), ('𑪙', '𑪙'), ('𑰿', '𑰿'), ('𑵄', '𑵅'),
+];
+
+pub const HEX_DIGIT: &'static [(char, char)] = &[
+  ('0', '9'), ('A', 'F'), ('a', 'f'), ('０', '９'), ('Ａ', 'Ｆ'),
+  ('ａ', 'ｆ'),
+];
+
+pub const HYPHEN: &'static [(char, char)] = &[
+  ('-', '-'), ('\u{ad}', '\u{ad}'), ('֊', '֊'), ('᠆', '᠆'),
+  ('‐', '‑'), ('⸗', '⸗'), ('・', '・'), ('﹣', '﹣'),
+  ('－', '－'), ('･', '･'),
+];
+
+pub const IDS_BINARY_OPERATOR: &'static [(char, char)] = &[
+  ('⿰', '⿱'), ('⿴', '⿻'),
+];
+
+pub const IDS_TRINARY_OPERATOR: &'static [(char, char)] = &[
+  ('⿲', '⿳'),
+];
+
+pub const ID_CONTINUE: &'static [(char, char)] = &[
+  ('0', '9'), ('A', 'Z'), ('_', '_'), ('a', 'z'), ('ª', 'ª'), ('µ', 'µ'),
+  ('·', '·'), ('º', 'º'), ('À', 'Ö'), ('Ø', 'ö'), ('ø', 'ˁ'),
+  ('ˆ', 'ˑ'), ('ˠ', 'ˤ'), ('ˬ', 'ˬ'), ('ˮ', 'ˮ'), ('̀', 'ʹ'),
+  ('Ͷ', 'ͷ'), ('ͺ', 'ͽ'), ('Ϳ', 'Ϳ'), ('Ά', 'Ί'), ('Ό', 'Ό'),
+  ('Ύ', 'Ρ'), ('Σ', 'ϵ'), ('Ϸ', 'ҁ'), ('҃', '҇'), ('Ҋ', 'ԯ'),
+  ('Ա', 'Ֆ'), ('ՙ', 'ՙ'), ('ա', 'և'), ('֑', 'ֽ'), ('ֿ', 'ֿ'),
+  ('ׁ', 'ׂ'), ('ׄ', 'ׅ'), ('ׇ', 'ׇ'), ('א', 'ת'), ('װ', 'ײ'),
+  ('ؐ', 'ؚ'), ('ؠ', '٩'), ('ٮ', 'ۓ'), ('ە', 'ۜ'), ('۟', 'ۨ'),
+  ('۪', 'ۼ'), ('ۿ', 'ۿ'), ('ܐ', '݊'), ('ݍ', 'ޱ'), ('߀', 'ߵ'),
+  ('ߺ', 'ߺ'), ('ࠀ', '࠭'), ('ࡀ', '࡛'), ('ࡠ', 'ࡪ'),
+  ('ࢠ', 'ࢴ'), ('ࢶ', 'ࢽ'), ('ࣔ', '࣡'), ('ࣣ', 'ॣ'),
+  ('०', '९'), ('ॱ', 'ঃ'), ('অ', 'ঌ'), ('এ', 'ঐ'),
+  ('ও', 'ন'), ('প', 'র'), ('ল', 'ল'), ('শ', 'হ'),
+  ('়', 'ৄ'), ('ে', 'ৈ'), ('ো', 'ৎ'), ('ৗ', 'ৗ'),
+  ('ড়', 'ঢ়'), ('য়', 'ৣ'), ('০', 'ৱ'), ('ৼ', 'ৼ'),
+  ('ਁ', 'ਃ'), ('ਅ', 'ਊ'), ('ਏ', 'ਐ'), ('ਓ', 'ਨ'),
+  ('ਪ', 'ਰ'), ('ਲ', 'ਲ਼'), ('ਵ', 'ਸ਼'), ('ਸ', 'ਹ'),
+  ('਼', '਼'), ('ਾ', 'ੂ'), ('ੇ', 'ੈ'), ('ੋ', '੍'),
+  ('ੑ', 'ੑ'), ('ਖ਼', 'ੜ'), ('ਫ਼', 'ਫ਼'), ('੦', 'ੵ'),
+  ('ઁ', 'ઃ'), ('અ', 'ઍ'), ('એ', 'ઑ'), ('ઓ', 'ન'),
+  ('પ', 'ર'), ('લ', 'ળ'), ('વ', 'હ'), ('઼', 'ૅ'),
+  ('ે', 'ૉ'), ('ો', '્'), ('ૐ', 'ૐ'), ('ૠ', 'ૣ'),
+  ('૦', '૯'), ('ૹ', '૿'), ('ଁ', 'ଃ'), ('ଅ', 'ଌ'),
+  ('ଏ', 'ଐ'), ('ଓ', 'ନ'), ('ପ', 'ର'), ('ଲ', 'ଳ'),
+  ('ଵ', 'ହ'), ('଼', 'ୄ'), ('େ', 'ୈ'), ('ୋ', '୍'),
+  ('ୖ', 'ୗ'), ('ଡ଼', 'ଢ଼'), ('ୟ', 'ୣ'), ('୦', '୯'),
+  ('ୱ', 'ୱ'), ('ஂ', 'ஃ'), ('அ', 'ஊ'), ('எ', 'ஐ'),
+  ('ஒ', 'க'), ('ங', 'ச'), ('ஜ', 'ஜ'), ('ஞ', 'ட'),
+  ('ண', 'த'), ('ந', 'ப'), ('ம', 'ஹ'), ('ா', 'ூ'),
+  ('ெ', 'ை'), ('ொ', '்'), ('ௐ', 'ௐ'), ('ௗ', 'ௗ'),
+  ('௦', '௯'), ('ఀ', 'ః'), ('అ', 'ఌ'), ('ఎ', 'ఐ'),
+  ('ఒ', 'న'), ('ప', 'హ'), ('ఽ', 'ౄ'), ('ె', 'ై'),
+  ('ొ', '్'), ('ౕ', 'ౖ'), ('ౘ', 'ౚ'), ('ౠ', 'ౣ'),
+  ('౦', '౯'), ('ಀ', 'ಃ'), ('ಅ', 'ಌ'), ('ಎ', 'ಐ'),
+  ('ಒ', 'ನ'), ('ಪ', 'ಳ'), ('ವ', 'ಹ'), ('಼', 'ೄ'),
+  ('ೆ', 'ೈ'), ('ೊ', '್'), ('ೕ', 'ೖ'), ('ೞ', 'ೞ'),
+  ('ೠ', 'ೣ'), ('೦', '೯'), ('ೱ', 'ೲ'), ('ഀ', 'ഃ'),
+  ('അ', 'ഌ'), ('എ', 'ഐ'), ('ഒ', 'ൄ'), ('െ', 'ൈ'),
+  ('ൊ', 'ൎ'), ('ൔ', 'ൗ'), ('ൟ', 'ൣ'), ('൦', '൯'),
+  ('ൺ', 'ൿ'), ('ං', 'ඃ'), ('අ', 'ඖ'), ('ක', 'න'),
+  ('ඳ', 'ර'), ('ල', 'ල'), ('ව', 'ෆ'), ('්', '්'),
+  ('ා', 'ු'), ('ූ', 'ූ'), ('ෘ', 'ෟ'), ('෦', '෯'),
+  ('ෲ', 'ෳ'), ('ก', 'ฺ'), ('เ', '๎'), ('๐', '๙'),
+  ('ກ', 'ຂ'), ('ຄ', 'ຄ'), ('ງ', 'ຈ'), ('ຊ', 'ຊ'),
+  ('ຍ', 'ຍ'), ('ດ', 'ທ'), ('ນ', 'ຟ'), ('ມ', 'ຣ'),
+  ('ລ', 'ລ'), ('ວ', 'ວ'), ('ສ', 'ຫ'), ('ອ', 'ູ'),
+  ('ົ', 'ຽ'), ('ເ', 'ໄ'), ('ໆ', 'ໆ'), ('່', 'ໍ'),
+  ('໐', '໙'), ('ໜ', 'ໟ'), ('ༀ', 'ༀ'), ('༘', '༙'),
+  ('༠', '༩'), ('༵', '༵'), ('༷', '༷'), ('༹', '༹'),
+  ('༾', 'ཇ'), ('ཉ', 'ཬ'), ('ཱ', '྄'), ('྆', 'ྗ'),
+  ('ྙ', 'ྼ'), ('࿆', '࿆'), ('က', '၉'), ('ၐ', 'ႝ'),
+  ('Ⴀ', 'Ⴥ'), ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'), ('ა', 'ჺ'),
+  ('ჼ', 'ቈ'), ('ቊ', 'ቍ'), ('ቐ', 'ቖ'), ('ቘ', 'ቘ'),
+  ('ቚ', 'ቝ'), ('በ', 'ኈ'), ('ኊ', 'ኍ'), ('ነ', 'ኰ'),
+  ('ኲ', 'ኵ'), ('ኸ', 'ኾ'), ('ዀ', 'ዀ'), ('ዂ', 'ዅ'),
+  ('ወ', 'ዖ'), ('ዘ', 'ጐ'), ('ጒ', 'ጕ'), ('ጘ', 'ፚ'),
+  ('፝', '፟'), ('፩', '፱'), ('ᎀ', 'ᎏ'), ('Ꭰ', 'Ᏽ'),
+  ('ᏸ', 'ᏽ'), ('ᐁ', 'ᙬ'), ('ᙯ', 'ᙿ'), ('ᚁ', 'ᚚ'),
+  ('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ'), ('ᜀ', 'ᜌ'), ('ᜎ', '᜔'),
+  ('ᜠ', '᜴'), ('ᝀ', 'ᝓ'), ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'),
+  ('ᝲ', 'ᝳ'), ('ក', '៓'), ('ៗ', 'ៗ'), ('ៜ', '៝'),
+  ('០', '៩'), ('᠋', '᠍'), ('᠐', '᠙'), ('ᠠ', 'ᡷ'),
+  ('ᢀ', 'ᢪ'), ('ᢰ', 'ᣵ'), ('ᤀ', 'ᤞ'), ('ᤠ', 'ᤫ'),
+  ('ᤰ', '᤻'), ('᥆', 'ᥭ'), ('ᥰ', 'ᥴ'), ('ᦀ', 'ᦫ'),
+  ('ᦰ', 'ᧉ'), ('᧐', '᧚'), ('ᨀ', 'ᨛ'), ('ᨠ', 'ᩞ'),
+  ('᩠', '᩼'), ('᩿', '᪉'), ('᪐', '᪙'), ('ᪧ', 'ᪧ'),
+  ('᪰', '᪽'), ('ᬀ', 'ᭋ'), ('᭐', '᭙'), ('᭫', '᭳'),
+  ('ᮀ', '᯳'), ('ᰀ', '᰷'), ('᱀', '᱉'), ('ᱍ', 'ᱽ'),
+  ('ᲀ', 'ᲈ'), ('᳐', '᳒'), ('᳔', '᳹'), ('ᴀ', '᷹'),
+  ('᷻', 'ἕ'), ('Ἐ', 'Ἕ'), ('ἠ', 'ὅ'), ('Ὀ', 'Ὅ'),
+  ('ὐ', 'ὗ'), ('Ὑ', 'Ὑ'), ('Ὓ', 'Ὓ'), ('Ὕ', 'Ὕ'),
+  ('Ὗ', 'ώ'), ('ᾀ', 'ᾴ'), ('ᾶ', 'ᾼ'), ('ι', 'ι'),
+  ('ῂ', 'ῄ'), ('ῆ', 'ῌ'), ('ῐ', 'ΐ'), ('ῖ', 'Ί'),
+  ('ῠ', 'Ῥ'), ('ῲ', 'ῴ'), ('ῶ', 'ῼ'), ('‿', '⁀'),
+  ('⁔', '⁔'), ('ⁱ', 'ⁱ'), ('ⁿ', 'ⁿ'), ('ₐ', 'ₜ'),
+  ('⃐', '⃜'), ('⃡', '⃡'), ('⃥', '⃰'), ('ℂ', 'ℂ'),
+  ('ℇ', 'ℇ'), ('ℊ', 'ℓ'), ('ℕ', 'ℕ'), ('℘', 'ℝ'),
+  ('ℤ', 'ℤ'), ('Ω', 'Ω'), ('ℨ', 'ℨ'), ('K', 'ℹ'),
+  ('ℼ', 'ℿ'), ('ⅅ', 'ⅉ'), ('ⅎ', 'ⅎ'), ('Ⅰ', 'ↈ'),
+  ('Ⰰ', 'Ⱞ'), ('ⰰ', 'ⱞ'), ('Ⱡ', 'ⳤ'), ('Ⳬ', 'ⳳ'),
+  ('ⴀ', 'ⴥ'), ('ⴧ', 'ⴧ'), ('ⴭ', 'ⴭ'), ('ⴰ', 'ⵧ'),
+  ('ⵯ', 'ⵯ'), ('⵿', 'ⶖ'), ('ⶠ', 'ⶦ'), ('ⶨ', 'ⶮ'),
+  ('ⶰ', 'ⶶ'), ('ⶸ', 'ⶾ'), ('ⷀ', 'ⷆ'), ('ⷈ', 'ⷎ'),
+  ('ⷐ', 'ⷖ'), ('ⷘ', 'ⷞ'), ('ⷠ', 'ⷿ'), ('々', '〇'),
+  ('〡', '〯'), ('〱', '〵'), ('〸', '〼'), ('ぁ', 'ゖ'),
+  ('゙', 'ゟ'), ('ァ', 'ヺ'), ('ー', 'ヿ'), ('ㄅ', 'ㄮ'),
+  ('ㄱ', 'ㆎ'), ('ㆠ', 'ㆺ'), ('ㇰ', 'ㇿ'), ('㐀', '䶵'),
+  ('一', '鿪'), ('ꀀ', 'ꒌ'), ('ꓐ', 'ꓽ'), ('ꔀ', 'ꘌ'),
+  ('ꘐ', 'ꘫ'), ('Ꙁ', '꙯'), ('ꙴ', '꙽'), ('ꙿ', '꛱'),
+  ('ꜗ', 'ꜟ'), ('Ꜣ', 'ꞈ'), ('Ꞌ', 'Ɪ'), ('Ʞ', 'ꞷ'),
+  ('ꟷ', 'ꠧ'), ('ꡀ', 'ꡳ'), ('ꢀ', 'ꣅ'), ('꣐', '꣙'),
+  ('꣠', 'ꣷ'), ('ꣻ', 'ꣻ'), ('ꣽ', 'ꣽ'), ('꤀', '꤭'),
+  ('ꤰ', '꥓'), ('ꥠ', 'ꥼ'), ('ꦀ', '꧀'), ('ꧏ', '꧙'),
+  ('ꧠ', 'ꧾ'), ('ꨀ', 'ꨶ'), ('ꩀ', 'ꩍ'), ('꩐', '꩙'),
+  ('ꩠ', 'ꩶ'), ('ꩺ', 'ꫂ'), ('ꫛ', 'ꫝ'), ('ꫠ', 'ꫯ'),
+  ('ꫲ', '꫶'), ('ꬁ', 'ꬆ'), ('ꬉ', 'ꬎ'), ('ꬑ', 'ꬖ'),
+  ('ꬠ', 'ꬦ'), ('ꬨ', 'ꬮ'), ('ꬰ', 'ꭚ'), ('ꭜ', 'ꭥ'),
+  ('ꭰ', 'ꯪ'), ('꯬', '꯭'), ('꯰', '꯹'), ('가', '힣'),
+  ('ힰ', 'ퟆ'), ('ퟋ', 'ퟻ'), ('豈', '舘'), ('並', '龎'),
+  ('ﬀ', 'ﬆ'), ('ﬓ', 'ﬗ'), ('יִ', 'ﬨ'), ('שׁ', 'זּ'),
+  ('טּ', 'לּ'), ('מּ', 'מּ'), ('נּ', 'סּ'), ('ףּ', 'פּ'),
+  ('צּ', 'ﮱ'), ('ﯓ', 'ﴽ'), ('ﵐ', 'ﶏ'), ('ﶒ', 'ﷇ'),
+  ('ﷰ', 'ﷻ'), ('︀', '️'), ('︠', '︯'), ('︳', '︴'),
+  ('﹍', '﹏'), ('ﹰ', 'ﹴ'), ('ﹶ', 'ﻼ'), ('０', '９'),
+  ('Ａ', 'Ｚ'), ('＿', '＿'), ('ａ', 'ｚ'), ('ｦ', 'ﾾ'),
+  ('ￂ', 'ￇ'), ('ￊ', 'ￏ'), ('ￒ', 'ￗ'), ('ￚ', 'ￜ'),
+  ('𐀀', '𐀋'), ('𐀍', '𐀦'), ('𐀨', '𐀺'), ('𐀼', '𐀽'),
+  ('𐀿', '𐁍'), ('𐁐', '𐁝'), ('𐂀', '𐃺'), ('𐅀', '𐅴'),
+  ('𐇽', '𐇽'), ('𐊀', '𐊜'), ('𐊠', '𐋐'), ('𐋠', '𐋠'),
+  ('𐌀', '𐌟'), ('𐌭', '𐍊'), ('𐍐', '𐍺'), ('𐎀', '𐎝'),
+  ('𐎠', '𐏃'), ('𐏈', '𐏏'), ('𐏑', '𐏕'), ('𐐀', '𐒝'),
+  ('𐒠', '𐒩'), ('𐒰', '𐓓'), ('𐓘', '𐓻'), ('𐔀', '𐔧'),
+  ('𐔰', '𐕣'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'),
+  ('𐠀', '𐠅'), ('𐠈', '𐠈'), ('𐠊', '𐠵'), ('𐠷', '𐠸'),
+  ('𐠼', '𐠼'), ('𐠿', '𐡕'), ('𐡠', '𐡶'), ('𐢀', '𐢞'),
+  ('𐣠', '𐣲'), ('𐣴', '𐣵'), ('𐤀', '𐤕'), ('𐤠', '𐤹'),
+  ('𐦀', '𐦷'), ('𐦾', '𐦿'), ('𐨀', '𐨃'), ('𐨅', '𐨆'),
+  ('𐨌', '𐨓'), ('𐨕', '𐨗'), ('𐨙', '𐨳'), ('𐨸', '𐨺'),
+  ('𐨿', '𐨿'), ('𐩠', '𐩼'), ('𐪀', '𐪜'), ('𐫀', '𐫇'),
+  ('𐫉', '𐫦'), ('𐬀', '𐬵'), ('𐭀', '𐭕'), ('𐭠', '𐭲'),
+  ('𐮀', '𐮑'), ('𐰀', '𐱈'), ('𐲀', '𐲲'), ('𐳀', '𐳲'),
+  ('𑀀', '𑁆'), ('𑁦', '𑁯'), ('𑁿', '𑂺'), ('𑃐', '𑃨'),
+  ('𑃰', '𑃹'), ('𑄀', '𑄴'), ('𑄶', '𑄿'), ('𑅐', '𑅳'),
+  ('𑅶', '𑅶'), ('𑆀', '𑇄'), ('𑇊', '𑇌'), ('𑇐', '𑇚'),
+  ('𑇜', '𑇜'), ('𑈀', '𑈑'), ('𑈓', '𑈷'), ('𑈾', '𑈾'),
+  ('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'), ('𑊏', '𑊝'),
+  ('𑊟', '𑊨'), ('𑊰', '𑋪'), ('𑋰', '𑋹'), ('𑌀', '𑌃'),
+  ('𑌅', '𑌌'), ('𑌏', '𑌐'), ('𑌓', '𑌨'), ('𑌪', '𑌰'),
+  ('𑌲', '𑌳'), ('𑌵', '𑌹'), ('𑌼', '𑍄'), ('𑍇', '𑍈'),
+  ('𑍋', '𑍍'), ('𑍐', '𑍐'), ('𑍗', '𑍗'), ('𑍝', '𑍣'),
+  ('𑍦', '𑍬'), ('𑍰', '𑍴'), ('𑐀', '𑑊'), ('𑑐', '𑑙'),
+  ('𑒀', '𑓅'), ('𑓇', '𑓇'), ('𑓐', '𑓙'), ('𑖀', '𑖵'),
+  ('𑖸', '𑗀'), ('𑗘', '𑗝'), ('𑘀', '𑙀'), ('𑙄', '𑙄'),
+  ('𑙐', '𑙙'), ('𑚀', '𑚷'), ('𑛀', '𑛉'), ('𑜀', '𑜙'),
+  ('𑜝', '𑜫'), ('𑜰', '𑜹'), ('𑢠', '𑣩'), ('𑣿', '𑣿'),
+  ('𑨀', '𑨾'), ('𑩇', '𑩇'), ('𑩐', '𑪃'), ('𑪆', '𑪙'),
+  ('𑫀', '𑫸'), ('𑰀', '𑰈'), ('𑰊', '𑰶'), ('𑰸', '𑱀'),
+  ('𑱐', '𑱙'), ('𑱲', '𑲏'), ('𑲒', '𑲧'), ('𑲩', '𑲶'),
+  ('𑴀', '𑴆'), ('𑴈', '𑴉'), ('𑴋', '𑴶'), ('𑴺', '𑴺'),
+  ('𑴼', '𑴽'), ('𑴿', '𑵇'), ('𑵐', '𑵙'), ('𒀀', '𒎙'),
+  ('𒐀', '𒑮'), ('𒒀', '𒕃'), ('𓀀', '𓐮'), ('𔐀', '𔙆'),
+  ('𖠀', '𖨸'), ('𖩀', '𖩞'), ('𖩠', '𖩩'), ('𖫐', '𖫭'),
+  ('𖫰', '𖫴'), ('𖬀', '𖬶'), ('𖭀', '𖭃'), ('𖭐', '𖭙'),
+  ('𖭣', '𖭷'), ('𖭽', '𖮏'), ('𖼀', '𖽄'), ('𖽐', '𖽾'),
+  ('𖾏', '𖾟'), ('𖿠', '𖿡'), ('𗀀', '𘟬'), ('𘠀', '𘫲'),
+  ('𛀀', '𛄞'), ('𛅰', '𛋻'), ('𛰀', '𛱪'), ('𛱰', '𛱼'),
+  ('𛲀', '𛲈'), ('𛲐', '𛲙'), ('𛲝', '𛲞'), ('𝅥', '𝅩'),
+  ('𝅭', '𝅲'), ('𝅻', '𝆂'), ('𝆅', '𝆋'), ('𝆪', '𝆭'),
+  ('𝉂', '𝉄'), ('𝐀', '𝑔'), ('𝑖', '𝒜'), ('𝒞', '𝒟'),
+  ('𝒢', '𝒢'), ('𝒥', '𝒦'), ('𝒩', '𝒬'), ('𝒮', '𝒹'),
+  ('𝒻', '𝒻'), ('𝒽', '𝓃'), ('𝓅', '𝔅'), ('𝔇', '𝔊'),
+  ('𝔍', '𝔔'), ('𝔖', '𝔜'), ('𝔞', '𝔹'), ('𝔻', '𝔾'),
+  ('𝕀', '𝕄'), ('𝕆', '𝕆'), ('𝕊', '𝕐'), ('𝕒', '𝚥'),
+  ('𝚨', '𝛀'), ('𝛂', '𝛚'), ('𝛜', '𝛺'), ('𝛼', '𝜔'),
+  ('𝜖', '𝜴'), ('𝜶', '𝝎'), ('𝝐', '𝝮'), ('𝝰', '𝞈'),
+  ('𝞊', '𝞨'), ('𝞪', '𝟂'), ('𝟄', '𝟋'), ('𝟎', '𝟿'),
+  ('𝨀', '𝨶'), ('𝨻', '𝩬'), ('𝩵', '𝩵'), ('𝪄', '𝪄'),
+  ('𝪛', '𝪟'), ('𝪡', '𝪯'), ('𞀀', '𞀆'), ('𞀈', '𞀘'),
+  ('𞀛', '𞀡'), ('𞀣', '𞀤'), ('𞀦', '𞀪'), ('𞠀', '𞣄'),
+  ('𞣐', '𞣖'), ('𞤀', '𞥊'), ('𞥐', '𞥙'), ('𞸀', '𞸃'),
+  ('𞸅', '𞸟'), ('𞸡', '𞸢'), ('𞸤', '𞸤'), ('𞸧', '𞸧'),
+  ('𞸩', '𞸲'), ('𞸴', '𞸷'), ('𞸹', '𞸹'), ('𞸻', '𞸻'),
+  ('𞹂', '𞹂'), ('𞹇', '𞹇'), ('𞹉', '𞹉'), ('𞹋', '𞹋'),
+  ('𞹍', '𞹏'), ('𞹑', '𞹒'), ('𞹔', '𞹔'), ('𞹗', '𞹗'),
+  ('𞹙', '𞹙'), ('𞹛', '𞹛'), ('𞹝', '𞹝'), ('𞹟', '𞹟'),
+  ('𞹡', '𞹢'), ('𞹤', '𞹤'), ('𞹧', '𞹪'), ('𞹬', '𞹲'),
+  ('𞹴', '𞹷'), ('𞹹', '𞹼'), ('𞹾', '𞹾'), ('𞺀', '𞺉'),
+  ('𞺋', '𞺛'), ('𞺡', '𞺣'), ('𞺥', '𞺩'), ('𞺫', '𞺻'),
+  ('𠀀', '𪛖'), ('𪜀', '𫜴'), ('𫝀', '𫠝'), ('𫠠', '𬺡'),
+  ('𬺰', '𮯠'), ('丽', '𪘀'), ('󠄀', '󠇯'),
+];
+
+pub const ID_START: &'static [(char, char)] = &[
+  ('A', 'Z'), ('a', 'z'), ('ª', 'ª'), ('µ', 'µ'), ('º', 'º'),
+  ('À', 'Ö'), ('Ø', 'ö'), ('ø', 'ˁ'), ('ˆ', 'ˑ'), ('ˠ', 'ˤ'),
+  ('ˬ', 'ˬ'), ('ˮ', 'ˮ'), ('Ͱ', 'ʹ'), ('Ͷ', 'ͷ'), ('ͺ', 'ͽ'),
+  ('Ϳ', 'Ϳ'), ('Ά', 'Ά'), ('Έ', 'Ί'), ('Ό', 'Ό'), ('Ύ', 'Ρ'),
+  ('Σ', 'ϵ'), ('Ϸ', 'ҁ'), ('Ҋ', 'ԯ'), ('Ա', 'Ֆ'), ('ՙ', 'ՙ'),
+  ('ա', 'և'), ('א', 'ת'), ('װ', 'ײ'), ('ؠ', 'ي'), ('ٮ', 'ٯ'),
+  ('ٱ', 'ۓ'), ('ە', 'ە'), ('ۥ', 'ۦ'), ('ۮ', 'ۯ'), ('ۺ', 'ۼ'),
+  ('ۿ', 'ۿ'), ('ܐ', 'ܐ'), ('ܒ', 'ܯ'), ('ݍ', 'ޥ'), ('ޱ', 'ޱ'),
+  ('ߊ', 'ߪ'), ('ߴ', 'ߵ'), ('ߺ', 'ߺ'), ('ࠀ', 'ࠕ'), ('ࠚ', 'ࠚ'),
+  ('ࠤ', 'ࠤ'), ('ࠨ', 'ࠨ'), ('ࡀ', 'ࡘ'), ('ࡠ', 'ࡪ'),
+  ('ࢠ', 'ࢴ'), ('ࢶ', 'ࢽ'), ('ऄ', 'ह'), ('ऽ', 'ऽ'),
+  ('ॐ', 'ॐ'), ('क़', 'ॡ'), ('ॱ', 'ঀ'), ('অ', 'ঌ'),
+  ('এ', 'ঐ'), ('ও', 'ন'), ('প', 'র'), ('ল', 'ল'),
+  ('শ', 'হ'), ('ঽ', 'ঽ'), ('ৎ', 'ৎ'), ('ড়', 'ঢ়'),
+  ('য়', 'ৡ'), ('ৰ', 'ৱ'), ('ৼ', 'ৼ'), ('ਅ', 'ਊ'),
+  ('ਏ', 'ਐ'), ('ਓ', 'ਨ'), ('ਪ', 'ਰ'), ('ਲ', 'ਲ਼'),
+  ('ਵ', 'ਸ਼'), ('ਸ', 'ਹ'), ('ਖ਼', 'ੜ'), ('ਫ਼', 'ਫ਼'),
+  ('ੲ', 'ੴ'), ('અ', 'ઍ'), ('એ', 'ઑ'), ('ઓ', 'ન'),
+  ('પ', 'ર'), ('લ', 'ળ'), ('વ', 'હ'), ('ઽ', 'ઽ'),
+  ('ૐ', 'ૐ'), ('ૠ', 'ૡ'), ('ૹ', 'ૹ'), ('ଅ', 'ଌ'),
+  ('ଏ', 'ଐ'), ('ଓ', 'ନ'), ('ପ', 'ର'), ('ଲ', 'ଳ'),
+  ('ଵ', 'ହ'), ('ଽ', 'ଽ'), ('ଡ଼', 'ଢ଼'), ('ୟ', 'ୡ'),
+  ('ୱ', 'ୱ'), ('ஃ', 'ஃ'), ('அ', 'ஊ'), ('எ', 'ஐ'),
+  ('ஒ', 'க'), ('ங', 'ச'), ('ஜ', 'ஜ'), ('ஞ', 'ட'),
+  ('ண', 'த'), ('ந', 'ப'), ('ம', 'ஹ'), ('ௐ', 'ௐ'),
+  ('అ', 'ఌ'), ('ఎ', 'ఐ'), ('ఒ', 'న'), ('ప', 'హ'),
+  ('ఽ', 'ఽ'), ('ౘ', 'ౚ'), ('ౠ', 'ౡ'), ('ಀ', 'ಀ'),
+  ('ಅ', 'ಌ'), ('ಎ', 'ಐ'), ('ಒ', 'ನ'), ('ಪ', 'ಳ'),
+  ('ವ', 'ಹ'), ('ಽ', 'ಽ'), ('ೞ', 'ೞ'), ('ೠ', 'ೡ'),
+  ('ೱ', 'ೲ'), ('അ', 'ഌ'), ('എ', 'ഐ'), ('ഒ', 'ഺ'),
+  ('ഽ', 'ഽ'), ('ൎ', 'ൎ'), ('ൔ', 'ൖ'), ('ൟ', 'ൡ'),
+  ('ൺ', 'ൿ'), ('අ', 'ඖ'), ('ක', 'න'), ('ඳ', 'ර'),
+  ('ල', 'ල'), ('ව', 'ෆ'), ('ก', 'ะ'), ('า', 'ำ'),
+  ('เ', 'ๆ'), ('ກ', 'ຂ'), ('ຄ', 'ຄ'), ('ງ', 'ຈ'),
+  ('ຊ', 'ຊ'), ('ຍ', 'ຍ'), ('ດ', 'ທ'), ('ນ', 'ຟ'),
+  ('ມ', 'ຣ'), ('ລ', 'ລ'), ('ວ', 'ວ'), ('ສ', 'ຫ'),
+  ('ອ', 'ະ'), ('າ', 'ຳ'), ('ຽ', 'ຽ'), ('ເ', 'ໄ'),
+  ('ໆ', 'ໆ'), ('ໜ', 'ໟ'), ('ༀ', 'ༀ'), ('ཀ', 'ཇ'),
+  ('ཉ', 'ཬ'), ('ྈ', 'ྌ'), ('က', 'ဪ'), ('ဿ', 'ဿ'),
+  ('ၐ', 'ၕ'), ('ၚ', 'ၝ'), ('ၡ', 'ၡ'), ('ၥ', 'ၦ'),
+  ('ၮ', 'ၰ'), ('ၵ', 'ႁ'), ('ႎ', 'ႎ'), ('Ⴀ', 'Ⴥ'),
+  ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'), ('ა', 'ჺ'), ('ჼ', 'ቈ'),
+  ('ቊ', 'ቍ'), ('ቐ', 'ቖ'), ('ቘ', 'ቘ'), ('ቚ', 'ቝ'),
+  ('በ', 'ኈ'), ('ኊ', 'ኍ'), ('ነ', 'ኰ'), ('ኲ', 'ኵ'),
+  ('ኸ', 'ኾ'), ('ዀ', 'ዀ'), ('ዂ', 'ዅ'), ('ወ', 'ዖ'),
+  ('ዘ', 'ጐ'), ('ጒ', 'ጕ'), ('ጘ', 'ፚ'), ('ᎀ', 'ᎏ'),
+  ('Ꭰ', 'Ᏽ'), ('ᏸ', 'ᏽ'), ('ᐁ', 'ᙬ'), ('ᙯ', 'ᙿ'),
+  ('ᚁ', 'ᚚ'), ('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ'), ('ᜀ', 'ᜌ'),
+  ('ᜎ', 'ᜑ'), ('ᜠ', 'ᜱ'), ('ᝀ', 'ᝑ'), ('ᝠ', 'ᝬ'),
+  ('ᝮ', 'ᝰ'), ('ក', 'ឳ'), ('ៗ', 'ៗ'), ('ៜ', 'ៜ'),
+  ('ᠠ', 'ᡷ'), ('ᢀ', 'ᢨ'), ('ᢪ', 'ᢪ'), ('ᢰ', 'ᣵ'),
+  ('ᤀ', 'ᤞ'), ('ᥐ', 'ᥭ'), ('ᥰ', 'ᥴ'), ('ᦀ', 'ᦫ'),
+  ('ᦰ', 'ᧉ'), ('ᨀ', 'ᨖ'), ('ᨠ', 'ᩔ'), ('ᪧ', 'ᪧ'),
+  ('ᬅ', 'ᬳ'), ('ᭅ', 'ᭋ'), ('ᮃ', 'ᮠ'), ('ᮮ', 'ᮯ'),
+  ('ᮺ', 'ᯥ'), ('ᰀ', 'ᰣ'), ('ᱍ', 'ᱏ'), ('ᱚ', 'ᱽ'),
+  ('ᲀ', 'ᲈ'), ('ᳩ', 'ᳬ'), ('ᳮ', 'ᳱ'), ('ᳵ', 'ᳶ'),
+  ('ᴀ', 'ᶿ'), ('Ḁ', 'ἕ'), ('Ἐ', 'Ἕ'), ('ἠ', 'ὅ'),
+  ('Ὀ', 'Ὅ'), ('ὐ', 'ὗ'), ('Ὑ', 'Ὑ'), ('Ὓ', 'Ὓ'),
+  ('Ὕ', 'Ὕ'), ('Ὗ', 'ώ'), ('ᾀ', 'ᾴ'), ('ᾶ', 'ᾼ'),
+  ('ι', 'ι'), ('ῂ', 'ῄ'), ('ῆ', 'ῌ'), ('ῐ', 'ΐ'),
+  ('ῖ', 'Ί'), ('ῠ', 'Ῥ'), ('ῲ', 'ῴ'), ('ῶ', 'ῼ'),
+  ('ⁱ', 'ⁱ'), ('ⁿ', 'ⁿ'), ('ₐ', 'ₜ'), ('ℂ', 'ℂ'),
+  ('ℇ', 'ℇ'), ('ℊ', 'ℓ'), ('ℕ', 'ℕ'), ('℘', 'ℝ'),
+  ('ℤ', 'ℤ'), ('Ω', 'Ω'), ('ℨ', 'ℨ'), ('K', 'ℹ'),
+  ('ℼ', 'ℿ'), ('ⅅ', 'ⅉ'), ('ⅎ', 'ⅎ'), ('Ⅰ', 'ↈ'),
+  ('Ⰰ', 'Ⱞ'), ('ⰰ', 'ⱞ'), ('Ⱡ', 'ⳤ'), ('Ⳬ', 'ⳮ'),
+  ('Ⳳ', 'ⳳ'), ('ⴀ', 'ⴥ'), ('ⴧ', 'ⴧ'), ('ⴭ', 'ⴭ'),
+  ('ⴰ', 'ⵧ'), ('ⵯ', 'ⵯ'), ('ⶀ', 'ⶖ'), ('ⶠ', 'ⶦ'),
+  ('ⶨ', 'ⶮ'), ('ⶰ', 'ⶶ'), ('ⶸ', 'ⶾ'), ('ⷀ', 'ⷆ'),
+  ('ⷈ', 'ⷎ'), ('ⷐ', 'ⷖ'), ('ⷘ', 'ⷞ'), ('々', '〇'),
+  ('〡', '〩'), ('〱', '〵'), ('〸', '〼'), ('ぁ', 'ゖ'),
+  ('゛', 'ゟ'), ('ァ', 'ヺ'), ('ー', 'ヿ'), ('ㄅ', 'ㄮ'),
+  ('ㄱ', 'ㆎ'), ('ㆠ', 'ㆺ'), ('ㇰ', 'ㇿ'), ('㐀', '䶵'),
+  ('一', '鿪'), ('ꀀ', 'ꒌ'), ('ꓐ', 'ꓽ'), ('ꔀ', 'ꘌ'),
+  ('ꘐ', 'ꘟ'), ('ꘪ', 'ꘫ'), ('Ꙁ', 'ꙮ'), ('ꙿ', 'ꚝ'),
+  ('ꚠ', 'ꛯ'), ('ꜗ', 'ꜟ'), ('Ꜣ', 'ꞈ'), ('Ꞌ', 'Ɪ'),
+  ('Ʞ', 'ꞷ'), ('ꟷ', 'ꠁ'), ('ꠃ', 'ꠅ'), ('ꠇ', 'ꠊ'),
+  ('ꠌ', 'ꠢ'), ('ꡀ', 'ꡳ'), ('ꢂ', 'ꢳ'), ('ꣲ', 'ꣷ'),
+  ('ꣻ', 'ꣻ'), ('ꣽ', 'ꣽ'), ('ꤊ', 'ꤥ'), ('ꤰ', 'ꥆ'),
+  ('ꥠ', 'ꥼ'), ('ꦄ', 'ꦲ'), ('ꧏ', 'ꧏ'), ('ꧠ', 'ꧤ'),
+  ('ꧦ', 'ꧯ'), ('ꧺ', 'ꧾ'), ('ꨀ', 'ꨨ'), ('ꩀ', 'ꩂ'),
+  ('ꩄ', 'ꩋ'), ('ꩠ', 'ꩶ'), ('ꩺ', 'ꩺ'), ('ꩾ', 'ꪯ'),
+  ('ꪱ', 'ꪱ'), ('ꪵ', 'ꪶ'), ('ꪹ', 'ꪽ'), ('ꫀ', 'ꫀ'),
+  ('ꫂ', 'ꫂ'), ('ꫛ', 'ꫝ'), ('ꫠ', 'ꫪ'), ('ꫲ', 'ꫴ'),
+  ('ꬁ', 'ꬆ'), ('ꬉ', 'ꬎ'), ('ꬑ', 'ꬖ'), ('ꬠ', 'ꬦ'),
+  ('ꬨ', 'ꬮ'), ('ꬰ', 'ꭚ'), ('ꭜ', 'ꭥ'), ('ꭰ', 'ꯢ'),
+  ('가', '힣'), ('ힰ', 'ퟆ'), ('ퟋ', 'ퟻ'), ('豈', '舘'),
+  ('並', '龎'), ('ﬀ', 'ﬆ'), ('ﬓ', 'ﬗ'), ('יִ', 'יִ'),
+  ('ײַ', 'ﬨ'), ('שׁ', 'זּ'), ('טּ', 'לּ'), ('מּ', 'מּ'),
+  ('נּ', 'סּ'), ('ףּ', 'פּ'), ('צּ', 'ﮱ'), ('ﯓ', 'ﴽ'),
+  ('ﵐ', 'ﶏ'), ('ﶒ', 'ﷇ'), ('ﷰ', 'ﷻ'), ('ﹰ', 'ﹴ'),
+  ('ﹶ', 'ﻼ'), ('Ａ', 'Ｚ'), ('ａ', 'ｚ'), ('ｦ', 'ﾾ'),
+  ('ￂ', 'ￇ'), ('ￊ', 'ￏ'), ('ￒ', 'ￗ'), ('ￚ', 'ￜ'),
+  ('𐀀', '𐀋'), ('𐀍', '𐀦'), ('𐀨', '𐀺'), ('𐀼', '𐀽'),
+  ('𐀿', '𐁍'), ('𐁐', '𐁝'), ('𐂀', '𐃺'), ('𐅀', '𐅴'),
+  ('𐊀', '𐊜'), ('𐊠', '𐋐'), ('𐌀', '𐌟'), ('𐌭', '𐍊'),
+  ('𐍐', '𐍵'), ('𐎀', '𐎝'), ('𐎠', '𐏃'), ('𐏈', '𐏏'),
+  ('𐏑', '𐏕'), ('𐐀', '𐒝'), ('𐒰', '𐓓'), ('𐓘', '𐓻'),
+  ('𐔀', '𐔧'), ('𐔰', '𐕣'), ('𐘀', '𐜶'), ('𐝀', '𐝕'),
+  ('𐝠', '𐝧'), ('𐠀', '𐠅'), ('𐠈', '𐠈'), ('𐠊', '𐠵'),
+  ('𐠷', '𐠸'), ('𐠼', '𐠼'), ('𐠿', '𐡕'), ('𐡠', '𐡶'),
+  ('𐢀', '𐢞'), ('𐣠', '𐣲'), ('𐣴', '𐣵'), ('𐤀', '𐤕'),
+  ('𐤠', '𐤹'), ('𐦀', '𐦷'), ('𐦾', '𐦿'), ('𐨀', '𐨀'),
+  ('𐨐', '𐨓'), ('𐨕', '𐨗'), ('𐨙', '𐨳'), ('𐩠', '𐩼'),
+  ('𐪀', '𐪜'), ('𐫀', '𐫇'), ('𐫉', '𐫤'), ('𐬀', '𐬵'),
+  ('𐭀', '𐭕'), ('𐭠', '𐭲'), ('𐮀', '𐮑'), ('𐰀', '𐱈'),
+  ('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𑀃', '𑀷'), ('𑂃', '𑂯'),
+  ('𑃐', '𑃨'), ('𑄃', '𑄦'), ('𑅐', '𑅲'), ('𑅶', '𑅶'),
+  ('𑆃', '𑆲'), ('𑇁', '𑇄'), ('𑇚', '𑇚'), ('𑇜', '𑇜'),
+  ('𑈀', '𑈑'), ('𑈓', '𑈫'), ('𑊀', '𑊆'), ('𑊈', '𑊈'),
+  ('𑊊', '𑊍'), ('𑊏', '𑊝'), ('𑊟', '𑊨'), ('𑊰', '𑋞'),
+  ('𑌅', '𑌌'), ('𑌏', '𑌐'), ('𑌓', '𑌨'), ('𑌪', '𑌰'),
+  ('𑌲', '𑌳'), ('𑌵', '𑌹'), ('𑌽', '𑌽'), ('𑍐', '𑍐'),
+  ('𑍝', '𑍡'), ('𑐀', '𑐴'), ('𑑇', '𑑊'), ('𑒀', '𑒯'),
+  ('𑓄', '𑓅'), ('𑓇', '𑓇'), ('𑖀', '𑖮'), ('𑗘', '𑗛'),
+  ('𑘀', '𑘯'), ('𑙄', '𑙄'), ('𑚀', '𑚪'), ('𑜀', '𑜙'),
+  ('𑢠', '𑣟'), ('𑣿', '𑣿'), ('𑨀', '𑨀'), ('𑨋', '𑨲'),
+  ('𑨺', '𑨺'), ('𑩐', '𑩐'), ('𑩜', '𑪃'), ('𑪆', '𑪉'),
+  ('𑫀', '𑫸'), ('𑰀', '𑰈'), ('𑰊', '𑰮'), ('𑱀', '𑱀'),
+  ('𑱲', '𑲏'), ('𑴀', '𑴆'), ('𑴈', '𑴉'), ('𑴋', '𑴰'),
+  ('𑵆', '𑵆'), ('𒀀', '𒎙'), ('𒐀', '𒑮'), ('𒒀', '𒕃'),
+  ('𓀀', '𓐮'), ('𔐀', '𔙆'), ('𖠀', '𖨸'), ('𖩀', '𖩞'),
+  ('𖫐', '𖫭'), ('𖬀', '𖬯'), ('𖭀', '𖭃'), ('𖭣', '𖭷'),
+  ('𖭽', '𖮏'), ('𖼀', '𖽄'), ('𖽐', '𖽐'), ('𖾓', '𖾟'),
+  ('𖿠', '𖿡'), ('𗀀', '𘟬'), ('𘠀', '𘫲'), ('𛀀', '𛄞'),
+  ('𛅰', '𛋻'), ('𛰀', '𛱪'), ('𛱰', '𛱼'), ('𛲀', '𛲈'),
+  ('𛲐', '𛲙'), ('𝐀', '𝑔'), ('𝑖', '𝒜'), ('𝒞', '𝒟'),
+  ('𝒢', '𝒢'), ('𝒥', '𝒦'), ('𝒩', '𝒬'), ('𝒮', '𝒹'),
+  ('𝒻', '𝒻'), ('𝒽', '𝓃'), ('𝓅', '𝔅'), ('𝔇', '𝔊'),
+  ('𝔍', '𝔔'), ('𝔖', '𝔜'), ('𝔞', '𝔹'), ('𝔻', '𝔾'),
+  ('𝕀', '𝕄'), ('𝕆', '𝕆'), ('𝕊', '𝕐'), ('𝕒', '𝚥'),
+  ('𝚨', '𝛀'), ('𝛂', '𝛚'), ('𝛜', '𝛺'), ('𝛼', '𝜔'),
+  ('𝜖', '𝜴'), ('𝜶', '𝝎'), ('𝝐', '𝝮'), ('𝝰', '𝞈'),
+  ('𝞊', '𝞨'), ('𝞪', '𝟂'), ('𝟄', '𝟋'), ('𞠀', '𞣄'),
+  ('𞤀', '𞥃'), ('𞸀', '𞸃'), ('𞸅', '𞸟'), ('𞸡', '𞸢'),
+  ('𞸤', '𞸤'), ('𞸧', '𞸧'), ('𞸩', '𞸲'), ('𞸴', '𞸷'),
+  ('𞸹', '𞸹'), ('𞸻', '𞸻'), ('𞹂', '𞹂'), ('𞹇', '𞹇'),
+  ('𞹉', '𞹉'), ('𞹋', '𞹋'), ('𞹍', '𞹏'), ('𞹑', '𞹒'),
+  ('𞹔', '𞹔'), ('𞹗', '𞹗'), ('𞹙', '𞹙'), ('𞹛', '𞹛'),
+  ('𞹝', '𞹝'), ('𞹟', '𞹟'), ('𞹡', '𞹢'), ('𞹤', '𞹤'),
+  ('𞹧', '𞹪'), ('𞹬', '𞹲'), ('𞹴', '𞹷'), ('𞹹', '𞹼'),
+  ('𞹾', '𞹾'), ('𞺀', '𞺉'), ('𞺋', '𞺛'), ('𞺡', '𞺣'),
+  ('𞺥', '𞺩'), ('𞺫', '𞺻'), ('𠀀', '𪛖'), ('𪜀', '𫜴'),
+  ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), ('丽', '𪘀'),
+];
+
+pub const IDEOGRAPHIC: &'static [(char, char)] = &[
+  ('〆', '〇'), ('〡', '〩'), ('〸', '〺'), ('㐀', '䶵'),
+  ('一', '鿪'), ('豈', '舘'), ('並', '龎'), ('𗀀', '𘟬'),
+  ('𘠀', '𘫲'), ('𛅰', '𛋻'), ('𠀀', '𪛖'), ('𪜀', '𫜴'),
+  ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), ('丽', '𪘀'),
+];
+
+pub const JOIN_CONTROL: &'static [(char, char)] = &[
+  ('\u{200c}', '\u{200d}'),
+];
+
+pub const LOGICAL_ORDER_EXCEPTION: &'static [(char, char)] = &[
+  ('เ', 'ไ'), ('ເ', 'ໄ'), ('ᦵ', 'ᦷ'), ('ᦺ', 'ᦺ'),
+  ('ꪵ', 'ꪶ'), ('ꪹ', 'ꪹ'), ('ꪻ', 'ꪼ'),
+];
+
+pub const LOWERCASE: &'static [(char, char)] = &[
+  ('a', 'z'), ('ª', 'ª'), ('µ', 'µ'), ('º', 'º'), ('ß', 'ö'),
+  ('ø', 'ÿ'), ('ā', 'ā'), ('ă', 'ă'), ('ą', 'ą'), ('ć', 'ć'),
+  ('ĉ', 'ĉ'), ('ċ', 'ċ'), ('č', 'č'), ('ď', 'ď'), ('đ', 'đ'),
+  ('ē', 'ē'), ('ĕ', 'ĕ'), ('ė', 'ė'), ('ę', 'ę'), ('ě', 'ě'),
+  ('ĝ', 'ĝ'), ('ğ', 'ğ'), ('ġ', 'ġ'), ('ģ', 'ģ'), ('ĥ', 'ĥ'),
+  ('ħ', 'ħ'), ('ĩ', 'ĩ'), ('ī', 'ī'), ('ĭ', 'ĭ'), ('į', 'į'),
+  ('ı', 'ı'), ('ĳ', 'ĳ'), ('ĵ', 'ĵ'), ('ķ', 'ĸ'), ('ĺ', 'ĺ'),
+  ('ļ', 'ļ'), ('ľ', 'ľ'), ('ŀ', 'ŀ'), ('ł', 'ł'), ('ń', 'ń'),
+  ('ņ', 'ņ'), ('ň', 'ŉ'), ('ŋ', 'ŋ'), ('ō', 'ō'), ('ŏ', 'ŏ'),
+  ('ő', 'ő'), ('œ', 'œ'), ('ŕ', 'ŕ'), ('ŗ', 'ŗ'), ('ř', 'ř'),
+  ('ś', 'ś'), ('ŝ', 'ŝ'), ('ş', 'ş'), ('š', 'š'), ('ţ', 'ţ'),
+  ('ť', 'ť'), ('ŧ', 'ŧ'), ('ũ', 'ũ'), ('ū', 'ū'), ('ŭ', 'ŭ'),
+  ('ů', 'ů'), ('ű', 'ű'), ('ų', 'ų'), ('ŵ', 'ŵ'), ('ŷ', 'ŷ'),
+  ('ź', 'ź'), ('ż', 'ż'), ('ž', 'ƀ'), ('ƃ', 'ƃ'), ('ƅ', 'ƅ'),
+  ('ƈ', 'ƈ'), ('ƌ', 'ƍ'), ('ƒ', 'ƒ'), ('ƕ', 'ƕ'), ('ƙ', 'ƛ'),
+  ('ƞ', 'ƞ'), ('ơ', 'ơ'), ('ƣ', 'ƣ'), ('ƥ', 'ƥ'), ('ƨ', 'ƨ'),
+  ('ƪ', 'ƫ'), ('ƭ', 'ƭ'), ('ư', 'ư'), ('ƴ', 'ƴ'), ('ƶ', 'ƶ'),
+  ('ƹ', 'ƺ'), ('ƽ', 'ƿ'), ('ǆ', 'ǆ'), ('ǉ', 'ǉ'), ('ǌ', 'ǌ'),
+  ('ǎ', 'ǎ'), ('ǐ', 'ǐ'), ('ǒ', 'ǒ'), ('ǔ', 'ǔ'), ('ǖ', 'ǖ'),
+  ('ǘ', 'ǘ'), ('ǚ', 'ǚ'), ('ǜ', 'ǝ'), ('ǟ', 'ǟ'), ('ǡ', 'ǡ'),
+  ('ǣ', 'ǣ'), ('ǥ', 'ǥ'), ('ǧ', 'ǧ'), ('ǩ', 'ǩ'), ('ǫ', 'ǫ'),
+  ('ǭ', 'ǭ'), ('ǯ', 'ǰ'), ('ǳ', 'ǳ'), ('ǵ', 'ǵ'), ('ǹ', 'ǹ'),
+  ('ǻ', 'ǻ'), ('ǽ', 'ǽ'), ('ǿ', 'ǿ'), ('ȁ', 'ȁ'), ('ȃ', 'ȃ'),
+  ('ȅ', 'ȅ'), ('ȇ', 'ȇ'), ('ȉ', 'ȉ'), ('ȋ', 'ȋ'), ('ȍ', 'ȍ'),
+  ('ȏ', 'ȏ'), ('ȑ', 'ȑ'), ('ȓ', 'ȓ'), ('ȕ', 'ȕ'), ('ȗ', 'ȗ'),
+  ('ș', 'ș'), ('ț', 'ț'), ('ȝ', 'ȝ'), ('ȟ', 'ȟ'), ('ȡ', 'ȡ'),
+  ('ȣ', 'ȣ'), ('ȥ', 'ȥ'), ('ȧ', 'ȧ'), ('ȩ', 'ȩ'), ('ȫ', 'ȫ'),
+  ('ȭ', 'ȭ'), ('ȯ', 'ȯ'), ('ȱ', 'ȱ'), ('ȳ', 'ȹ'), ('ȼ', 'ȼ'),
+  ('ȿ', 'ɀ'), ('ɂ', 'ɂ'), ('ɇ', 'ɇ'), ('ɉ', 'ɉ'), ('ɋ', 'ɋ'),
+  ('ɍ', 'ɍ'), ('ɏ', 'ʓ'), ('ʕ', 'ʸ'), ('ˀ', 'ˁ'), ('ˠ', 'ˤ'),
+  ('ͅ', 'ͅ'), ('ͱ', 'ͱ'), ('ͳ', 'ͳ'), ('ͷ', 'ͷ'), ('ͺ', 'ͽ'),
+  ('ΐ', 'ΐ'), ('ά', 'ώ'), ('ϐ', 'ϑ'), ('ϕ', 'ϗ'), ('ϙ', 'ϙ'),
+  ('ϛ', 'ϛ'), ('ϝ', 'ϝ'), ('ϟ', 'ϟ'), ('ϡ', 'ϡ'), ('ϣ', 'ϣ'),
+  ('ϥ', 'ϥ'), ('ϧ', 'ϧ'), ('ϩ', 'ϩ'), ('ϫ', 'ϫ'), ('ϭ', 'ϭ'),
+  ('ϯ', 'ϳ'), ('ϵ', 'ϵ'), ('ϸ', 'ϸ'), ('ϻ', 'ϼ'), ('а', 'џ'),
+  ('ѡ', 'ѡ'), ('ѣ', 'ѣ'), ('ѥ', 'ѥ'), ('ѧ', 'ѧ'), ('ѩ', 'ѩ'),
+  ('ѫ', 'ѫ'), ('ѭ', 'ѭ'), ('ѯ', 'ѯ'), ('ѱ', 'ѱ'), ('ѳ', 'ѳ'),
+  ('ѵ', 'ѵ'), ('ѷ', 'ѷ'), ('ѹ', 'ѹ'), ('ѻ', 'ѻ'), ('ѽ', 'ѽ'),
+  ('ѿ', 'ѿ'), ('ҁ', 'ҁ'), ('ҋ', 'ҋ'), ('ҍ', 'ҍ'), ('ҏ', 'ҏ'),
+  ('ґ', 'ґ'), ('ғ', 'ғ'), ('ҕ', 'ҕ'), ('җ', 'җ'), ('ҙ', 'ҙ'),
+  ('қ', 'қ'), ('ҝ', 'ҝ'), ('ҟ', 'ҟ'), ('ҡ', 'ҡ'), ('ң', 'ң'),
+  ('ҥ', 'ҥ'), ('ҧ', 'ҧ'), ('ҩ', 'ҩ'), ('ҫ', 'ҫ'), ('ҭ', 'ҭ'),
+  ('ү', 'ү'), ('ұ', 'ұ'), ('ҳ', 'ҳ'), ('ҵ', 'ҵ'), ('ҷ', 'ҷ'),
+  ('ҹ', 'ҹ'), ('һ', 'һ'), ('ҽ', 'ҽ'), ('ҿ', 'ҿ'), ('ӂ', 'ӂ'),
+  ('ӄ', 'ӄ'), ('ӆ', 'ӆ'), ('ӈ', 'ӈ'), ('ӊ', 'ӊ'), ('ӌ', 'ӌ'),
+  ('ӎ', 'ӏ'), ('ӑ', 'ӑ'), ('ӓ', 'ӓ'), ('ӕ', 'ӕ'), ('ӗ', 'ӗ'),
+  ('ә', 'ә'), ('ӛ', 'ӛ'), ('ӝ', 'ӝ'), ('ӟ', 'ӟ'), ('ӡ', 'ӡ'),
+  ('ӣ', 'ӣ'), ('ӥ', 'ӥ'), ('ӧ', 'ӧ'), ('ө', 'ө'), ('ӫ', 'ӫ'),
+  ('ӭ', 'ӭ'), ('ӯ', 'ӯ'), ('ӱ', 'ӱ'), ('ӳ', 'ӳ'), ('ӵ', 'ӵ'),
+  ('ӷ', 'ӷ'), ('ӹ', 'ӹ'), ('ӻ', 'ӻ'), ('ӽ', 'ӽ'), ('ӿ', 'ӿ'),
+  ('ԁ', 'ԁ'), ('ԃ', 'ԃ'), ('ԅ', 'ԅ'), ('ԇ', 'ԇ'), ('ԉ', 'ԉ'),
+  ('ԋ', 'ԋ'), ('ԍ', 'ԍ'), ('ԏ', 'ԏ'), ('ԑ', 'ԑ'), ('ԓ', 'ԓ'),
+  ('ԕ', 'ԕ'), ('ԗ', 'ԗ'), ('ԙ', 'ԙ'), ('ԛ', 'ԛ'), ('ԝ', 'ԝ'),
+  ('ԟ', 'ԟ'), ('ԡ', 'ԡ'), ('ԣ', 'ԣ'), ('ԥ', 'ԥ'), ('ԧ', 'ԧ'),
+  ('ԩ', 'ԩ'), ('ԫ', 'ԫ'), ('ԭ', 'ԭ'), ('ԯ', 'ԯ'), ('ա', 'և'),
+  ('ᏸ', 'ᏽ'), ('ᲀ', 'ᲈ'), ('ᴀ', 'ᶿ'), ('ḁ', 'ḁ'),
+  ('ḃ', 'ḃ'), ('ḅ', 'ḅ'), ('ḇ', 'ḇ'), ('ḉ', 'ḉ'),
+  ('ḋ', 'ḋ'), ('ḍ', 'ḍ'), ('ḏ', 'ḏ'), ('ḑ', 'ḑ'),
+  ('ḓ', 'ḓ'), ('ḕ', 'ḕ'), ('ḗ', 'ḗ'), ('ḙ', 'ḙ'),
+  ('ḛ', 'ḛ'), ('ḝ', 'ḝ'), ('ḟ', 'ḟ'), ('ḡ', 'ḡ'),
+  ('ḣ', 'ḣ'), ('ḥ', 'ḥ'), ('ḧ', 'ḧ'), ('ḩ', 'ḩ'),
+  ('ḫ', 'ḫ'), ('ḭ', 'ḭ'), ('ḯ', 'ḯ'), ('ḱ', 'ḱ'),
+  ('ḳ', 'ḳ'), ('ḵ', 'ḵ'), ('ḷ', 'ḷ'), ('ḹ', 'ḹ'),
+  ('ḻ', 'ḻ'), ('ḽ', 'ḽ'), ('ḿ', 'ḿ'), ('ṁ', 'ṁ'),
+  ('ṃ', 'ṃ'), ('ṅ', 'ṅ'), ('ṇ', 'ṇ'), ('ṉ', 'ṉ'),
+  ('ṋ', 'ṋ'), ('ṍ', 'ṍ'), ('ṏ', 'ṏ'), ('ṑ', 'ṑ'),
+  ('ṓ', 'ṓ'), ('ṕ', 'ṕ'), ('ṗ', 'ṗ'), ('ṙ', 'ṙ'),
+  ('ṛ', 'ṛ'), ('ṝ', 'ṝ'), ('ṟ', 'ṟ'), ('ṡ', 'ṡ'),
+  ('ṣ', 'ṣ'), ('ṥ', 'ṥ'), ('ṧ', 'ṧ'), ('ṩ', 'ṩ'),
+  ('ṫ', 'ṫ'), ('ṭ', 'ṭ'), ('ṯ', 'ṯ'), ('ṱ', 'ṱ'),
+  ('ṳ', 'ṳ'), ('ṵ', 'ṵ'), ('ṷ', 'ṷ'), ('ṹ', 'ṹ'),
+  ('ṻ', 'ṻ'), ('ṽ', 'ṽ'), ('ṿ', 'ṿ'), ('ẁ', 'ẁ'),
+  ('ẃ', 'ẃ'), ('ẅ', 'ẅ'), ('ẇ', 'ẇ'), ('ẉ', 'ẉ'),
+  ('ẋ', 'ẋ'), ('ẍ', 'ẍ'), ('ẏ', 'ẏ'), ('ẑ', 'ẑ'),
+  ('ẓ', 'ẓ'), ('ẕ', 'ẝ'), ('ẟ', 'ẟ'), ('ạ', 'ạ'),
+  ('ả', 'ả'), ('ấ', 'ấ'), ('ầ', 'ầ'), ('ẩ', 'ẩ'),
+  ('ẫ', 'ẫ'), ('ậ', 'ậ'), ('ắ', 'ắ'), ('ằ', 'ằ'),
+  ('ẳ', 'ẳ'), ('ẵ', 'ẵ'), ('ặ', 'ặ'), ('ẹ', 'ẹ'),
+  ('ẻ', 'ẻ'), ('ẽ', 'ẽ'), ('ế', 'ế'), ('ề', 'ề'),
+  ('ể', 'ể'), ('ễ', 'ễ'), ('ệ', 'ệ'), ('ỉ', 'ỉ'),
+  ('ị', 'ị'), ('ọ', 'ọ'), ('ỏ', 'ỏ'), ('ố', 'ố'),
+  ('ồ', 'ồ'), ('ổ', 'ổ'), ('ỗ', 'ỗ'), ('ộ', 'ộ'),
+  ('ớ', 'ớ'), ('ờ', 'ờ'), ('ở', 'ở'), ('ỡ', 'ỡ'),
+  ('ợ', 'ợ'), ('ụ', 'ụ'), ('ủ', 'ủ'), ('ứ', 'ứ'),
+  ('ừ', 'ừ'), ('ử', 'ử'), ('ữ', 'ữ'), ('ự', 'ự'),
+  ('ỳ', 'ỳ'), ('ỵ', 'ỵ'), ('ỷ', 'ỷ'), ('ỹ', 'ỹ'),
+  ('ỻ', 'ỻ'), ('ỽ', 'ỽ'), ('ỿ', 'ἇ'), ('ἐ', 'ἕ'),
+  ('ἠ', 'ἧ'), ('ἰ', 'ἷ'), ('ὀ', 'ὅ'), ('ὐ', 'ὗ'),
+  ('ὠ', 'ὧ'), ('ὰ', 'ώ'), ('ᾀ', 'ᾇ'), ('ᾐ', 'ᾗ'),
+  ('ᾠ', 'ᾧ'), ('ᾰ', 'ᾴ'), ('ᾶ', 'ᾷ'), ('ι', 'ι'),
+  ('ῂ', 'ῄ'), ('ῆ', 'ῇ'), ('ῐ', 'ΐ'), ('ῖ', 'ῗ'),
+  ('ῠ', 'ῧ'), ('ῲ', 'ῴ'), ('ῶ', 'ῷ'), ('ⁱ', 'ⁱ'),
+  ('ⁿ', 'ⁿ'), ('ₐ', 'ₜ'), ('ℊ', 'ℊ'), ('ℎ', 'ℏ'),
+  ('ℓ', 'ℓ'), ('ℯ', 'ℯ'), ('ℴ', 'ℴ'), ('ℹ', 'ℹ'),
+  ('ℼ', 'ℽ'), ('ⅆ', 'ⅉ'), ('ⅎ', 'ⅎ'), ('ⅰ', 'ⅿ'),
+  ('ↄ', 'ↄ'), ('ⓐ', 'ⓩ'), ('ⰰ', 'ⱞ'), ('ⱡ', 'ⱡ'),
+  ('ⱥ', 'ⱦ'), ('ⱨ', 'ⱨ'), ('ⱪ', 'ⱪ'), ('ⱬ', 'ⱬ'),
+  ('ⱱ', 'ⱱ'), ('ⱳ', 'ⱴ'), ('ⱶ', 'ⱽ'), ('ⲁ', 'ⲁ'),
+  ('ⲃ', 'ⲃ'), ('ⲅ', 'ⲅ'), ('ⲇ', 'ⲇ'), ('ⲉ', 'ⲉ'),
+  ('ⲋ', 'ⲋ'), ('ⲍ', 'ⲍ'), ('ⲏ', 'ⲏ'), ('ⲑ', 'ⲑ'),
+  ('ⲓ', 'ⲓ'), ('ⲕ', 'ⲕ'), ('ⲗ', 'ⲗ'), ('ⲙ', 'ⲙ'),
+  ('ⲛ', 'ⲛ'), ('ⲝ', 'ⲝ'), ('ⲟ', 'ⲟ'), ('ⲡ', 'ⲡ'),
+  ('ⲣ', 'ⲣ'), ('ⲥ', 'ⲥ'), ('ⲧ', 'ⲧ'), ('ⲩ', 'ⲩ'),
+  ('ⲫ', 'ⲫ'), ('ⲭ', 'ⲭ'), ('ⲯ', 'ⲯ'), ('ⲱ', 'ⲱ'),
+  ('ⲳ', 'ⲳ'), ('ⲵ', 'ⲵ'), ('ⲷ', 'ⲷ'), ('ⲹ', 'ⲹ'),
+  ('ⲻ', 'ⲻ'), ('ⲽ', 'ⲽ'), ('ⲿ', 'ⲿ'), ('ⳁ', 'ⳁ'),
+  ('ⳃ', 'ⳃ'), ('ⳅ', 'ⳅ'), ('ⳇ', 'ⳇ'), ('ⳉ', 'ⳉ'),
+  ('ⳋ', 'ⳋ'), ('ⳍ', 'ⳍ'), ('ⳏ', 'ⳏ'), ('ⳑ', 'ⳑ'),
+  ('ⳓ', 'ⳓ'), ('ⳕ', 'ⳕ'), ('ⳗ', 'ⳗ'), ('ⳙ', 'ⳙ'),
+  ('ⳛ', 'ⳛ'), ('ⳝ', 'ⳝ'), ('ⳟ', 'ⳟ'), ('ⳡ', 'ⳡ'),
+  ('ⳣ', 'ⳤ'), ('ⳬ', 'ⳬ'), ('ⳮ', 'ⳮ'), ('ⳳ', 'ⳳ'),
+  ('ⴀ', 'ⴥ'), ('ⴧ', 'ⴧ'), ('ⴭ', 'ⴭ'), ('ꙁ', 'ꙁ'),
+  ('ꙃ', 'ꙃ'), ('ꙅ', 'ꙅ'), ('ꙇ', 'ꙇ'), ('ꙉ', 'ꙉ'),
+  ('ꙋ', 'ꙋ'), ('ꙍ', 'ꙍ'), ('ꙏ', 'ꙏ'), ('ꙑ', 'ꙑ'),
+  ('ꙓ', 'ꙓ'), ('ꙕ', 'ꙕ'), ('ꙗ', 'ꙗ'), ('ꙙ', 'ꙙ'),
+  ('ꙛ', 'ꙛ'), ('ꙝ', 'ꙝ'), ('ꙟ', 'ꙟ'), ('ꙡ', 'ꙡ'),
+  ('ꙣ', 'ꙣ'), ('ꙥ', 'ꙥ'), ('ꙧ', 'ꙧ'), ('ꙩ', 'ꙩ'),
+  ('ꙫ', 'ꙫ'), ('ꙭ', 'ꙭ'), ('ꚁ', 'ꚁ'), ('ꚃ', 'ꚃ'),
+  ('ꚅ', 'ꚅ'), ('ꚇ', 'ꚇ'), ('ꚉ', 'ꚉ'), ('ꚋ', 'ꚋ'),
+  ('ꚍ', 'ꚍ'), ('ꚏ', 'ꚏ'), ('ꚑ', 'ꚑ'), ('ꚓ', 'ꚓ'),
+  ('ꚕ', 'ꚕ'), ('ꚗ', 'ꚗ'), ('ꚙ', 'ꚙ'), ('ꚛ', 'ꚝ'),
+  ('ꜣ', 'ꜣ'), ('ꜥ', 'ꜥ'), ('ꜧ', 'ꜧ'), ('ꜩ', 'ꜩ'),
+  ('ꜫ', 'ꜫ'), ('ꜭ', 'ꜭ'), ('ꜯ', 'ꜱ'), ('ꜳ', 'ꜳ'),
+  ('ꜵ', 'ꜵ'), ('ꜷ', 'ꜷ'), ('ꜹ', 'ꜹ'), ('ꜻ', 'ꜻ'),
+  ('ꜽ', 'ꜽ'), ('ꜿ', 'ꜿ'), ('ꝁ', 'ꝁ'), ('ꝃ', 'ꝃ'),
+  ('ꝅ', 'ꝅ'), ('ꝇ', 'ꝇ'), ('ꝉ', 'ꝉ'), ('ꝋ', 'ꝋ'),
+  ('ꝍ', 'ꝍ'), ('ꝏ', 'ꝏ'), ('ꝑ', 'ꝑ'), ('ꝓ', 'ꝓ'),
+  ('ꝕ', 'ꝕ'), ('ꝗ', 'ꝗ'), ('ꝙ', 'ꝙ'), ('ꝛ', 'ꝛ'),
+  ('ꝝ', 'ꝝ'), ('ꝟ', 'ꝟ'), ('ꝡ', 'ꝡ'), ('ꝣ', 'ꝣ'),
+  ('ꝥ', 'ꝥ'), ('ꝧ', 'ꝧ'), ('ꝩ', 'ꝩ'), ('ꝫ', 'ꝫ'),
+  ('ꝭ', 'ꝭ'), ('ꝯ', 'ꝸ'), ('ꝺ', 'ꝺ'), ('ꝼ', 'ꝼ'),
+  ('ꝿ', 'ꝿ'), ('ꞁ', 'ꞁ'), ('ꞃ', 'ꞃ'), ('ꞅ', 'ꞅ'),
+  ('ꞇ', 'ꞇ'), ('ꞌ', 'ꞌ'), ('ꞎ', 'ꞎ'), ('ꞑ', 'ꞑ'),
+  ('ꞓ', 'ꞕ'), ('ꞗ', 'ꞗ'), ('ꞙ', 'ꞙ'), ('ꞛ', 'ꞛ'),
+  ('ꞝ', 'ꞝ'), ('ꞟ', 'ꞟ'), ('ꞡ', 'ꞡ'), ('ꞣ', 'ꞣ'),
+  ('ꞥ', 'ꞥ'), ('ꞧ', 'ꞧ'), ('ꞩ', 'ꞩ'), ('ꞵ', 'ꞵ'),
+  ('ꞷ', 'ꞷ'), ('ꟸ', 'ꟺ'), ('ꬰ', 'ꭚ'), ('ꭜ', 'ꭥ'),
+  ('ꭰ', 'ꮿ'), ('ﬀ', 'ﬆ'), ('ﬓ', 'ﬗ'), ('ａ', 'ｚ'),
+  ('𐐨', '𐑏'), ('𐓘', '𐓻'), ('𐳀', '𐳲'), ('𑣀', '𑣟'),
+  ('𝐚', '𝐳'), ('𝑎', '𝑔'), ('𝑖', '𝑧'), ('𝒂', '𝒛'),
+  ('𝒶', '𝒹'), ('𝒻', '𝒻'), ('𝒽', '𝓃'), ('𝓅', '𝓏'),
+  ('𝓪', '𝔃'), ('𝔞', '𝔷'), ('𝕒', '𝕫'), ('𝖆', '𝖟'),
+  ('𝖺', '𝗓'), ('𝗮', '𝘇'), ('𝘢', '𝘻'), ('𝙖', '𝙯'),
+  ('𝚊', '𝚥'), ('𝛂', '𝛚'), ('𝛜', '𝛡'), ('𝛼', '𝜔'),
+  ('𝜖', '𝜛'), ('𝜶', '𝝎'), ('𝝐', '𝝕'), ('𝝰', '𝞈'),
+  ('𝞊', '𝞏'), ('𝞪', '𝟂'), ('𝟄', '𝟉'), ('𝟋', '𝟋'),
+  ('𞤢', '𞥃'),
+];
+
+pub const MATH: &'static [(char, char)] = &[
+  ('+', '+'), ('<', '>'), ('^', '^'), ('|', '|'), ('~', '~'), ('¬', '¬'),
+  ('±', '±'), ('×', '×'), ('÷', '÷'), ('ϐ', 'ϒ'), ('ϕ', 'ϕ'),
+  ('ϰ', 'ϱ'), ('ϴ', '϶'), ('؆', '؈'), ('‖', '‖'), ('′', '‴'),
+  ('⁀', '⁀'), ('⁄', '⁄'), ('⁒', '⁒'), ('\u{2061}', '\u{2064}'),
+  ('⁺', '⁾'), ('₊', '₎'), ('⃐', '⃜'), ('⃡', '⃡'),
+  ('⃥', '⃦'), ('⃫', '⃯'), ('ℂ', 'ℂ'), ('ℇ', 'ℇ'),
+  ('ℊ', 'ℓ'), ('ℕ', 'ℕ'), ('℘', 'ℝ'), ('ℤ', 'ℤ'),
+  ('ℨ', '℩'), ('ℬ', 'ℭ'), ('ℯ', 'ℱ'), ('ℳ', 'ℸ'),
+  ('ℼ', 'ⅉ'), ('⅋', '⅋'), ('←', '↧'), ('↩', '↮'),
+  ('↰', '↱'), ('↶', '↷'), ('↼', '⇛'), ('⇝', '⇝'),
+  ('⇤', '⇥'), ('⇴', '⋿'), ('⌈', '⌋'), ('⌠', '⌡'),
+  ('⍼', '⍼'), ('⎛', '⎵'), ('⎷', '⎷'), ('⏐', '⏐'),
+  ('⏜', '⏢'), ('■', '□'), ('▮', '▷'), ('▼', '◁'),
+  ('◆', '◇'), ('◊', '○'), ('●', '◓'), ('◢', '◢'),
+  ('◤', '◤'), ('◧', '◬'), ('◸', '◿'), ('★', '☆'),
+  ('♀', '♀'), ('♂', '♂'), ('♠', '♣'), ('♭', '♯'),
+  ('⟀', '⟿'), ('⤀', '⫿'), ('⬰', '⭄'), ('⭇', '⭌'),
+  ('﬩', '﬩'), ('﹡', '﹦'), ('﹨', '﹨'), ('＋', '＋'),
+  ('＜', '＞'), ('＼', '＼'), ('＾', '＾'), ('｜', '｜'),
+  ('～', '～'), ('￢', '￢'), ('￩', '￬'), ('𝐀', '𝑔'),
+  ('𝑖', '𝒜'), ('𝒞', '𝒟'), ('𝒢', '𝒢'), ('𝒥', '𝒦'),
+  ('𝒩', '𝒬'), ('𝒮', '𝒹'), ('𝒻', '𝒻'), ('𝒽', '𝓃'),
+  ('𝓅', '𝔅'), ('𝔇', '𝔊'), ('𝔍', '𝔔'), ('𝔖', '𝔜'),
+  ('𝔞', '𝔹'), ('𝔻', '𝔾'), ('𝕀', '𝕄'), ('𝕆', '𝕆'),
+  ('𝕊', '𝕐'), ('𝕒', '𝚥'), ('𝚨', '𝟋'), ('𝟎', '𝟿'),
+  ('𞸀', '𞸃'), ('𞸅', '𞸟'), ('𞸡', '𞸢'), ('𞸤', '𞸤'),
+  ('𞸧', '𞸧'), ('𞸩', '𞸲'), ('𞸴', '𞸷'), ('𞸹', '𞸹'),
+  ('𞸻', '𞸻'), ('𞹂', '𞹂'), ('𞹇', '𞹇'), ('𞹉', '𞹉'),
+  ('𞹋', '𞹋'), ('𞹍', '𞹏'), ('𞹑', '𞹒'), ('𞹔', '𞹔'),
+  ('𞹗', '𞹗'), ('𞹙', '𞹙'), ('𞹛', '𞹛'), ('𞹝', '𞹝'),
+  ('𞹟', '𞹟'), ('𞹡', '𞹢'), ('𞹤', '𞹤'), ('𞹧', '𞹪'),
+  ('𞹬', '𞹲'), ('𞹴', '𞹷'), ('𞹹', '𞹼'), ('𞹾', '𞹾'),
+  ('𞺀', '𞺉'), ('𞺋', '𞺛'), ('𞺡', '𞺣'), ('𞺥', '𞺩'),
+  ('𞺫', '𞺻'), ('𞻰', '𞻱'),
+];
+
+pub const NONCHARACTER_CODE_POINT: &'static [(char, char)] = &[
+  ('\u{fdd0}', '\u{fdef}'), ('\u{fffe}', '\u{ffff}'),
+  ('\u{1fffe}', '\u{1ffff}'), ('\u{2fffe}', '\u{2ffff}'),
+  ('\u{3fffe}', '\u{3ffff}'), ('\u{4fffe}', '\u{4ffff}'),
+  ('\u{5fffe}', '\u{5ffff}'), ('\u{6fffe}', '\u{6ffff}'),
+  ('\u{7fffe}', '\u{7ffff}'), ('\u{8fffe}', '\u{8ffff}'),
+  ('\u{9fffe}', '\u{9ffff}'), ('\u{afffe}', '\u{affff}'),
+  ('\u{bfffe}', '\u{bffff}'), ('\u{cfffe}', '\u{cffff}'),
+  ('\u{dfffe}', '\u{dffff}'), ('\u{efffe}', '\u{effff}'),
+  ('\u{ffffe}', '\u{fffff}'), ('\u{10fffe}', '\u{10ffff}'),
+];
+
+pub const OTHER_ALPHABETIC: &'static [(char, char)] = &[
+  ('ͅ', 'ͅ'), ('ְ', 'ֽ'), ('ֿ', 'ֿ'), ('ׁ', 'ׂ'), ('ׄ', 'ׅ'),
+  ('ׇ', 'ׇ'), ('ؐ', 'ؚ'), ('ً', 'ٗ'), ('ٙ', 'ٟ'), ('ٰ', 'ٰ'),
+  ('ۖ', 'ۜ'), ('ۡ', 'ۤ'), ('ۧ', 'ۨ'), ('ۭ', 'ۭ'), ('ܑ', 'ܑ'),
+  ('ܰ', 'ܿ'), ('ަ', 'ް'), ('ࠖ', 'ࠗ'), ('ࠛ', 'ࠣ'), ('ࠥ', 'ࠧ'),
+  ('ࠩ', 'ࠬ'), ('ࣔ', 'ࣟ'), ('ࣣ', 'ࣩ'), ('ࣰ', 'ः'),
+  ('ऺ', 'ऻ'), ('ा', 'ौ'), ('ॎ', 'ॏ'), ('ॕ', 'ॗ'),
+  ('ॢ', 'ॣ'), ('ঁ', 'ঃ'), ('া', 'ৄ'), ('ে', 'ৈ'),
+  ('ো', 'ৌ'), ('ৗ', 'ৗ'), ('ৢ', 'ৣ'), ('ਁ', 'ਃ'),
+  ('ਾ', 'ੂ'), ('ੇ', 'ੈ'), ('ੋ', 'ੌ'), ('ੑ', 'ੑ'),
+  ('ੰ', 'ੱ'), ('ੵ', 'ੵ'), ('ઁ', 'ઃ'), ('ા', 'ૅ'),
+  ('ે', 'ૉ'), ('ો', 'ૌ'), ('ૢ', 'ૣ'), ('ૺ', 'ૼ'),
+  ('ଁ', 'ଃ'), ('ା', 'ୄ'), ('େ', 'ୈ'), ('ୋ', 'ୌ'),
+  ('ୖ', 'ୗ'), ('ୢ', 'ୣ'), ('ஂ', 'ஂ'), ('ா', 'ூ'),
+  ('ெ', 'ை'), ('ொ', 'ௌ'), ('ௗ', 'ௗ'), ('ఀ', 'ః'),
+  ('ా', 'ౄ'), ('ె', 'ై'), ('ొ', 'ౌ'), ('ౕ', 'ౖ'),
+  ('ౢ', 'ౣ'), ('ಁ', 'ಃ'), ('ಾ', 'ೄ'), ('ೆ', 'ೈ'),
+  ('ೊ', 'ೌ'), ('ೕ', 'ೖ'), ('ೢ', 'ೣ'), ('ഀ', 'ഃ'),
+  ('ാ', 'ൄ'), ('െ', 'ൈ'), ('ൊ', 'ൌ'), ('ൗ', 'ൗ'),
+  ('ൢ', 'ൣ'), ('ං', 'ඃ'), ('ා', 'ු'), ('ූ', 'ූ'),
+  ('ෘ', 'ෟ'), ('ෲ', 'ෳ'), ('ั', 'ั'), ('ิ', 'ฺ'),
+  ('ํ', 'ํ'), ('ັ', 'ັ'), ('ິ', 'ູ'), ('ົ', 'ຼ'),
+  ('ໍ', 'ໍ'), ('ཱ', 'ཱྀ'), ('ྍ', 'ྗ'), ('ྙ', 'ྼ'),
+  ('ါ', 'ံ'), ('း', 'း'), ('ျ', 'ှ'), ('ၖ', 'ၙ'),
+  ('ၞ', 'ၠ'), ('ၢ', 'ၢ'), ('ၧ', 'ၨ'), ('ၱ', 'ၴ'),
+  ('ႂ', 'ႆ'), ('ႜ', 'ႝ'), ('፟', '፟'), ('ᜒ', 'ᜓ'),
+  ('ᜲ', 'ᜳ'), ('ᝒ', 'ᝓ'), ('ᝲ', 'ᝳ'), ('ា', 'ៈ'),
+  ('ᢅ', 'ᢆ'), ('ᢩ', 'ᢩ'), ('ᤠ', 'ᤫ'), ('ᤰ', 'ᤸ'),
+  ('ᨗ', 'ᨛ'), ('ᩕ', 'ᩞ'), ('ᩡ', 'ᩴ'), ('ᬀ', 'ᬄ'),
+  ('ᬵ', 'ᭃ'), ('ᮀ', 'ᮂ'), ('ᮡ', 'ᮩ'), ('ᮬ', 'ᮭ'),
+  ('ᯧ', 'ᯱ'), ('ᰤ', 'ᰵ'), ('ᳲ', 'ᳳ'), ('ᷧ', 'ᷴ'),
+  ('Ⓐ', 'ⓩ'), ('ⷠ', 'ⷿ'), ('ꙴ', 'ꙻ'), ('ꚞ', 'ꚟ'),
+  ('ꠣ', 'ꠧ'), ('ꢀ', 'ꢁ'), ('ꢴ', 'ꣃ'), ('ꣅ', 'ꣅ'),
+  ('ꤦ', 'ꤪ'), ('ꥇ', 'ꥒ'), ('ꦀ', 'ꦃ'), ('ꦴ', 'ꦿ'),
+  ('ꨩ', 'ꨶ'), ('ꩃ', 'ꩃ'), ('ꩌ', 'ꩍ'), ('ꪰ', 'ꪰ'),
+  ('ꪲ', 'ꪴ'), ('ꪷ', 'ꪸ'), ('ꪾ', 'ꪾ'), ('ꫫ', 'ꫯ'),
+  ('ꫵ', 'ꫵ'), ('ꯣ', 'ꯪ'), ('ﬞ', 'ﬞ'), ('𐍶', '𐍺'),
+  ('𐨁', '𐨃'), ('𐨅', '𐨆'), ('𐨌', '𐨏'), ('𑀀', '𑀂'),
+  ('𑀸', '𑁅'), ('𑂂', '𑂂'), ('𑂰', '𑂸'), ('𑄀', '𑄂'),
+  ('𑄧', '𑄲'), ('𑆀', '𑆂'), ('𑆳', '𑆿'), ('𑈬', '𑈴'),
+  ('𑈷', '𑈷'), ('𑈾', '𑈾'), ('𑋟', '𑋨'), ('𑌀', '𑌃'),
+  ('𑌾', '𑍄'), ('𑍇', '𑍈'), ('𑍋', '𑍌'), ('𑍗', '𑍗'),
+  ('𑍢', '𑍣'), ('𑐵', '𑑁'), ('𑑃', '𑑅'), ('𑒰', '𑓁'),
+  ('𑖯', '𑖵'), ('𑖸', '𑖾'), ('𑗜', '𑗝'), ('𑘰', '𑘾'),
+  ('𑙀', '𑙀'), ('𑚫', '𑚵'), ('𑜝', '𑜪'), ('𑨁', '𑨊'),
+  ('𑨵', '𑨹'), ('𑨻', '𑨾'), ('𑩑', '𑩛'), ('𑪊', '𑪗'),
+  ('𑰯', '𑰶'), ('𑰸', '𑰾'), ('𑲒', '𑲧'), ('𑲩', '𑲶'),
+  ('𑴱', '𑴶'), ('𑴺', '𑴺'), ('𑴼', '𑴽'), ('𑴿', '𑵁'),
+  ('𑵃', '𑵃'), ('𑵇', '𑵇'), ('𖬰', '𖬶'), ('𖽑', '𖽾'),
+  ('𛲞', '𛲞'), ('𞀀', '𞀆'), ('𞀈', '𞀘'), ('𞀛', '𞀡'),
+  ('𞀣', '𞀤'), ('𞀦', '𞀪'), ('𞥇', '𞥇'), ('🄰', '🅉'),
+  ('🅐', '🅩'), ('🅰', '🆉'),
+];
+
+pub const OTHER_DEFAULT_IGNORABLE_CODE_POINT: &'static [(char, char)] = &[
+  ('͏', '͏'), ('ᅟ', 'ᅠ'), ('឴', '឵'), ('\u{2065}', '\u{2065}'),
+  ('ㅤ', 'ㅤ'), ('ﾠ', 'ﾠ'), ('\u{fff0}', '\u{fff8}'),
+  ('\u{e0000}', '\u{e0000}'), ('\u{e0002}', '\u{e001f}'),
+  ('\u{e0080}', '\u{e00ff}'), ('\u{e01f0}', '\u{e0fff}'),
+];
+
+pub const OTHER_GRAPHEME_EXTEND: &'static [(char, char)] = &[
+  ('া', 'া'), ('ৗ', 'ৗ'), ('ା', 'ା'), ('ୗ', 'ୗ'),
+  ('ா', 'ா'), ('ௗ', 'ௗ'), ('ೂ', 'ೂ'), ('ೕ', 'ೖ'),
+  ('ാ', 'ാ'), ('ൗ', 'ൗ'), ('ා', 'ා'), ('ෟ', 'ෟ'),
+  ('\u{200c}', '\u{200c}'), ('〮', '〯'), ('ﾞ', 'ﾟ'), ('𑌾', '𑌾'),
+  ('𑍗', '𑍗'), ('𑒰', '𑒰'), ('𑒽', '𑒽'), ('𑖯', '𑖯'),
+  ('𝅥', '𝅥'), ('𝅮', '𝅲'), ('\u{e0020}', '\u{e007f}'),
+];
+
+pub const OTHER_ID_CONTINUE: &'static [(char, char)] = &[
+  ('·', '·'), ('·', '·'), ('፩', '፱'), ('᧚', '᧚'),
+];
+
+pub const OTHER_ID_START: &'static [(char, char)] = &[
+  ('ᢅ', 'ᢆ'), ('℘', '℘'), ('℮', '℮'), ('゛', '゜'),
+];
+
+pub const OTHER_LOWERCASE: &'static [(char, char)] = &[
+  ('ª', 'ª'), ('º', 'º'), ('ʰ', 'ʸ'), ('ˀ', 'ˁ'), ('ˠ', 'ˤ'),
+  ('ͅ', 'ͅ'), ('ͺ', 'ͺ'), ('ᴬ', 'ᵪ'), ('ᵸ', 'ᵸ'), ('ᶛ', 'ᶿ'),
+  ('ⁱ', 'ⁱ'), ('ⁿ', 'ⁿ'), ('ₐ', 'ₜ'), ('ⅰ', 'ⅿ'),
+  ('ⓐ', 'ⓩ'), ('ⱼ', 'ⱽ'), ('ꚜ', 'ꚝ'), ('ꝰ', 'ꝰ'),
+  ('ꟸ', 'ꟹ'), ('ꭜ', 'ꭟ'),
+];
+
+pub const OTHER_MATH: &'static [(char, char)] = &[
+  ('^', '^'), ('ϐ', 'ϒ'), ('ϕ', 'ϕ'), ('ϰ', 'ϱ'), ('ϴ', 'ϵ'),
+  ('‖', '‖'), ('′', '‴'), ('⁀', '⁀'), ('\u{2061}', '\u{2064}'),
+  ('⁽', '⁾'), ('₍', '₎'), ('⃐', '⃜'), ('⃡', '⃡'),
+  ('⃥', '⃦'), ('⃫', '⃯'), ('ℂ', 'ℂ'), ('ℇ', 'ℇ'),
+  ('ℊ', 'ℓ'), ('ℕ', 'ℕ'), ('ℙ', 'ℝ'), ('ℤ', 'ℤ'),
+  ('ℨ', '℩'), ('ℬ', 'ℭ'), ('ℯ', 'ℱ'), ('ℳ', 'ℸ'),
+  ('ℼ', 'ℿ'), ('ⅅ', 'ⅉ'), ('↕', '↙'), ('↜', '↟'),
+  ('↡', '↢'), ('↤', '↥'), ('↧', '↧'), ('↩', '↭'),
+  ('↰', '↱'), ('↶', '↷'), ('↼', '⇍'), ('⇐', '⇑'),
+  ('⇓', '⇓'), ('⇕', '⇛'), ('⇝', '⇝'), ('⇤', '⇥'),
+  ('⌈', '⌋'), ('⎴', '⎵'), ('⎷', '⎷'), ('⏐', '⏐'),
+  ('⏢', '⏢'), ('■', '□'), ('▮', '▶'), ('▼', '◀'),
+  ('◆', '◇'), ('◊', '○'), ('●', '◓'), ('◢', '◢'),
+  ('◤', '◤'), ('◧', '◬'), ('★', '☆'), ('♀', '♀'),
+  ('♂', '♂'), ('♠', '♣'), ('♭', '♮'), ('⟅', '⟆'),
+  ('⟦', '⟯'), ('⦃', '⦘'), ('⧘', '⧛'), ('⧼', '⧽'),
+  ('﹡', '﹡'), ('﹣', '﹣'), ('﹨', '﹨'), ('＼', '＼'),
+  ('＾', '＾'), ('𝐀', '𝑔'), ('𝑖', '𝒜'), ('𝒞', '𝒟'),
+  ('𝒢', '𝒢'), ('𝒥', '𝒦'), ('𝒩', '𝒬'), ('𝒮', '𝒹'),
+  ('𝒻', '𝒻'), ('𝒽', '𝓃'), ('𝓅', '𝔅'), ('𝔇', '𝔊'),
+  ('𝔍', '𝔔'), ('𝔖', '𝔜'), ('𝔞', '𝔹'), ('𝔻', '𝔾'),
+  ('𝕀', '𝕄'), ('𝕆', '𝕆'), ('𝕊', '𝕐'), ('𝕒', '𝚥'),
+  ('𝚨', '𝛀'), ('𝛂', '𝛚'), ('𝛜', '𝛺'), ('𝛼', '𝜔'),
+  ('𝜖', '𝜴'), ('𝜶', '𝝎'), ('𝝐', '𝝮'), ('𝝰', '𝞈'),
+  ('𝞊', '𝞨'), ('𝞪', '𝟂'), ('𝟄', '𝟋'), ('𝟎', '𝟿'),
+  ('𞸀', '𞸃'), ('𞸅', '𞸟'), ('𞸡', '𞸢'), ('𞸤', '𞸤'),
+  ('𞸧', '𞸧'), ('𞸩', '𞸲'), ('𞸴', '𞸷'), ('𞸹', '𞸹'),
+  ('𞸻', '𞸻'), ('𞹂', '𞹂'), ('𞹇', '𞹇'), ('𞹉', '𞹉'),
+  ('𞹋', '𞹋'), ('𞹍', '𞹏'), ('𞹑', '𞹒'), ('𞹔', '𞹔'),
+  ('𞹗', '𞹗'), ('𞹙', '𞹙'), ('𞹛', '𞹛'), ('𞹝', '𞹝'),
+  ('𞹟', '𞹟'), ('𞹡', '𞹢'), ('𞹤', '𞹤'), ('𞹧', '𞹪'),
+  ('𞹬', '𞹲'), ('𞹴', '𞹷'), ('𞹹', '𞹼'), ('𞹾', '𞹾'),
+  ('𞺀', '𞺉'), ('𞺋', '𞺛'), ('𞺡', '𞺣'), ('𞺥', '𞺩'),
+  ('𞺫', '𞺻'),
+];
+
+pub const OTHER_UPPERCASE: &'static [(char, char)] = &[
+  ('Ⅰ', 'Ⅿ'), ('Ⓐ', 'Ⓩ'), ('🄰', '🅉'), ('🅐', '🅩'),
+  ('🅰', '🆉'),
+];
+
+pub const PATTERN_SYNTAX: &'static [(char, char)] = &[
+  ('!', '/'), (':', '@'), ('[', '^'), ('`', '`'), ('{', '~'), ('¡', '§'),
+  ('©', '©'), ('«', '¬'), ('®', '®'), ('°', '±'), ('¶', '¶'),
+  ('»', '»'), ('¿', '¿'), ('×', '×'), ('÷', '÷'), ('‐', '‧'),
+  ('‰', '‾'), ('⁁', '⁓'), ('⁕', '⁞'), ('←', '\u{245f}'),
+  ('─', '❵'), ('➔', '\u{2bff}'), ('⸀', '\u{2e7f}'), ('、', '〃'),
+  ('〈', '〠'), ('〰', '〰'), ('﴾', '﴿'), ('﹅', '﹆'),
+];
+
+pub const PATTERN_WHITE_SPACE: &'static [(char, char)] = &[
+  ('\t', '\r'), (' ', ' '), ('\u{85}', '\u{85}'), ('\u{200e}', '\u{200f}'),
+  ('\u{2028}', '\u{2029}'),
+];
+
+pub const PREPENDED_CONCATENATION_MARK: &'static [(char, char)] = &[
+  ('\u{600}', '\u{605}'), ('\u{6dd}', '\u{6dd}'), ('\u{70f}', '\u{70f}'),
+  ('\u{8e2}', '\u{8e2}'), ('\u{110bd}', '\u{110bd}'),
+];
+
+pub const QUOTATION_MARK: &'static [(char, char)] = &[
+  ('\"', '\"'), ('\'', '\''), ('«', '«'), ('»', '»'), ('‘', '‟'),
+  ('‹', '›'), ('⹂', '⹂'), ('「', '』'), ('〝', '〟'),
+  ('﹁', '﹄'), ('＂', '＂'), ('＇', '＇'), ('｢', '｣'),
+];
+
+pub const RADICAL: &'static [(char, char)] = &[
+  ('⺀', '⺙'), ('⺛', '⻳'), ('⼀', '⿕'),
+];
+
+pub const REGIONAL_INDICATOR: &'static [(char, char)] = &[
+  ('🇦', '🇿'),
+];
+
+pub const SENTENCE_TERMINAL: &'static [(char, char)] = &[
+  ('!', '!'), ('.', '.'), ('?', '?'), ('։', '։'), ('؟', '؟'),
+  ('۔', '۔'), ('܀', '܂'), ('߹', '߹'), ('।', '॥'), ('၊', '။'),
+  ('።', '።'), ('፧', '፨'), ('᙮', '᙮'), ('᜵', '᜶'),
+  ('᠃', '᠃'), ('᠉', '᠉'), ('᥄', '᥅'), ('᪨', '᪫'),
+  ('᭚', '᭛'), ('᭞', '᭟'), ('᰻', '᰼'), ('᱾', '᱿'),
+  ('‼', '‽'), ('⁇', '⁉'), ('⸮', '⸮'), ('⸼', '⸼'),
+  ('。', '。'), ('꓿', '꓿'), ('꘎', '꘏'), ('꛳', '꛳'),
+  ('꛷', '꛷'), ('꡶', '꡷'), ('꣎', '꣏'), ('꤯', '꤯'),
+  ('꧈', '꧉'), ('꩝', '꩟'), ('꫰', '꫱'), ('꯫', '꯫'),
+  ('﹒', '﹒'), ('﹖', '﹗'), ('！', '！'), ('．', '．'),
+  ('？', '？'), ('｡', '｡'), ('𐩖', '𐩗'), ('𑁇', '𑁈'),
+  ('𑂾', '𑃁'), ('𑅁', '𑅃'), ('𑇅', '𑇆'), ('𑇍', '𑇍'),
+  ('𑇞', '𑇟'), ('𑈸', '𑈹'), ('𑈻', '𑈼'), ('𑊩', '𑊩'),
+  ('𑑋', '𑑌'), ('𑗂', '𑗃'), ('𑗉', '𑗗'), ('𑙁', '𑙂'),
+  ('𑜼', '𑜾'), ('𑩂', '𑩃'), ('𑪛', '𑪜'), ('𑱁', '𑱂'),
+  ('𖩮', '𖩯'), ('𖫵', '𖫵'), ('𖬷', '𖬸'), ('𖭄', '𖭄'),
+  ('𛲟', '𛲟'), ('𝪈', '𝪈'),
+];
+
+pub const SOFT_DOTTED: &'static [(char, char)] = &[
+  ('i', 'j'), ('į', 'į'), ('ɉ', 'ɉ'), ('ɨ', 'ɨ'), ('ʝ', 'ʝ'),
+  ('ʲ', 'ʲ'), ('ϳ', 'ϳ'), ('і', 'і'), ('ј', 'ј'), ('ᵢ', 'ᵢ'),
+  ('ᶖ', 'ᶖ'), ('ᶤ', 'ᶤ'), ('ᶨ', 'ᶨ'), ('ḭ', 'ḭ'),
+  ('ị', 'ị'), ('ⁱ', 'ⁱ'), ('ⅈ', 'ⅉ'), ('ⱼ', 'ⱼ'),
+  ('𝐢', '𝐣'), ('𝑖', '𝑗'), ('𝒊', '𝒋'), ('𝒾', '𝒿'),
+  ('𝓲', '𝓳'), ('𝔦', '𝔧'), ('𝕚', '𝕛'), ('𝖎', '𝖏'),
+  ('𝗂', '𝗃'), ('𝗶', '𝗷'), ('𝘪', '𝘫'), ('𝙞', '𝙟'),
+  ('𝚒', '𝚓'),
+];
+
+pub const TERMINAL_PUNCTUATION: &'static [(char, char)] = &[
+  ('!', '!'), (',', ','), ('.', '.'), (':', ';'), ('?', '?'), (';', ';'),
+  ('·', '·'), ('։', '։'), ('׃', '׃'), ('،', '،'), ('؛', '؛'),
+  ('؟', '؟'), ('۔', '۔'), ('܀', '܊'), ('܌', '܌'), ('߸', '߹'),
+  ('࠰', '࠾'), ('࡞', '࡞'), ('।', '॥'), ('๚', '๛'),
+  ('༈', '༈'), ('།', '༒'), ('၊', '။'), ('፡', '፨'),
+  ('᙭', '᙮'), ('᛫', '᛭'), ('᜵', '᜶'), ('។', '៖'),
+  ('៚', '៚'), ('᠂', '᠅'), ('᠈', '᠉'), ('᥄', '᥅'),
+  ('᪨', '᪫'), ('᭚', '᭛'), ('᭝', '᭟'), ('᰻', '᰿'),
+  ('᱾', '᱿'), ('‼', '‽'), ('⁇', '⁉'), ('⸮', '⸮'),
+  ('⸼', '⸼'), ('⹁', '⹁'), ('、', '。'), ('꓾', '꓿'),
+  ('꘍', '꘏'), ('꛳', '꛷'), ('꡶', '꡷'), ('꣎', '꣏'),
+  ('꤯', '꤯'), ('꧇', '꧉'), ('꩝', '꩟'), ('꫟', '꫟'),
+  ('꫰', '꫱'), ('꯫', '꯫'), ('﹐', '﹒'), ('﹔', '﹗'),
+  ('！', '！'), ('，', '，'), ('．', '．'), ('：', '；'),
+  ('？', '？'), ('｡', '｡'), ('､', '､'), ('𐎟', '𐎟'),
+  ('𐏐', '𐏐'), ('𐡗', '𐡗'), ('𐤟', '𐤟'), ('𐩖', '𐩗'),
+  ('𐫰', '𐫵'), ('𐬺', '𐬿'), ('𐮙', '𐮜'), ('𑁇', '𑁍'),
+  ('𑂾', '𑃁'), ('𑅁', '𑅃'), ('𑇅', '𑇆'), ('𑇍', '𑇍'),
+  ('𑇞', '𑇟'), ('𑈸', '𑈼'), ('𑊩', '𑊩'), ('𑑋', '𑑍'),
+  ('𑑛', '𑑛'), ('𑗂', '𑗅'), ('𑗉', '𑗗'), ('𑙁', '𑙂'),
+  ('𑜼', '𑜾'), ('𑩂', '𑩃'), ('𑪛', '𑪜'), ('𑪡', '𑪢'),
+  ('𑱁', '𑱃'), ('𑱱', '𑱱'), ('𒑰', '𒑴'), ('𖩮', '𖩯'),
+  ('𖫵', '𖫵'), ('𖬷', '𖬹'), ('𖭄', '𖭄'), ('𛲟', '𛲟'),
+  ('𝪇', '𝪊'),
+];
+
+pub const UNIFIED_IDEOGRAPH: &'static [(char, char)] = &[
+  ('㐀', '䶵'), ('一', '鿪'), ('﨎', '﨏'), ('﨑', '﨑'),
+  ('﨓', '﨔'), ('﨟', '﨟'), ('﨡', '﨡'), ('﨣', '﨤'),
+  ('﨧', '﨩'), ('𠀀', '𪛖'), ('𪜀', '𫜴'), ('𫝀', '𫠝'),
+  ('𫠠', '𬺡'), ('𬺰', '𮯠'),
+];
+
+pub const UPPERCASE: &'static [(char, char)] = &[
+  ('A', 'Z'), ('À', 'Ö'), ('Ø', 'Þ'), ('Ā', 'Ā'), ('Ă', 'Ă'),
+  ('Ą', 'Ą'), ('Ć', 'Ć'), ('Ĉ', 'Ĉ'), ('Ċ', 'Ċ'), ('Č', 'Č'),
+  ('Ď', 'Ď'), ('Đ', 'Đ'), ('Ē', 'Ē'), ('Ĕ', 'Ĕ'), ('Ė', 'Ė'),
+  ('Ę', 'Ę'), ('Ě', 'Ě'), ('Ĝ', 'Ĝ'), ('Ğ', 'Ğ'), ('Ġ', 'Ġ'),
+  ('Ģ', 'Ģ'), ('Ĥ', 'Ĥ'), ('Ħ', 'Ħ'), ('Ĩ', 'Ĩ'), ('Ī', 'Ī'),
+  ('Ĭ', 'Ĭ'), ('Į', 'Į'), ('İ', 'İ'), ('Ĳ', 'Ĳ'), ('Ĵ', 'Ĵ'),
+  ('Ķ', 'Ķ'), ('Ĺ', 'Ĺ'), ('Ļ', 'Ļ'), ('Ľ', 'Ľ'), ('Ŀ', 'Ŀ'),
+  ('Ł', 'Ł'), ('Ń', 'Ń'), ('Ņ', 'Ņ'), ('Ň', 'Ň'), ('Ŋ', 'Ŋ'),
+  ('Ō', 'Ō'), ('Ŏ', 'Ŏ'), ('Ő', 'Ő'), ('Œ', 'Œ'), ('Ŕ', 'Ŕ'),
+  ('Ŗ', 'Ŗ'), ('Ř', 'Ř'), ('Ś', 'Ś'), ('Ŝ', 'Ŝ'), ('Ş', 'Ş'),
+  ('Š', 'Š'), ('Ţ', 'Ţ'), ('Ť', 'Ť'), ('Ŧ', 'Ŧ'), ('Ũ', 'Ũ'),
+  ('Ū', 'Ū'), ('Ŭ', 'Ŭ'), ('Ů', 'Ů'), ('Ű', 'Ű'), ('Ų', 'Ų'),
+  ('Ŵ', 'Ŵ'), ('Ŷ', 'Ŷ'), ('Ÿ', 'Ź'), ('Ż', 'Ż'), ('Ž', 'Ž'),
+  ('Ɓ', 'Ƃ'), ('Ƅ', 'Ƅ'), ('Ɔ', 'Ƈ'), ('Ɖ', 'Ƌ'), ('Ǝ', 'Ƒ'),
+  ('Ɠ', 'Ɣ'), ('Ɩ', 'Ƙ'), ('Ɯ', 'Ɲ'), ('Ɵ', 'Ơ'), ('Ƣ', 'Ƣ'),
+  ('Ƥ', 'Ƥ'), ('Ʀ', 'Ƨ'), ('Ʃ', 'Ʃ'), ('Ƭ', 'Ƭ'), ('Ʈ', 'Ư'),
+  ('Ʊ', 'Ƴ'), ('Ƶ', 'Ƶ'), ('Ʒ', 'Ƹ'), ('Ƽ', 'Ƽ'), ('Ǆ', 'Ǆ'),
+  ('Ǉ', 'Ǉ'), ('Ǌ', 'Ǌ'), ('Ǎ', 'Ǎ'), ('Ǐ', 'Ǐ'), ('Ǒ', 'Ǒ'),
+  ('Ǔ', 'Ǔ'), ('Ǖ', 'Ǖ'), ('Ǘ', 'Ǘ'), ('Ǚ', 'Ǚ'), ('Ǜ', 'Ǜ'),
+  ('Ǟ', 'Ǟ'), ('Ǡ', 'Ǡ'), ('Ǣ', 'Ǣ'), ('Ǥ', 'Ǥ'), ('Ǧ', 'Ǧ'),
+  ('Ǩ', 'Ǩ'), ('Ǫ', 'Ǫ'), ('Ǭ', 'Ǭ'), ('Ǯ', 'Ǯ'), ('Ǳ', 'Ǳ'),
+  ('Ǵ', 'Ǵ'), ('Ƕ', 'Ǹ'), ('Ǻ', 'Ǻ'), ('Ǽ', 'Ǽ'), ('Ǿ', 'Ǿ'),
+  ('Ȁ', 'Ȁ'), ('Ȃ', 'Ȃ'), ('Ȅ', 'Ȅ'), ('Ȇ', 'Ȇ'), ('Ȉ', 'Ȉ'),
+  ('Ȋ', 'Ȋ'), ('Ȍ', 'Ȍ'), ('Ȏ', 'Ȏ'), ('Ȑ', 'Ȑ'), ('Ȓ', 'Ȓ'),
+  ('Ȕ', 'Ȕ'), ('Ȗ', 'Ȗ'), ('Ș', 'Ș'), ('Ț', 'Ț'), ('Ȝ', 'Ȝ'),
+  ('Ȟ', 'Ȟ'), ('Ƞ', 'Ƞ'), ('Ȣ', 'Ȣ'), ('Ȥ', 'Ȥ'), ('Ȧ', 'Ȧ'),
+  ('Ȩ', 'Ȩ'), ('Ȫ', 'Ȫ'), ('Ȭ', 'Ȭ'), ('Ȯ', 'Ȯ'), ('Ȱ', 'Ȱ'),
+  ('Ȳ', 'Ȳ'), ('Ⱥ', 'Ȼ'), ('Ƚ', 'Ⱦ'), ('Ɂ', 'Ɂ'), ('Ƀ', 'Ɇ'),
+  ('Ɉ', 'Ɉ'), ('Ɋ', 'Ɋ'), ('Ɍ', 'Ɍ'), ('Ɏ', 'Ɏ'), ('Ͱ', 'Ͱ'),
+  ('Ͳ', 'Ͳ'), ('Ͷ', 'Ͷ'), ('Ϳ', 'Ϳ'), ('Ά', 'Ά'), ('Έ', 'Ί'),
+  ('Ό', 'Ό'), ('Ύ', 'Ώ'), ('Α', 'Ρ'), ('Σ', 'Ϋ'), ('Ϗ', 'Ϗ'),
+  ('ϒ', 'ϔ'), ('Ϙ', 'Ϙ'), ('Ϛ', 'Ϛ'), ('Ϝ', 'Ϝ'), ('Ϟ', 'Ϟ'),
+  ('Ϡ', 'Ϡ'), ('Ϣ', 'Ϣ'), ('Ϥ', 'Ϥ'), ('Ϧ', 'Ϧ'), ('Ϩ', 'Ϩ'),
+  ('Ϫ', 'Ϫ'), ('Ϭ', 'Ϭ'), ('Ϯ', 'Ϯ'), ('ϴ', 'ϴ'), ('Ϸ', 'Ϸ'),
+  ('Ϲ', 'Ϻ'), ('Ͻ', 'Я'), ('Ѡ', 'Ѡ'), ('Ѣ', 'Ѣ'), ('Ѥ', 'Ѥ'),
+  ('Ѧ', 'Ѧ'), ('Ѩ', 'Ѩ'), ('Ѫ', 'Ѫ'), ('Ѭ', 'Ѭ'), ('Ѯ', 'Ѯ'),
+  ('Ѱ', 'Ѱ'), ('Ѳ', 'Ѳ'), ('Ѵ', 'Ѵ'), ('Ѷ', 'Ѷ'), ('Ѹ', 'Ѹ'),
+  ('Ѻ', 'Ѻ'), ('Ѽ', 'Ѽ'), ('Ѿ', 'Ѿ'), ('Ҁ', 'Ҁ'), ('Ҋ', 'Ҋ'),
+  ('Ҍ', 'Ҍ'), ('Ҏ', 'Ҏ'), ('Ґ', 'Ґ'), ('Ғ', 'Ғ'), ('Ҕ', 'Ҕ'),
+  ('Җ', 'Җ'), ('Ҙ', 'Ҙ'), ('Қ', 'Қ'), ('Ҝ', 'Ҝ'), ('Ҟ', 'Ҟ'),
+  ('Ҡ', 'Ҡ'), ('Ң', 'Ң'), ('Ҥ', 'Ҥ'), ('Ҧ', 'Ҧ'), ('Ҩ', 'Ҩ'),
+  ('Ҫ', 'Ҫ'), ('Ҭ', 'Ҭ'), ('Ү', 'Ү'), ('Ұ', 'Ұ'), ('Ҳ', 'Ҳ'),
+  ('Ҵ', 'Ҵ'), ('Ҷ', 'Ҷ'), ('Ҹ', 'Ҹ'), ('Һ', 'Һ'), ('Ҽ', 'Ҽ'),
+  ('Ҿ', 'Ҿ'), ('Ӏ', 'Ӂ'), ('Ӄ', 'Ӄ'), ('Ӆ', 'Ӆ'), ('Ӈ', 'Ӈ'),
+  ('Ӊ', 'Ӊ'), ('Ӌ', 'Ӌ'), ('Ӎ', 'Ӎ'), ('Ӑ', 'Ӑ'), ('Ӓ', 'Ӓ'),
+  ('Ӕ', 'Ӕ'), ('Ӗ', 'Ӗ'), ('Ә', 'Ә'), ('Ӛ', 'Ӛ'), ('Ӝ', 'Ӝ'),
+  ('Ӟ', 'Ӟ'), ('Ӡ', 'Ӡ'), ('Ӣ', 'Ӣ'), ('Ӥ', 'Ӥ'), ('Ӧ', 'Ӧ'),
+  ('Ө', 'Ө'), ('Ӫ', 'Ӫ'), ('Ӭ', 'Ӭ'), ('Ӯ', 'Ӯ'), ('Ӱ', 'Ӱ'),
+  ('Ӳ', 'Ӳ'), ('Ӵ', 'Ӵ'), ('Ӷ', 'Ӷ'), ('Ӹ', 'Ӹ'), ('Ӻ', 'Ӻ'),
+  ('Ӽ', 'Ӽ'), ('Ӿ', 'Ӿ'), ('Ԁ', 'Ԁ'), ('Ԃ', 'Ԃ'), ('Ԅ', 'Ԅ'),
+  ('Ԇ', 'Ԇ'), ('Ԉ', 'Ԉ'), ('Ԋ', 'Ԋ'), ('Ԍ', 'Ԍ'), ('Ԏ', 'Ԏ'),
+  ('Ԑ', 'Ԑ'), ('Ԓ', 'Ԓ'), ('Ԕ', 'Ԕ'), ('Ԗ', 'Ԗ'), ('Ԙ', 'Ԙ'),
+  ('Ԛ', 'Ԛ'), ('Ԝ', 'Ԝ'), ('Ԟ', 'Ԟ'), ('Ԡ', 'Ԡ'), ('Ԣ', 'Ԣ'),
+  ('Ԥ', 'Ԥ'), ('Ԧ', 'Ԧ'), ('Ԩ', 'Ԩ'), ('Ԫ', 'Ԫ'), ('Ԭ', 'Ԭ'),
+  ('Ԯ', 'Ԯ'), ('Ա', 'Ֆ'), ('Ⴀ', 'Ⴥ'), ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'),
+  ('Ꭰ', 'Ᏽ'), ('Ḁ', 'Ḁ'), ('Ḃ', 'Ḃ'), ('Ḅ', 'Ḅ'),
+  ('Ḇ', 'Ḇ'), ('Ḉ', 'Ḉ'), ('Ḋ', 'Ḋ'), ('Ḍ', 'Ḍ'),
+  ('Ḏ', 'Ḏ'), ('Ḑ', 'Ḑ'), ('Ḓ', 'Ḓ'), ('Ḕ', 'Ḕ'),
+  ('Ḗ', 'Ḗ'), ('Ḙ', 'Ḙ'), ('Ḛ', 'Ḛ'), ('Ḝ', 'Ḝ'),
+  ('Ḟ', 'Ḟ'), ('Ḡ', 'Ḡ'), ('Ḣ', 'Ḣ'), ('Ḥ', 'Ḥ'),
+  ('Ḧ', 'Ḧ'), ('Ḩ', 'Ḩ'), ('Ḫ', 'Ḫ'), ('Ḭ', 'Ḭ'),
+  ('Ḯ', 'Ḯ'), ('Ḱ', 'Ḱ'), ('Ḳ', 'Ḳ'), ('Ḵ', 'Ḵ'),
+  ('Ḷ', 'Ḷ'), ('Ḹ', 'Ḹ'), ('Ḻ', 'Ḻ'), ('Ḽ', 'Ḽ'),
+  ('Ḿ', 'Ḿ'), ('Ṁ', 'Ṁ'), ('Ṃ', 'Ṃ'), ('Ṅ', 'Ṅ'),
+  ('Ṇ', 'Ṇ'), ('Ṉ', 'Ṉ'), ('Ṋ', 'Ṋ'), ('Ṍ', 'Ṍ'),
+  ('Ṏ', 'Ṏ'), ('Ṑ', 'Ṑ'), ('Ṓ', 'Ṓ'), ('Ṕ', 'Ṕ'),
+  ('Ṗ', 'Ṗ'), ('Ṙ', 'Ṙ'), ('Ṛ', 'Ṛ'), ('Ṝ', 'Ṝ'),
+  ('Ṟ', 'Ṟ'), ('Ṡ', 'Ṡ'), ('Ṣ', 'Ṣ'), ('Ṥ', 'Ṥ'),
+  ('Ṧ', 'Ṧ'), ('Ṩ', 'Ṩ'), ('Ṫ', 'Ṫ'), ('Ṭ', 'Ṭ'),
+  ('Ṯ', 'Ṯ'), ('Ṱ', 'Ṱ'), ('Ṳ', 'Ṳ'), ('Ṵ', 'Ṵ'),
+  ('Ṷ', 'Ṷ'), ('Ṹ', 'Ṹ'), ('Ṻ', 'Ṻ'), ('Ṽ', 'Ṽ'),
+  ('Ṿ', 'Ṿ'), ('Ẁ', 'Ẁ'), ('Ẃ', 'Ẃ'), ('Ẅ', 'Ẅ'),
+  ('Ẇ', 'Ẇ'), ('Ẉ', 'Ẉ'), ('Ẋ', 'Ẋ'), ('Ẍ', 'Ẍ'),
+  ('Ẏ', 'Ẏ'), ('Ẑ', 'Ẑ'), ('Ẓ', 'Ẓ'), ('Ẕ', 'Ẕ'),
+  ('ẞ', 'ẞ'), ('Ạ', 'Ạ'), ('Ả', 'Ả'), ('Ấ', 'Ấ'),
+  ('Ầ', 'Ầ'), ('Ẩ', 'Ẩ'), ('Ẫ', 'Ẫ'), ('Ậ', 'Ậ'),
+  ('Ắ', 'Ắ'), ('Ằ', 'Ằ'), ('Ẳ', 'Ẳ'), ('Ẵ', 'Ẵ'),
+  ('Ặ', 'Ặ'), ('Ẹ', 'Ẹ'), ('Ẻ', 'Ẻ'), ('Ẽ', 'Ẽ'),
+  ('Ế', 'Ế'), ('Ề', 'Ề'), ('Ể', 'Ể'), ('Ễ', 'Ễ'),
+  ('Ệ', 'Ệ'), ('Ỉ', 'Ỉ'), ('Ị', 'Ị'), ('Ọ', 'Ọ'),
+  ('Ỏ', 'Ỏ'), ('Ố', 'Ố'), ('Ồ', 'Ồ'), ('Ổ', 'Ổ'),
+  ('Ỗ', 'Ỗ'), ('Ộ', 'Ộ'), ('Ớ', 'Ớ'), ('Ờ', 'Ờ'),
+  ('Ở', 'Ở'), ('Ỡ', 'Ỡ'), ('Ợ', 'Ợ'), ('Ụ', 'Ụ'),
+  ('Ủ', 'Ủ'), ('Ứ', 'Ứ'), ('Ừ', 'Ừ'), ('Ử', 'Ử'),
+  ('Ữ', 'Ữ'), ('Ự', 'Ự'), ('Ỳ', 'Ỳ'), ('Ỵ', 'Ỵ'),
+  ('Ỷ', 'Ỷ'), ('Ỹ', 'Ỹ'), ('Ỻ', 'Ỻ'), ('Ỽ', 'Ỽ'),
+  ('Ỿ', 'Ỿ'), ('Ἀ', 'Ἇ'), ('Ἐ', 'Ἕ'), ('Ἠ', 'Ἧ'),
+  ('Ἰ', 'Ἷ'), ('Ὀ', 'Ὅ'), ('Ὑ', 'Ὑ'), ('Ὓ', 'Ὓ'),
+  ('Ὕ', 'Ὕ'), ('Ὗ', 'Ὗ'), ('Ὠ', 'Ὧ'), ('Ᾰ', 'Ά'),
+  ('Ὲ', 'Ή'), ('Ῐ', 'Ί'), ('Ῠ', 'Ῥ'), ('Ὸ', 'Ώ'),
+  ('ℂ', 'ℂ'), ('ℇ', 'ℇ'), ('ℋ', 'ℍ'), ('ℐ', 'ℒ'),
+  ('ℕ', 'ℕ'), ('ℙ', 'ℝ'), ('ℤ', 'ℤ'), ('Ω', 'Ω'),
+  ('ℨ', 'ℨ'), ('K', 'ℭ'), ('ℰ', 'ℳ'), ('ℾ', 'ℿ'),
+  ('ⅅ', 'ⅅ'), ('Ⅰ', 'Ⅿ'), ('Ↄ', 'Ↄ'), ('Ⓐ', 'Ⓩ'),
+  ('Ⰰ', 'Ⱞ'), ('Ⱡ', 'Ⱡ'), ('Ɫ', 'Ɽ'), ('Ⱨ', 'Ⱨ'),
+  ('Ⱪ', 'Ⱪ'), ('Ⱬ', 'Ⱬ'), ('Ɑ', 'Ɒ'), ('Ⱳ', 'Ⱳ'),
+  ('Ⱶ', 'Ⱶ'), ('Ȿ', 'Ⲁ'), ('Ⲃ', 'Ⲃ'), ('Ⲅ', 'Ⲅ'),
+  ('Ⲇ', 'Ⲇ'), ('Ⲉ', 'Ⲉ'), ('Ⲋ', 'Ⲋ'), ('Ⲍ', 'Ⲍ'),
+  ('Ⲏ', 'Ⲏ'), ('Ⲑ', 'Ⲑ'), ('Ⲓ', 'Ⲓ'), ('Ⲕ', 'Ⲕ'),
+  ('Ⲗ', 'Ⲗ'), ('Ⲙ', 'Ⲙ'), ('Ⲛ', 'Ⲛ'), ('Ⲝ', 'Ⲝ'),
+  ('Ⲟ', 'Ⲟ'), ('Ⲡ', 'Ⲡ'), ('Ⲣ', 'Ⲣ'), ('Ⲥ', 'Ⲥ'),
+  ('Ⲧ', 'Ⲧ'), ('Ⲩ', 'Ⲩ'), ('Ⲫ', 'Ⲫ'), ('Ⲭ', 'Ⲭ'),
+  ('Ⲯ', 'Ⲯ'), ('Ⲱ', 'Ⲱ'), ('Ⲳ', 'Ⲳ'), ('Ⲵ', 'Ⲵ'),
+  ('Ⲷ', 'Ⲷ'), ('Ⲹ', 'Ⲹ'), ('Ⲻ', 'Ⲻ'), ('Ⲽ', 'Ⲽ'),
+  ('Ⲿ', 'Ⲿ'), ('Ⳁ', 'Ⳁ'), ('Ⳃ', 'Ⳃ'), ('Ⳅ', 'Ⳅ'),
+  ('Ⳇ', 'Ⳇ'), ('Ⳉ', 'Ⳉ'), ('Ⳋ', 'Ⳋ'), ('Ⳍ', 'Ⳍ'),
+  ('Ⳏ', 'Ⳏ'), ('Ⳑ', 'Ⳑ'), ('Ⳓ', 'Ⳓ'), ('Ⳕ', 'Ⳕ'),
+  ('Ⳗ', 'Ⳗ'), ('Ⳙ', 'Ⳙ'), ('Ⳛ', 'Ⳛ'), ('Ⳝ', 'Ⳝ'),
+  ('Ⳟ', 'Ⳟ'), ('Ⳡ', 'Ⳡ'), ('Ⳣ', 'Ⳣ'), ('Ⳬ', 'Ⳬ'),
+  ('Ⳮ', 'Ⳮ'), ('Ⳳ', 'Ⳳ'), ('Ꙁ', 'Ꙁ'), ('Ꙃ', 'Ꙃ'),
+  ('Ꙅ', 'Ꙅ'), ('Ꙇ', 'Ꙇ'), ('Ꙉ', 'Ꙉ'), ('Ꙋ', 'Ꙋ'),
+  ('Ꙍ', 'Ꙍ'), ('Ꙏ', 'Ꙏ'), ('Ꙑ', 'Ꙑ'), ('Ꙓ', 'Ꙓ'),
+  ('Ꙕ', 'Ꙕ'), ('Ꙗ', 'Ꙗ'), ('Ꙙ', 'Ꙙ'), ('Ꙛ', 'Ꙛ'),
+  ('Ꙝ', 'Ꙝ'), ('Ꙟ', 'Ꙟ'), ('Ꙡ', 'Ꙡ'), ('Ꙣ', 'Ꙣ'),
+  ('Ꙥ', 'Ꙥ'), ('Ꙧ', 'Ꙧ'), ('Ꙩ', 'Ꙩ'), ('Ꙫ', 'Ꙫ'),
+  ('Ꙭ', 'Ꙭ'), ('Ꚁ', 'Ꚁ'), ('Ꚃ', 'Ꚃ'), ('Ꚅ', 'Ꚅ'),
+  ('Ꚇ', 'Ꚇ'), ('Ꚉ', 'Ꚉ'), ('Ꚋ', 'Ꚋ'), ('Ꚍ', 'Ꚍ'),
+  ('Ꚏ', 'Ꚏ'), ('Ꚑ', 'Ꚑ'), ('Ꚓ', 'Ꚓ'), ('Ꚕ', 'Ꚕ'),
+  ('Ꚗ', 'Ꚗ'), ('Ꚙ', 'Ꚙ'), ('Ꚛ', 'Ꚛ'), ('Ꜣ', 'Ꜣ'),
+  ('Ꜥ', 'Ꜥ'), ('Ꜧ', 'Ꜧ'), ('Ꜩ', 'Ꜩ'), ('Ꜫ', 'Ꜫ'),
+  ('Ꜭ', 'Ꜭ'), ('Ꜯ', 'Ꜯ'), ('Ꜳ', 'Ꜳ'), ('Ꜵ', 'Ꜵ'),
+  ('Ꜷ', 'Ꜷ'), ('Ꜹ', 'Ꜹ'), ('Ꜻ', 'Ꜻ'), ('Ꜽ', 'Ꜽ'),
+  ('Ꜿ', 'Ꜿ'), ('Ꝁ', 'Ꝁ'), ('Ꝃ', 'Ꝃ'), ('Ꝅ', 'Ꝅ'),
+  ('Ꝇ', 'Ꝇ'), ('Ꝉ', 'Ꝉ'), ('Ꝋ', 'Ꝋ'), ('Ꝍ', 'Ꝍ'),
+  ('Ꝏ', 'Ꝏ'), ('Ꝑ', 'Ꝑ'), ('Ꝓ', 'Ꝓ'), ('Ꝕ', 'Ꝕ'),
+  ('Ꝗ', 'Ꝗ'), ('Ꝙ', 'Ꝙ'), ('Ꝛ', 'Ꝛ'), ('Ꝝ', 'Ꝝ'),
+  ('Ꝟ', 'Ꝟ'), ('Ꝡ', 'Ꝡ'), ('Ꝣ', 'Ꝣ'), ('Ꝥ', 'Ꝥ'),
+  ('Ꝧ', 'Ꝧ'), ('Ꝩ', 'Ꝩ'), ('Ꝫ', 'Ꝫ'), ('Ꝭ', 'Ꝭ'),
+  ('Ꝯ', 'Ꝯ'), ('Ꝺ', 'Ꝺ'), ('Ꝼ', 'Ꝼ'), ('Ᵹ', 'Ꝿ'),
+  ('Ꞁ', 'Ꞁ'), ('Ꞃ', 'Ꞃ'), ('Ꞅ', 'Ꞅ'), ('Ꞇ', 'Ꞇ'),
+  ('Ꞌ', 'Ꞌ'), ('Ɥ', 'Ɥ'), ('Ꞑ', 'Ꞑ'), ('Ꞓ', 'Ꞓ'),
+  ('Ꞗ', 'Ꞗ'), ('Ꞙ', 'Ꞙ'), ('Ꞛ', 'Ꞛ'), ('Ꞝ', 'Ꞝ'),
+  ('Ꞟ', 'Ꞟ'), ('Ꞡ', 'Ꞡ'), ('Ꞣ', 'Ꞣ'), ('Ꞥ', 'Ꞥ'),
+  ('Ꞧ', 'Ꞧ'), ('Ꞩ', 'Ꞩ'), ('Ɦ', 'Ɪ'), ('Ʞ', 'Ꞵ'),
+  ('Ꞷ', 'Ꞷ'), ('Ａ', 'Ｚ'), ('𐐀', '𐐧'), ('𐒰', '𐓓'),
+  ('𐲀', '𐲲'), ('𑢠', '𑢿'), ('𝐀', '𝐙'), ('𝐴', '𝑍'),
+  ('𝑨', '𝒁'), ('𝒜', '𝒜'), ('𝒞', '𝒟'), ('𝒢', '𝒢'),
+  ('𝒥', '𝒦'), ('𝒩', '𝒬'), ('𝒮', '𝒵'), ('𝓐', '𝓩'),
+  ('𝔄', '𝔅'), ('𝔇', '𝔊'), ('𝔍', '𝔔'), ('𝔖', '𝔜'),
+  ('𝔸', '𝔹'), ('𝔻', '𝔾'), ('𝕀', '𝕄'), ('𝕆', '𝕆'),
+  ('𝕊', '𝕐'), ('𝕬', '𝖅'), ('𝖠', '𝖹'), ('𝗔', '𝗭'),
+  ('𝘈', '𝘡'), ('𝘼', '𝙕'), ('𝙰', '𝚉'), ('𝚨', '𝛀'),
+  ('𝛢', '𝛺'), ('𝜜', '𝜴'), ('𝝖', '𝝮'), ('𝞐', '𝞨'),
+  ('𝟊', '𝟊'), ('𞤀', '𞤡'), ('🄰', '🅉'), ('🅐', '🅩'),
+  ('🅰', '🆉'),
+];
+
+pub const VARIATION_SELECTOR: &'static [(char, char)] = &[
+  ('᠋', '᠍'), ('︀', '️'), ('󠄀', '󠇯'),
+];
+
+pub const WHITE_SPACE: &'static [(char, char)] = &[
+  ('\t', '\r'), (' ', ' '), ('\u{85}', '\u{85}'), ('\u{a0}', '\u{a0}'),
+  ('\u{1680}', '\u{1680}'), ('\u{2000}', '\u{200a}'),
+  ('\u{2028}', '\u{2029}'), ('\u{202f}', '\u{202f}'),
+  ('\u{205f}', '\u{205f}'), ('\u{3000}', '\u{3000}'),
+];
+
+pub const XID_CONTINUE: &'static [(char, char)] = &[
+  ('0', '9'), ('A', 'Z'), ('_', '_'), ('a', 'z'), ('ª', 'ª'), ('µ', 'µ'),
+  ('·', '·'), ('º', 'º'), ('À', 'Ö'), ('Ø', 'ö'), ('ø', 'ˁ'),
+  ('ˆ', 'ˑ'), ('ˠ', 'ˤ'), ('ˬ', 'ˬ'), ('ˮ', 'ˮ'), ('̀', 'ʹ'),
+  ('Ͷ', 'ͷ'), ('ͻ', 'ͽ'), ('Ϳ', 'Ϳ'), ('Ά', 'Ί'), ('Ό', 'Ό'),
+  ('Ύ', 'Ρ'), ('Σ', 'ϵ'), ('Ϸ', 'ҁ'), ('҃', '҇'), ('Ҋ', 'ԯ'),
+  ('Ա', 'Ֆ'), ('ՙ', 'ՙ'), ('ա', 'և'), ('֑', 'ֽ'), ('ֿ', 'ֿ'),
+  ('ׁ', 'ׂ'), ('ׄ', 'ׅ'), ('ׇ', 'ׇ'), ('א', 'ת'), ('װ', 'ײ'),
+  ('ؐ', 'ؚ'), ('ؠ', '٩'), ('ٮ', 'ۓ'), ('ە', 'ۜ'), ('۟', 'ۨ'),
+  ('۪', 'ۼ'), ('ۿ', 'ۿ'), ('ܐ', '݊'), ('ݍ', 'ޱ'), ('߀', 'ߵ'),
+  ('ߺ', 'ߺ'), ('ࠀ', '࠭'), ('ࡀ', '࡛'), ('ࡠ', 'ࡪ'),
+  ('ࢠ', 'ࢴ'), ('ࢶ', 'ࢽ'), ('ࣔ', '࣡'), ('ࣣ', 'ॣ'),
+  ('०', '९'), ('ॱ', 'ঃ'), ('অ', 'ঌ'), ('এ', 'ঐ'),
+  ('ও', 'ন'), ('প', 'র'), ('ল', 'ল'), ('শ', 'হ'),
+  ('়', 'ৄ'), ('ে', 'ৈ'), ('ো', 'ৎ'), ('ৗ', 'ৗ'),
+  ('ড়', 'ঢ়'), ('য়', 'ৣ'), ('০', 'ৱ'), ('ৼ', 'ৼ'),
+  ('ਁ', 'ਃ'), ('ਅ', 'ਊ'), ('ਏ', 'ਐ'), ('ਓ', 'ਨ'),
+  ('ਪ', 'ਰ'), ('ਲ', 'ਲ਼'), ('ਵ', 'ਸ਼'), ('ਸ', 'ਹ'),
+  ('਼', '਼'), ('ਾ', 'ੂ'), ('ੇ', 'ੈ'), ('ੋ', '੍'),
+  ('ੑ', 'ੑ'), ('ਖ਼', 'ੜ'), ('ਫ਼', 'ਫ਼'), ('੦', 'ੵ'),
+  ('ઁ', 'ઃ'), ('અ', 'ઍ'), ('એ', 'ઑ'), ('ઓ', 'ન'),
+  ('પ', 'ર'), ('લ', 'ળ'), ('વ', 'હ'), ('઼', 'ૅ'),
+  ('ે', 'ૉ'), ('ો', '્'), ('ૐ', 'ૐ'), ('ૠ', 'ૣ'),
+  ('૦', '૯'), ('ૹ', '૿'), ('ଁ', 'ଃ'), ('ଅ', 'ଌ'),
+  ('ଏ', 'ଐ'), ('ଓ', 'ନ'), ('ପ', 'ର'), ('ଲ', 'ଳ'),
+  ('ଵ', 'ହ'), ('଼', 'ୄ'), ('େ', 'ୈ'), ('ୋ', '୍'),
+  ('ୖ', 'ୗ'), ('ଡ଼', 'ଢ଼'), ('ୟ', 'ୣ'), ('୦', '୯'),
+  ('ୱ', 'ୱ'), ('ஂ', 'ஃ'), ('அ', 'ஊ'), ('எ', 'ஐ'),
+  ('ஒ', 'க'), ('ங', 'ச'), ('ஜ', 'ஜ'), ('ஞ', 'ட'),
+  ('ண', 'த'), ('ந', 'ப'), ('ம', 'ஹ'), ('ா', 'ூ'),
+  ('ெ', 'ை'), ('ொ', '்'), ('ௐ', 'ௐ'), ('ௗ', 'ௗ'),
+  ('௦', '௯'), ('ఀ', 'ః'), ('అ', 'ఌ'), ('ఎ', 'ఐ'),
+  ('ఒ', 'న'), ('ప', 'హ'), ('ఽ', 'ౄ'), ('ె', 'ై'),
+  ('ొ', '్'), ('ౕ', 'ౖ'), ('ౘ', 'ౚ'), ('ౠ', 'ౣ'),
+  ('౦', '౯'), ('ಀ', 'ಃ'), ('ಅ', 'ಌ'), ('ಎ', 'ಐ'),
+  ('ಒ', 'ನ'), ('ಪ', 'ಳ'), ('ವ', 'ಹ'), ('಼', 'ೄ'),
+  ('ೆ', 'ೈ'), ('ೊ', '್'), ('ೕ', 'ೖ'), ('ೞ', 'ೞ'),
+  ('ೠ', 'ೣ'), ('೦', '೯'), ('ೱ', 'ೲ'), ('ഀ', 'ഃ'),
+  ('അ', 'ഌ'), ('എ', 'ഐ'), ('ഒ', 'ൄ'), ('െ', 'ൈ'),
+  ('ൊ', 'ൎ'), ('ൔ', 'ൗ'), ('ൟ', 'ൣ'), ('൦', '൯'),
+  ('ൺ', 'ൿ'), ('ං', 'ඃ'), ('අ', 'ඖ'), ('ක', 'න'),
+  ('ඳ', 'ර'), ('ල', 'ල'), ('ව', 'ෆ'), ('්', '්'),
+  ('ා', 'ු'), ('ූ', 'ූ'), ('ෘ', 'ෟ'), ('෦', '෯'),
+  ('ෲ', 'ෳ'), ('ก', 'ฺ'), ('เ', '๎'), ('๐', '๙'),
+  ('ກ', 'ຂ'), ('ຄ', 'ຄ'), ('ງ', 'ຈ'), ('ຊ', 'ຊ'),
+  ('ຍ', 'ຍ'), ('ດ', 'ທ'), ('ນ', 'ຟ'), ('ມ', 'ຣ'),
+  ('ລ', 'ລ'), ('ວ', 'ວ'), ('ສ', 'ຫ'), ('ອ', 'ູ'),
+  ('ົ', 'ຽ'), ('ເ', 'ໄ'), ('ໆ', 'ໆ'), ('່', 'ໍ'),
+  ('໐', '໙'), ('ໜ', 'ໟ'), ('ༀ', 'ༀ'), ('༘', '༙'),
+  ('༠', '༩'), ('༵', '༵'), ('༷', '༷'), ('༹', '༹'),
+  ('༾', 'ཇ'), ('ཉ', 'ཬ'), ('ཱ', '྄'), ('྆', 'ྗ'),
+  ('ྙ', 'ྼ'), ('࿆', '࿆'), ('က', '၉'), ('ၐ', 'ႝ'),
+  ('Ⴀ', 'Ⴥ'), ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'), ('ა', 'ჺ'),
+  ('ჼ', 'ቈ'), ('ቊ', 'ቍ'), ('ቐ', 'ቖ'), ('ቘ', 'ቘ'),
+  ('ቚ', 'ቝ'), ('በ', 'ኈ'), ('ኊ', 'ኍ'), ('ነ', 'ኰ'),
+  ('ኲ', 'ኵ'), ('ኸ', 'ኾ'), ('ዀ', 'ዀ'), ('ዂ', 'ዅ'),
+  ('ወ', 'ዖ'), ('ዘ', 'ጐ'), ('ጒ', 'ጕ'), ('ጘ', 'ፚ'),
+  ('፝', '፟'), ('፩', '፱'), ('ᎀ', 'ᎏ'), ('Ꭰ', 'Ᏽ'),
+  ('ᏸ', 'ᏽ'), ('ᐁ', 'ᙬ'), ('ᙯ', 'ᙿ'), ('ᚁ', 'ᚚ'),
+  ('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ'), ('ᜀ', 'ᜌ'), ('ᜎ', '᜔'),
+  ('ᜠ', '᜴'), ('ᝀ', 'ᝓ'), ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'),
+  ('ᝲ', 'ᝳ'), ('ក', '៓'), ('ៗ', 'ៗ'), ('ៜ', '៝'),
+  ('០', '៩'), ('᠋', '᠍'), ('᠐', '᠙'), ('ᠠ', 'ᡷ'),
+  ('ᢀ', 'ᢪ'), ('ᢰ', 'ᣵ'), ('ᤀ', 'ᤞ'), ('ᤠ', 'ᤫ'),
+  ('ᤰ', '᤻'), ('᥆', 'ᥭ'), ('ᥰ', 'ᥴ'), ('ᦀ', 'ᦫ'),
+  ('ᦰ', 'ᧉ'), ('᧐', '᧚'), ('ᨀ', 'ᨛ'), ('ᨠ', 'ᩞ'),
+  ('᩠', '᩼'), ('᩿', '᪉'), ('᪐', '᪙'), ('ᪧ', 'ᪧ'),
+  ('᪰', '᪽'), ('ᬀ', 'ᭋ'), ('᭐', '᭙'), ('᭫', '᭳'),
+  ('ᮀ', '᯳'), ('ᰀ', '᰷'), ('᱀', '᱉'), ('ᱍ', 'ᱽ'),
+  ('ᲀ', 'ᲈ'), ('᳐', '᳒'), ('᳔', '᳹'), ('ᴀ', '᷹'),
+  ('᷻', 'ἕ'), ('Ἐ', 'Ἕ'), ('ἠ', 'ὅ'), ('Ὀ', 'Ὅ'),
+  ('ὐ', 'ὗ'), ('Ὑ', 'Ὑ'), ('Ὓ', 'Ὓ'), ('Ὕ', 'Ὕ'),
+  ('Ὗ', 'ώ'), ('ᾀ', 'ᾴ'), ('ᾶ', 'ᾼ'), ('ι', 'ι'),
+  ('ῂ', 'ῄ'), ('ῆ', 'ῌ'), ('ῐ', 'ΐ'), ('ῖ', 'Ί'),
+  ('ῠ', 'Ῥ'), ('ῲ', 'ῴ'), ('ῶ', 'ῼ'), ('‿', '⁀'),
+  ('⁔', '⁔'), ('ⁱ', 'ⁱ'), ('ⁿ', 'ⁿ'), ('ₐ', 'ₜ'),
+  ('⃐', '⃜'), ('⃡', '⃡'), ('⃥', '⃰'), ('ℂ', 'ℂ'),
+  ('ℇ', 'ℇ'), ('ℊ', 'ℓ'), ('ℕ', 'ℕ'), ('℘', 'ℝ'),
+  ('ℤ', 'ℤ'), ('Ω', 'Ω'), ('ℨ', 'ℨ'), ('K', 'ℹ'),
+  ('ℼ', 'ℿ'), ('ⅅ', 'ⅉ'), ('ⅎ', 'ⅎ'), ('Ⅰ', 'ↈ'),
+  ('Ⰰ', 'Ⱞ'), ('ⰰ', 'ⱞ'), ('Ⱡ', 'ⳤ'), ('Ⳬ', 'ⳳ'),
+  ('ⴀ', 'ⴥ'), ('ⴧ', 'ⴧ'), ('ⴭ', 'ⴭ'), ('ⴰ', 'ⵧ'),
+  ('ⵯ', 'ⵯ'), ('⵿', 'ⶖ'), ('ⶠ', 'ⶦ'), ('ⶨ', 'ⶮ'),
+  ('ⶰ', 'ⶶ'), ('ⶸ', 'ⶾ'), ('ⷀ', 'ⷆ'), ('ⷈ', 'ⷎ'),
+  ('ⷐ', 'ⷖ'), ('ⷘ', 'ⷞ'), ('ⷠ', 'ⷿ'), ('々', '〇'),
+  ('〡', '〯'), ('〱', '〵'), ('〸', '〼'), ('ぁ', 'ゖ'),
+  ('゙', '゚'), ('ゝ', 'ゟ'), ('ァ', 'ヺ'), ('ー', 'ヿ'),
+  ('ㄅ', 'ㄮ'), ('ㄱ', 'ㆎ'), ('ㆠ', 'ㆺ'), ('ㇰ', 'ㇿ'),
+  ('㐀', '䶵'), ('一', '鿪'), ('ꀀ', 'ꒌ'), ('ꓐ', 'ꓽ'),
+  ('ꔀ', 'ꘌ'), ('ꘐ', 'ꘫ'), ('Ꙁ', '꙯'), ('ꙴ', '꙽'),
+  ('ꙿ', '꛱'), ('ꜗ', 'ꜟ'), ('Ꜣ', 'ꞈ'), ('Ꞌ', 'Ɪ'),
+  ('Ʞ', 'ꞷ'), ('ꟷ', 'ꠧ'), ('ꡀ', 'ꡳ'), ('ꢀ', 'ꣅ'),
+  ('꣐', '꣙'), ('꣠', 'ꣷ'), ('ꣻ', 'ꣻ'), ('ꣽ', 'ꣽ'),
+  ('꤀', '꤭'), ('ꤰ', '꥓'), ('ꥠ', 'ꥼ'), ('ꦀ', '꧀'),
+  ('ꧏ', '꧙'), ('ꧠ', 'ꧾ'), ('ꨀ', 'ꨶ'), ('ꩀ', 'ꩍ'),
+  ('꩐', '꩙'), ('ꩠ', 'ꩶ'), ('ꩺ', 'ꫂ'), ('ꫛ', 'ꫝ'),
+  ('ꫠ', 'ꫯ'), ('ꫲ', '꫶'), ('ꬁ', 'ꬆ'), ('ꬉ', 'ꬎ'),
+  ('ꬑ', 'ꬖ'), ('ꬠ', 'ꬦ'), ('ꬨ', 'ꬮ'), ('ꬰ', 'ꭚ'),
+  ('ꭜ', 'ꭥ'), ('ꭰ', 'ꯪ'), ('꯬', '꯭'), ('꯰', '꯹'),
+  ('가', '힣'), ('ힰ', 'ퟆ'), ('ퟋ', 'ퟻ'), ('豈', '舘'),
+  ('並', '龎'), ('ﬀ', 'ﬆ'), ('ﬓ', 'ﬗ'), ('יִ', 'ﬨ'),
+  ('שׁ', 'זּ'), ('טּ', 'לּ'), ('מּ', 'מּ'), ('נּ', 'סּ'),
+  ('ףּ', 'פּ'), ('צּ', 'ﮱ'), ('ﯓ', 'ﱝ'), ('ﱤ', 'ﴽ'),
+  ('ﵐ', 'ﶏ'), ('ﶒ', 'ﷇ'), ('ﷰ', 'ﷹ'), ('︀', '️'),
+  ('︠', '︯'), ('︳', '︴'), ('﹍', '﹏'), ('ﹱ', 'ﹱ'),
+  ('ﹳ', 'ﹳ'), ('ﹷ', 'ﹷ'), ('ﹹ', 'ﹹ'), ('ﹻ', 'ﹻ'),
+  ('ﹽ', 'ﹽ'), ('ﹿ', 'ﻼ'), ('０', '９'), ('Ａ', 'Ｚ'),
+  ('＿', '＿'), ('ａ', 'ｚ'), ('ｦ', 'ﾾ'), ('ￂ', 'ￇ'),
+  ('ￊ', 'ￏ'), ('ￒ', 'ￗ'), ('ￚ', 'ￜ'), ('𐀀', '𐀋'),
+  ('𐀍', '𐀦'), ('𐀨', '𐀺'), ('𐀼', '𐀽'), ('𐀿', '𐁍'),
+  ('𐁐', '𐁝'), ('𐂀', '𐃺'), ('𐅀', '𐅴'), ('𐇽', '𐇽'),
+  ('𐊀', '𐊜'), ('𐊠', '𐋐'), ('𐋠', '𐋠'), ('𐌀', '𐌟'),
+  ('𐌭', '𐍊'), ('𐍐', '𐍺'), ('𐎀', '𐎝'), ('𐎠', '𐏃'),
+  ('𐏈', '𐏏'), ('𐏑', '𐏕'), ('𐐀', '𐒝'), ('𐒠', '𐒩'),
+  ('𐒰', '𐓓'), ('𐓘', '𐓻'), ('𐔀', '𐔧'), ('𐔰', '𐕣'),
+  ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'), ('𐠀', '𐠅'),
+  ('𐠈', '𐠈'), ('𐠊', '𐠵'), ('𐠷', '𐠸'), ('𐠼', '𐠼'),
+  ('𐠿', '𐡕'), ('𐡠', '𐡶'), ('𐢀', '𐢞'), ('𐣠', '𐣲'),
+  ('𐣴', '𐣵'), ('𐤀', '𐤕'), ('𐤠', '𐤹'), ('𐦀', '𐦷'),
+  ('𐦾', '𐦿'), ('𐨀', '𐨃'), ('𐨅', '𐨆'), ('𐨌', '𐨓'),
+  ('𐨕', '𐨗'), ('𐨙', '𐨳'), ('𐨸', '𐨺'), ('𐨿', '𐨿'),
+  ('𐩠', '𐩼'), ('𐪀', '𐪜'), ('𐫀', '𐫇'), ('𐫉', '𐫦'),
+  ('𐬀', '𐬵'), ('𐭀', '𐭕'), ('𐭠', '𐭲'), ('𐮀', '𐮑'),
+  ('𐰀', '𐱈'), ('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𑀀', '𑁆'),
+  ('𑁦', '𑁯'), ('𑁿', '𑂺'), ('𑃐', '𑃨'), ('𑃰', '𑃹'),
+  ('𑄀', '𑄴'), ('𑄶', '𑄿'), ('𑅐', '𑅳'), ('𑅶', '𑅶'),
+  ('𑆀', '𑇄'), ('𑇊', '𑇌'), ('𑇐', '𑇚'), ('𑇜', '𑇜'),
+  ('𑈀', '𑈑'), ('𑈓', '𑈷'), ('𑈾', '𑈾'), ('𑊀', '𑊆'),
+  ('𑊈', '𑊈'), ('𑊊', '𑊍'), ('𑊏', '𑊝'), ('𑊟', '𑊨'),
+  ('𑊰', '𑋪'), ('𑋰', '𑋹'), ('𑌀', '𑌃'), ('𑌅', '𑌌'),
+  ('𑌏', '𑌐'), ('𑌓', '𑌨'), ('𑌪', '𑌰'), ('𑌲', '𑌳'),
+  ('𑌵', '𑌹'), ('𑌼', '𑍄'), ('𑍇', '𑍈'), ('𑍋', '𑍍'),
+  ('𑍐', '𑍐'), ('𑍗', '𑍗'), ('𑍝', '𑍣'), ('𑍦', '𑍬'),
+  ('𑍰', '𑍴'), ('𑐀', '𑑊'), ('𑑐', '𑑙'), ('𑒀', '𑓅'),
+  ('𑓇', '𑓇'), ('𑓐', '𑓙'), ('𑖀', '𑖵'), ('𑖸', '𑗀'),
+  ('𑗘', '𑗝'), ('𑘀', '𑙀'), ('𑙄', '𑙄'), ('𑙐', '𑙙'),
+  ('𑚀', '𑚷'), ('𑛀', '𑛉'), ('𑜀', '𑜙'), ('𑜝', '𑜫'),
+  ('𑜰', '𑜹'), ('𑢠', '𑣩'), ('𑣿', '𑣿'), ('𑨀', '𑨾'),
+  ('𑩇', '𑩇'), ('𑩐', '𑪃'), ('𑪆', '𑪙'), ('𑫀', '𑫸'),
+  ('𑰀', '𑰈'), ('𑰊', '𑰶'), ('𑰸', '𑱀'), ('𑱐', '𑱙'),
+  ('𑱲', '𑲏'), ('𑲒', '𑲧'), ('𑲩', '𑲶'), ('𑴀', '𑴆'),
+  ('𑴈', '𑴉'), ('𑴋', '𑴶'), ('𑴺', '𑴺'), ('𑴼', '𑴽'),
+  ('𑴿', '𑵇'), ('𑵐', '𑵙'), ('𒀀', '𒎙'), ('𒐀', '𒑮'),
+  ('𒒀', '𒕃'), ('𓀀', '𓐮'), ('𔐀', '𔙆'), ('𖠀', '𖨸'),
+  ('𖩀', '𖩞'), ('𖩠', '𖩩'), ('𖫐', '𖫭'), ('𖫰', '𖫴'),
+  ('𖬀', '𖬶'), ('𖭀', '𖭃'), ('𖭐', '𖭙'), ('𖭣', '𖭷'),
+  ('𖭽', '𖮏'), ('𖼀', '𖽄'), ('𖽐', '𖽾'), ('𖾏', '𖾟'),
+  ('𖿠', '𖿡'), ('𗀀', '𘟬'), ('𘠀', '𘫲'), ('𛀀', '𛄞'),
+  ('𛅰', '𛋻'), ('𛰀', '𛱪'), ('𛱰', '𛱼'), ('𛲀', '𛲈'),
+  ('𛲐', '𛲙'), ('𛲝', '𛲞'), ('𝅥', '𝅩'), ('𝅭', '𝅲'),
+  ('𝅻', '𝆂'), ('𝆅', '𝆋'), ('𝆪', '𝆭'), ('𝉂', '𝉄'),
+  ('𝐀', '𝑔'), ('𝑖', '𝒜'), ('𝒞', '𝒟'), ('𝒢', '𝒢'),
+  ('𝒥', '𝒦'), ('𝒩', '𝒬'), ('𝒮', '𝒹'), ('𝒻', '𝒻'),
+  ('𝒽', '𝓃'), ('𝓅', '𝔅'), ('𝔇', '𝔊'), ('𝔍', '𝔔'),
+  ('𝔖', '𝔜'), ('𝔞', '𝔹'), ('𝔻', '𝔾'), ('𝕀', '𝕄'),
+  ('𝕆', '𝕆'), ('𝕊', '𝕐'), ('𝕒', '𝚥'), ('𝚨', '𝛀'),
+  ('𝛂', '𝛚'), ('𝛜', '𝛺'), ('𝛼', '𝜔'), ('𝜖', '𝜴'),
+  ('𝜶', '𝝎'), ('𝝐', '𝝮'), ('𝝰', '𝞈'), ('𝞊', '𝞨'),
+  ('𝞪', '𝟂'), ('𝟄', '𝟋'), ('𝟎', '𝟿'), ('𝨀', '𝨶'),
+  ('𝨻', '𝩬'), ('𝩵', '𝩵'), ('𝪄', '𝪄'), ('𝪛', '𝪟'),
+  ('𝪡', '𝪯'), ('𞀀', '𞀆'), ('𞀈', '𞀘'), ('𞀛', '𞀡'),
+  ('𞀣', '𞀤'), ('𞀦', '𞀪'), ('𞠀', '𞣄'), ('𞣐', '𞣖'),
+  ('𞤀', '𞥊'), ('𞥐', '𞥙'), ('𞸀', '𞸃'), ('𞸅', '𞸟'),
+  ('𞸡', '𞸢'), ('𞸤', '𞸤'), ('𞸧', '𞸧'), ('𞸩', '𞸲'),
+  ('𞸴', '𞸷'), ('𞸹', '𞸹'), ('𞸻', '𞸻'), ('𞹂', '𞹂'),
+  ('𞹇', '𞹇'), ('𞹉', '𞹉'), ('𞹋', '𞹋'), ('𞹍', '𞹏'),
+  ('𞹑', '𞹒'), ('𞹔', '𞹔'), ('𞹗', '𞹗'), ('𞹙', '𞹙'),
+  ('𞹛', '𞹛'), ('𞹝', '𞹝'), ('𞹟', '𞹟'), ('𞹡', '𞹢'),
+  ('𞹤', '𞹤'), ('𞹧', '𞹪'), ('𞹬', '𞹲'), ('𞹴', '𞹷'),
+  ('𞹹', '𞹼'), ('𞹾', '𞹾'), ('𞺀', '𞺉'), ('𞺋', '𞺛'),
+  ('𞺡', '𞺣'), ('𞺥', '𞺩'), ('𞺫', '𞺻'), ('𠀀', '𪛖'),
+  ('𪜀', '𫜴'), ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'),
+  ('丽', '𪘀'), ('󠄀', '󠇯'),
+];
+
+pub const XID_START: &'static [(char, char)] = &[
+  ('A', 'Z'), ('a', 'z'), ('ª', 'ª'), ('µ', 'µ'), ('º', 'º'),
+  ('À', 'Ö'), ('Ø', 'ö'), ('ø', 'ˁ'), ('ˆ', 'ˑ'), ('ˠ', 'ˤ'),
+  ('ˬ', 'ˬ'), ('ˮ', 'ˮ'), ('Ͱ', 'ʹ'), ('Ͷ', 'ͷ'), ('ͻ', 'ͽ'),
+  ('Ϳ', 'Ϳ'), ('Ά', 'Ά'), ('Έ', 'Ί'), ('Ό', 'Ό'), ('Ύ', 'Ρ'),
+  ('Σ', 'ϵ'), ('Ϸ', 'ҁ'), ('Ҋ', 'ԯ'), ('Ա', 'Ֆ'), ('ՙ', 'ՙ'),
+  ('ա', 'և'), ('א', 'ת'), ('װ', 'ײ'), ('ؠ', 'ي'), ('ٮ', 'ٯ'),
+  ('ٱ', 'ۓ'), ('ە', 'ە'), ('ۥ', 'ۦ'), ('ۮ', 'ۯ'), ('ۺ', 'ۼ'),
+  ('ۿ', 'ۿ'), ('ܐ', 'ܐ'), ('ܒ', 'ܯ'), ('ݍ', 'ޥ'), ('ޱ', 'ޱ'),
+  ('ߊ', 'ߪ'), ('ߴ', 'ߵ'), ('ߺ', 'ߺ'), ('ࠀ', 'ࠕ'), ('ࠚ', 'ࠚ'),
+  ('ࠤ', 'ࠤ'), ('ࠨ', 'ࠨ'), ('ࡀ', 'ࡘ'), ('ࡠ', 'ࡪ'),
+  ('ࢠ', 'ࢴ'), ('ࢶ', 'ࢽ'), ('ऄ', 'ह'), ('ऽ', 'ऽ'),
+  ('ॐ', 'ॐ'), ('क़', 'ॡ'), ('ॱ', 'ঀ'), ('অ', 'ঌ'),
+  ('এ', 'ঐ'), ('ও', 'ন'), ('প', 'র'), ('ল', 'ল'),
+  ('শ', 'হ'), ('ঽ', 'ঽ'), ('ৎ', 'ৎ'), ('ড়', 'ঢ়'),
+  ('য়', 'ৡ'), ('ৰ', 'ৱ'), ('ৼ', 'ৼ'), ('ਅ', 'ਊ'),
+  ('ਏ', 'ਐ'), ('ਓ', 'ਨ'), ('ਪ', 'ਰ'), ('ਲ', 'ਲ਼'),
+  ('ਵ', 'ਸ਼'), ('ਸ', 'ਹ'), ('ਖ਼', 'ੜ'), ('ਫ਼', 'ਫ਼'),
+  ('ੲ', 'ੴ'), ('અ', 'ઍ'), ('એ', 'ઑ'), ('ઓ', 'ન'),
+  ('પ', 'ર'), ('લ', 'ળ'), ('વ', 'હ'), ('ઽ', 'ઽ'),
+  ('ૐ', 'ૐ'), ('ૠ', 'ૡ'), ('ૹ', 'ૹ'), ('ଅ', 'ଌ'),
+  ('ଏ', 'ଐ'), ('ଓ', 'ନ'), ('ପ', 'ର'), ('ଲ', 'ଳ'),
+  ('ଵ', 'ହ'), ('ଽ', 'ଽ'), ('ଡ଼', 'ଢ଼'), ('ୟ', 'ୡ'),
+  ('ୱ', 'ୱ'), ('ஃ', 'ஃ'), ('அ', 'ஊ'), ('எ', 'ஐ'),
+  ('ஒ', 'க'), ('ங', 'ச'), ('ஜ', 'ஜ'), ('ஞ', 'ட'),
+  ('ண', 'த'), ('ந', 'ப'), ('ம', 'ஹ'), ('ௐ', 'ௐ'),
+  ('అ', 'ఌ'), ('ఎ', 'ఐ'), ('ఒ', 'న'), ('ప', 'హ'),
+  ('ఽ', 'ఽ'), ('ౘ', 'ౚ'), ('ౠ', 'ౡ'), ('ಀ', 'ಀ'),
+  ('ಅ', 'ಌ'), ('ಎ', 'ಐ'), ('ಒ', 'ನ'), ('ಪ', 'ಳ'),
+  ('ವ', 'ಹ'), ('ಽ', 'ಽ'), ('ೞ', 'ೞ'), ('ೠ', 'ೡ'),
+  ('ೱ', 'ೲ'), ('അ', 'ഌ'), ('എ', 'ഐ'), ('ഒ', 'ഺ'),
+  ('ഽ', 'ഽ'), ('ൎ', 'ൎ'), ('ൔ', 'ൖ'), ('ൟ', 'ൡ'),
+  ('ൺ', 'ൿ'), ('අ', 'ඖ'), ('ක', 'න'), ('ඳ', 'ර'),
+  ('ල', 'ල'), ('ව', 'ෆ'), ('ก', 'ะ'), ('า', 'า'),
+  ('เ', 'ๆ'), ('ກ', 'ຂ'), ('ຄ', 'ຄ'), ('ງ', 'ຈ'),
+  ('ຊ', 'ຊ'), ('ຍ', 'ຍ'), ('ດ', 'ທ'), ('ນ', 'ຟ'),
+  ('ມ', 'ຣ'), ('ລ', 'ລ'), ('ວ', 'ວ'), ('ສ', 'ຫ'),
+  ('ອ', 'ະ'), ('າ', 'າ'), ('ຽ', 'ຽ'), ('ເ', 'ໄ'),
+  ('ໆ', 'ໆ'), ('ໜ', 'ໟ'), ('ༀ', 'ༀ'), ('ཀ', 'ཇ'),
+  ('ཉ', 'ཬ'), ('ྈ', 'ྌ'), ('က', 'ဪ'), ('ဿ', 'ဿ'),
+  ('ၐ', 'ၕ'), ('ၚ', 'ၝ'), ('ၡ', 'ၡ'), ('ၥ', 'ၦ'),
+  ('ၮ', 'ၰ'), ('ၵ', 'ႁ'), ('ႎ', 'ႎ'), ('Ⴀ', 'Ⴥ'),
+  ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'), ('ა', 'ჺ'), ('ჼ', 'ቈ'),
+  ('ቊ', 'ቍ'), ('ቐ', 'ቖ'), ('ቘ', 'ቘ'), ('ቚ', 'ቝ'),
+  ('በ', 'ኈ'), ('ኊ', 'ኍ'), ('ነ', 'ኰ'), ('ኲ', 'ኵ'),
+  ('ኸ', 'ኾ'), ('ዀ', 'ዀ'), ('ዂ', 'ዅ'), ('ወ', 'ዖ'),
+  ('ዘ', 'ጐ'), ('ጒ', 'ጕ'), ('ጘ', 'ፚ'), ('ᎀ', 'ᎏ'),
+  ('Ꭰ', 'Ᏽ'), ('ᏸ', 'ᏽ'), ('ᐁ', 'ᙬ'), ('ᙯ', 'ᙿ'),
+  ('ᚁ', 'ᚚ'), ('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ'), ('ᜀ', 'ᜌ'),
+  ('ᜎ', 'ᜑ'), ('ᜠ', 'ᜱ'), ('ᝀ', 'ᝑ'), ('ᝠ', 'ᝬ'),
+  ('ᝮ', 'ᝰ'), ('ក', 'ឳ'), ('ៗ', 'ៗ'), ('ៜ', 'ៜ'),
+  ('ᠠ', 'ᡷ'), ('ᢀ', 'ᢨ'), ('ᢪ', 'ᢪ'), ('ᢰ', 'ᣵ'),
+  ('ᤀ', 'ᤞ'), ('ᥐ', 'ᥭ'), ('ᥰ', 'ᥴ'), ('ᦀ', 'ᦫ'),
+  ('ᦰ', 'ᧉ'), ('ᨀ', 'ᨖ'), ('ᨠ', 'ᩔ'), ('ᪧ', 'ᪧ'),
+  ('ᬅ', 'ᬳ'), ('ᭅ', 'ᭋ'), ('ᮃ', 'ᮠ'), ('ᮮ', 'ᮯ'),
+  ('ᮺ', 'ᯥ'), ('ᰀ', 'ᰣ'), ('ᱍ', 'ᱏ'), ('ᱚ', 'ᱽ'),
+  ('ᲀ', 'ᲈ'), ('ᳩ', 'ᳬ'), ('ᳮ', 'ᳱ'), ('ᳵ', 'ᳶ'),
+  ('ᴀ', 'ᶿ'), ('Ḁ', 'ἕ'), ('Ἐ', 'Ἕ'), ('ἠ', 'ὅ'),
+  ('Ὀ', 'Ὅ'), ('ὐ', 'ὗ'), ('Ὑ', 'Ὑ'), ('Ὓ', 'Ὓ'),
+  ('Ὕ', 'Ὕ'), ('Ὗ', 'ώ'), ('ᾀ', 'ᾴ'), ('ᾶ', 'ᾼ'),
+  ('ι', 'ι'), ('ῂ', 'ῄ'), ('ῆ', 'ῌ'), ('ῐ', 'ΐ'),
+  ('ῖ', 'Ί'), ('ῠ', 'Ῥ'), ('ῲ', 'ῴ'), ('ῶ', 'ῼ'),
+  ('ⁱ', 'ⁱ'), ('ⁿ', 'ⁿ'), ('ₐ', 'ₜ'), ('ℂ', 'ℂ'),
+  ('ℇ', 'ℇ'), ('ℊ', 'ℓ'), ('ℕ', 'ℕ'), ('℘', 'ℝ'),
+  ('ℤ', 'ℤ'), ('Ω', 'Ω'), ('ℨ', 'ℨ'), ('K', 'ℹ'),
+  ('ℼ', 'ℿ'), ('ⅅ', 'ⅉ'), ('ⅎ', 'ⅎ'), ('Ⅰ', 'ↈ'),
+  ('Ⰰ', 'Ⱞ'), ('ⰰ', 'ⱞ'), ('Ⱡ', 'ⳤ'), ('Ⳬ', 'ⳮ'),
+  ('Ⳳ', 'ⳳ'), ('ⴀ', 'ⴥ'), ('ⴧ', 'ⴧ'), ('ⴭ', 'ⴭ'),
+  ('ⴰ', 'ⵧ'), ('ⵯ', 'ⵯ'), ('ⶀ', 'ⶖ'), ('ⶠ', 'ⶦ'),
+  ('ⶨ', 'ⶮ'), ('ⶰ', 'ⶶ'), ('ⶸ', 'ⶾ'), ('ⷀ', 'ⷆ'),
+  ('ⷈ', 'ⷎ'), ('ⷐ', 'ⷖ'), ('ⷘ', 'ⷞ'), ('々', '〇'),
+  ('〡', '〩'), ('〱', '〵'), ('〸', '〼'), ('ぁ', 'ゖ'),
+  ('ゝ', 'ゟ'), ('ァ', 'ヺ'), ('ー', 'ヿ'), ('ㄅ', 'ㄮ'),
+  ('ㄱ', 'ㆎ'), ('ㆠ', 'ㆺ'), ('ㇰ', 'ㇿ'), ('㐀', '䶵'),
+  ('一', '鿪'), ('ꀀ', 'ꒌ'), ('ꓐ', 'ꓽ'), ('ꔀ', 'ꘌ'),
+  ('ꘐ', 'ꘟ'), ('ꘪ', 'ꘫ'), ('Ꙁ', 'ꙮ'), ('ꙿ', 'ꚝ'),
+  ('ꚠ', 'ꛯ'), ('ꜗ', 'ꜟ'), ('Ꜣ', 'ꞈ'), ('Ꞌ', 'Ɪ'),
+  ('Ʞ', 'ꞷ'), ('ꟷ', 'ꠁ'), ('ꠃ', 'ꠅ'), ('ꠇ', 'ꠊ'),
+  ('ꠌ', 'ꠢ'), ('ꡀ', 'ꡳ'), ('ꢂ', 'ꢳ'), ('ꣲ', 'ꣷ'),
+  ('ꣻ', 'ꣻ'), ('ꣽ', 'ꣽ'), ('ꤊ', 'ꤥ'), ('ꤰ', 'ꥆ'),
+  ('ꥠ', 'ꥼ'), ('ꦄ', 'ꦲ'), ('ꧏ', 'ꧏ'), ('ꧠ', 'ꧤ'),
+  ('ꧦ', 'ꧯ'), ('ꧺ', 'ꧾ'), ('ꨀ', 'ꨨ'), ('ꩀ', 'ꩂ'),
+  ('ꩄ', 'ꩋ'), ('ꩠ', 'ꩶ'), ('ꩺ', 'ꩺ'), ('ꩾ', 'ꪯ'),
+  ('ꪱ', 'ꪱ'), ('ꪵ', 'ꪶ'), ('ꪹ', 'ꪽ'), ('ꫀ', 'ꫀ'),
+  ('ꫂ', 'ꫂ'), ('ꫛ', 'ꫝ'), ('ꫠ', 'ꫪ'), ('ꫲ', 'ꫴ'),
+  ('ꬁ', 'ꬆ'), ('ꬉ', 'ꬎ'), ('ꬑ', 'ꬖ'), ('ꬠ', 'ꬦ'),
+  ('ꬨ', 'ꬮ'), ('ꬰ', 'ꭚ'), ('ꭜ', 'ꭥ'), ('ꭰ', 'ꯢ'),
+  ('가', '힣'), ('ힰ', 'ퟆ'), ('ퟋ', 'ퟻ'), ('豈', '舘'),
+  ('並', '龎'), ('ﬀ', 'ﬆ'), ('ﬓ', 'ﬗ'), ('יִ', 'יִ'),
+  ('ײַ', 'ﬨ'), ('שׁ', 'זּ'), ('טּ', 'לּ'), ('מּ', 'מּ'),
+  ('נּ', 'סּ'), ('ףּ', 'פּ'), ('צּ', 'ﮱ'), ('ﯓ', 'ﱝ'),
+  ('ﱤ', 'ﴽ'), ('ﵐ', 'ﶏ'), ('ﶒ', 'ﷇ'), ('ﷰ', 'ﷹ'),
+  ('ﹱ', 'ﹱ'), ('ﹳ', 'ﹳ'), ('ﹷ', 'ﹷ'), ('ﹹ', 'ﹹ'),
+  ('ﹻ', 'ﹻ'), ('ﹽ', 'ﹽ'), ('ﹿ', 'ﻼ'), ('Ａ', 'Ｚ'),
+  ('ａ', 'ｚ'), ('ｦ', 'ﾝ'), ('ﾠ', 'ﾾ'), ('ￂ', 'ￇ'),
+  ('ￊ', 'ￏ'), ('ￒ', 'ￗ'), ('ￚ', 'ￜ'), ('𐀀', '𐀋'),
+  ('𐀍', '𐀦'), ('𐀨', '𐀺'), ('𐀼', '𐀽'), ('𐀿', '𐁍'),
+  ('𐁐', '𐁝'), ('𐂀', '𐃺'), ('𐅀', '𐅴'), ('𐊀', '𐊜'),
+  ('𐊠', '𐋐'), ('𐌀', '𐌟'), ('𐌭', '𐍊'), ('𐍐', '𐍵'),
+  ('𐎀', '𐎝'), ('𐎠', '𐏃'), ('𐏈', '𐏏'), ('𐏑', '𐏕'),
+  ('𐐀', '𐒝'), ('𐒰', '𐓓'), ('𐓘', '𐓻'), ('𐔀', '𐔧'),
+  ('𐔰', '𐕣'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'),
+  ('𐠀', '𐠅'), ('𐠈', '𐠈'), ('𐠊', '𐠵'), ('𐠷', '𐠸'),
+  ('𐠼', '𐠼'), ('𐠿', '𐡕'), ('𐡠', '𐡶'), ('𐢀', '𐢞'),
+  ('𐣠', '𐣲'), ('𐣴', '𐣵'), ('𐤀', '𐤕'), ('𐤠', '𐤹'),
+  ('𐦀', '𐦷'), ('𐦾', '𐦿'), ('𐨀', '𐨀'), ('𐨐', '𐨓'),
+  ('𐨕', '𐨗'), ('𐨙', '𐨳'), ('𐩠', '𐩼'), ('𐪀', '𐪜'),
+  ('𐫀', '𐫇'), ('𐫉', '𐫤'), ('𐬀', '𐬵'), ('𐭀', '𐭕'),
+  ('𐭠', '𐭲'), ('𐮀', '𐮑'), ('𐰀', '𐱈'), ('𐲀', '𐲲'),
+  ('𐳀', '𐳲'), ('𑀃', '𑀷'), ('𑂃', '𑂯'), ('𑃐', '𑃨'),
+  ('𑄃', '𑄦'), ('𑅐', '𑅲'), ('𑅶', '𑅶'), ('𑆃', '𑆲'),
+  ('𑇁', '𑇄'), ('𑇚', '𑇚'), ('𑇜', '𑇜'), ('𑈀', '𑈑'),
+  ('𑈓', '𑈫'), ('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'),
+  ('𑊏', '𑊝'), ('𑊟', '𑊨'), ('𑊰', '𑋞'), ('𑌅', '𑌌'),
+  ('𑌏', '𑌐'), ('𑌓', '𑌨'), ('𑌪', '𑌰'), ('𑌲', '𑌳'),
+  ('𑌵', '𑌹'), ('𑌽', '𑌽'), ('𑍐', '𑍐'), ('𑍝', '𑍡'),
+  ('𑐀', '𑐴'), ('𑑇', '𑑊'), ('𑒀', '𑒯'), ('𑓄', '𑓅'),
+  ('𑓇', '𑓇'), ('𑖀', '𑖮'), ('𑗘', '𑗛'), ('𑘀', '𑘯'),
+  ('𑙄', '𑙄'), ('𑚀', '𑚪'), ('𑜀', '𑜙'), ('𑢠', '𑣟'),
+  ('𑣿', '𑣿'), ('𑨀', '𑨀'), ('𑨋', '𑨲'), ('𑨺', '𑨺'),
+  ('𑩐', '𑩐'), ('𑩜', '𑪃'), ('𑪆', '𑪉'), ('𑫀', '𑫸'),
+  ('𑰀', '𑰈'), ('𑰊', '𑰮'), ('𑱀', '𑱀'), ('𑱲', '𑲏'),
+  ('𑴀', '𑴆'), ('𑴈', '𑴉'), ('𑴋', '𑴰'), ('𑵆', '𑵆'),
+  ('𒀀', '𒎙'), ('𒐀', '𒑮'), ('𒒀', '𒕃'), ('𓀀', '𓐮'),
+  ('𔐀', '𔙆'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), ('𖫐', '𖫭'),
+  ('𖬀', '𖬯'), ('𖭀', '𖭃'), ('𖭣', '𖭷'), ('𖭽', '𖮏'),
+  ('𖼀', '𖽄'), ('𖽐', '𖽐'), ('𖾓', '𖾟'), ('𖿠', '𖿡'),
+  ('𗀀', '𘟬'), ('𘠀', '𘫲'), ('𛀀', '𛄞'), ('𛅰', '𛋻'),
+  ('𛰀', '𛱪'), ('𛱰', '𛱼'), ('𛲀', '𛲈'), ('𛲐', '𛲙'),
+  ('𝐀', '𝑔'), ('𝑖', '𝒜'), ('𝒞', '𝒟'), ('𝒢', '𝒢'),
+  ('𝒥', '𝒦'), ('𝒩', '𝒬'), ('𝒮', '𝒹'), ('𝒻', '𝒻'),
+  ('𝒽', '𝓃'), ('𝓅', '𝔅'), ('𝔇', '𝔊'), ('𝔍', '𝔔'),
+  ('𝔖', '𝔜'), ('𝔞', '𝔹'), ('𝔻', '𝔾'), ('𝕀', '𝕄'),
+  ('𝕆', '𝕆'), ('𝕊', '𝕐'), ('𝕒', '𝚥'), ('𝚨', '𝛀'),
+  ('𝛂', '𝛚'), ('𝛜', '𝛺'), ('𝛼', '𝜔'), ('𝜖', '𝜴'),
+  ('𝜶', '𝝎'), ('𝝐', '𝝮'), ('𝝰', '𝞈'), ('𝞊', '𝞨'),
+  ('𝞪', '𝟂'), ('𝟄', '𝟋'), ('𞠀', '𞣄'), ('𞤀', '𞥃'),
+  ('𞸀', '𞸃'), ('𞸅', '𞸟'), ('𞸡', '𞸢'), ('𞸤', '𞸤'),
+  ('𞸧', '𞸧'), ('𞸩', '𞸲'), ('𞸴', '𞸷'), ('𞸹', '𞸹'),
+  ('𞸻', '𞸻'), ('𞹂', '𞹂'), ('𞹇', '𞹇'), ('𞹉', '𞹉'),
+  ('𞹋', '𞹋'), ('𞹍', '𞹏'), ('𞹑', '𞹒'), ('𞹔', '𞹔'),
+  ('𞹗', '𞹗'), ('𞹙', '𞹙'), ('𞹛', '𞹛'), ('𞹝', '𞹝'),
+  ('𞹟', '𞹟'), ('𞹡', '𞹢'), ('𞹤', '𞹤'), ('𞹧', '𞹪'),
+  ('𞹬', '𞹲'), ('𞹴', '𞹷'), ('𞹹', '𞹼'), ('𞹾', '𞹾'),
+  ('𞺀', '𞺉'), ('𞺋', '𞺛'), ('𞺡', '𞺣'), ('𞺥', '𞺩'),
+  ('𞺫', '𞺻'), ('𠀀', '𪛖'), ('𪜀', '𫜴'), ('𫝀', '𫠝'),
+  ('𫠠', '𬺡'), ('𬺰', '𮯠'), ('丽', '𪘀'),
+];
diff --git a/regex-syntax/src/unicode_tables/property_names.rs b/regex-syntax/src/unicode_tables/property_names.rs
new file mode 100644
index 0000000000..1d1032d337
--- /dev/null
+++ b/regex-syntax/src/unicode_tables/property_names.rs
@@ -0,0 +1,146 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+//  ucd-generate property-names tmp/ucd-10.0.0/
+//
+// ucd-generate is available on crates.io.
+
+pub const PROPERTY_NAMES: &'static [(&'static str, &'static str)] = &[
+  ("age", "Age"), ("ahex", "ASCII_Hex_Digit"), ("alpha", "Alphabetic"),
+  ("alphabetic", "Alphabetic"), ("asciihexdigit", "ASCII_Hex_Digit"),
+  ("bc", "Bidi_Class"), ("bidic", "Bidi_Control"),
+  ("bidiclass", "Bidi_Class"), ("bidicontrol", "Bidi_Control"),
+  ("bidim", "Bidi_Mirrored"), ("bidimirrored", "Bidi_Mirrored"),
+  ("bidimirroringglyph", "Bidi_Mirroring_Glyph"),
+  ("bidipairedbracket", "Bidi_Paired_Bracket"),
+  ("bidipairedbrackettype", "Bidi_Paired_Bracket_Type"), ("blk", "Block"),
+  ("block", "Block"), ("bmg", "Bidi_Mirroring_Glyph"),
+  ("bpb", "Bidi_Paired_Bracket"), ("bpt", "Bidi_Paired_Bracket_Type"),
+  ("c", "ISO_Comment"),
+  ("canonicalcombiningclass", "Canonical_Combining_Class"),
+  ("cased", "Cased"), ("casefolding", "Case_Folding"),
+  ("caseignorable", "Case_Ignorable"), ("ccc", "Canonical_Combining_Class"),
+  ("ce", "Composition_Exclusion"), ("cf", "Case_Folding"),
+  ("changeswhencasefolded", "Changes_When_Casefolded"),
+  ("changeswhencasemapped", "Changes_When_Casemapped"),
+  ("changeswhenlowercased", "Changes_When_Lowercased"),
+  ("changeswhennfkccasefolded", "Changes_When_NFKC_Casefolded"),
+  ("changeswhentitlecased", "Changes_When_Titlecased"),
+  ("changeswhenuppercased", "Changes_When_Uppercased"),
+  ("ci", "Case_Ignorable"), ("cjkaccountingnumeric", "kAccountingNumeric"),
+  ("cjkcompatibilityvariant", "kCompatibilityVariant"),
+  ("cjkiicore", "kIICore"), ("cjkirggsource", "kIRG_GSource"),
+  ("cjkirghsource", "kIRG_HSource"), ("cjkirgjsource", "kIRG_JSource"),
+  ("cjkirgkpsource", "kIRG_KPSource"), ("cjkirgksource", "kIRG_KSource"),
+  ("cjkirgmsource", "kIRG_MSource"), ("cjkirgtsource", "kIRG_TSource"),
+  ("cjkirgusource", "kIRG_USource"), ("cjkirgvsource", "kIRG_VSource"),
+  ("cjkothernumeric", "kOtherNumeric"),
+  ("cjkprimarynumeric", "kPrimaryNumeric"), ("cjkrsunicode", "kRSUnicode"),
+  ("compex", "Full_Composition_Exclusion"),
+  ("compositionexclusion", "Composition_Exclusion"),
+  ("cwcf", "Changes_When_Casefolded"), ("cwcm", "Changes_When_Casemapped"),
+  ("cwkcf", "Changes_When_NFKC_Casefolded"),
+  ("cwl", "Changes_When_Lowercased"), ("cwt", "Changes_When_Titlecased"),
+  ("cwu", "Changes_When_Uppercased"), ("dash", "Dash"),
+  ("decompositionmapping", "Decomposition_Mapping"),
+  ("decompositiontype", "Decomposition_Type"),
+  ("defaultignorablecodepoint", "Default_Ignorable_Code_Point"),
+  ("dep", "Deprecated"), ("deprecated", "Deprecated"),
+  ("di", "Default_Ignorable_Code_Point"), ("dia", "Diacritic"),
+  ("diacritic", "Diacritic"), ("dm", "Decomposition_Mapping"),
+  ("dt", "Decomposition_Type"), ("ea", "East_Asian_Width"),
+  ("eastasianwidth", "East_Asian_Width"), ("expandsonnfc", "Expands_On_NFC"),
+  ("expandsonnfd", "Expands_On_NFD"), ("expandsonnfkc", "Expands_On_NFKC"),
+  ("expandsonnfkd", "Expands_On_NFKD"), ("ext", "Extender"),
+  ("extender", "Extender"), ("fcnfkc", "FC_NFKC_Closure"),
+  ("fcnfkcclosure", "FC_NFKC_Closure"),
+  ("fullcompositionexclusion", "Full_Composition_Exclusion"),
+  ("gc", "General_Category"), ("gcb", "Grapheme_Cluster_Break"),
+  ("generalcategory", "General_Category"), ("graphemebase", "Grapheme_Base"),
+  ("graphemeclusterbreak", "Grapheme_Cluster_Break"),
+  ("graphemeextend", "Grapheme_Extend"), ("graphemelink", "Grapheme_Link"),
+  ("grbase", "Grapheme_Base"), ("grext", "Grapheme_Extend"),
+  ("grlink", "Grapheme_Link"), ("hangulsyllabletype", "Hangul_Syllable_Type"),
+  ("hex", "Hex_Digit"), ("hexdigit", "Hex_Digit"),
+  ("hst", "Hangul_Syllable_Type"), ("hyphen", "Hyphen"),
+  ("idc", "ID_Continue"), ("idcontinue", "ID_Continue"),
+  ("ideo", "Ideographic"), ("ideographic", "Ideographic"),
+  ("ids", "ID_Start"), ("idsb", "IDS_Binary_Operator"),
+  ("idsbinaryoperator", "IDS_Binary_Operator"),
+  ("idst", "IDS_Trinary_Operator"), ("idstart", "ID_Start"),
+  ("idstrinaryoperator", "IDS_Trinary_Operator"),
+  ("indicpositionalcategory", "Indic_Positional_Category"),
+  ("indicsyllabiccategory", "Indic_Syllabic_Category"),
+  ("inpc", "Indic_Positional_Category"), ("insc", "Indic_Syllabic_Category"),
+  ("jamoshortname", "Jamo_Short_Name"), ("jg", "Joining_Group"),
+  ("joinc", "Join_Control"), ("joincontrol", "Join_Control"),
+  ("joininggroup", "Joining_Group"), ("joiningtype", "Joining_Type"),
+  ("jsn", "Jamo_Short_Name"), ("jt", "Joining_Type"),
+  ("kaccountingnumeric", "kAccountingNumeric"),
+  ("kcompatibilityvariant", "kCompatibilityVariant"), ("kiicore", "kIICore"),
+  ("kirggsource", "kIRG_GSource"), ("kirghsource", "kIRG_HSource"),
+  ("kirgjsource", "kIRG_JSource"), ("kirgkpsource", "kIRG_KPSource"),
+  ("kirgksource", "kIRG_KSource"), ("kirgmsource", "kIRG_MSource"),
+  ("kirgtsource", "kIRG_TSource"), ("kirgusource", "kIRG_USource"),
+  ("kirgvsource", "kIRG_VSource"), ("kothernumeric", "kOtherNumeric"),
+  ("kprimarynumeric", "kPrimaryNumeric"), ("krsunicode", "kRSUnicode"),
+  ("lb", "Line_Break"), ("lc", "Lowercase_Mapping"),
+  ("linebreak", "Line_Break"), ("loe", "Logical_Order_Exception"),
+  ("logicalorderexception", "Logical_Order_Exception"),
+  ("lower", "Lowercase"), ("lowercase", "Lowercase"),
+  ("lowercasemapping", "Lowercase_Mapping"), ("math", "Math"), ("na", "Name"),
+  ("na1", "Unicode_1_Name"), ("name", "Name"), ("namealias", "Name_Alias"),
+  ("nchar", "Noncharacter_Code_Point"), ("nfcqc", "NFC_Quick_Check"),
+  ("nfcquickcheck", "NFC_Quick_Check"), ("nfdqc", "NFD_Quick_Check"),
+  ("nfdquickcheck", "NFD_Quick_Check"), ("nfkccasefold", "NFKC_Casefold"),
+  ("nfkccf", "NFKC_Casefold"), ("nfkcqc", "NFKC_Quick_Check"),
+  ("nfkcquickcheck", "NFKC_Quick_Check"), ("nfkdqc", "NFKD_Quick_Check"),
+  ("nfkdquickcheck", "NFKD_Quick_Check"),
+  ("noncharactercodepoint", "Noncharacter_Code_Point"),
+  ("nt", "Numeric_Type"), ("numerictype", "Numeric_Type"),
+  ("numericvalue", "Numeric_Value"), ("nv", "Numeric_Value"),
+  ("oalpha", "Other_Alphabetic"), ("ocomment", "ISO_Comment"),
+  ("odi", "Other_Default_Ignorable_Code_Point"),
+  ("ogrext", "Other_Grapheme_Extend"), ("oidc", "Other_ID_Continue"),
+  ("oids", "Other_ID_Start"), ("olower", "Other_Lowercase"),
+  ("omath", "Other_Math"), ("otheralphabetic", "Other_Alphabetic"),
+  ("otherdefaultignorablecodepoint", "Other_Default_Ignorable_Code_Point"),
+  ("othergraphemeextend", "Other_Grapheme_Extend"),
+  ("otheridcontinue", "Other_ID_Continue"),
+  ("otheridstart", "Other_ID_Start"), ("otherlowercase", "Other_Lowercase"),
+  ("othermath", "Other_Math"), ("otheruppercase", "Other_Uppercase"),
+  ("oupper", "Other_Uppercase"), ("patsyn", "Pattern_Syntax"),
+  ("patternsyntax", "Pattern_Syntax"),
+  ("patternwhitespace", "Pattern_White_Space"),
+  ("patws", "Pattern_White_Space"), ("pcm", "Prepended_Concatenation_Mark"),
+  ("prependedconcatenationmark", "Prepended_Concatenation_Mark"),
+  ("qmark", "Quotation_Mark"), ("quotationmark", "Quotation_Mark"),
+  ("radical", "Radical"), ("regionalindicator", "Regional_Indicator"),
+  ("ri", "Regional_Indicator"), ("sb", "Sentence_Break"), ("sc", "Script"),
+  ("scf", "Simple_Case_Folding"), ("script", "Script"),
+  ("scriptextensions", "Script_Extensions"), ("scx", "Script_Extensions"),
+  ("sd", "Soft_Dotted"), ("sentencebreak", "Sentence_Break"),
+  ("sentenceterminal", "Sentence_Terminal"), ("sfc", "Simple_Case_Folding"),
+  ("simplecasefolding", "Simple_Case_Folding"),
+  ("simplelowercasemapping", "Simple_Lowercase_Mapping"),
+  ("simpletitlecasemapping", "Simple_Titlecase_Mapping"),
+  ("simpleuppercasemapping", "Simple_Uppercase_Mapping"),
+  ("slc", "Simple_Lowercase_Mapping"), ("softdotted", "Soft_Dotted"),
+  ("space", "White_Space"), ("stc", "Simple_Titlecase_Mapping"),
+  ("sterm", "Sentence_Terminal"), ("suc", "Simple_Uppercase_Mapping"),
+  ("tc", "Titlecase_Mapping"), ("term", "Terminal_Punctuation"),
+  ("terminalpunctuation", "Terminal_Punctuation"),
+  ("titlecasemapping", "Titlecase_Mapping"), ("uc", "Uppercase_Mapping"),
+  ("uideo", "Unified_Ideograph"), ("unicode1name", "Unicode_1_Name"),
+  ("unicoderadicalstroke", "kRSUnicode"),
+  ("unifiedideograph", "Unified_Ideograph"), ("upper", "Uppercase"),
+  ("uppercase", "Uppercase"), ("uppercasemapping", "Uppercase_Mapping"),
+  ("urs", "kRSUnicode"), ("variationselector", "Variation_Selector"),
+  ("verticalorientation", "Vertical_Orientation"),
+  ("vo", "Vertical_Orientation"), ("vs", "Variation_Selector"),
+  ("wb", "Word_Break"), ("whitespace", "White_Space"),
+  ("wordbreak", "Word_Break"), ("wspace", "White_Space"),
+  ("xidc", "XID_Continue"), ("xidcontinue", "XID_Continue"),
+  ("xids", "XID_Start"), ("xidstart", "XID_Start"),
+  ("xonfc", "Expands_On_NFC"), ("xonfd", "Expands_On_NFD"),
+  ("xonfkc", "Expands_On_NFKC"), ("xonfkd", "Expands_On_NFKD"),
+];
diff --git a/regex-syntax/src/unicode_tables/property_values.rs b/regex-syntax/src/unicode_tables/property_values.rs
new file mode 100644
index 0000000000..1ce9795b1c
--- /dev/null
+++ b/regex-syntax/src/unicode_tables/property_values.rs
@@ -0,0 +1,277 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+//  ucd-generate property-values tmp/ucd-10.0.0 --include gc,script,scx,age
+//
+// ucd-generate is available on crates.io.
+
+pub const PROPERTY_VALUES: &'static [(&'static str, &'static [(&'static str, &'static str)])] = &[
+  ("Age", &[("1.1", "V1_1"), ("10.0", "V10_0"), ("2.0", "V2_0"),
+  ("2.1", "V2_1"), ("3.0", "V3_0"), ("3.1", "V3_1"), ("3.2", "V3_2"),
+  ("4.0", "V4_0"), ("4.1", "V4_1"), ("5.0", "V5_0"), ("5.1", "V5_1"),
+  ("5.2", "V5_2"), ("6.0", "V6_0"), ("6.1", "V6_1"), ("6.2", "V6_2"),
+  ("6.3", "V6_3"), ("7.0", "V7_0"), ("8.0", "V8_0"), ("9.0", "V9_0"),
+  ("na", "Unassigned"), ("unassigned", "Unassigned"), ("v100", "V10_0"),
+  ("v11", "V1_1"), ("v20", "V2_0"), ("v21", "V2_1"), ("v30", "V3_0"),
+  ("v31", "V3_1"), ("v32", "V3_2"), ("v40", "V4_0"), ("v41", "V4_1"),
+  ("v50", "V5_0"), ("v51", "V5_1"), ("v52", "V5_2"), ("v60", "V6_0"),
+  ("v61", "V6_1"), ("v62", "V6_2"), ("v63", "V6_3"), ("v70", "V7_0"),
+  ("v80", "V8_0"), ("v90", "V9_0"), ]),
+
+  ("General_Category", &[("c", "Other"), ("casedletter", "Cased_Letter"),
+  ("cc", "Control"), ("cf", "Format"),
+  ("closepunctuation", "Close_Punctuation"), ("cn", "Unassigned"),
+  ("cntrl", "Control"), ("co", "Private_Use"), ("combiningmark", "Mark"),
+  ("connectorpunctuation", "Connector_Punctuation"), ("control", "Control"),
+  ("cs", "Surrogate"), ("currencysymbol", "Currency_Symbol"),
+  ("dashpunctuation", "Dash_Punctuation"),
+  ("decimalnumber", "Decimal_Number"), ("digit", "Decimal_Number"),
+  ("enclosingmark", "Enclosing_Mark"),
+  ("finalpunctuation", "Final_Punctuation"), ("format", "Format"),
+  ("initialpunctuation", "Initial_Punctuation"), ("l", "Letter"),
+  ("lc", "Cased_Letter"), ("letter", "Letter"),
+  ("letternumber", "Letter_Number"), ("lineseparator", "Line_Separator"),
+  ("ll", "Lowercase_Letter"), ("lm", "Modifier_Letter"),
+  ("lo", "Other_Letter"), ("lowercaseletter", "Lowercase_Letter"),
+  ("lt", "Titlecase_Letter"), ("lu", "Uppercase_Letter"), ("m", "Mark"),
+  ("mark", "Mark"), ("mathsymbol", "Math_Symbol"), ("mc", "Spacing_Mark"),
+  ("me", "Enclosing_Mark"), ("mn", "Nonspacing_Mark"),
+  ("modifierletter", "Modifier_Letter"),
+  ("modifiersymbol", "Modifier_Symbol"), ("n", "Number"),
+  ("nd", "Decimal_Number"), ("nl", "Letter_Number"), ("no", "Other_Number"),
+  ("nonspacingmark", "Nonspacing_Mark"), ("number", "Number"),
+  ("openpunctuation", "Open_Punctuation"), ("other", "Other"),
+  ("otherletter", "Other_Letter"), ("othernumber", "Other_Number"),
+  ("otherpunctuation", "Other_Punctuation"), ("othersymbol", "Other_Symbol"),
+  ("p", "Punctuation"), ("paragraphseparator", "Paragraph_Separator"),
+  ("pc", "Connector_Punctuation"), ("pd", "Dash_Punctuation"),
+  ("pe", "Close_Punctuation"), ("pf", "Final_Punctuation"),
+  ("pi", "Initial_Punctuation"), ("po", "Other_Punctuation"),
+  ("privateuse", "Private_Use"), ("ps", "Open_Punctuation"),
+  ("punct", "Punctuation"), ("punctuation", "Punctuation"), ("s", "Symbol"),
+  ("sc", "Currency_Symbol"), ("separator", "Separator"),
+  ("sk", "Modifier_Symbol"), ("sm", "Math_Symbol"), ("so", "Other_Symbol"),
+  ("spaceseparator", "Space_Separator"), ("spacingmark", "Spacing_Mark"),
+  ("surrogate", "Surrogate"), ("symbol", "Symbol"),
+  ("titlecaseletter", "Titlecase_Letter"), ("unassigned", "Unassigned"),
+  ("uppercaseletter", "Uppercase_Letter"), ("z", "Separator"),
+  ("zl", "Line_Separator"), ("zp", "Paragraph_Separator"),
+  ("zs", "Space_Separator"), ]),
+
+  ("Script", &[("adlam", "Adlam"), ("adlm", "Adlam"),
+  ("aghb", "Caucasian_Albanian"), ("ahom", "Ahom"),
+  ("anatolianhieroglyphs", "Anatolian_Hieroglyphs"), ("arab", "Arabic"),
+  ("arabic", "Arabic"), ("armenian", "Armenian"),
+  ("armi", "Imperial_Aramaic"), ("armn", "Armenian"), ("avestan", "Avestan"),
+  ("avst", "Avestan"), ("bali", "Balinese"), ("balinese", "Balinese"),
+  ("bamu", "Bamum"), ("bamum", "Bamum"), ("bass", "Bassa_Vah"),
+  ("bassavah", "Bassa_Vah"), ("batak", "Batak"), ("batk", "Batak"),
+  ("beng", "Bengali"), ("bengali", "Bengali"), ("bhaiksuki", "Bhaiksuki"),
+  ("bhks", "Bhaiksuki"), ("bopo", "Bopomofo"), ("bopomofo", "Bopomofo"),
+  ("brah", "Brahmi"), ("brahmi", "Brahmi"), ("brai", "Braille"),
+  ("braille", "Braille"), ("bugi", "Buginese"), ("buginese", "Buginese"),
+  ("buhd", "Buhid"), ("buhid", "Buhid"), ("cakm", "Chakma"),
+  ("canadianaboriginal", "Canadian_Aboriginal"),
+  ("cans", "Canadian_Aboriginal"), ("cari", "Carian"), ("carian", "Carian"),
+  ("caucasianalbanian", "Caucasian_Albanian"), ("chakma", "Chakma"),
+  ("cham", "Cham"), ("cher", "Cherokee"), ("cherokee", "Cherokee"),
+  ("common", "Common"), ("copt", "Coptic"), ("coptic", "Coptic"),
+  ("cprt", "Cypriot"), ("cuneiform", "Cuneiform"), ("cypriot", "Cypriot"),
+  ("cyrillic", "Cyrillic"), ("cyrl", "Cyrillic"), ("deseret", "Deseret"),
+  ("deva", "Devanagari"), ("devanagari", "Devanagari"), ("dsrt", "Deseret"),
+  ("dupl", "Duployan"), ("duployan", "Duployan"),
+  ("egyp", "Egyptian_Hieroglyphs"),
+  ("egyptianhieroglyphs", "Egyptian_Hieroglyphs"), ("elba", "Elbasan"),
+  ("elbasan", "Elbasan"), ("ethi", "Ethiopic"), ("ethiopic", "Ethiopic"),
+  ("geor", "Georgian"), ("georgian", "Georgian"), ("glag", "Glagolitic"),
+  ("glagolitic", "Glagolitic"), ("gonm", "Masaram_Gondi"), ("goth", "Gothic"),
+  ("gothic", "Gothic"), ("gran", "Grantha"), ("grantha", "Grantha"),
+  ("greek", "Greek"), ("grek", "Greek"), ("gujarati", "Gujarati"),
+  ("gujr", "Gujarati"), ("gurmukhi", "Gurmukhi"), ("guru", "Gurmukhi"),
+  ("han", "Han"), ("hang", "Hangul"), ("hangul", "Hangul"), ("hani", "Han"),
+  ("hano", "Hanunoo"), ("hanunoo", "Hanunoo"), ("hatr", "Hatran"),
+  ("hatran", "Hatran"), ("hebr", "Hebrew"), ("hebrew", "Hebrew"),
+  ("hira", "Hiragana"), ("hiragana", "Hiragana"),
+  ("hluw", "Anatolian_Hieroglyphs"), ("hmng", "Pahawh_Hmong"),
+  ("hrkt", "Katakana_Or_Hiragana"), ("hung", "Old_Hungarian"),
+  ("imperialaramaic", "Imperial_Aramaic"), ("inherited", "Inherited"),
+  ("inscriptionalpahlavi", "Inscriptional_Pahlavi"),
+  ("inscriptionalparthian", "Inscriptional_Parthian"), ("ital", "Old_Italic"),
+  ("java", "Javanese"), ("javanese", "Javanese"), ("kaithi", "Kaithi"),
+  ("kali", "Kayah_Li"), ("kana", "Katakana"), ("kannada", "Kannada"),
+  ("katakana", "Katakana"), ("katakanaorhiragana", "Katakana_Or_Hiragana"),
+  ("kayahli", "Kayah_Li"), ("khar", "Kharoshthi"),
+  ("kharoshthi", "Kharoshthi"), ("khmer", "Khmer"), ("khmr", "Khmer"),
+  ("khoj", "Khojki"), ("khojki", "Khojki"), ("khudawadi", "Khudawadi"),
+  ("knda", "Kannada"), ("kthi", "Kaithi"), ("lana", "Tai_Tham"),
+  ("lao", "Lao"), ("laoo", "Lao"), ("latin", "Latin"), ("latn", "Latin"),
+  ("lepc", "Lepcha"), ("lepcha", "Lepcha"), ("limb", "Limbu"),
+  ("limbu", "Limbu"), ("lina", "Linear_A"), ("linb", "Linear_B"),
+  ("lineara", "Linear_A"), ("linearb", "Linear_B"), ("lisu", "Lisu"),
+  ("lyci", "Lycian"), ("lycian", "Lycian"), ("lydi", "Lydian"),
+  ("lydian", "Lydian"), ("mahajani", "Mahajani"), ("mahj", "Mahajani"),
+  ("malayalam", "Malayalam"), ("mand", "Mandaic"), ("mandaic", "Mandaic"),
+  ("mani", "Manichaean"), ("manichaean", "Manichaean"), ("marc", "Marchen"),
+  ("marchen", "Marchen"), ("masaramgondi", "Masaram_Gondi"),
+  ("meeteimayek", "Meetei_Mayek"), ("mend", "Mende_Kikakui"),
+  ("mendekikakui", "Mende_Kikakui"), ("merc", "Meroitic_Cursive"),
+  ("mero", "Meroitic_Hieroglyphs"), ("meroiticcursive", "Meroitic_Cursive"),
+  ("meroitichieroglyphs", "Meroitic_Hieroglyphs"), ("miao", "Miao"),
+  ("mlym", "Malayalam"), ("modi", "Modi"), ("mong", "Mongolian"),
+  ("mongolian", "Mongolian"), ("mro", "Mro"), ("mroo", "Mro"),
+  ("mtei", "Meetei_Mayek"), ("mult", "Multani"), ("multani", "Multani"),
+  ("myanmar", "Myanmar"), ("mymr", "Myanmar"), ("nabataean", "Nabataean"),
+  ("narb", "Old_North_Arabian"), ("nbat", "Nabataean"), ("newa", "Newa"),
+  ("newtailue", "New_Tai_Lue"), ("nko", "Nko"), ("nkoo", "Nko"),
+  ("nshu", "Nushu"), ("nushu", "Nushu"), ("ogam", "Ogham"),
+  ("ogham", "Ogham"), ("olchiki", "Ol_Chiki"), ("olck", "Ol_Chiki"),
+  ("oldhungarian", "Old_Hungarian"), ("olditalic", "Old_Italic"),
+  ("oldnortharabian", "Old_North_Arabian"), ("oldpermic", "Old_Permic"),
+  ("oldpersian", "Old_Persian"), ("oldsoutharabian", "Old_South_Arabian"),
+  ("oldturkic", "Old_Turkic"), ("oriya", "Oriya"), ("orkh", "Old_Turkic"),
+  ("orya", "Oriya"), ("osage", "Osage"), ("osge", "Osage"),
+  ("osma", "Osmanya"), ("osmanya", "Osmanya"),
+  ("pahawhhmong", "Pahawh_Hmong"), ("palm", "Palmyrene"),
+  ("palmyrene", "Palmyrene"), ("pauc", "Pau_Cin_Hau"),
+  ("paucinhau", "Pau_Cin_Hau"), ("perm", "Old_Permic"), ("phag", "Phags_Pa"),
+  ("phagspa", "Phags_Pa"), ("phli", "Inscriptional_Pahlavi"),
+  ("phlp", "Psalter_Pahlavi"), ("phnx", "Phoenician"),
+  ("phoenician", "Phoenician"), ("plrd", "Miao"),
+  ("prti", "Inscriptional_Parthian"), ("psalterpahlavi", "Psalter_Pahlavi"),
+  ("qaac", "Coptic"), ("qaai", "Inherited"), ("rejang", "Rejang"),
+  ("rjng", "Rejang"), ("runic", "Runic"), ("runr", "Runic"),
+  ("samaritan", "Samaritan"), ("samr", "Samaritan"),
+  ("sarb", "Old_South_Arabian"), ("saur", "Saurashtra"),
+  ("saurashtra", "Saurashtra"), ("sgnw", "SignWriting"),
+  ("sharada", "Sharada"), ("shavian", "Shavian"), ("shaw", "Shavian"),
+  ("shrd", "Sharada"), ("sidd", "Siddham"), ("siddham", "Siddham"),
+  ("signwriting", "SignWriting"), ("sind", "Khudawadi"), ("sinh", "Sinhala"),
+  ("sinhala", "Sinhala"), ("sora", "Sora_Sompeng"),
+  ("sorasompeng", "Sora_Sompeng"), ("soyo", "Soyombo"),
+  ("soyombo", "Soyombo"), ("sund", "Sundanese"), ("sundanese", "Sundanese"),
+  ("sylo", "Syloti_Nagri"), ("sylotinagri", "Syloti_Nagri"),
+  ("syrc", "Syriac"), ("syriac", "Syriac"), ("tagalog", "Tagalog"),
+  ("tagb", "Tagbanwa"), ("tagbanwa", "Tagbanwa"), ("taile", "Tai_Le"),
+  ("taitham", "Tai_Tham"), ("taiviet", "Tai_Viet"), ("takr", "Takri"),
+  ("takri", "Takri"), ("tale", "Tai_Le"), ("talu", "New_Tai_Lue"),
+  ("tamil", "Tamil"), ("taml", "Tamil"), ("tang", "Tangut"),
+  ("tangut", "Tangut"), ("tavt", "Tai_Viet"), ("telu", "Telugu"),
+  ("telugu", "Telugu"), ("tfng", "Tifinagh"), ("tglg", "Tagalog"),
+  ("thaa", "Thaana"), ("thaana", "Thaana"), ("thai", "Thai"),
+  ("tibetan", "Tibetan"), ("tibt", "Tibetan"), ("tifinagh", "Tifinagh"),
+  ("tirh", "Tirhuta"), ("tirhuta", "Tirhuta"), ("ugar", "Ugaritic"),
+  ("ugaritic", "Ugaritic"), ("unknown", "Unknown"), ("vai", "Vai"),
+  ("vaii", "Vai"), ("wara", "Warang_Citi"), ("warangciti", "Warang_Citi"),
+  ("xpeo", "Old_Persian"), ("xsux", "Cuneiform"), ("yi", "Yi"),
+  ("yiii", "Yi"), ("zanabazarsquare", "Zanabazar_Square"),
+  ("zanb", "Zanabazar_Square"), ("zinh", "Inherited"), ("zyyy", "Common"),
+  ("zzzz", "Unknown"), ]),
+
+  ("Script_Extensions", &[("adlam", "Adlam"), ("adlm", "Adlam"),
+  ("aghb", "Caucasian_Albanian"), ("ahom", "Ahom"),
+  ("anatolianhieroglyphs", "Anatolian_Hieroglyphs"), ("arab", "Arabic"),
+  ("arabic", "Arabic"), ("armenian", "Armenian"),
+  ("armi", "Imperial_Aramaic"), ("armn", "Armenian"), ("avestan", "Avestan"),
+  ("avst", "Avestan"), ("bali", "Balinese"), ("balinese", "Balinese"),
+  ("bamu", "Bamum"), ("bamum", "Bamum"), ("bass", "Bassa_Vah"),
+  ("bassavah", "Bassa_Vah"), ("batak", "Batak"), ("batk", "Batak"),
+  ("beng", "Bengali"), ("bengali", "Bengali"), ("bhaiksuki", "Bhaiksuki"),
+  ("bhks", "Bhaiksuki"), ("bopo", "Bopomofo"), ("bopomofo", "Bopomofo"),
+  ("brah", "Brahmi"), ("brahmi", "Brahmi"), ("brai", "Braille"),
+  ("braille", "Braille"), ("bugi", "Buginese"), ("buginese", "Buginese"),
+  ("buhd", "Buhid"), ("buhid", "Buhid"), ("cakm", "Chakma"),
+  ("canadianaboriginal", "Canadian_Aboriginal"),
+  ("cans", "Canadian_Aboriginal"), ("cari", "Carian"), ("carian", "Carian"),
+  ("caucasianalbanian", "Caucasian_Albanian"), ("chakma", "Chakma"),
+  ("cham", "Cham"), ("cher", "Cherokee"), ("cherokee", "Cherokee"),
+  ("common", "Common"), ("copt", "Coptic"), ("coptic", "Coptic"),
+  ("cprt", "Cypriot"), ("cuneiform", "Cuneiform"), ("cypriot", "Cypriot"),
+  ("cyrillic", "Cyrillic"), ("cyrl", "Cyrillic"), ("deseret", "Deseret"),
+  ("deva", "Devanagari"), ("devanagari", "Devanagari"), ("dsrt", "Deseret"),
+  ("dupl", "Duployan"), ("duployan", "Duployan"),
+  ("egyp", "Egyptian_Hieroglyphs"),
+  ("egyptianhieroglyphs", "Egyptian_Hieroglyphs"), ("elba", "Elbasan"),
+  ("elbasan", "Elbasan"), ("ethi", "Ethiopic"), ("ethiopic", "Ethiopic"),
+  ("geor", "Georgian"), ("georgian", "Georgian"), ("glag", "Glagolitic"),
+  ("glagolitic", "Glagolitic"), ("gonm", "Masaram_Gondi"), ("goth", "Gothic"),
+  ("gothic", "Gothic"), ("gran", "Grantha"), ("grantha", "Grantha"),
+  ("greek", "Greek"), ("grek", "Greek"), ("gujarati", "Gujarati"),
+  ("gujr", "Gujarati"), ("gurmukhi", "Gurmukhi"), ("guru", "Gurmukhi"),
+  ("han", "Han"), ("hang", "Hangul"), ("hangul", "Hangul"), ("hani", "Han"),
+  ("hano", "Hanunoo"), ("hanunoo", "Hanunoo"), ("hatr", "Hatran"),
+  ("hatran", "Hatran"), ("hebr", "Hebrew"), ("hebrew", "Hebrew"),
+  ("hira", "Hiragana"), ("hiragana", "Hiragana"),
+  ("hluw", "Anatolian_Hieroglyphs"), ("hmng", "Pahawh_Hmong"),
+  ("hrkt", "Katakana_Or_Hiragana"), ("hung", "Old_Hungarian"),
+  ("imperialaramaic", "Imperial_Aramaic"), ("inherited", "Inherited"),
+  ("inscriptionalpahlavi", "Inscriptional_Pahlavi"),
+  ("inscriptionalparthian", "Inscriptional_Parthian"), ("ital", "Old_Italic"),
+  ("java", "Javanese"), ("javanese", "Javanese"), ("kaithi", "Kaithi"),
+  ("kali", "Kayah_Li"), ("kana", "Katakana"), ("kannada", "Kannada"),
+  ("katakana", "Katakana"), ("katakanaorhiragana", "Katakana_Or_Hiragana"),
+  ("kayahli", "Kayah_Li"), ("khar", "Kharoshthi"),
+  ("kharoshthi", "Kharoshthi"), ("khmer", "Khmer"), ("khmr", "Khmer"),
+  ("khoj", "Khojki"), ("khojki", "Khojki"), ("khudawadi", "Khudawadi"),
+  ("knda", "Kannada"), ("kthi", "Kaithi"), ("lana", "Tai_Tham"),
+  ("lao", "Lao"), ("laoo", "Lao"), ("latin", "Latin"), ("latn", "Latin"),
+  ("lepc", "Lepcha"), ("lepcha", "Lepcha"), ("limb", "Limbu"),
+  ("limbu", "Limbu"), ("lina", "Linear_A"), ("linb", "Linear_B"),
+  ("lineara", "Linear_A"), ("linearb", "Linear_B"), ("lisu", "Lisu"),
+  ("lyci", "Lycian"), ("lycian", "Lycian"), ("lydi", "Lydian"),
+  ("lydian", "Lydian"), ("mahajani", "Mahajani"), ("mahj", "Mahajani"),
+  ("malayalam", "Malayalam"), ("mand", "Mandaic"), ("mandaic", "Mandaic"),
+  ("mani", "Manichaean"), ("manichaean", "Manichaean"), ("marc", "Marchen"),
+  ("marchen", "Marchen"), ("masaramgondi", "Masaram_Gondi"),
+  ("meeteimayek", "Meetei_Mayek"), ("mend", "Mende_Kikakui"),
+  ("mendekikakui", "Mende_Kikakui"), ("merc", "Meroitic_Cursive"),
+  ("mero", "Meroitic_Hieroglyphs"), ("meroiticcursive", "Meroitic_Cursive"),
+  ("meroitichieroglyphs", "Meroitic_Hieroglyphs"), ("miao", "Miao"),
+  ("mlym", "Malayalam"), ("modi", "Modi"), ("mong", "Mongolian"),
+  ("mongolian", "Mongolian"), ("mro", "Mro"), ("mroo", "Mro"),
+  ("mtei", "Meetei_Mayek"), ("mult", "Multani"), ("multani", "Multani"),
+  ("myanmar", "Myanmar"), ("mymr", "Myanmar"), ("nabataean", "Nabataean"),
+  ("narb", "Old_North_Arabian"), ("nbat", "Nabataean"), ("newa", "Newa"),
+  ("newtailue", "New_Tai_Lue"), ("nko", "Nko"), ("nkoo", "Nko"),
+  ("nshu", "Nushu"), ("nushu", "Nushu"), ("ogam", "Ogham"),
+  ("ogham", "Ogham"), ("olchiki", "Ol_Chiki"), ("olck", "Ol_Chiki"),
+  ("oldhungarian", "Old_Hungarian"), ("olditalic", "Old_Italic"),
+  ("oldnortharabian", "Old_North_Arabian"), ("oldpermic", "Old_Permic"),
+  ("oldpersian", "Old_Persian"), ("oldsoutharabian", "Old_South_Arabian"),
+  ("oldturkic", "Old_Turkic"), ("oriya", "Oriya"), ("orkh", "Old_Turkic"),
+  ("orya", "Oriya"), ("osage", "Osage"), ("osge", "Osage"),
+  ("osma", "Osmanya"), ("osmanya", "Osmanya"),
+  ("pahawhhmong", "Pahawh_Hmong"), ("palm", "Palmyrene"),
+  ("palmyrene", "Palmyrene"), ("pauc", "Pau_Cin_Hau"),
+  ("paucinhau", "Pau_Cin_Hau"), ("perm", "Old_Permic"), ("phag", "Phags_Pa"),
+  ("phagspa", "Phags_Pa"), ("phli", "Inscriptional_Pahlavi"),
+  ("phlp", "Psalter_Pahlavi"), ("phnx", "Phoenician"),
+  ("phoenician", "Phoenician"), ("plrd", "Miao"),
+  ("prti", "Inscriptional_Parthian"), ("psalterpahlavi", "Psalter_Pahlavi"),
+  ("qaac", "Coptic"), ("qaai", "Inherited"), ("rejang", "Rejang"),
+  ("rjng", "Rejang"), ("runic", "Runic"), ("runr", "Runic"),
+  ("samaritan", "Samaritan"), ("samr", "Samaritan"),
+  ("sarb", "Old_South_Arabian"), ("saur", "Saurashtra"),
+  ("saurashtra", "Saurashtra"), ("sgnw", "SignWriting"),
+  ("sharada", "Sharada"), ("shavian", "Shavian"), ("shaw", "Shavian"),
+  ("shrd", "Sharada"), ("sidd", "Siddham"), ("siddham", "Siddham"),
+  ("signwriting", "SignWriting"), ("sind", "Khudawadi"), ("sinh", "Sinhala"),
+  ("sinhala", "Sinhala"), ("sora", "Sora_Sompeng"),
+  ("sorasompeng", "Sora_Sompeng"), ("soyo", "Soyombo"),
+  ("soyombo", "Soyombo"), ("sund", "Sundanese"), ("sundanese", "Sundanese"),
+  ("sylo", "Syloti_Nagri"), ("sylotinagri", "Syloti_Nagri"),
+  ("syrc", "Syriac"), ("syriac", "Syriac"), ("tagalog", "Tagalog"),
+  ("tagb", "Tagbanwa"), ("tagbanwa", "Tagbanwa"), ("taile", "Tai_Le"),
+  ("taitham", "Tai_Tham"), ("taiviet", "Tai_Viet"), ("takr", "Takri"),
+  ("takri", "Takri"), ("tale", "Tai_Le"), ("talu", "New_Tai_Lue"),
+  ("tamil", "Tamil"), ("taml", "Tamil"), ("tang", "Tangut"),
+  ("tangut", "Tangut"), ("tavt", "Tai_Viet"), ("telu", "Telugu"),
+  ("telugu", "Telugu"), ("tfng", "Tifinagh"), ("tglg", "Tagalog"),
+  ("thaa", "Thaana"), ("thaana", "Thaana"), ("thai", "Thai"),
+  ("tibetan", "Tibetan"), ("tibt", "Tibetan"), ("tifinagh", "Tifinagh"),
+  ("tirh", "Tirhuta"), ("tirhuta", "Tirhuta"), ("ugar", "Ugaritic"),
+  ("ugaritic", "Ugaritic"), ("unknown", "Unknown"), ("vai", "Vai"),
+  ("vaii", "Vai"), ("wara", "Warang_Citi"), ("warangciti", "Warang_Citi"),
+  ("xpeo", "Old_Persian"), ("xsux", "Cuneiform"), ("yi", "Yi"),
+  ("yiii", "Yi"), ("zanabazarsquare", "Zanabazar_Square"),
+  ("zanb", "Zanabazar_Square"), ("zinh", "Inherited"), ("zyyy", "Common"),
+  ("zzzz", "Unknown"), ]),
+];
diff --git a/regex-syntax/src/unicode_tables/script.rs b/regex-syntax/src/unicode_tables/script.rs
new file mode 100644
index 0000000000..99c5786dea
--- /dev/null
+++ b/regex-syntax/src/unicode_tables/script.rs
@@ -0,0 +1,765 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+//  ucd-generate script tmp/ucd-10.0.0/ --chars
+//
+// ucd-generate is available on crates.io.
+
+pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
+  ("Adlam", ADLAM), ("Ahom", AHOM),
+  ("Anatolian_Hieroglyphs", ANATOLIAN_HIEROGLYPHS), ("Arabic", ARABIC),
+  ("Armenian", ARMENIAN), ("Avestan", AVESTAN), ("Balinese", BALINESE),
+  ("Bamum", BAMUM), ("Bassa_Vah", BASSA_VAH), ("Batak", BATAK),
+  ("Bengali", BENGALI), ("Bhaiksuki", BHAIKSUKI), ("Bopomofo", BOPOMOFO),
+  ("Brahmi", BRAHMI), ("Braille", BRAILLE), ("Buginese", BUGINESE),
+  ("Buhid", BUHID), ("Canadian_Aboriginal", CANADIAN_ABORIGINAL),
+  ("Carian", CARIAN), ("Caucasian_Albanian", CAUCASIAN_ALBANIAN),
+  ("Chakma", CHAKMA), ("Cham", CHAM), ("Cherokee", CHEROKEE),
+  ("Common", COMMON), ("Coptic", COPTIC), ("Cuneiform", CUNEIFORM),
+  ("Cypriot", CYPRIOT), ("Cyrillic", CYRILLIC), ("Deseret", DESERET),
+  ("Devanagari", DEVANAGARI), ("Duployan", DUPLOYAN),
+  ("Egyptian_Hieroglyphs", EGYPTIAN_HIEROGLYPHS), ("Elbasan", ELBASAN),
+  ("Ethiopic", ETHIOPIC), ("Georgian", GEORGIAN), ("Glagolitic", GLAGOLITIC),
+  ("Gothic", GOTHIC), ("Grantha", GRANTHA), ("Greek", GREEK),
+  ("Gujarati", GUJARATI), ("Gurmukhi", GURMUKHI), ("Han", HAN),
+  ("Hangul", HANGUL), ("Hanunoo", HANUNOO), ("Hatran", HATRAN),
+  ("Hebrew", HEBREW), ("Hiragana", HIRAGANA),
+  ("Imperial_Aramaic", IMPERIAL_ARAMAIC), ("Inherited", INHERITED),
+  ("Inscriptional_Pahlavi", INSCRIPTIONAL_PAHLAVI),
+  ("Inscriptional_Parthian", INSCRIPTIONAL_PARTHIAN), ("Javanese", JAVANESE),
+  ("Kaithi", KAITHI), ("Kannada", KANNADA), ("Katakana", KATAKANA),
+  ("Kayah_Li", KAYAH_LI), ("Kharoshthi", KHAROSHTHI), ("Khmer", KHMER),
+  ("Khojki", KHOJKI), ("Khudawadi", KHUDAWADI), ("Lao", LAO),
+  ("Latin", LATIN), ("Lepcha", LEPCHA), ("Limbu", LIMBU),
+  ("Linear_A", LINEAR_A), ("Linear_B", LINEAR_B), ("Lisu", LISU),
+  ("Lycian", LYCIAN), ("Lydian", LYDIAN), ("Mahajani", MAHAJANI),
+  ("Malayalam", MALAYALAM), ("Mandaic", MANDAIC), ("Manichaean", MANICHAEAN),
+  ("Marchen", MARCHEN), ("Masaram_Gondi", MASARAM_GONDI),
+  ("Meetei_Mayek", MEETEI_MAYEK), ("Mende_Kikakui", MENDE_KIKAKUI),
+  ("Meroitic_Cursive", MEROITIC_CURSIVE),
+  ("Meroitic_Hieroglyphs", MEROITIC_HIEROGLYPHS), ("Miao", MIAO),
+  ("Modi", MODI), ("Mongolian", MONGOLIAN), ("Mro", MRO),
+  ("Multani", MULTANI), ("Myanmar", MYANMAR), ("Nabataean", NABATAEAN),
+  ("New_Tai_Lue", NEW_TAI_LUE), ("Newa", NEWA), ("Nko", NKO),
+  ("Nushu", NUSHU), ("Ogham", OGHAM), ("Ol_Chiki", OL_CHIKI),
+  ("Old_Hungarian", OLD_HUNGARIAN), ("Old_Italic", OLD_ITALIC),
+  ("Old_North_Arabian", OLD_NORTH_ARABIAN), ("Old_Permic", OLD_PERMIC),
+  ("Old_Persian", OLD_PERSIAN), ("Old_South_Arabian", OLD_SOUTH_ARABIAN),
+  ("Old_Turkic", OLD_TURKIC), ("Oriya", ORIYA), ("Osage", OSAGE),
+  ("Osmanya", OSMANYA), ("Pahawh_Hmong", PAHAWH_HMONG),
+  ("Palmyrene", PALMYRENE), ("Pau_Cin_Hau", PAU_CIN_HAU),
+  ("Phags_Pa", PHAGS_PA), ("Phoenician", PHOENICIAN),
+  ("Psalter_Pahlavi", PSALTER_PAHLAVI), ("Rejang", REJANG), ("Runic", RUNIC),
+  ("Samaritan", SAMARITAN), ("Saurashtra", SAURASHTRA), ("Sharada", SHARADA),
+  ("Shavian", SHAVIAN), ("Siddham", SIDDHAM), ("SignWriting", SIGNWRITING),
+  ("Sinhala", SINHALA), ("Sora_Sompeng", SORA_SOMPENG), ("Soyombo", SOYOMBO),
+  ("Sundanese", SUNDANESE), ("Syloti_Nagri", SYLOTI_NAGRI),
+  ("Syriac", SYRIAC), ("Tagalog", TAGALOG), ("Tagbanwa", TAGBANWA),
+  ("Tai_Le", TAI_LE), ("Tai_Tham", TAI_THAM), ("Tai_Viet", TAI_VIET),
+  ("Takri", TAKRI), ("Tamil", TAMIL), ("Tangut", TANGUT), ("Telugu", TELUGU),
+  ("Thaana", THAANA), ("Thai", THAI), ("Tibetan", TIBETAN),
+  ("Tifinagh", TIFINAGH), ("Tirhuta", TIRHUTA), ("Ugaritic", UGARITIC),
+  ("Vai", VAI), ("Warang_Citi", WARANG_CITI), ("Yi", YI),
+  ("Zanabazar_Square", ZANABAZAR_SQUARE),
+];
+
+pub const ADLAM: &'static [(char, char)] = &[
+  ('𞤀', '𞥊'), ('𞥐', '𞥙'), ('𞥞', '𞥟'),
+];
+
+pub const AHOM: &'static [(char, char)] = &[
+  ('𑜀', '𑜙'), ('𑜝', '𑜫'), ('𑜰', '𑜿'),
+];
+
+pub const ANATOLIAN_HIEROGLYPHS: &'static [(char, char)] = &[
+  ('𔐀', '𔙆'),
+];
+
+pub const ARABIC: &'static [(char, char)] = &[
+  ('\u{600}', '\u{604}'), ('؆', '؋'), ('؍', 'ؚ'), ('\u{61c}', '\u{61c}'),
+  ('؞', '؞'), ('ؠ', 'ؿ'), ('ف', 'ي'), ('ٖ', 'ٯ'), ('ٱ', 'ۜ'),
+  ('۞', 'ۿ'), ('ݐ', 'ݿ'), ('ࢠ', 'ࢴ'), ('ࢶ', 'ࢽ'), ('ࣔ', '࣡'),
+  ('ࣣ', 'ࣿ'), ('ﭐ', '﯁'), ('ﯓ', 'ﴽ'), ('ﵐ', 'ﶏ'),
+  ('ﶒ', 'ﷇ'), ('ﷰ', '﷽'), ('ﹰ', 'ﹴ'), ('ﹶ', 'ﻼ'),
+  ('𐹠', '𐹾'), ('𞸀', '𞸃'), ('𞸅', '𞸟'), ('𞸡', '𞸢'),
+  ('𞸤', '𞸤'), ('𞸧', '𞸧'), ('𞸩', '𞸲'), ('𞸴', '𞸷'),
+  ('𞸹', '𞸹'), ('𞸻', '𞸻'), ('𞹂', '𞹂'), ('𞹇', '𞹇'),
+  ('𞹉', '𞹉'), ('𞹋', '𞹋'), ('𞹍', '𞹏'), ('𞹑', '𞹒'),
+  ('𞹔', '𞹔'), ('𞹗', '𞹗'), ('𞹙', '𞹙'), ('𞹛', '𞹛'),
+  ('𞹝', '𞹝'), ('𞹟', '𞹟'), ('𞹡', '𞹢'), ('𞹤', '𞹤'),
+  ('𞹧', '𞹪'), ('𞹬', '𞹲'), ('𞹴', '𞹷'), ('𞹹', '𞹼'),
+  ('𞹾', '𞹾'), ('𞺀', '𞺉'), ('𞺋', '𞺛'), ('𞺡', '𞺣'),
+  ('𞺥', '𞺩'), ('𞺫', '𞺻'), ('𞻰', '𞻱'),
+];
+
+pub const ARMENIAN: &'static [(char, char)] = &[
+  ('Ա', 'Ֆ'), ('ՙ', '՟'), ('ա', 'և'), ('֊', '֊'), ('֍', '֏'),
+  ('ﬓ', 'ﬗ'),
+];
+
+pub const AVESTAN: &'static [(char, char)] = &[
+  ('𐬀', '𐬵'), ('𐬹', '𐬿'),
+];
+
+pub const BALINESE: &'static [(char, char)] = &[
+  ('ᬀ', 'ᭋ'), ('᭐', '᭼'),
+];
+
+pub const BAMUM: &'static [(char, char)] = &[
+  ('ꚠ', '꛷'), ('𖠀', '𖨸'),
+];
+
+pub const BASSA_VAH: &'static [(char, char)] = &[
+  ('𖫐', '𖫭'), ('𖫰', '𖫵'),
+];
+
+pub const BATAK: &'static [(char, char)] = &[
+  ('ᯀ', '᯳'), ('᯼', '᯿'),
+];
+
+pub const BENGALI: &'static [(char, char)] = &[
+  ('ঀ', 'ঃ'), ('অ', 'ঌ'), ('এ', 'ঐ'), ('ও', 'ন'),
+  ('প', 'র'), ('ল', 'ল'), ('শ', 'হ'), ('়', 'ৄ'),
+  ('ে', 'ৈ'), ('ো', 'ৎ'), ('ৗ', 'ৗ'), ('ড়', 'ঢ়'),
+  ('য়', 'ৣ'), ('০', '৽'),
+];
+
+pub const BHAIKSUKI: &'static [(char, char)] = &[
+  ('𑰀', '𑰈'), ('𑰊', '𑰶'), ('𑰸', '𑱅'), ('𑱐', '𑱬'),
+];
+
+pub const BOPOMOFO: &'static [(char, char)] = &[
+  ('˪', '˫'), ('ㄅ', 'ㄮ'), ('ㆠ', 'ㆺ'),
+];
+
+pub const BRAHMI: &'static [(char, char)] = &[
+  ('𑀀', '𑁍'), ('𑁒', '𑁯'), ('𑁿', '𑁿'),
+];
+
+pub const BRAILLE: &'static [(char, char)] = &[
+  ('⠀', '⣿'),
+];
+
+pub const BUGINESE: &'static [(char, char)] = &[
+  ('ᨀ', 'ᨛ'), ('᨞', '᨟'),
+];
+
+pub const BUHID: &'static [(char, char)] = &[
+  ('ᝀ', 'ᝓ'),
+];
+
+pub const CANADIAN_ABORIGINAL: &'static [(char, char)] = &[
+  ('᐀', 'ᙿ'), ('ᢰ', 'ᣵ'),
+];
+
+pub const CARIAN: &'static [(char, char)] = &[
+  ('𐊠', '𐋐'),
+];
+
+pub const CAUCASIAN_ALBANIAN: &'static [(char, char)] = &[
+  ('𐔰', '𐕣'), ('𐕯', '𐕯'),
+];
+
+pub const CHAKMA: &'static [(char, char)] = &[
+  ('𑄀', '𑄴'), ('𑄶', '𑅃'),
+];
+
+pub const CHAM: &'static [(char, char)] = &[
+  ('ꨀ', 'ꨶ'), ('ꩀ', 'ꩍ'), ('꩐', '꩙'), ('꩜', '꩟'),
+];
+
+pub const CHEROKEE: &'static [(char, char)] = &[
+  ('Ꭰ', 'Ᏽ'), ('ᏸ', 'ᏽ'), ('ꭰ', 'ꮿ'),
+];
+
+pub const COMMON: &'static [(char, char)] = &[
+  ('\u{0}', '@'), ('[', '`'), ('{', '©'), ('«', '¹'), ('»', '¿'),
+  ('×', '×'), ('÷', '÷'), ('ʹ', '˟'), ('˥', '˩'), ('ˬ', '˿'),
+  ('ʹ', 'ʹ'), (';', ';'), ('΅', '΅'), ('·', '·'), ('։', '։'),
+  ('\u{605}', '\u{605}'), ('،', '،'), ('؛', '؛'), ('؟', '؟'),
+  ('ـ', 'ـ'), ('\u{6dd}', '\u{6dd}'), ('\u{8e2}', '\u{8e2}'),
+  ('।', '॥'), ('฿', '฿'), ('࿕', '࿘'), ('჻', '჻'),
+  ('᛫', '᛭'), ('᜵', '᜶'), ('᠂', '᠃'), ('᠅', '᠅'),
+  ('᳓', '᳓'), ('᳡', '᳡'), ('ᳩ', 'ᳬ'), ('ᳮ', 'ᳳ'),
+  ('ᳵ', '᳷'), ('\u{2000}', '\u{200b}'), ('\u{200e}', '\u{2064}'),
+  ('\u{2066}', '⁰'), ('⁴', '⁾'), ('₀', '₎'), ('₠', '₿'),
+  ('℀', '℥'), ('℧', '℩'), ('ℬ', 'ℱ'), ('ℳ', '⅍'),
+  ('⅏', '⅟'), ('↉', '↋'), ('←', '␦'), ('⑀', '⑊'),
+  ('①', '⟿'), ('⤀', '⭳'), ('⭶', '⮕'), ('⮘', '⮹'),
+  ('⮽', '⯈'), ('⯊', '⯒'), ('⯬', '⯯'), ('⸀', '⹉'),
+  ('⿰', '⿻'), ('\u{3000}', '〄'), ('〆', '〆'), ('〈', '〠'),
+  ('〰', '〷'), ('〼', '〿'), ('゛', '゜'), ('゠', '゠'),
+  ('・', 'ー'), ('㆐', '㆟'), ('㇀', '㇣'), ('㈠', '㉟'),
+  ('㉿', '㋏'), ('㍘', '㏿'), ('䷀', '䷿'), ('꜀', '꜡'),
+  ('ꞈ', '꞊'), ('꠰', '꠹'), ('꤮', '꤮'), ('ꧏ', 'ꧏ'),
+  ('꭛', '꭛'), ('﴾', '﴿'), ('︐', '︙'), ('︰', '﹒'),
+  ('﹔', '﹦'), ('﹨', '﹫'), ('\u{feff}', '\u{feff}'), ('！', '＠'),
+  ('［', '｀'), ('｛', '･'), ('ｰ', 'ｰ'), ('ﾞ', 'ﾟ'),
+  ('￠', '￦'), ('￨', '￮'), ('\u{fff9}', '�'), ('𐄀', '𐄂'),
+  ('𐄇', '𐄳'), ('𐄷', '𐄿'), ('𐆐', '𐆛'), ('𐇐', '𐇼'),
+  ('𐋡', '𐋻'), ('\u{1bca0}', '\u{1bca3}'), ('𝀀', '𝃵'),
+  ('𝄀', '𝄦'), ('𝄩', '𝅦'), ('𝅪', '\u{1d17a}'), ('𝆃', '𝆄'),
+  ('𝆌', '𝆩'), ('𝆮', '𝇨'), ('𝌀', '𝍖'), ('𝍠', '𝍱'),
+  ('𝐀', '𝑔'), ('𝑖', '𝒜'), ('𝒞', '𝒟'), ('𝒢', '𝒢'),
+  ('𝒥', '𝒦'), ('𝒩', '𝒬'), ('𝒮', '𝒹'), ('𝒻', '𝒻'),
+  ('𝒽', '𝓃'), ('𝓅', '𝔅'), ('𝔇', '𝔊'), ('𝔍', '𝔔'),
+  ('𝔖', '𝔜'), ('𝔞', '𝔹'), ('𝔻', '𝔾'), ('𝕀', '𝕄'),
+  ('𝕆', '𝕆'), ('𝕊', '𝕐'), ('𝕒', '𝚥'), ('𝚨', '𝟋'),
+  ('𝟎', '𝟿'), ('🀀', '🀫'), ('🀰', '🂓'), ('🂠', '🂮'),
+  ('🂱', '🂿'), ('🃁', '🃏'), ('🃑', '🃵'), ('🄀', '🄌'),
+  ('🄐', '🄮'), ('🄰', '🅫'), ('🅰', '🆬'), ('🇦', '🇿'),
+  ('🈁', '🈂'), ('🈐', '🈻'), ('🉀', '🉈'), ('🉐', '🉑'),
+  ('🉠', '🉥'), ('🌀', '🛔'), ('🛠', '🛬'), ('🛰', '🛸'),
+  ('🜀', '🝳'), ('🞀', '🟔'), ('🠀', '🠋'), ('🠐', '🡇'),
+  ('🡐', '🡙'), ('🡠', '🢇'), ('🢐', '🢭'), ('🤀', '🤋'),
+  ('🤐', '🤾'), ('🥀', '🥌'), ('🥐', '🥫'), ('🦀', '🦗'),
+  ('🧀', '🧀'), ('🧐', '🧦'), ('\u{e0001}', '\u{e0001}'),
+  ('\u{e0020}', '\u{e007f}'),
+];
+
+pub const COPTIC: &'static [(char, char)] = &[
+  ('Ϣ', 'ϯ'), ('Ⲁ', 'ⳳ'), ('⳹', '⳿'),
+];
+
+pub const CUNEIFORM: &'static [(char, char)] = &[
+  ('𒀀', '𒎙'), ('𒐀', '𒑮'), ('𒑰', '𒑴'), ('𒒀', '𒕃'),
+];
+
+pub const CYPRIOT: &'static [(char, char)] = &[
+  ('𐠀', '𐠅'), ('𐠈', '𐠈'), ('𐠊', '𐠵'), ('𐠷', '𐠸'),
+  ('𐠼', '𐠼'), ('𐠿', '𐠿'),
+];
+
+pub const CYRILLIC: &'static [(char, char)] = &[
+  ('Ѐ', '҄'), ('҇', 'ԯ'), ('ᲀ', 'ᲈ'), ('ᴫ', 'ᴫ'), ('ᵸ', 'ᵸ'),
+  ('ⷠ', 'ⷿ'), ('Ꙁ', 'ꚟ'), ('︮', '︯'),
+];
+
+pub const DESERET: &'static [(char, char)] = &[
+  ('𐐀', '𐑏'),
+];
+
+pub const DEVANAGARI: &'static [(char, char)] = &[
+  ('ऀ', 'ॐ'), ('॓', 'ॣ'), ('०', 'ॿ'), ('꣠', 'ꣽ'),
+];
+
+pub const DUPLOYAN: &'static [(char, char)] = &[
+  ('𛰀', '𛱪'), ('𛱰', '𛱼'), ('𛲀', '𛲈'), ('𛲐', '𛲙'),
+  ('𛲜', '𛲟'),
+];
+
+pub const EGYPTIAN_HIEROGLYPHS: &'static [(char, char)] = &[
+  ('𓀀', '𓐮'),
+];
+
+pub const ELBASAN: &'static [(char, char)] = &[
+  ('𐔀', '𐔧'),
+];
+
+pub const ETHIOPIC: &'static [(char, char)] = &[
+  ('ሀ', 'ቈ'), ('ቊ', 'ቍ'), ('ቐ', 'ቖ'), ('ቘ', 'ቘ'),
+  ('ቚ', 'ቝ'), ('በ', 'ኈ'), ('ኊ', 'ኍ'), ('ነ', 'ኰ'),
+  ('ኲ', 'ኵ'), ('ኸ', 'ኾ'), ('ዀ', 'ዀ'), ('ዂ', 'ዅ'),
+  ('ወ', 'ዖ'), ('ዘ', 'ጐ'), ('ጒ', 'ጕ'), ('ጘ', 'ፚ'),
+  ('፝', '፼'), ('ᎀ', '᎙'), ('ⶀ', 'ⶖ'), ('ⶠ', 'ⶦ'),
+  ('ⶨ', 'ⶮ'), ('ⶰ', 'ⶶ'), ('ⶸ', 'ⶾ'), ('ⷀ', 'ⷆ'),
+  ('ⷈ', 'ⷎ'), ('ⷐ', 'ⷖ'), ('ⷘ', 'ⷞ'), ('ꬁ', 'ꬆ'),
+  ('ꬉ', 'ꬎ'), ('ꬑ', 'ꬖ'), ('ꬠ', 'ꬦ'), ('ꬨ', 'ꬮ'),
+];
+
+pub const GEORGIAN: &'static [(char, char)] = &[
+  ('Ⴀ', 'Ⴥ'), ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'), ('ა', 'ჺ'),
+  ('ჼ', 'ჿ'), ('ⴀ', 'ⴥ'), ('ⴧ', 'ⴧ'), ('ⴭ', 'ⴭ'),
+];
+
+pub const GLAGOLITIC: &'static [(char, char)] = &[
+  ('Ⰰ', 'Ⱞ'), ('ⰰ', 'ⱞ'), ('𞀀', '𞀆'), ('𞀈', '𞀘'),
+  ('𞀛', '𞀡'), ('𞀣', '𞀤'), ('𞀦', '𞀪'),
+];
+
+pub const GOTHIC: &'static [(char, char)] = &[
+  ('𐌰', '𐍊'),
+];
+
+pub const GRANTHA: &'static [(char, char)] = &[
+  ('𑌀', '𑌃'), ('𑌅', '𑌌'), ('𑌏', '𑌐'), ('𑌓', '𑌨'),
+  ('𑌪', '𑌰'), ('𑌲', '𑌳'), ('𑌵', '𑌹'), ('𑌼', '𑍄'),
+  ('𑍇', '𑍈'), ('𑍋', '𑍍'), ('𑍐', '𑍐'), ('𑍗', '𑍗'),
+  ('𑍝', '𑍣'), ('𑍦', '𑍬'), ('𑍰', '𑍴'),
+];
+
+pub const GREEK: &'static [(char, char)] = &[
+  ('Ͱ', 'ͳ'), ('͵', 'ͷ'), ('ͺ', 'ͽ'), ('Ϳ', 'Ϳ'), ('΄', '΄'),
+  ('Ά', 'Ά'), ('Έ', 'Ί'), ('Ό', 'Ό'), ('Ύ', 'Ρ'), ('Σ', 'ϡ'),
+  ('ϰ', 'Ͽ'), ('ᴦ', 'ᴪ'), ('ᵝ', 'ᵡ'), ('ᵦ', 'ᵪ'),
+  ('ᶿ', 'ᶿ'), ('ἀ', 'ἕ'), ('Ἐ', 'Ἕ'), ('ἠ', 'ὅ'),
+  ('Ὀ', 'Ὅ'), ('ὐ', 'ὗ'), ('Ὑ', 'Ὑ'), ('Ὓ', 'Ὓ'),
+  ('Ὕ', 'Ὕ'), ('Ὗ', 'ώ'), ('ᾀ', 'ᾴ'), ('ᾶ', 'ῄ'),
+  ('ῆ', 'ΐ'), ('ῖ', 'Ί'), ('῝', '`'), ('ῲ', 'ῴ'),
+  ('ῶ', '῾'), ('Ω', 'Ω'), ('ꭥ', 'ꭥ'), ('𐅀', '𐆎'),
+  ('𐆠', '𐆠'), ('𝈀', '𝉅'),
+];
+
+pub const GUJARATI: &'static [(char, char)] = &[
+  ('ઁ', 'ઃ'), ('અ', 'ઍ'), ('એ', 'ઑ'), ('ઓ', 'ન'),
+  ('પ', 'ર'), ('લ', 'ળ'), ('વ', 'હ'), ('઼', 'ૅ'),
+  ('ે', 'ૉ'), ('ો', '્'), ('ૐ', 'ૐ'), ('ૠ', 'ૣ'),
+  ('૦', '૱'), ('ૹ', '૿'),
+];
+
+pub const GURMUKHI: &'static [(char, char)] = &[
+  ('ਁ', 'ਃ'), ('ਅ', 'ਊ'), ('ਏ', 'ਐ'), ('ਓ', 'ਨ'),
+  ('ਪ', 'ਰ'), ('ਲ', 'ਲ਼'), ('ਵ', 'ਸ਼'), ('ਸ', 'ਹ'),
+  ('਼', '਼'), ('ਾ', 'ੂ'), ('ੇ', 'ੈ'), ('ੋ', '੍'),
+  ('ੑ', 'ੑ'), ('ਖ਼', 'ੜ'), ('ਫ਼', 'ਫ਼'), ('੦', 'ੵ'),
+];
+
+pub const HAN: &'static [(char, char)] = &[
+  ('⺀', '⺙'), ('⺛', '⻳'), ('⼀', '⿕'), ('々', '々'),
+  ('〇', '〇'), ('〡', '〩'), ('〸', '〻'), ('㐀', '䶵'),
+  ('一', '鿪'), ('豈', '舘'), ('並', '龎'), ('𠀀', '𪛖'),
+  ('𪜀', '𫜴'), ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'),
+  ('丽', '𪘀'),
+];
+
+pub const HANGUL: &'static [(char, char)] = &[
+  ('ᄀ', 'ᇿ'), ('〮', '〯'), ('ㄱ', 'ㆎ'), ('㈀', '㈞'),
+  ('㉠', '㉾'), ('ꥠ', 'ꥼ'), ('가', '힣'), ('ힰ', 'ퟆ'),
+  ('ퟋ', 'ퟻ'), ('ﾠ', 'ﾾ'), ('ￂ', 'ￇ'), ('ￊ', 'ￏ'),
+  ('ￒ', 'ￗ'), ('ￚ', 'ￜ'),
+];
+
+pub const HANUNOO: &'static [(char, char)] = &[
+  ('ᜠ', '᜴'),
+];
+
+pub const HATRAN: &'static [(char, char)] = &[
+  ('𐣠', '𐣲'), ('𐣴', '𐣵'), ('𐣻', '𐣿'),
+];
+
+pub const HEBREW: &'static [(char, char)] = &[
+  ('֑', 'ׇ'), ('א', 'ת'), ('װ', '״'), ('יִ', 'זּ'), ('טּ', 'לּ'),
+  ('מּ', 'מּ'), ('נּ', 'סּ'), ('ףּ', 'פּ'), ('צּ', 'ﭏ'),
+];
+
+pub const HIRAGANA: &'static [(char, char)] = &[
+  ('ぁ', 'ゖ'), ('ゝ', 'ゟ'), ('𛀁', '𛄞'), ('🈀', '🈀'),
+];
+
+pub const IMPERIAL_ARAMAIC: &'static [(char, char)] = &[
+  ('𐡀', '𐡕'), ('𐡗', '𐡟'),
+];
+
+pub const INHERITED: &'static [(char, char)] = &[
+  ('̀', 'ͯ'), ('҅', '҆'), ('ً', 'ٕ'), ('ٰ', 'ٰ'), ('॑', '॒'),
+  ('᪰', '᪾'), ('᳐', '᳒'), ('᳔', '᳠'), ('᳢', '᳨'),
+  ('᳭', '᳭'), ('᳴', '᳴'), ('᳸', '᳹'), ('᷀', '᷹'),
+  ('᷻', '᷿'), ('\u{200c}', '\u{200d}'), ('⃐', '⃰'), ('〪', '〭'),
+  ('゙', '゚'), ('︀', '️'), ('︠', '︭'), ('𐇽', '𐇽'),
+  ('𐋠', '𐋠'), ('𝅧', '𝅩'), ('𝅻', '𝆂'), ('𝆅', '𝆋'),
+  ('𝆪', '𝆭'), ('󠄀', '󠇯'),
+];
+
+pub const INSCRIPTIONAL_PAHLAVI: &'static [(char, char)] = &[
+  ('𐭠', '𐭲'), ('𐭸', '𐭿'),
+];
+
+pub const INSCRIPTIONAL_PARTHIAN: &'static [(char, char)] = &[
+  ('𐭀', '𐭕'), ('𐭘', '𐭟'),
+];
+
+pub const JAVANESE: &'static [(char, char)] = &[
+  ('ꦀ', '꧍'), ('꧐', '꧙'), ('꧞', '꧟'),
+];
+
+pub const KAITHI: &'static [(char, char)] = &[
+  ('𑂀', '𑃁'),
+];
+
+pub const KANNADA: &'static [(char, char)] = &[
+  ('ಀ', 'ಃ'), ('ಅ', 'ಌ'), ('ಎ', 'ಐ'), ('ಒ', 'ನ'),
+  ('ಪ', 'ಳ'), ('ವ', 'ಹ'), ('಼', 'ೄ'), ('ೆ', 'ೈ'),
+  ('ೊ', '್'), ('ೕ', 'ೖ'), ('ೞ', 'ೞ'), ('ೠ', 'ೣ'),
+  ('೦', '೯'), ('ೱ', 'ೲ'),
+];
+
+pub const KATAKANA: &'static [(char, char)] = &[
+  ('ァ', 'ヺ'), ('ヽ', 'ヿ'), ('ㇰ', 'ㇿ'), ('㋐', '㋾'),
+  ('㌀', '㍗'), ('ｦ', 'ｯ'), ('ｱ', 'ﾝ'), ('𛀀', '𛀀'),
+];
+
+pub const KAYAH_LI: &'static [(char, char)] = &[
+  ('꤀', '꤭'), ('꤯', '꤯'),
+];
+
+pub const KHAROSHTHI: &'static [(char, char)] = &[
+  ('𐨀', '𐨃'), ('𐨅', '𐨆'), ('𐨌', '𐨓'), ('𐨕', '𐨗'),
+  ('𐨙', '𐨳'), ('𐨸', '𐨺'), ('𐨿', '𐩇'), ('𐩐', '𐩘'),
+];
+
+pub const KHMER: &'static [(char, char)] = &[
+  ('ក', '៝'), ('០', '៩'), ('៰', '៹'), ('᧠', '᧿'),
+];
+
+pub const KHOJKI: &'static [(char, char)] = &[
+  ('𑈀', '𑈑'), ('𑈓', '𑈾'),
+];
+
+pub const KHUDAWADI: &'static [(char, char)] = &[
+  ('𑊰', '𑋪'), ('𑋰', '𑋹'),
+];
+
+pub const LAO: &'static [(char, char)] = &[
+  ('ກ', 'ຂ'), ('ຄ', 'ຄ'), ('ງ', 'ຈ'), ('ຊ', 'ຊ'),
+  ('ຍ', 'ຍ'), ('ດ', 'ທ'), ('ນ', 'ຟ'), ('ມ', 'ຣ'),
+  ('ລ', 'ລ'), ('ວ', 'ວ'), ('ສ', 'ຫ'), ('ອ', 'ູ'),
+  ('ົ', 'ຽ'), ('ເ', 'ໄ'), ('ໆ', 'ໆ'), ('່', 'ໍ'),
+  ('໐', '໙'), ('ໜ', 'ໟ'),
+];
+
+pub const LATIN: &'static [(char, char)] = &[
+  ('A', 'Z'), ('a', 'z'), ('ª', 'ª'), ('º', 'º'), ('À', 'Ö'),
+  ('Ø', 'ö'), ('ø', 'ʸ'), ('ˠ', 'ˤ'), ('ᴀ', 'ᴥ'), ('ᴬ', 'ᵜ'),
+  ('ᵢ', 'ᵥ'), ('ᵫ', 'ᵷ'), ('ᵹ', 'ᶾ'), ('Ḁ', 'ỿ'),
+  ('ⁱ', 'ⁱ'), ('ⁿ', 'ⁿ'), ('ₐ', 'ₜ'), ('K', 'Å'),
+  ('Ⅎ', 'Ⅎ'), ('ⅎ', 'ⅎ'), ('Ⅰ', 'ↈ'), ('Ⱡ', 'Ɀ'),
+  ('Ꜣ', 'ꞇ'), ('Ꞌ', 'Ɪ'), ('Ʞ', 'ꞷ'), ('ꟷ', 'ꟿ'),
+  ('ꬰ', 'ꭚ'), ('ꭜ', 'ꭤ'), ('ﬀ', 'ﬆ'), ('Ａ', 'Ｚ'),
+  ('ａ', 'ｚ'),
+];
+
+pub const LEPCHA: &'static [(char, char)] = &[
+  ('ᰀ', '᰷'), ('᰻', '᱉'), ('ᱍ', 'ᱏ'),
+];
+
+pub const LIMBU: &'static [(char, char)] = &[
+  ('ᤀ', 'ᤞ'), ('ᤠ', 'ᤫ'), ('ᤰ', '᤻'), ('᥀', '᥀'),
+  ('᥄', '᥏'),
+];
+
+pub const LINEAR_A: &'static [(char, char)] = &[
+  ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'),
+];
+
+pub const LINEAR_B: &'static [(char, char)] = &[
+  ('𐀀', '𐀋'), ('𐀍', '𐀦'), ('𐀨', '𐀺'), ('𐀼', '𐀽'),
+  ('𐀿', '𐁍'), ('𐁐', '𐁝'), ('𐂀', '𐃺'),
+];
+
+pub const LISU: &'static [(char, char)] = &[
+  ('ꓐ', '꓿'),
+];
+
+pub const LYCIAN: &'static [(char, char)] = &[
+  ('𐊀', '𐊜'),
+];
+
+pub const LYDIAN: &'static [(char, char)] = &[
+  ('𐤠', '𐤹'), ('𐤿', '𐤿'),
+];
+
+pub const MAHAJANI: &'static [(char, char)] = &[
+  ('𑅐', '𑅶'),
+];
+
+pub const MALAYALAM: &'static [(char, char)] = &[
+  ('ഀ', 'ഃ'), ('അ', 'ഌ'), ('എ', 'ഐ'), ('ഒ', 'ൄ'),
+  ('െ', 'ൈ'), ('ൊ', '൏'), ('ൔ', 'ൣ'), ('൦', 'ൿ'),
+];
+
+pub const MANDAIC: &'static [(char, char)] = &[
+  ('ࡀ', '࡛'), ('࡞', '࡞'),
+];
+
+pub const MANICHAEAN: &'static [(char, char)] = &[
+  ('𐫀', '𐫦'), ('𐫫', '𐫶'),
+];
+
+pub const MARCHEN: &'static [(char, char)] = &[
+  ('𑱰', '𑲏'), ('𑲒', '𑲧'), ('𑲩', '𑲶'),
+];
+
+pub const MASARAM_GONDI: &'static [(char, char)] = &[
+  ('𑴀', '𑴆'), ('𑴈', '𑴉'), ('𑴋', '𑴶'), ('𑴺', '𑴺'),
+  ('𑴼', '𑴽'), ('𑴿', '𑵇'), ('𑵐', '𑵙'),
+];
+
+pub const MEETEI_MAYEK: &'static [(char, char)] = &[
+  ('ꫠ', '꫶'), ('ꯀ', '꯭'), ('꯰', '꯹'),
+];
+
+pub const MENDE_KIKAKUI: &'static [(char, char)] = &[
+  ('𞠀', '𞣄'), ('𞣇', '𞣖'),
+];
+
+pub const MEROITIC_CURSIVE: &'static [(char, char)] = &[
+  ('𐦠', '𐦷'), ('𐦼', '𐧏'), ('𐧒', '𐧿'),
+];
+
+pub const MEROITIC_HIEROGLYPHS: &'static [(char, char)] = &[
+  ('𐦀', '𐦟'),
+];
+
+pub const MIAO: &'static [(char, char)] = &[
+  ('𖼀', '𖽄'), ('𖽐', '𖽾'), ('𖾏', '𖾟'),
+];
+
+pub const MODI: &'static [(char, char)] = &[
+  ('𑘀', '𑙄'), ('𑙐', '𑙙'),
+];
+
+pub const MONGOLIAN: &'static [(char, char)] = &[
+  ('᠀', '᠁'), ('᠄', '᠄'), ('᠆', '\u{180e}'), ('᠐', '᠙'),
+  ('ᠠ', 'ᡷ'), ('ᢀ', 'ᢪ'), ('𑙠', '𑙬'),
+];
+
+pub const MRO: &'static [(char, char)] = &[
+  ('𖩀', '𖩞'), ('𖩠', '𖩩'), ('𖩮', '𖩯'),
+];
+
+pub const MULTANI: &'static [(char, char)] = &[
+  ('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'), ('𑊏', '𑊝'),
+  ('𑊟', '𑊩'),
+];
+
+pub const MYANMAR: &'static [(char, char)] = &[
+  ('က', '႟'), ('ꧠ', 'ꧾ'), ('ꩠ', 'ꩿ'),
+];
+
+pub const NABATAEAN: &'static [(char, char)] = &[
+  ('𐢀', '𐢞'), ('𐢧', '𐢯'),
+];
+
+pub const NEW_TAI_LUE: &'static [(char, char)] = &[
+  ('ᦀ', 'ᦫ'), ('ᦰ', 'ᧉ'), ('᧐', '᧚'), ('᧞', '᧟'),
+];
+
+pub const NEWA: &'static [(char, char)] = &[
+  ('𑐀', '𑑙'), ('𑑛', '𑑛'), ('𑑝', '𑑝'),
+];
+
+pub const NKO: &'static [(char, char)] = &[
+  ('߀', 'ߺ'),
+];
+
+pub const NUSHU: &'static [(char, char)] = &[
+  ('𖿡', '𖿡'), ('𛅰', '𛋻'),
+];
+
+pub const OGHAM: &'static [(char, char)] = &[
+  ('\u{1680}', '᚜'),
+];
+
+pub const OL_CHIKI: &'static [(char, char)] = &[
+  ('᱐', '᱿'),
+];
+
+pub const OLD_HUNGARIAN: &'static [(char, char)] = &[
+  ('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𐳺', '𐳿'),
+];
+
+pub const OLD_ITALIC: &'static [(char, char)] = &[
+  ('𐌀', '𐌣'), ('𐌭', '𐌯'),
+];
+
+pub const OLD_NORTH_ARABIAN: &'static [(char, char)] = &[
+  ('𐪀', '𐪟'),
+];
+
+pub const OLD_PERMIC: &'static [(char, char)] = &[
+  ('𐍐', '𐍺'),
+];
+
+pub const OLD_PERSIAN: &'static [(char, char)] = &[
+  ('𐎠', '𐏃'), ('𐏈', '𐏕'),
+];
+
+pub const OLD_SOUTH_ARABIAN: &'static [(char, char)] = &[
+  ('𐩠', '𐩿'),
+];
+
+pub const OLD_TURKIC: &'static [(char, char)] = &[
+  ('𐰀', '𐱈'),
+];
+
+pub const ORIYA: &'static [(char, char)] = &[
+  ('ଁ', 'ଃ'), ('ଅ', 'ଌ'), ('ଏ', 'ଐ'), ('ଓ', 'ନ'),
+  ('ପ', 'ର'), ('ଲ', 'ଳ'), ('ଵ', 'ହ'), ('଼', 'ୄ'),
+  ('େ', 'ୈ'), ('ୋ', '୍'), ('ୖ', 'ୗ'), ('ଡ଼', 'ଢ଼'),
+  ('ୟ', 'ୣ'), ('୦', '୷'),
+];
+
+pub const OSAGE: &'static [(char, char)] = &[
+  ('𐒰', '𐓓'), ('𐓘', '𐓻'),
+];
+
+pub const OSMANYA: &'static [(char, char)] = &[
+  ('𐒀', '𐒝'), ('𐒠', '𐒩'),
+];
+
+pub const PAHAWH_HMONG: &'static [(char, char)] = &[
+  ('𖬀', '𖭅'), ('𖭐', '𖭙'), ('𖭛', '𖭡'), ('𖭣', '𖭷'),
+  ('𖭽', '𖮏'),
+];
+
+pub const PALMYRENE: &'static [(char, char)] = &[
+  ('𐡠', '𐡿'),
+];
+
+pub const PAU_CIN_HAU: &'static [(char, char)] = &[
+  ('𑫀', '𑫸'),
+];
+
+pub const PHAGS_PA: &'static [(char, char)] = &[
+  ('ꡀ', '꡷'),
+];
+
+pub const PHOENICIAN: &'static [(char, char)] = &[
+  ('𐤀', '𐤛'), ('𐤟', '𐤟'),
+];
+
+pub const PSALTER_PAHLAVI: &'static [(char, char)] = &[
+  ('𐮀', '𐮑'), ('𐮙', '𐮜'), ('𐮩', '𐮯'),
+];
+
+pub const REJANG: &'static [(char, char)] = &[
+  ('ꤰ', '꥓'), ('꥟', '꥟'),
+];
+
+pub const RUNIC: &'static [(char, char)] = &[
+  ('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ'),
+];
+
+pub const SAMARITAN: &'static [(char, char)] = &[
+  ('ࠀ', '࠭'), ('࠰', '࠾'),
+];
+
+pub const SAURASHTRA: &'static [(char, char)] = &[
+  ('ꢀ', 'ꣅ'), ('꣎', '꣙'),
+];
+
+pub const SHARADA: &'static [(char, char)] = &[
+  ('𑆀', '𑇍'), ('𑇐', '𑇟'),
+];
+
+pub const SHAVIAN: &'static [(char, char)] = &[
+  ('𐑐', '𐑿'),
+];
+
+pub const SIDDHAM: &'static [(char, char)] = &[
+  ('𑖀', '𑖵'), ('𑖸', '𑗝'),
+];
+
+pub const SIGNWRITING: &'static [(char, char)] = &[
+  ('𝠀', '𝪋'), ('𝪛', '𝪟'), ('𝪡', '𝪯'),
+];
+
+pub const SINHALA: &'static [(char, char)] = &[
+  ('ං', 'ඃ'), ('අ', 'ඖ'), ('ක', 'න'), ('ඳ', 'ර'),
+  ('ල', 'ල'), ('ව', 'ෆ'), ('්', '්'), ('ා', 'ු'),
+  ('ූ', 'ූ'), ('ෘ', 'ෟ'), ('෦', '෯'), ('ෲ', '෴'),
+  ('𑇡', '𑇴'),
+];
+
+pub const SORA_SOMPENG: &'static [(char, char)] = &[
+  ('𑃐', '𑃨'), ('𑃰', '𑃹'),
+];
+
+pub const SOYOMBO: &'static [(char, char)] = &[
+  ('𑩐', '𑪃'), ('𑪆', '𑪜'), ('𑪞', '𑪢'),
+];
+
+pub const SUNDANESE: &'static [(char, char)] = &[
+  ('ᮀ', 'ᮿ'), ('᳀', '᳇'),
+];
+
+pub const SYLOTI_NAGRI: &'static [(char, char)] = &[
+  ('ꠀ', '꠫'),
+];
+
+pub const SYRIAC: &'static [(char, char)] = &[
+  ('܀', '܍'), ('\u{70f}', '݊'), ('ݍ', 'ݏ'), ('ࡠ', 'ࡪ'),
+];
+
+pub const TAGALOG: &'static [(char, char)] = &[
+  ('ᜀ', 'ᜌ'), ('ᜎ', '᜔'),
+];
+
+pub const TAGBANWA: &'static [(char, char)] = &[
+  ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), ('ᝲ', 'ᝳ'),
+];
+
+pub const TAI_LE: &'static [(char, char)] = &[
+  ('ᥐ', 'ᥭ'), ('ᥰ', 'ᥴ'),
+];
+
+pub const TAI_THAM: &'static [(char, char)] = &[
+  ('ᨠ', 'ᩞ'), ('᩠', '᩼'), ('᩿', '᪉'), ('᪐', '᪙'),
+  ('᪠', '᪭'),
+];
+
+pub const TAI_VIET: &'static [(char, char)] = &[
+  ('ꪀ', 'ꫂ'), ('ꫛ', '꫟'),
+];
+
+pub const TAKRI: &'static [(char, char)] = &[
+  ('𑚀', '𑚷'), ('𑛀', '𑛉'),
+];
+
+pub const TAMIL: &'static [(char, char)] = &[
+  ('ஂ', 'ஃ'), ('அ', 'ஊ'), ('எ', 'ஐ'), ('ஒ', 'க'),
+  ('ங', 'ச'), ('ஜ', 'ஜ'), ('ஞ', 'ட'), ('ண', 'த'),
+  ('ந', 'ப'), ('ம', 'ஹ'), ('ா', 'ூ'), ('ெ', 'ை'),
+  ('ொ', '்'), ('ௐ', 'ௐ'), ('ௗ', 'ௗ'), ('௦', '௺'),
+];
+
+pub const TANGUT: &'static [(char, char)] = &[
+  ('𖿠', '𖿠'), ('𗀀', '𘟬'), ('𘠀', '𘫲'),
+];
+
+pub const TELUGU: &'static [(char, char)] = &[
+  ('ఀ', 'ః'), ('అ', 'ఌ'), ('ఎ', 'ఐ'), ('ఒ', 'న'),
+  ('ప', 'హ'), ('ఽ', 'ౄ'), ('ె', 'ై'), ('ొ', '్'),
+  ('ౕ', 'ౖ'), ('ౘ', 'ౚ'), ('ౠ', 'ౣ'), ('౦', '౯'),
+  ('౸', '౿'),
+];
+
+pub const THAANA: &'static [(char, char)] = &[
+  ('ހ', 'ޱ'),
+];
+
+pub const THAI: &'static [(char, char)] = &[
+  ('ก', 'ฺ'), ('เ', '๛'),
+];
+
+pub const TIBETAN: &'static [(char, char)] = &[
+  ('ༀ', 'ཇ'), ('ཉ', 'ཬ'), ('ཱ', 'ྗ'), ('ྙ', 'ྼ'),
+  ('྾', '࿌'), ('࿎', '࿔'), ('࿙', '࿚'),
+];
+
+pub const TIFINAGH: &'static [(char, char)] = &[
+  ('ⴰ', 'ⵧ'), ('ⵯ', '⵰'), ('⵿', '⵿'),
+];
+
+pub const TIRHUTA: &'static [(char, char)] = &[
+  ('𑒀', '𑓇'), ('𑓐', '𑓙'),
+];
+
+pub const UGARITIC: &'static [(char, char)] = &[
+  ('𐎀', '𐎝'), ('𐎟', '𐎟'),
+];
+
+pub const VAI: &'static [(char, char)] = &[
+  ('ꔀ', 'ꘫ'),
+];
+
+pub const WARANG_CITI: &'static [(char, char)] = &[
+  ('𑢠', '𑣲'), ('𑣿', '𑣿'),
+];
+
+pub const YI: &'static [(char, char)] = &[
+  ('ꀀ', 'ꒌ'), ('꒐', '꓆'),
+];
+
+pub const ZANABAZAR_SQUARE: &'static [(char, char)] = &[
+  ('𑨀', '𑩇'),
+];
diff --git a/regex-syntax/src/unicode_tables/script_extension.rs b/regex-syntax/src/unicode_tables/script_extension.rs
new file mode 100644
index 0000000000..10b6c3e03f
--- /dev/null
+++ b/regex-syntax/src/unicode_tables/script_extension.rs
@@ -0,0 +1,785 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+//  ucd-generate script-extension tmp/ucd-10.0.0/ --chars
+//
+// ucd-generate is available on crates.io.
+
+pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
+  ("Adlam", ADLAM), ("Ahom", AHOM),
+  ("Anatolian_Hieroglyphs", ANATOLIAN_HIEROGLYPHS), ("Arabic", ARABIC),
+  ("Armenian", ARMENIAN), ("Avestan", AVESTAN), ("Balinese", BALINESE),
+  ("Bamum", BAMUM), ("Bassa_Vah", BASSA_VAH), ("Batak", BATAK),
+  ("Bengali", BENGALI), ("Bhaiksuki", BHAIKSUKI), ("Bopomofo", BOPOMOFO),
+  ("Brahmi", BRAHMI), ("Braille", BRAILLE), ("Buginese", BUGINESE),
+  ("Buhid", BUHID), ("Canadian_Aboriginal", CANADIAN_ABORIGINAL),
+  ("Carian", CARIAN), ("Caucasian_Albanian", CAUCASIAN_ALBANIAN),
+  ("Chakma", CHAKMA), ("Cham", CHAM), ("Cherokee", CHEROKEE),
+  ("Common", COMMON), ("Coptic", COPTIC), ("Cuneiform", CUNEIFORM),
+  ("Cypriot", CYPRIOT), ("Cyrillic", CYRILLIC), ("Deseret", DESERET),
+  ("Devanagari", DEVANAGARI), ("Duployan", DUPLOYAN),
+  ("Egyptian_Hieroglyphs", EGYPTIAN_HIEROGLYPHS), ("Elbasan", ELBASAN),
+  ("Ethiopic", ETHIOPIC), ("Georgian", GEORGIAN), ("Glagolitic", GLAGOLITIC),
+  ("Gothic", GOTHIC), ("Grantha", GRANTHA), ("Greek", GREEK),
+  ("Gujarati", GUJARATI), ("Gurmukhi", GURMUKHI), ("Han", HAN),
+  ("Hangul", HANGUL), ("Hanunoo", HANUNOO), ("Hatran", HATRAN),
+  ("Hebrew", HEBREW), ("Hiragana", HIRAGANA),
+  ("Imperial_Aramaic", IMPERIAL_ARAMAIC), ("Inherited", INHERITED),
+  ("Inscriptional_Pahlavi", INSCRIPTIONAL_PAHLAVI),
+  ("Inscriptional_Parthian", INSCRIPTIONAL_PARTHIAN), ("Javanese", JAVANESE),
+  ("Kaithi", KAITHI), ("Kannada", KANNADA), ("Katakana", KATAKANA),
+  ("Kayah_Li", KAYAH_LI), ("Kharoshthi", KHAROSHTHI), ("Khmer", KHMER),
+  ("Khojki", KHOJKI), ("Khudawadi", KHUDAWADI), ("Lao", LAO),
+  ("Latin", LATIN), ("Lepcha", LEPCHA), ("Limbu", LIMBU),
+  ("Linear_A", LINEAR_A), ("Linear_B", LINEAR_B), ("Lisu", LISU),
+  ("Lycian", LYCIAN), ("Lydian", LYDIAN), ("Mahajani", MAHAJANI),
+  ("Malayalam", MALAYALAM), ("Mandaic", MANDAIC), ("Manichaean", MANICHAEAN),
+  ("Marchen", MARCHEN), ("Masaram_Gondi", MASARAM_GONDI),
+  ("Meetei_Mayek", MEETEI_MAYEK), ("Mende_Kikakui", MENDE_KIKAKUI),
+  ("Meroitic_Cursive", MEROITIC_CURSIVE),
+  ("Meroitic_Hieroglyphs", MEROITIC_HIEROGLYPHS), ("Miao", MIAO),
+  ("Modi", MODI), ("Mongolian", MONGOLIAN), ("Mro", MRO),
+  ("Multani", MULTANI), ("Myanmar", MYANMAR), ("Nabataean", NABATAEAN),
+  ("New_Tai_Lue", NEW_TAI_LUE), ("Newa", NEWA), ("Nko", NKO),
+  ("Nushu", NUSHU), ("Ogham", OGHAM), ("Ol_Chiki", OL_CHIKI),
+  ("Old_Hungarian", OLD_HUNGARIAN), ("Old_Italic", OLD_ITALIC),
+  ("Old_North_Arabian", OLD_NORTH_ARABIAN), ("Old_Permic", OLD_PERMIC),
+  ("Old_Persian", OLD_PERSIAN), ("Old_South_Arabian", OLD_SOUTH_ARABIAN),
+  ("Old_Turkic", OLD_TURKIC), ("Oriya", ORIYA), ("Osage", OSAGE),
+  ("Osmanya", OSMANYA), ("Pahawh_Hmong", PAHAWH_HMONG),
+  ("Palmyrene", PALMYRENE), ("Pau_Cin_Hau", PAU_CIN_HAU),
+  ("Phags_Pa", PHAGS_PA), ("Phoenician", PHOENICIAN),
+  ("Psalter_Pahlavi", PSALTER_PAHLAVI), ("Rejang", REJANG), ("Runic", RUNIC),
+  ("Samaritan", SAMARITAN), ("Saurashtra", SAURASHTRA), ("Sharada", SHARADA),
+  ("Shavian", SHAVIAN), ("Siddham", SIDDHAM), ("SignWriting", SIGNWRITING),
+  ("Sinhala", SINHALA), ("Sora_Sompeng", SORA_SOMPENG), ("Soyombo", SOYOMBO),
+  ("Sundanese", SUNDANESE), ("Syloti_Nagri", SYLOTI_NAGRI),
+  ("Syriac", SYRIAC), ("Tagalog", TAGALOG), ("Tagbanwa", TAGBANWA),
+  ("Tai_Le", TAI_LE), ("Tai_Tham", TAI_THAM), ("Tai_Viet", TAI_VIET),
+  ("Takri", TAKRI), ("Tamil", TAMIL), ("Tangut", TANGUT), ("Telugu", TELUGU),
+  ("Thaana", THAANA), ("Thai", THAI), ("Tibetan", TIBETAN),
+  ("Tifinagh", TIFINAGH), ("Tirhuta", TIRHUTA), ("Ugaritic", UGARITIC),
+  ("Vai", VAI), ("Warang_Citi", WARANG_CITI), ("Yi", YI),
+  ("Zanabazar_Square", ZANABAZAR_SQUARE),
+];
+
+pub const ADLAM: &'static [(char, char)] = &[
+  ('ـ', 'ـ'), ('𞤀', '𞥊'), ('𞥐', '𞥙'), ('𞥞', '𞥟'),
+];
+
+pub const AHOM: &'static [(char, char)] = &[
+  ('𑜀', '𑜙'), ('𑜝', '𑜫'), ('𑜰', '𑜿'),
+];
+
+pub const ANATOLIAN_HIEROGLYPHS: &'static [(char, char)] = &[
+  ('𔐀', '𔙆'),
+];
+
+pub const ARABIC: &'static [(char, char)] = &[
+  ('\u{600}', '\u{604}'), ('؆', '\u{61c}'), ('؞', 'ۜ'), ('۞', 'ۿ'),
+  ('ݐ', 'ݿ'), ('ࢠ', 'ࢴ'), ('ࢶ', 'ࢽ'), ('ࣔ', '࣡'),
+  ('ࣣ', 'ࣿ'), ('ﭐ', '﯁'), ('ﯓ', 'ﴽ'), ('ﵐ', 'ﶏ'),
+  ('ﶒ', 'ﷇ'), ('ﷰ', '﷽'), ('ﹰ', 'ﹴ'), ('ﹶ', 'ﻼ'),
+  ('𐋠', '𐋻'), ('𐹠', '𐹾'), ('𞸀', '𞸃'), ('𞸅', '𞸟'),
+  ('𞸡', '𞸢'), ('𞸤', '𞸤'), ('𞸧', '𞸧'), ('𞸩', '𞸲'),
+  ('𞸴', '𞸷'), ('𞸹', '𞸹'), ('𞸻', '𞸻'), ('𞹂', '𞹂'),
+  ('𞹇', '𞹇'), ('𞹉', '𞹉'), ('𞹋', '𞹋'), ('𞹍', '𞹏'),
+  ('𞹑', '𞹒'), ('𞹔', '𞹔'), ('𞹗', '𞹗'), ('𞹙', '𞹙'),
+  ('𞹛', '𞹛'), ('𞹝', '𞹝'), ('𞹟', '𞹟'), ('𞹡', '𞹢'),
+  ('𞹤', '𞹤'), ('𞹧', '𞹪'), ('𞹬', '𞹲'), ('𞹴', '𞹷'),
+  ('𞹹', '𞹼'), ('𞹾', '𞹾'), ('𞺀', '𞺉'), ('𞺋', '𞺛'),
+  ('𞺡', '𞺣'), ('𞺥', '𞺩'), ('𞺫', '𞺻'), ('𞻰', '𞻱'),
+];
+
+pub const ARMENIAN: &'static [(char, char)] = &[
+  ('Ա', 'Ֆ'), ('ՙ', '՟'), ('ա', 'և'), ('։', '֊'), ('֍', '֏'),
+  ('ﬓ', 'ﬗ'),
+];
+
+pub const AVESTAN: &'static [(char, char)] = &[
+  ('𐬀', '𐬵'), ('𐬹', '𐬿'),
+];
+
+pub const BALINESE: &'static [(char, char)] = &[
+  ('ᬀ', 'ᭋ'), ('᭐', '᭼'),
+];
+
+pub const BAMUM: &'static [(char, char)] = &[
+  ('ꚠ', '꛷'), ('𖠀', '𖨸'),
+];
+
+pub const BASSA_VAH: &'static [(char, char)] = &[
+  ('𖫐', '𖫭'), ('𖫰', '𖫵'),
+];
+
+pub const BATAK: &'static [(char, char)] = &[
+  ('ᯀ', '᯳'), ('᯼', '᯿'),
+];
+
+pub const BENGALI: &'static [(char, char)] = &[
+  ('॑', '॒'), ('।', '॥'), ('ঀ', 'ঃ'), ('অ', 'ঌ'),
+  ('এ', 'ঐ'), ('ও', 'ন'), ('প', 'র'), ('ল', 'ল'),
+  ('শ', 'হ'), ('়', 'ৄ'), ('ে', 'ৈ'), ('ো', 'ৎ'),
+  ('ৗ', 'ৗ'), ('ড়', 'ঢ়'), ('য়', 'ৣ'), ('০', '৽'),
+  ('᳷', '᳷'), ('꣱', '꣱'),
+];
+
+pub const BHAIKSUKI: &'static [(char, char)] = &[
+  ('𑰀', '𑰈'), ('𑰊', '𑰶'), ('𑰸', '𑱅'), ('𑱐', '𑱬'),
+];
+
+pub const BOPOMOFO: &'static [(char, char)] = &[
+  ('˪', '˫'), ('、', '〃'), ('〈', '】'), ('〓', '〟'),
+  ('〪', '〭'), ('〰', '〰'), ('〷', '〷'), ('・', '・'),
+  ('ㄅ', 'ㄮ'), ('ㆠ', 'ㆺ'), ('﹅', '﹆'), ('｡', '･'),
+];
+
+pub const BRAHMI: &'static [(char, char)] = &[
+  ('𑀀', '𑁍'), ('𑁒', '𑁯'), ('𑁿', '𑁿'),
+];
+
+pub const BRAILLE: &'static [(char, char)] = &[
+  ('⠀', '⣿'),
+];
+
+pub const BUGINESE: &'static [(char, char)] = &[
+  ('ᨀ', 'ᨛ'), ('᨞', '᨟'), ('ꧏ', 'ꧏ'),
+];
+
+pub const BUHID: &'static [(char, char)] = &[
+  ('᜵', '᜶'), ('ᝀ', 'ᝓ'),
+];
+
+pub const CANADIAN_ABORIGINAL: &'static [(char, char)] = &[
+  ('᐀', 'ᙿ'), ('ᢰ', 'ᣵ'),
+];
+
+pub const CARIAN: &'static [(char, char)] = &[
+  ('𐊠', '𐋐'),
+];
+
+pub const CAUCASIAN_ALBANIAN: &'static [(char, char)] = &[
+  ('𐔰', '𐕣'), ('𐕯', '𐕯'),
+];
+
+pub const CHAKMA: &'static [(char, char)] = &[
+  ('০', '৯'), ('၀', '၉'), ('𑄀', '𑄴'), ('𑄶', '𑅃'),
+];
+
+pub const CHAM: &'static [(char, char)] = &[
+  ('ꨀ', 'ꨶ'), ('ꩀ', 'ꩍ'), ('꩐', '꩙'), ('꩜', '꩟'),
+];
+
+pub const CHEROKEE: &'static [(char, char)] = &[
+  ('Ꭰ', 'Ᏽ'), ('ᏸ', 'ᏽ'), ('ꭰ', 'ꮿ'),
+];
+
+pub const COMMON: &'static [(char, char)] = &[
+  ('\u{0}', '@'), ('[', '`'), ('{', '©'), ('«', '¹'), ('»', '¿'),
+  ('×', '×'), ('÷', '÷'), ('ʹ', '˟'), ('˥', '˩'), ('ˬ', '˿'),
+  ('ʹ', 'ʹ'), (';', ';'), ('΅', '΅'), ('·', '·'),
+  ('\u{605}', '\u{605}'), ('\u{6dd}', '\u{6dd}'), ('\u{8e2}', '\u{8e2}'),
+  ('฿', '฿'), ('࿕', '࿘'), ('᛫', '᛭'), ('\u{2000}', '\u{200b}'),
+  ('\u{200e}', '\u{2064}'), ('\u{2066}', '⁰'), ('⁴', '⁾'),
+  ('₀', '₎'), ('₠', '₿'), ('℀', '℥'), ('℧', '℩'),
+  ('ℬ', 'ℱ'), ('ℳ', '⅍'), ('⅏', '⅟'), ('↉', '↋'),
+  ('←', '␦'), ('⑀', '⑊'), ('①', '⟿'), ('⤀', '⭳'),
+  ('⭶', '⮕'), ('⮘', '⮹'), ('⮽', '⯈'), ('⯊', '⯒'),
+  ('⯬', '⯯'), ('⸀', '⹂'), ('⹄', '⹉'), ('⿰', '⿻'),
+  ('\u{3000}', '\u{3000}'), ('〄', '〄'), ('〒', '〒'), ('〠', '〠'),
+  ('〶', '〶'), ('㉈', '㉟'), ('㉿', '㉿'), ('㊱', '㊿'),
+  ('㋌', '㋏'), ('㍱', '㍺'), ('㎀', '㏟'), ('㏿', '㏿'),
+  ('䷀', '䷿'), ('꜀', '꜡'), ('ꞈ', '꞊'), ('꭛', '꭛'),
+  ('﴾', '﴿'), ('︐', '︙'), ('︰', '﹄'), ('﹇', '﹒'),
+  ('﹔', '﹦'), ('﹨', '﹫'), ('\u{feff}', '\u{feff}'), ('！', '＠'),
+  ('［', '｀'), ('｛', '｠'), ('￠', '￦'), ('￨', '￮'),
+  ('\u{fff9}', '�'), ('𐆐', '𐆛'), ('𐇐', '𐇼'), ('𝀀', '𝃵'),
+  ('𝄀', '𝄦'), ('𝄩', '𝅦'), ('𝅪', '\u{1d17a}'), ('𝆃', '𝆄'),
+  ('𝆌', '𝆩'), ('𝆮', '𝇨'), ('𝌀', '𝍖'), ('𝐀', '𝑔'),
+  ('𝑖', '𝒜'), ('𝒞', '𝒟'), ('𝒢', '𝒢'), ('𝒥', '𝒦'),
+  ('𝒩', '𝒬'), ('𝒮', '𝒹'), ('𝒻', '𝒻'), ('𝒽', '𝓃'),
+  ('𝓅', '𝔅'), ('𝔇', '𝔊'), ('𝔍', '𝔔'), ('𝔖', '𝔜'),
+  ('𝔞', '𝔹'), ('𝔻', '𝔾'), ('𝕀', '𝕄'), ('𝕆', '𝕆'),
+  ('𝕊', '𝕐'), ('𝕒', '𝚥'), ('𝚨', '𝟋'), ('𝟎', '𝟿'),
+  ('🀀', '🀫'), ('🀰', '🂓'), ('🂠', '🂮'), ('🂱', '🂿'),
+  ('🃁', '🃏'), ('🃑', '🃵'), ('🄀', '🄌'), ('🄐', '🄮'),
+  ('🄰', '🅫'), ('🅰', '🆬'), ('🇦', '🇿'), ('🈁', '🈂'),
+  ('🈐', '🈻'), ('🉀', '🉈'), ('🉠', '🉥'), ('🌀', '🛔'),
+  ('🛠', '🛬'), ('🛰', '🛸'), ('🜀', '🝳'), ('🞀', '🟔'),
+  ('🠀', '🠋'), ('🠐', '🡇'), ('🡐', '🡙'), ('🡠', '🢇'),
+  ('🢐', '🢭'), ('🤀', '🤋'), ('🤐', '🤾'), ('🥀', '🥌'),
+  ('🥐', '🥫'), ('🦀', '🦗'), ('🧀', '🧀'), ('🧐', '🧦'),
+  ('\u{e0001}', '\u{e0001}'), ('\u{e0020}', '\u{e007f}'),
+];
+
+pub const COPTIC: &'static [(char, char)] = &[
+  ('Ϣ', 'ϯ'), ('Ⲁ', 'ⳳ'), ('⳹', '⳿'), ('𐋠', '𐋻'),
+];
+
+pub const CUNEIFORM: &'static [(char, char)] = &[
+  ('𒀀', '𒎙'), ('𒐀', '𒑮'), ('𒑰', '𒑴'), ('𒒀', '𒕃'),
+];
+
+pub const CYPRIOT: &'static [(char, char)] = &[
+  ('𐄀', '𐄂'), ('𐄇', '𐄳'), ('𐄷', '𐄿'), ('𐠀', '𐠅'),
+  ('𐠈', '𐠈'), ('𐠊', '𐠵'), ('𐠷', '𐠸'), ('𐠼', '𐠼'),
+  ('𐠿', '𐠿'),
+];
+
+pub const CYRILLIC: &'static [(char, char)] = &[
+  ('Ѐ', 'ԯ'), ('ᲀ', 'ᲈ'), ('ᴫ', 'ᴫ'), ('ᵸ', 'ᵸ'),
+  ('ⷠ', 'ⷿ'), ('⹃', '⹃'), ('Ꙁ', 'ꚟ'), ('︮', '︯'),
+];
+
+pub const DESERET: &'static [(char, char)] = &[
+  ('𐐀', '𐑏'),
+];
+
+pub const DEVANAGARI: &'static [(char, char)] = &[
+  ('ऀ', 'ॿ'), ('᳐', 'ᳶ'), ('᳸', '᳹'), ('⃰', '⃰'),
+  ('꠰', '꠹'), ('꣠', 'ꣽ'),
+];
+
+pub const DUPLOYAN: &'static [(char, char)] = &[
+  ('𛰀', '𛱪'), ('𛱰', '𛱼'), ('𛲀', '𛲈'), ('𛲐', '𛲙'),
+  ('𛲜', '\u{1bca3}'),
+];
+
+pub const EGYPTIAN_HIEROGLYPHS: &'static [(char, char)] = &[
+  ('𓀀', '𓐮'),
+];
+
+pub const ELBASAN: &'static [(char, char)] = &[
+  ('𐔀', '𐔧'),
+];
+
+pub const ETHIOPIC: &'static [(char, char)] = &[
+  ('ሀ', 'ቈ'), ('ቊ', 'ቍ'), ('ቐ', 'ቖ'), ('ቘ', 'ቘ'),
+  ('ቚ', 'ቝ'), ('በ', 'ኈ'), ('ኊ', 'ኍ'), ('ነ', 'ኰ'),
+  ('ኲ', 'ኵ'), ('ኸ', 'ኾ'), ('ዀ', 'ዀ'), ('ዂ', 'ዅ'),
+  ('ወ', 'ዖ'), ('ዘ', 'ጐ'), ('ጒ', 'ጕ'), ('ጘ', 'ፚ'),
+  ('፝', '፼'), ('ᎀ', '᎙'), ('ⶀ', 'ⶖ'), ('ⶠ', 'ⶦ'),
+  ('ⶨ', 'ⶮ'), ('ⶰ', 'ⶶ'), ('ⶸ', 'ⶾ'), ('ⷀ', 'ⷆ'),
+  ('ⷈ', 'ⷎ'), ('ⷐ', 'ⷖ'), ('ⷘ', 'ⷞ'), ('ꬁ', 'ꬆ'),
+  ('ꬉ', 'ꬎ'), ('ꬑ', 'ꬖ'), ('ꬠ', 'ꬦ'), ('ꬨ', 'ꬮ'),
+];
+
+pub const GEORGIAN: &'static [(char, char)] = &[
+  ('։', '։'), ('Ⴀ', 'Ⴥ'), ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'),
+  ('ა', 'ჿ'), ('ⴀ', 'ⴥ'), ('ⴧ', 'ⴧ'), ('ⴭ', 'ⴭ'),
+];
+
+pub const GLAGOLITIC: &'static [(char, char)] = &[
+  ('҄', '҄'), ('҇', '҇'), ('Ⰰ', 'Ⱞ'), ('ⰰ', 'ⱞ'), ('⹃', '⹃'),
+  ('꙯', '꙯'), ('𞀀', '𞀆'), ('𞀈', '𞀘'), ('𞀛', '𞀡'),
+  ('𞀣', '𞀤'), ('𞀦', '𞀪'),
+];
+
+pub const GOTHIC: &'static [(char, char)] = &[
+  ('𐌰', '𐍊'),
+];
+
+pub const GRANTHA: &'static [(char, char)] = &[
+  ('॑', '॒'), ('।', '॥'), ('ப', 'ப'), ('வ', 'வ'),
+  ('௦', '௲'), ('᳐', '᳐'), ('᳒', '᳓'), ('ᳲ', '᳴'),
+  ('᳸', '᳹'), ('⃰', '⃰'), ('𑌀', '𑌃'), ('𑌅', '𑌌'),
+  ('𑌏', '𑌐'), ('𑌓', '𑌨'), ('𑌪', '𑌰'), ('𑌲', '𑌳'),
+  ('𑌵', '𑌹'), ('𑌼', '𑍄'), ('𑍇', '𑍈'), ('𑍋', '𑍍'),
+  ('𑍐', '𑍐'), ('𑍗', '𑍗'), ('𑍝', '𑍣'), ('𑍦', '𑍬'),
+  ('𑍰', '𑍴'),
+];
+
+pub const GREEK: &'static [(char, char)] = &[
+  ('͂', '͂'), ('ͅ', 'ͅ'), ('Ͱ', 'ͳ'), ('͵', 'ͷ'), ('ͺ', 'ͽ'),
+  ('Ϳ', 'Ϳ'), ('΄', '΄'), ('Ά', 'Ά'), ('Έ', 'Ί'), ('Ό', 'Ό'),
+  ('Ύ', 'Ρ'), ('Σ', 'ϡ'), ('ϰ', 'Ͽ'), ('ᴦ', 'ᴪ'), ('ᵝ', 'ᵡ'),
+  ('ᵦ', 'ᵪ'), ('ᶿ', '᷁'), ('ἀ', 'ἕ'), ('Ἐ', 'Ἕ'),
+  ('ἠ', 'ὅ'), ('Ὀ', 'Ὅ'), ('ὐ', 'ὗ'), ('Ὑ', 'Ὑ'),
+  ('Ὓ', 'Ὓ'), ('Ὕ', 'Ὕ'), ('Ὗ', 'ώ'), ('ᾀ', 'ᾴ'),
+  ('ᾶ', 'ῄ'), ('ῆ', 'ΐ'), ('ῖ', 'Ί'), ('῝', '`'),
+  ('ῲ', 'ῴ'), ('ῶ', '῾'), ('Ω', 'Ω'), ('ꭥ', 'ꭥ'),
+  ('𐅀', '𐆎'), ('𐆠', '𐆠'), ('𝈀', '𝉅'),
+];
+
+pub const GUJARATI: &'static [(char, char)] = &[
+  ('॑', '॒'), ('।', '॥'), ('ઁ', 'ઃ'), ('અ', 'ઍ'),
+  ('એ', 'ઑ'), ('ઓ', 'ન'), ('પ', 'ર'), ('લ', 'ળ'),
+  ('વ', 'હ'), ('઼', 'ૅ'), ('ે', 'ૉ'), ('ો', '્'),
+  ('ૐ', 'ૐ'), ('ૠ', 'ૣ'), ('૦', '૱'), ('ૹ', '૿'),
+  ('꠰', '꠹'),
+];
+
+pub const GURMUKHI: &'static [(char, char)] = &[
+  ('॑', '॒'), ('।', '॥'), ('ਁ', 'ਃ'), ('ਅ', 'ਊ'),
+  ('ਏ', 'ਐ'), ('ਓ', 'ਨ'), ('ਪ', 'ਰ'), ('ਲ', 'ਲ਼'),
+  ('ਵ', 'ਸ਼'), ('ਸ', 'ਹ'), ('਼', '਼'), ('ਾ', 'ੂ'),
+  ('ੇ', 'ੈ'), ('ੋ', '੍'), ('ੑ', 'ੑ'), ('ਖ਼', 'ੜ'),
+  ('ਫ਼', 'ਫ਼'), ('੦', 'ੵ'), ('꠰', '꠹'),
+];
+
+pub const HAN: &'static [(char, char)] = &[
+  ('⺀', '⺙'), ('⺛', '⻳'), ('⼀', '⿕'), ('、', '〃'),
+  ('々', '】'), ('〓', '〟'), ('〡', '〭'), ('〰', '〰'),
+  ('〷', '〿'), ('・', '・'), ('㆐', '㆟'), ('㇀', '㇣'),
+  ('㈠', '㉇'), ('㊀', '㊰'), ('㋀', '㋋'), ('㍘', '㍰'),
+  ('㍻', '㍿'), ('㏠', '㏾'), ('㐀', '䶵'), ('一', '鿪'),
+  ('豈', '舘'), ('並', '龎'), ('﹅', '﹆'), ('｡', '･'),
+  ('𝍠', '𝍱'), ('🉐', '🉑'), ('𠀀', '𪛖'), ('𪜀', '𫜴'),
+  ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), ('丽', '𪘀'),
+];
+
+pub const HANGUL: &'static [(char, char)] = &[
+  ('ᄀ', 'ᇿ'), ('、', '〃'), ('〈', '】'), ('〓', '〟'),
+  ('〮', '〰'), ('〷', '〷'), ('・', '・'), ('ㄱ', 'ㆎ'),
+  ('㈀', '㈞'), ('㉠', '㉾'), ('ꥠ', 'ꥼ'), ('가', '힣'),
+  ('ힰ', 'ퟆ'), ('ퟋ', 'ퟻ'), ('﹅', '﹆'), ('｡', '･'),
+  ('ﾠ', 'ﾾ'), ('ￂ', 'ￇ'), ('ￊ', 'ￏ'), ('ￒ', 'ￗ'),
+  ('ￚ', 'ￜ'),
+];
+
+pub const HANUNOO: &'static [(char, char)] = &[
+  ('ᜠ', '᜶'),
+];
+
+pub const HATRAN: &'static [(char, char)] = &[
+  ('𐣠', '𐣲'), ('𐣴', '𐣵'), ('𐣻', '𐣿'),
+];
+
+pub const HEBREW: &'static [(char, char)] = &[
+  ('֑', 'ׇ'), ('א', 'ת'), ('װ', '״'), ('יִ', 'זּ'), ('טּ', 'לּ'),
+  ('מּ', 'מּ'), ('נּ', 'סּ'), ('ףּ', 'פּ'), ('צּ', 'ﭏ'),
+];
+
+pub const HIRAGANA: &'static [(char, char)] = &[
+  ('、', '〃'), ('〈', '】'), ('〓', '〟'), ('〰', '〵'),
+  ('〷', '〷'), ('〼', '〽'), ('ぁ', 'ゖ'), ('゙', '゠'),
+  ('・', 'ー'), ('﹅', '﹆'), ('｡', '･'), ('ｰ', 'ｰ'),
+  ('ﾞ', 'ﾟ'), ('𛀁', '𛄞'), ('🈀', '🈀'),
+];
+
+pub const IMPERIAL_ARAMAIC: &'static [(char, char)] = &[
+  ('𐡀', '𐡕'), ('𐡗', '𐡟'),
+];
+
+pub const INHERITED: &'static [(char, char)] = &[
+  ('̀', '́'), ('̓', '̈́'), ('͆', '͢'), ('᪰', '᪾'), ('᷂', '᷹'),
+  ('᷻', '᷿'), ('\u{200c}', '\u{200d}'), ('⃐', '⃯'), ('︀', '️'),
+  ('︠', '︭'), ('𐇽', '𐇽'), ('𝅧', '𝅩'), ('𝅻', '𝆂'),
+  ('𝆅', '𝆋'), ('𝆪', '𝆭'), ('󠄀', '󠇯'),
+];
+
+pub const INSCRIPTIONAL_PAHLAVI: &'static [(char, char)] = &[
+  ('𐭠', '𐭲'), ('𐭸', '𐭿'),
+];
+
+pub const INSCRIPTIONAL_PARTHIAN: &'static [(char, char)] = &[
+  ('𐭀', '𐭕'), ('𐭘', '𐭟'),
+];
+
+pub const JAVANESE: &'static [(char, char)] = &[
+  ('ꦀ', '꧍'), ('ꧏ', '꧙'), ('꧞', '꧟'),
+];
+
+pub const KAITHI: &'static [(char, char)] = &[
+  ('०', '९'), ('꠰', '꠹'), ('𑂀', '𑃁'),
+];
+
+pub const KANNADA: &'static [(char, char)] = &[
+  ('॑', '॒'), ('।', '॥'), ('ಀ', 'ಃ'), ('ಅ', 'ಌ'),
+  ('ಎ', 'ಐ'), ('ಒ', 'ನ'), ('ಪ', 'ಳ'), ('ವ', 'ಹ'),
+  ('಼', 'ೄ'), ('ೆ', 'ೈ'), ('ೊ', '್'), ('ೕ', 'ೖ'),
+  ('ೞ', 'ೞ'), ('ೠ', 'ೣ'), ('೦', '೯'), ('ೱ', 'ೲ'),
+  ('᳚', '᳚'), ('ᳵ', 'ᳵ'), ('꠰', '꠵'),
+];
+
+pub const KATAKANA: &'static [(char, char)] = &[
+  ('、', '〃'), ('〈', '】'), ('〓', '〟'), ('〰', '〵'),
+  ('〷', '〷'), ('〼', '〽'), ('゙', '゜'), ('゠', 'ヿ'),
+  ('ㇰ', 'ㇿ'), ('㋐', '㋾'), ('㌀', '㍗'), ('﹅', '﹆'),
+  ('｡', 'ﾟ'), ('𛀀', '𛀀'),
+];
+
+pub const KAYAH_LI: &'static [(char, char)] = &[
+  ('꤀', '꤯'),
+];
+
+pub const KHAROSHTHI: &'static [(char, char)] = &[
+  ('𐨀', '𐨃'), ('𐨅', '𐨆'), ('𐨌', '𐨓'), ('𐨕', '𐨗'),
+  ('𐨙', '𐨳'), ('𐨸', '𐨺'), ('𐨿', '𐩇'), ('𐩐', '𐩘'),
+];
+
+pub const KHMER: &'static [(char, char)] = &[
+  ('ក', '៝'), ('០', '៩'), ('៰', '៹'), ('᧠', '᧿'),
+];
+
+pub const KHOJKI: &'static [(char, char)] = &[
+  ('૦', '૯'), ('𑈀', '𑈑'), ('𑈓', '𑈾'),
+];
+
+pub const KHUDAWADI: &'static [(char, char)] = &[
+  ('।', '॥'), ('꠰', '꠹'), ('𑊰', '𑋪'), ('𑋰', '𑋹'),
+];
+
+pub const LAO: &'static [(char, char)] = &[
+  ('ກ', 'ຂ'), ('ຄ', 'ຄ'), ('ງ', 'ຈ'), ('ຊ', 'ຊ'),
+  ('ຍ', 'ຍ'), ('ດ', 'ທ'), ('ນ', 'ຟ'), ('ມ', 'ຣ'),
+  ('ລ', 'ລ'), ('ວ', 'ວ'), ('ສ', 'ຫ'), ('ອ', 'ູ'),
+  ('ົ', 'ຽ'), ('ເ', 'ໄ'), ('ໆ', 'ໆ'), ('່', 'ໍ'),
+  ('໐', '໙'), ('ໜ', 'ໟ'),
+];
+
+pub const LATIN: &'static [(char, char)] = &[
+  ('A', 'Z'), ('a', 'z'), ('ª', 'ª'), ('º', 'º'), ('À', 'Ö'),
+  ('Ø', 'ö'), ('ø', 'ʸ'), ('ˠ', 'ˤ'), ('ͣ', 'ͯ'), ('҅', '҆'),
+  ('॑', '॒'), ('჻', '჻'), ('ᴀ', 'ᴥ'), ('ᴬ', 'ᵜ'),
+  ('ᵢ', 'ᵥ'), ('ᵫ', 'ᵷ'), ('ᵹ', 'ᶾ'), ('Ḁ', 'ỿ'),
+  ('ⁱ', 'ⁱ'), ('ⁿ', 'ⁿ'), ('ₐ', 'ₜ'), ('⃰', '⃰'),
+  ('K', 'Å'), ('Ⅎ', 'Ⅎ'), ('ⅎ', 'ⅎ'), ('Ⅰ', 'ↈ'),
+  ('Ⱡ', 'Ɀ'), ('Ꜣ', 'ꞇ'), ('Ꞌ', 'Ɪ'), ('Ʞ', 'ꞷ'),
+  ('ꟷ', 'ꟿ'), ('꤮', '꤮'), ('ꬰ', 'ꭚ'), ('ꭜ', 'ꭤ'),
+  ('ﬀ', 'ﬆ'), ('Ａ', 'Ｚ'), ('ａ', 'ｚ'),
+];
+
+pub const LEPCHA: &'static [(char, char)] = &[
+  ('ᰀ', '᰷'), ('᰻', '᱉'), ('ᱍ', 'ᱏ'),
+];
+
+pub const LIMBU: &'static [(char, char)] = &[
+  ('॥', '॥'), ('ᤀ', 'ᤞ'), ('ᤠ', 'ᤫ'), ('ᤰ', '᤻'),
+  ('᥀', '᥀'), ('᥄', '᥏'),
+];
+
+pub const LINEAR_A: &'static [(char, char)] = &[
+  ('𐄇', '𐄳'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'),
+];
+
+pub const LINEAR_B: &'static [(char, char)] = &[
+  ('𐀀', '𐀋'), ('𐀍', '𐀦'), ('𐀨', '𐀺'), ('𐀼', '𐀽'),
+  ('𐀿', '𐁍'), ('𐁐', '𐁝'), ('𐂀', '𐃺'), ('𐄀', '𐄂'),
+  ('𐄇', '𐄳'), ('𐄷', '𐄿'),
+];
+
+pub const LISU: &'static [(char, char)] = &[
+  ('ꓐ', '꓿'),
+];
+
+pub const LYCIAN: &'static [(char, char)] = &[
+  ('𐊀', '𐊜'),
+];
+
+pub const LYDIAN: &'static [(char, char)] = &[
+  ('𐤠', '𐤹'), ('𐤿', '𐤿'),
+];
+
+pub const MAHAJANI: &'static [(char, char)] = &[
+  ('।', '९'), ('꠰', '꠹'), ('𑅐', '𑅶'),
+];
+
+pub const MALAYALAM: &'static [(char, char)] = &[
+  ('॑', '॒'), ('।', '॥'), ('ഀ', 'ഃ'), ('അ', 'ഌ'),
+  ('എ', 'ഐ'), ('ഒ', 'ൄ'), ('െ', 'ൈ'), ('ൊ', '൏'),
+  ('ൔ', 'ൣ'), ('൦', 'ൿ'), ('᳚', '᳚'),
+];
+
+pub const MANDAIC: &'static [(char, char)] = &[
+  ('ـ', 'ـ'), ('ࡀ', '࡛'), ('࡞', '࡞'),
+];
+
+pub const MANICHAEAN: &'static [(char, char)] = &[
+  ('ـ', 'ـ'), ('𐫀', '𐫦'), ('𐫫', '𐫶'),
+];
+
+pub const MARCHEN: &'static [(char, char)] = &[
+  ('𑱰', '𑲏'), ('𑲒', '𑲧'), ('𑲩', '𑲶'),
+];
+
+pub const MASARAM_GONDI: &'static [(char, char)] = &[
+  ('𑴀', '𑴆'), ('𑴈', '𑴉'), ('𑴋', '𑴶'), ('𑴺', '𑴺'),
+  ('𑴼', '𑴽'), ('𑴿', '𑵇'), ('𑵐', '𑵙'),
+];
+
+pub const MEETEI_MAYEK: &'static [(char, char)] = &[
+  ('ꫠ', '꫶'), ('ꯀ', '꯭'), ('꯰', '꯹'),
+];
+
+pub const MENDE_KIKAKUI: &'static [(char, char)] = &[
+  ('𞠀', '𞣄'), ('𞣇', '𞣖'),
+];
+
+pub const MEROITIC_CURSIVE: &'static [(char, char)] = &[
+  ('𐦠', '𐦷'), ('𐦼', '𐧏'), ('𐧒', '𐧿'),
+];
+
+pub const MEROITIC_HIEROGLYPHS: &'static [(char, char)] = &[
+  ('𐦀', '𐦟'),
+];
+
+pub const MIAO: &'static [(char, char)] = &[
+  ('𖼀', '𖽄'), ('𖽐', '𖽾'), ('𖾏', '𖾟'),
+];
+
+pub const MODI: &'static [(char, char)] = &[
+  ('꠰', '꠹'), ('𑘀', '𑙄'), ('𑙐', '𑙙'),
+];
+
+pub const MONGOLIAN: &'static [(char, char)] = &[
+  ('᠀', '\u{180e}'), ('᠐', '᠙'), ('ᠠ', 'ᡷ'), ('ᢀ', 'ᢪ'),
+  ('𑙠', '𑙬'),
+];
+
+pub const MRO: &'static [(char, char)] = &[
+  ('𖩀', '𖩞'), ('𖩠', '𖩩'), ('𖩮', '𖩯'),
+];
+
+pub const MULTANI: &'static [(char, char)] = &[
+  ('੦', '੯'), ('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'),
+  ('𑊏', '𑊝'), ('𑊟', '𑊩'),
+];
+
+pub const MYANMAR: &'static [(char, char)] = &[
+  ('က', '႟'), ('꤮', '꤮'), ('ꧠ', 'ꧾ'), ('ꩠ', 'ꩿ'),
+];
+
+pub const NABATAEAN: &'static [(char, char)] = &[
+  ('𐢀', '𐢞'), ('𐢧', '𐢯'),
+];
+
+pub const NEW_TAI_LUE: &'static [(char, char)] = &[
+  ('ᦀ', 'ᦫ'), ('ᦰ', 'ᧉ'), ('᧐', '᧚'), ('᧞', '᧟'),
+];
+
+pub const NEWA: &'static [(char, char)] = &[
+  ('𑐀', '𑑙'), ('𑑛', '𑑛'), ('𑑝', '𑑝'),
+];
+
+pub const NKO: &'static [(char, char)] = &[
+  ('߀', 'ߺ'),
+];
+
+pub const NUSHU: &'static [(char, char)] = &[
+  ('𖿡', '𖿡'), ('𛅰', '𛋻'),
+];
+
+pub const OGHAM: &'static [(char, char)] = &[
+  ('\u{1680}', '᚜'),
+];
+
+pub const OL_CHIKI: &'static [(char, char)] = &[
+  ('᱐', '᱿'),
+];
+
+pub const OLD_HUNGARIAN: &'static [(char, char)] = &[
+  ('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𐳺', '𐳿'),
+];
+
+pub const OLD_ITALIC: &'static [(char, char)] = &[
+  ('𐌀', '𐌣'), ('𐌭', '𐌯'),
+];
+
+pub const OLD_NORTH_ARABIAN: &'static [(char, char)] = &[
+  ('𐪀', '𐪟'),
+];
+
+pub const OLD_PERMIC: &'static [(char, char)] = &[
+  ('҃', '҃'), ('𐍐', '𐍺'),
+];
+
+pub const OLD_PERSIAN: &'static [(char, char)] = &[
+  ('𐎠', '𐏃'), ('𐏈', '𐏕'),
+];
+
+pub const OLD_SOUTH_ARABIAN: &'static [(char, char)] = &[
+  ('𐩠', '𐩿'),
+];
+
+pub const OLD_TURKIC: &'static [(char, char)] = &[
+  ('𐰀', '𐱈'),
+];
+
+pub const ORIYA: &'static [(char, char)] = &[
+  ('॑', '॒'), ('।', '॥'), ('ଁ', 'ଃ'), ('ଅ', 'ଌ'),
+  ('ଏ', 'ଐ'), ('ଓ', 'ନ'), ('ପ', 'ର'), ('ଲ', 'ଳ'),
+  ('ଵ', 'ହ'), ('଼', 'ୄ'), ('େ', 'ୈ'), ('ୋ', '୍'),
+  ('ୖ', 'ୗ'), ('ଡ଼', 'ଢ଼'), ('ୟ', 'ୣ'), ('୦', '୷'),
+];
+
+pub const OSAGE: &'static [(char, char)] = &[
+  ('𐒰', '𐓓'), ('𐓘', '𐓻'),
+];
+
+pub const OSMANYA: &'static [(char, char)] = &[
+  ('𐒀', '𐒝'), ('𐒠', '𐒩'),
+];
+
+pub const PAHAWH_HMONG: &'static [(char, char)] = &[
+  ('𖬀', '𖭅'), ('𖭐', '𖭙'), ('𖭛', '𖭡'), ('𖭣', '𖭷'),
+  ('𖭽', '𖮏'),
+];
+
+pub const PALMYRENE: &'static [(char, char)] = &[
+  ('𐡠', '𐡿'),
+];
+
+pub const PAU_CIN_HAU: &'static [(char, char)] = &[
+  ('𑫀', '𑫸'),
+];
+
+pub const PHAGS_PA: &'static [(char, char)] = &[
+  ('᠂', '᠃'), ('᠅', '᠅'), ('ꡀ', '꡷'),
+];
+
+pub const PHOENICIAN: &'static [(char, char)] = &[
+  ('𐤀', '𐤛'), ('𐤟', '𐤟'),
+];
+
+pub const PSALTER_PAHLAVI: &'static [(char, char)] = &[
+  ('ـ', 'ـ'), ('𐮀', '𐮑'), ('𐮙', '𐮜'), ('𐮩', '𐮯'),
+];
+
+pub const REJANG: &'static [(char, char)] = &[
+  ('ꤰ', '꥓'), ('꥟', '꥟'),
+];
+
+pub const RUNIC: &'static [(char, char)] = &[
+  ('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ'),
+];
+
+pub const SAMARITAN: &'static [(char, char)] = &[
+  ('ࠀ', '࠭'), ('࠰', '࠾'),
+];
+
+pub const SAURASHTRA: &'static [(char, char)] = &[
+  ('ꢀ', 'ꣅ'), ('꣎', '꣙'),
+];
+
+pub const SHARADA: &'static [(char, char)] = &[
+  ('॑', '॑'), ('᳗', '᳗'), ('᳙', '᳙'), ('᳜', '᳝'),
+  ('᳠', '᳠'), ('𑆀', '𑇍'), ('𑇐', '𑇟'),
+];
+
+pub const SHAVIAN: &'static [(char, char)] = &[
+  ('𐑐', '𐑿'),
+];
+
+pub const SIDDHAM: &'static [(char, char)] = &[
+  ('𑖀', '𑖵'), ('𑖸', '𑗝'),
+];
+
+pub const SIGNWRITING: &'static [(char, char)] = &[
+  ('𝠀', '𝪋'), ('𝪛', '𝪟'), ('𝪡', '𝪯'),
+];
+
+pub const SINHALA: &'static [(char, char)] = &[
+  ('।', '॥'), ('ං', 'ඃ'), ('අ', 'ඖ'), ('ක', 'න'),
+  ('ඳ', 'ර'), ('ල', 'ල'), ('ව', 'ෆ'), ('්', '්'),
+  ('ා', 'ු'), ('ූ', 'ූ'), ('ෘ', 'ෟ'), ('෦', '෯'),
+  ('ෲ', '෴'), ('𑇡', '𑇴'),
+];
+
+pub const SORA_SOMPENG: &'static [(char, char)] = &[
+  ('𑃐', '𑃨'), ('𑃰', '𑃹'),
+];
+
+pub const SOYOMBO: &'static [(char, char)] = &[
+  ('𑩐', '𑪃'), ('𑪆', '𑪜'), ('𑪞', '𑪢'),
+];
+
+pub const SUNDANESE: &'static [(char, char)] = &[
+  ('ᮀ', 'ᮿ'), ('᳀', '᳇'),
+];
+
+pub const SYLOTI_NAGRI: &'static [(char, char)] = &[
+  ('।', '॥'), ('০', '৯'), ('ꠀ', '꠫'),
+];
+
+pub const SYRIAC: &'static [(char, char)] = &[
+  ('،', '،'), ('؛', '\u{61c}'), ('؟', '؟'), ('ـ', 'ـ'), ('ً', 'ٕ'),
+  ('ٰ', 'ٰ'), ('܀', '܍'), ('\u{70f}', '݊'), ('ݍ', 'ݏ'), ('ࡠ', 'ࡪ'),
+];
+
+pub const TAGALOG: &'static [(char, char)] = &[
+  ('ᜀ', 'ᜌ'), ('ᜎ', '᜔'), ('᜵', '᜶'),
+];
+
+pub const TAGBANWA: &'static [(char, char)] = &[
+  ('᜵', '᜶'), ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), ('ᝲ', 'ᝳ'),
+];
+
+pub const TAI_LE: &'static [(char, char)] = &[
+  ('၀', '၉'), ('ᥐ', 'ᥭ'), ('ᥰ', 'ᥴ'),
+];
+
+pub const TAI_THAM: &'static [(char, char)] = &[
+  ('ᨠ', 'ᩞ'), ('᩠', '᩼'), ('᩿', '᪉'), ('᪐', '᪙'),
+  ('᪠', '᪭'),
+];
+
+pub const TAI_VIET: &'static [(char, char)] = &[
+  ('ꪀ', 'ꫂ'), ('ꫛ', '꫟'),
+];
+
+pub const TAKRI: &'static [(char, char)] = &[
+  ('।', '॥'), ('꠰', '꠹'), ('𑚀', '𑚷'), ('𑛀', '𑛉'),
+];
+
+pub const TAMIL: &'static [(char, char)] = &[
+  ('॑', '॒'), ('।', '॥'), ('ஂ', 'ஃ'), ('அ', 'ஊ'),
+  ('எ', 'ஐ'), ('ஒ', 'க'), ('ங', 'ச'), ('ஜ', 'ஜ'),
+  ('ஞ', 'ட'), ('ண', 'த'), ('ந', 'ப'), ('ம', 'ஹ'),
+  ('ா', 'ூ'), ('ெ', 'ை'), ('ொ', '்'), ('ௐ', 'ௐ'),
+  ('ௗ', 'ௗ'), ('௦', '௺'), ('᳚', '᳚'), ('ꣳ', 'ꣳ'),
+  ('𑌁', '𑌁'), ('𑌃', '𑌃'), ('𑌼', '𑌼'),
+];
+
+pub const TANGUT: &'static [(char, char)] = &[
+  ('𖿠', '𖿠'), ('𗀀', '𘟬'), ('𘠀', '𘫲'),
+];
+
+pub const TELUGU: &'static [(char, char)] = &[
+  ('॑', '॒'), ('।', '॥'), ('ఀ', 'ః'), ('అ', 'ఌ'),
+  ('ఎ', 'ఐ'), ('ఒ', 'న'), ('ప', 'హ'), ('ఽ', 'ౄ'),
+  ('ె', 'ై'), ('ొ', '్'), ('ౕ', 'ౖ'), ('ౘ', 'ౚ'),
+  ('ౠ', 'ౣ'), ('౦', '౯'), ('౸', '౿'), ('᳚', '᳚'),
+];
+
+pub const THAANA: &'static [(char, char)] = &[
+  ('،', '،'), ('؛', '\u{61c}'), ('؟', '؟'), ('٠', '٩'), ('ހ', 'ޱ'),
+  ('ﷲ', 'ﷲ'), ('﷽', '﷽'),
+];
+
+pub const THAI: &'static [(char, char)] = &[
+  ('ก', 'ฺ'), ('เ', '๛'),
+];
+
+pub const TIBETAN: &'static [(char, char)] = &[
+  ('ༀ', 'ཇ'), ('ཉ', 'ཬ'), ('ཱ', 'ྗ'), ('ྙ', 'ྼ'),
+  ('྾', '࿌'), ('࿎', '࿔'), ('࿙', '࿚'),
+];
+
+pub const TIFINAGH: &'static [(char, char)] = &[
+  ('ⴰ', 'ⵧ'), ('ⵯ', '⵰'), ('⵿', '⵿'),
+];
+
+pub const TIRHUTA: &'static [(char, char)] = &[
+  ('।', '॥'), ('꠰', '꠹'), ('𑒀', '𑓇'), ('𑓐', '𑓙'),
+];
+
+pub const UGARITIC: &'static [(char, char)] = &[
+  ('𐎀', '𐎝'), ('𐎟', '𐎟'),
+];
+
+pub const VAI: &'static [(char, char)] = &[
+  ('ꔀ', 'ꘫ'),
+];
+
+pub const WARANG_CITI: &'static [(char, char)] = &[
+  ('𑢠', '𑣲'), ('𑣿', '𑣿'),
+];
+
+pub const YI: &'static [(char, char)] = &[
+  ('、', '。'), ('〈', '】'), ('〔', '〛'), ('・', '・'),
+  ('ꀀ', 'ꒌ'), ('꒐', '꓆'), ('｡', '･'),
+];
+
+pub const ZANABAZAR_SQUARE: &'static [(char, char)] = &[
+  ('𑨀', '𑩇'),
+];
diff --git a/scripts/unicode.py b/scripts/unicode.py
deleted file mode 100755
index 015d2018da..0000000000
--- a/scripts/unicode.py
+++ /dev/null
@@ -1,305 +0,0 @@
-#!/usr/bin/env python2
-#
-# Copyright 2011-2013 The Rust Project Developers. See the COPYRIGHT
-# file at the top-level directory of this distribution and at
-# http://rust-lang.org/COPYRIGHT.
-#
-# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-# option. This file may not be copied, modified, or distributed
-# except according to those terms.
-
-# This script uses the following Unicode tables:
-# - DerivedCoreProperties.txt
-# - EastAsianWidth.txt
-# - PropList.txt
-# - Scripts.txt
-# - UnicodeData.txt
-#
-# Since this should not require frequent updates, we just store this
-# out-of-line and check the unicode.rs file into git.
-
-from collections import defaultdict
-import fileinput, re, os, sys
-
-preamble = '''// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-
-// NOTE: The following code was generated by "scripts/unicode.py", do not edit
-// directly
-
-#![allow(warnings)]
-'''
-
-# Mapping taken from Table 12 from:
-# http://www.unicode.org/reports/tr44/#General_Category_Values
-expanded_categories = {
-    'Lu': ['LC', 'L'], 'Ll': ['LC', 'L'], 'Lt': ['LC', 'L'],
-    'Lm': ['L'], 'Lo': ['L'],
-    'Mn': ['M'], 'Mc': ['M'], 'Me': ['M'],
-    'Nd': ['N'], 'Nl': ['N'], 'No': ['No'],
-    'Pc': ['P'], 'Pd': ['P'], 'Ps': ['P'], 'Pe': ['P'],
-    'Pi': ['P'], 'Pf': ['P'], 'Po': ['P'],
-    'Sm': ['S'], 'Sc': ['S'], 'Sk': ['S'], 'So': ['S'],
-    'Zs': ['Z'], 'Zl': ['Z'], 'Zp': ['Z'],
-    'Cc': ['C'], 'Cf': ['C'], 'Cs': ['C'], 'Co': ['C'], 'Cn': ['C'],
-}
-
-def fetch(f):
-    if not os.path.exists(f):
-        os.system("curl -O http://www.unicode.org/Public/UNIDATA/%s"
-                  % f)
-
-    if not os.path.exists(f):
-        sys.stderr.write("cannot load %s" % f)
-        exit(1)
-
-def is_surrogate(n):
-    return 0xD800 <= n <= 0xDFFF
-
-def load_unicode_data(f):
-    fetch(f)
-    gencats = {}
-
-    udict = {};
-    range_start = -1;
-    for line in fileinput.input(f):
-        data = line.split(';');
-        if len(data) != 15:
-            continue
-        cp = int(data[0], 16);
-        if is_surrogate(cp):
-            continue
-        if range_start >= 0:
-            for i in xrange(range_start, cp):
-                udict[i] = data;
-            range_start = -1;
-        if data[1].endswith(", First>"):
-            range_start = cp;
-            continue;
-        udict[cp] = data;
-
-    for code in udict:
-        [code_org, name, gencat, combine, bidi,
-         decomp, deci, digit, num, mirror,
-         old, iso, upcase, lowcase, titlecase ] = udict[code];
-
-        # place letter in categories as appropriate
-        for cat in [gencat, "Assigned"] + expanded_categories.get(gencat, []):
-            if cat not in gencats:
-                gencats[cat] = []
-            gencats[cat].append(code)
-
-    # generate Not_Assigned from Assigned
-    gencats["Cn"] = gen_unassigned(gencats["Assigned"])
-    # Assigned is not a real category
-    del(gencats["Assigned"])
-    # Other contains Not_Assigned
-    gencats["C"].extend(gencats["Cn"])
-    gencats = group_cats(gencats)
-
-    return gencats
-
-def group_cats(cats):
-    cats_out = {}
-    for cat in cats:
-        cats_out[cat] = group_cat(cats[cat])
-    return cats_out
-
-def group_cat(cat):
-    cat_out = []
-    letters = sorted(set(cat))
-    cur_start = letters.pop(0)
-    cur_end = cur_start
-    for letter in letters:
-        assert letter > cur_end, \
-            "cur_end: %s, letter: %s" % (hex(cur_end), hex(letter))
-        if letter == cur_end + 1:
-            cur_end = letter
-        else:
-            cat_out.append((cur_start, cur_end))
-            cur_start = cur_end = letter
-    cat_out.append((cur_start, cur_end))
-    return cat_out
-
-def ungroup_cat(cat):
-    cat_out = []
-    for (lo, hi) in cat:
-        while lo <= hi:
-            cat_out.append(lo)
-            lo += 1
-    return cat_out
-
-def gen_unassigned(assigned):
-    assigned = set(assigned)
-    return ([i for i in range(0, 0xd800) if i not in assigned] +
-            [i for i in range(0xe000, 0x110000) if i not in assigned])
-
-def format_table_content(f, content, indent):
-    line = " "*indent
-    first = True
-    for chunk in content.split(","):
-        if len(line) + len(chunk) < 78:
-            if first:
-                line += chunk
-            else:
-                line += ", " + chunk
-            first = False
-        else:
-            f.write(line + ",\n")
-            line = " "*indent + chunk
-    f.write(line)
-
-def load_properties(f, interestingprops):
-    fetch(f)
-    props = {}
-    re1 = re.compile("^ *([0-9A-F]+) *; *(\w+)")
-    re2 = re.compile("^ *([0-9A-F]+)\.\.([0-9A-F]+) *; *(\w+)")
-
-    for line in fileinput.input(f):
-        prop = None
-        d_lo = 0
-        d_hi = 0
-        m = re1.match(line)
-        if m:
-            d_lo = m.group(1)
-            d_hi = m.group(1)
-            prop = m.group(2)
-        else:
-            m = re2.match(line)
-            if m:
-                d_lo = m.group(1)
-                d_hi = m.group(2)
-                prop = m.group(3)
-            else:
-                continue
-        if interestingprops and prop not in interestingprops:
-            continue
-        d_lo = int(d_lo, 16)
-        d_hi = int(d_hi, 16)
-        if prop not in props:
-            props[prop] = []
-        props[prop].append((d_lo, d_hi))
-
-    # optimize props if possible
-    for prop in props:
-        props[prop] = group_cat(ungroup_cat(props[prop]))
-
-    return props
-
-def load_case_folding(f):
-    fetch(f)
-    re1 = re.compile("^ *([0-9A-F]+) *; *[CS] *; *([0-9A-F]+) *;")
-    all_pairs = defaultdict(list)
-    for line in fileinput.input(f):
-        m = re1.match(line)
-        if m:
-            a = int(m.group(1), 16)
-            b = int(m.group(2), 16)
-            all_pairs[a].append(b)
-            all_pairs[b].append(a)
-    both = set()
-    for k, vs in all_pairs.iteritems():
-        for v in vs:
-            both.add((k, v))
-            for v2 in all_pairs[v]:
-                both.add((k, v2))
-    c_plus_s_both = sorted((k1, k2) for k1, k2 in both if k1 != k2)
-    return {"C_plus_S_both": c_plus_s_both}
-
-def escape_char(c):
-    return "'\\u{%x}'" % c
-
-def emit_table(f, name, t_data, t_type = "&'static [(char, char)]", is_pub=True,
-        pfun=lambda x: "(%s,%s)" % (escape_char(x[0]), escape_char(x[1]))):
-    pub_string = ""
-    if is_pub:
-        pub_string = "pub "
-    f.write("    %sconst %s: %s = &[\n" % (pub_string, name, t_type))
-    data = ""
-    first = True
-    for dat in t_data:
-        if not first:
-            data += ","
-        first = False
-        data += pfun(dat)
-    format_table_content(f, data, 8)
-    f.write("\n    ];\n\n")
-
-def emit_property_module(f, mod, tbl):
-    f.write("pub mod %s {\n" % mod)
-    keys = tbl.keys()
-    keys.sort()
-    for cat in keys:
-        emit_table(f, "%s_table" % cat, tbl[cat])
-    f.write("}\n\n")
-
-def emit_regex_module(f, cats, w_data):
-    f.write("pub mod regex {\n")
-    regex_class = "&'static [(char, char)]"
-    class_table = "&'static [(&'static str, %s)]" % regex_class
-
-    emit_table(f, "UNICODE_CLASSES", cats, class_table,
-        pfun=lambda x: "(\"%s\",super::%s::%s_table)" % (x[0], x[1], x[0]))
-
-    f.write("    pub const PERLD: %s = super::general_category::Nd_table;\n\n"
-            % regex_class)
-    f.write("    pub const PERLS: %s = super::property::White_Space_table;\n\n"
-            % regex_class)
-
-    emit_table(f, "PERLW", w_data, regex_class)
-
-    f.write("}\n\n")
-
-if __name__ == "__main__":
-    r = "unicode.rs"
-    if os.path.exists(r):
-        os.remove(r)
-    with open(r, "w") as rf:
-        # write the file's preamble
-        rf.write(preamble)
-
-        # download and parse all the data
-        fetch("ReadMe.txt")
-        with open("ReadMe.txt") as readme:
-            pattern = "for Version (\d+)\.(\d+)\.(\d+) of the Unicode"
-            unicode_version = re.search(pattern, readme.read()).groups()
-        gencats = load_unicode_data("UnicodeData.txt")
-        want_derived = ["XID_Start", "XID_Continue", "Alphabetic", "Lowercase", "Uppercase"]
-        other_derived = ["Default_Ignorable_Code_Point", "Grapheme_Extend"]
-        derived = load_properties("DerivedCoreProperties.txt", want_derived + other_derived)
-        scripts = load_properties("Scripts.txt", [])
-        props = load_properties("PropList.txt",
-                ["White_Space", "Join_Control", "Noncharacter_Code_Point"])
-        case_folding = load_case_folding("CaseFolding.txt")
-
-        # all of these categories will also be available as \p{} in libregex
-        allcats = []
-        for (name, cat) in ("general_category", gencats), \
-                           ("derived_property", derived), \
-                           ("script", scripts), \
-                           ("property", props):
-            emit_property_module(rf, name, cat)
-            allcats.extend(map(lambda x: (x, name), cat))
-        allcats.sort(key=lambda c: c[0])
-
-        # the \w regex corresponds to Alphabetic + Mark + Decimal_Number +
-        # Connector_Punctuation + Join-Control according to UTS#18
-        # http://www.unicode.org/reports/tr18/#Compatibility_Properties
-        perl_words = []
-        for cat in derived["Alphabetic"], gencats["M"], gencats["Nd"], \
-                   gencats["Pc"], props["Join_Control"]:
-            perl_words.extend(ungroup_cat(cat))
-        perl_words = group_cat(perl_words)
-
-        # emit lookup tables for \p{}, along with \d, \w, and \s for libregex
-        emit_regex_module(rf, allcats, perl_words)
-        emit_property_module(rf, "case_folding", case_folding)
diff --git a/src/compile.rs b/src/compile.rs
index b72ba757fa..c765f01416 100644
--- a/src/compile.rs
+++ b/src/compile.rs
@@ -13,10 +13,8 @@ use std::iter;
 use std::result;
 use std::sync::Arc;
 
-use syntax::{
-    Expr, Repeater, CharClass, ClassRange, ByteClass, ByteRange,
-    is_word_byte,
-};
+use syntax::is_word_byte;
+use syntax::hir::{self, Hir};
 use utf8_ranges::{Utf8Range, Utf8Sequence, Utf8Sequences};
 
 use prog::{
@@ -123,7 +121,7 @@ impl Compiler {
     /// stops and returns an error.
     pub fn compile(
         mut self,
-        exprs: &[Expr],
+        exprs: &[Hir],
     ) -> result::Result<Program, Error> {
         debug_assert!(exprs.len() >= 1);
         self.num_exprs = exprs.len();
@@ -134,7 +132,7 @@ impl Compiler {
         }
     }
 
-    fn compile_one(mut self, expr: &Expr) -> result::Result<Program, Error> {
+    fn compile_one(mut self, expr: &Hir) -> result::Result<Program, Error> {
         // If we're compiling a forward DFA and we aren't anchored, then
         // add a `.*?` before the first capture group.
         // Other matching engines handle this by baking the logic into the
@@ -161,7 +159,7 @@ impl Compiler {
 
     fn compile_many(
         mut self,
-        exprs: &[Expr],
+        exprs: &[Hir],
     ) -> result::Result<Program, Error> {
         debug_assert!(exprs.len() > 1);
 
@@ -257,97 +255,100 @@ impl Compiler {
     /// method you will see that it does exactly this, though it handles
     /// a list of expressions rather than just the two that we use for
     /// an example.
-    fn c(&mut self, expr: &Expr) -> Result {
+    fn c(&mut self, expr: &Hir) -> Result {
         use prog;
-        use syntax::Expr::*;
+        use syntax::hir::HirKind::*;
 
         try!(self.check_size());
-        match *expr {
+        match *expr.kind() {
             Empty => Ok(Patch { hole: Hole::None, entry: self.insts.len() }),
-            Literal { ref chars, casei } => self.c_literal(chars, casei),
-            LiteralBytes { ref bytes, casei } => self.c_bytes(bytes, casei),
-            AnyChar => self.c_class(&[ClassRange {
-                start: '\x00',
-                end: '\u{10ffff}',
-            }]),
-            AnyCharNoNL => {
-                self.c_class(&[
-                    ClassRange { start: '\x00', end: '\x09' },
-                    ClassRange { start: '\x0b', end: '\u{10ffff}' },
-                ])
-            }
-            AnyByte => {
-                self.c_class_bytes(&[ByteRange { start: 0, end: 0xFF }])
+            Literal(hir::Literal::Unicode(c)) => {
+                self.c_literal(&[c])
             }
-            AnyByteNoNL => {
-                self.c_class_bytes(&[
-                    ByteRange { start: 0, end: 0x9 },
-                    ByteRange { start: 0xB, end: 0xFF },
-                ])
+            Literal(hir::Literal::Byte(b)) => {
+                assert!(self.compiled.uses_bytes());
+                self.c_bytes(&[b])
             }
-            Class(ref cls) => {
-                self.c_class(cls)
+            Class(hir::Class::Unicode(ref cls)) => {
+                self.c_class(cls.ranges())
             }
-            ClassBytes(ref cls) => {
-                self.c_class_bytes(cls)
+            Class(hir::Class::Bytes(ref cls)) => {
+                if self.compiled.uses_bytes() {
+                    self.c_class_bytes(cls.ranges())
+                } else {
+                    assert!(cls.is_all_ascii());
+                    let mut char_ranges = vec![];
+                    for r in cls.iter() {
+                        let (s, e) = (r.start() as char, r.end() as char);
+                        char_ranges.push(hir::ClassUnicodeRange::new(s, e));
+                    }
+                    self.c_class(&char_ranges)
+                }
             }
-            StartLine if self.compiled.is_reverse => {
+            Anchor(hir::Anchor::StartLine) if self.compiled.is_reverse => {
                 self.byte_classes.set_range(b'\n', b'\n');
                 self.c_empty_look(prog::EmptyLook::EndLine)
             }
-            StartLine => {
+            Anchor(hir::Anchor::StartLine) => {
                 self.byte_classes.set_range(b'\n', b'\n');
                 self.c_empty_look(prog::EmptyLook::StartLine)
             }
-            EndLine if self.compiled.is_reverse => {
+            Anchor(hir::Anchor::EndLine) if self.compiled.is_reverse => {
                 self.byte_classes.set_range(b'\n', b'\n');
                 self.c_empty_look(prog::EmptyLook::StartLine)
             }
-            EndLine => {
+            Anchor(hir::Anchor::EndLine) => {
                 self.byte_classes.set_range(b'\n', b'\n');
                 self.c_empty_look(prog::EmptyLook::EndLine)
             }
-            StartText if self.compiled.is_reverse => {
+            Anchor(hir::Anchor::StartText) if self.compiled.is_reverse => {
                 self.c_empty_look(prog::EmptyLook::EndText)
             }
-            StartText => {
+            Anchor(hir::Anchor::StartText) => {
                 self.c_empty_look(prog::EmptyLook::StartText)
             }
-            EndText if self.compiled.is_reverse => {
+            Anchor(hir::Anchor::EndText) if self.compiled.is_reverse => {
                 self.c_empty_look(prog::EmptyLook::StartText)
             }
-            EndText => {
+            Anchor(hir::Anchor::EndText) => {
                 self.c_empty_look(prog::EmptyLook::EndText)
             }
-            WordBoundary => {
+            WordBoundary(hir::WordBoundary::Unicode) => {
                 self.compiled.has_unicode_word_boundary = true;
                 self.byte_classes.set_word_boundary();
                 self.c_empty_look(prog::EmptyLook::WordBoundary)
             }
-            NotWordBoundary => {
+            WordBoundary(hir::WordBoundary::UnicodeNegate) => {
                 self.compiled.has_unicode_word_boundary = true;
                 self.byte_classes.set_word_boundary();
                 self.c_empty_look(prog::EmptyLook::NotWordBoundary)
             }
-            WordBoundaryAscii => {
+            WordBoundary(hir::WordBoundary::Ascii) => {
                 self.byte_classes.set_word_boundary();
                 self.c_empty_look(prog::EmptyLook::WordBoundaryAscii)
             }
-            NotWordBoundaryAscii => {
+            WordBoundary(hir::WordBoundary::AsciiNegate) => {
                 self.byte_classes.set_word_boundary();
                 self.c_empty_look(prog::EmptyLook::NotWordBoundaryAscii)
             }
-            Group { ref e, i: None, name: None } => self.c(e),
-            Group { ref e, i, ref name } => {
-                // it's impossible to have a named capture without an index
-                let i = i.expect("capture index");
-                if i >= self.compiled.captures.len() {
-                    self.compiled.captures.push(name.clone());
-                    if let Some(ref name) = *name {
-                        self.capture_name_idx.insert(name.to_owned(), i);
+            Group(ref g) => {
+                match g.kind {
+                    hir::GroupKind::NonCapturing => self.c(&g.hir),
+                    hir::GroupKind::CaptureIndex(index) => {
+                        if index as usize >= self.compiled.captures.len() {
+                            self.compiled.captures.push(None);
+                        }
+                        self.c_capture(2 * index as usize, &g.hir)
+                    }
+                    hir::GroupKind::CaptureName { index, ref name } => {
+                        if index as usize >= self.compiled.captures.len() {
+                            let n = name.to_string();
+                            self.compiled.captures.push(Some(n.clone()));
+                            self.capture_name_idx.insert(n, index as usize);
+                        }
+                        self.c_capture(2 * index as usize, &g.hir)
                     }
                 }
-                self.c_capture(2 * i, e)
             }
             Concat(ref es) => {
                 if self.compiled.is_reverse {
@@ -356,12 +357,12 @@ impl Compiler {
                     self.c_concat(es)
                 }
             }
-            Alternate(ref es) => self.c_alternate(&**es),
-            Repeat { ref e, r, greedy } => self.c_repeat(e, r, greedy),
+            Alternation(ref es) => self.c_alternate(&**es),
+            Repetition(ref rep) => self.c_repeat(rep),
         }
     }
 
-    fn c_capture(&mut self, first_slot: usize, expr: &Expr) -> Result {
+    fn c_capture(&mut self, first_slot: usize, expr: &Hir) -> Result {
         if self.num_exprs > 1 || self.compiled.is_dfa {
             // Don't ever compile Save instructions for regex sets because
             // they are never used. They are also never used in DFA programs
@@ -380,21 +381,21 @@ impl Compiler {
 
     fn c_dotstar(&mut self) -> Result {
         Ok(if !self.compiled.only_utf8() {
-            try!(self.c(&Expr::Repeat {
-                e: Box::new(Expr::AnyByte),
-                r: Repeater::ZeroOrMore,
+            try!(self.c(&Hir::repetition(hir::Repetition {
+                kind: hir::RepetitionKind::ZeroOrMore,
                 greedy: false,
-            }))
+                hir: Box::new(Hir::any(true)),
+            })))
         } else {
-            try!(self.c(&Expr::Repeat {
-                e: Box::new(Expr::AnyChar),
-                r: Repeater::ZeroOrMore,
+            try!(self.c(&Hir::repetition(hir::Repetition {
+                kind: hir::RepetitionKind::ZeroOrMore,
                 greedy: false,
-            }))
+                hir: Box::new(Hir::any(false)),
+            })))
         })
     }
 
-    fn c_literal(&mut self, chars: &[char], casei: bool) -> Result {
+    fn c_literal(&mut self, chars: &[char]) -> Result {
         debug_assert!(!chars.is_empty());
         let mut chars: Box<Iterator<Item=&char>> =
             if self.compiled.is_reverse {
@@ -403,26 +404,20 @@ impl Compiler {
                 Box::new(chars.iter())
             };
         let first = *chars.next().expect("non-empty literal");
-        let Patch { mut hole, entry } = try!(self.c_char(first, casei));
+        let Patch { mut hole, entry } = try!(self.c_char(first));
         for &c in chars {
-            let p = try!(self.c_char(c, casei));
+            let p = try!(self.c_char(c));
             self.fill(hole, p.entry);
             hole = p.hole;
         }
         Ok(Patch { hole: hole, entry: entry })
     }
 
-    fn c_char(&mut self, c: char, casei: bool) -> Result {
-        if casei {
-            self.c_class(&CharClass::new(vec![
-                ClassRange { start: c, end: c },
-            ]).case_fold())
-        } else {
-            self.c_class(&[ClassRange { start: c, end: c }])
-        }
+    fn c_char(&mut self, c: char) -> Result {
+        self.c_class(&[hir::ClassUnicodeRange::new(c, c)])
     }
 
-    fn c_class(&mut self, ranges: &[ClassRange]) -> Result {
+    fn c_class(&mut self, ranges: &[hir::ClassUnicodeRange]) -> Result {
         assert!(!ranges.is_empty());
         if self.compiled.uses_bytes() {
             CompileClass {
@@ -431,7 +426,7 @@ impl Compiler {
             }.compile()
         } else {
             let ranges: Vec<(char, char)> =
-                ranges.iter().map(|r| (r.start, r.end)).collect();
+                ranges.iter().map(|r| (r.start(), r.end())).collect();
             let hole = if ranges.len() == 1 && ranges[0].0 == ranges[0].1 {
                 self.push_hole(InstHole::Char { c: ranges[0].0 })
             } else {
@@ -441,7 +436,7 @@ impl Compiler {
         }
     }
 
-    fn c_bytes(&mut self, bytes: &[u8], casei: bool) -> Result {
+    fn c_bytes(&mut self, bytes: &[u8]) -> Result {
         debug_assert!(!bytes.is_empty());
         let mut bytes: Box<Iterator<Item=&u8>> =
             if self.compiled.is_reverse {
@@ -450,26 +445,20 @@ impl Compiler {
                 Box::new(bytes.iter())
             };
         let first = *bytes.next().expect("non-empty literal");
-        let Patch { mut hole, entry } = try!(self.c_byte(first, casei));
+        let Patch { mut hole, entry } = try!(self.c_byte(first));
         for &b in bytes {
-            let p = try!(self.c_byte(b, casei));
+            let p = try!(self.c_byte(b));
             self.fill(hole, p.entry);
             hole = p.hole;
         }
         Ok(Patch { hole: hole, entry: entry })
     }
 
-    fn c_byte(&mut self, b: u8, casei: bool) -> Result {
-        if casei {
-            self.c_class_bytes(&ByteClass::new(vec![
-                ByteRange { start: b, end: b },
-            ]).case_fold())
-        } else {
-            self.c_class_bytes(&[ByteRange { start: b, end: b }])
-        }
+    fn c_byte(&mut self, b: u8) -> Result {
+        self.c_class_bytes(&[hir::ClassBytesRange::new(b, b)])
     }
 
-    fn c_class_bytes(&mut self, ranges: &[ByteRange]) -> Result {
+    fn c_class_bytes(&mut self, ranges: &[hir::ClassBytesRange]) -> Result {
         debug_assert!(!ranges.is_empty());
 
         let first_split_entry = self.insts.len();
@@ -479,17 +468,17 @@ impl Compiler {
             self.fill_to_next(prev_hole);
             let split = self.push_split_hole();
             let next = self.insts.len();
-            self.byte_classes.set_range(r.start, r.end);
+            self.byte_classes.set_range(r.start(), r.end());
             holes.push(self.push_hole(InstHole::Bytes {
-                start: r.start, end: r.end,
+                start: r.start(), end: r.end(),
             }));
             prev_hole = self.fill_split(split, Some(next), None);
         }
         let next = self.insts.len();
         let r = &ranges[ranges.len() - 1];
-        self.byte_classes.set_range(r.start, r.end);
+        self.byte_classes.set_range(r.start(), r.end());
         holes.push(self.push_hole(InstHole::Bytes {
-            start: r.start, end: r.end,
+            start: r.start(), end: r.end(),
         }));
         self.fill(prev_hole, next);
         Ok(Patch { hole: Hole::Many(holes), entry: first_split_entry })
@@ -501,7 +490,7 @@ impl Compiler {
     }
 
     fn c_concat<'a, I>(&mut self, exprs: I) -> Result
-            where I: IntoIterator<Item=&'a Expr> {
+            where I: IntoIterator<Item=&'a Hir> {
         let mut exprs = exprs.into_iter();
         let first = match exprs.next() {
             Some(expr) => expr,
@@ -518,7 +507,7 @@ impl Compiler {
         Ok(Patch { hole: hole, entry: entry })
     }
 
-    fn c_alternate(&mut self, exprs: &[Expr]) -> Result {
+    fn c_alternate(&mut self, exprs: &[Hir]) -> Result {
         debug_assert!(
             exprs.len() >= 2, "alternates must have at least 2 exprs");
 
@@ -533,40 +522,53 @@ impl Compiler {
         for e in &exprs[0..exprs.len() - 1] {
             self.fill_to_next(prev_hole);
             let split = self.push_split_hole();
+            let prev_entry = self.insts.len();
             let Patch { hole, entry } = try!(self.c(e));
+            if prev_entry == self.insts.len() {
+                // TODO(burntsushi): It is kind of silly that we don't support
+                // empty-subexpressions in alternates, but it is supremely
+                // awkward to support them in the existing compiler
+                // infrastructure. This entire compiler needs to be thrown out
+                // anyway, so don't feel too bad.
+                return Err(Error::Syntax(
+                    "alternations cannot currently contain \
+                     empty sub-expressions".to_string()));
+            }
             holes.push(hole);
             prev_hole = self.fill_split(split, Some(entry), None);
         }
+        let prev_entry = self.insts.len();
         let Patch { hole, entry } = try!(self.c(&exprs[exprs.len() - 1]));
+        if prev_entry == self.insts.len() {
+            // TODO(burntsushi): See TODO above.
+            return Err(Error::Syntax(
+                "alternations cannot currently contain \
+                 empty sub-expressions".to_string()));
+        }
         holes.push(hole);
         self.fill(prev_hole, entry);
         Ok(Patch { hole: Hole::Many(holes), entry: first_split_entry })
     }
 
-    fn c_repeat(
-        &mut self,
-        expr: &Expr,
-        kind: Repeater,
-        greedy: bool,
-    ) -> Result {
-        match kind {
-            Repeater::ZeroOrOne => self.c_repeat_zero_or_one(expr, greedy),
-            Repeater::ZeroOrMore => self.c_repeat_zero_or_more(expr, greedy),
-            Repeater::OneOrMore => self.c_repeat_one_or_more(expr, greedy),
-            Repeater::Range { min, max: None } => {
-                self.c_repeat_range_min_or_more(expr, greedy, min)
+    fn c_repeat(&mut self, rep: &hir::Repetition) -> Result {
+        use syntax::hir::RepetitionKind::*;
+        match rep.kind {
+            ZeroOrOne => self.c_repeat_zero_or_one(&rep.hir, rep.greedy),
+            ZeroOrMore => self.c_repeat_zero_or_more(&rep.hir, rep.greedy),
+            OneOrMore => self.c_repeat_one_or_more(&rep.hir, rep.greedy),
+            Range(hir::RepetitionRange::Exactly(min_max)) => {
+                self.c_repeat_range(&rep.hir, rep.greedy, min_max, min_max)
+            }
+            Range(hir::RepetitionRange::AtLeast(min)) => {
+                self.c_repeat_range_min_or_more(&rep.hir, rep.greedy, min)
             }
-            Repeater::Range { min, max: Some(max) } => {
-                self.c_repeat_range(expr, greedy, min, max)
+            Range(hir::RepetitionRange::Bounded(min, max)) => {
+                self.c_repeat_range(&rep.hir, rep.greedy, min, max)
             }
         }
     }
 
-    fn c_repeat_zero_or_one(
-        &mut self,
-        expr: &Expr,
-        greedy: bool,
-    ) -> Result {
+    fn c_repeat_zero_or_one(&mut self, expr: &Hir, greedy: bool) -> Result {
         let split_entry = self.insts.len();
         let split = self.push_split_hole();
         let Patch { hole: hole_rep, entry: entry_rep } = try!(self.c(expr));
@@ -580,11 +582,7 @@ impl Compiler {
         Ok(Patch { hole: Hole::Many(holes), entry: split_entry })
     }
 
-    fn c_repeat_zero_or_more(
-        &mut self,
-        expr: &Expr,
-        greedy: bool,
-    ) -> Result {
+    fn c_repeat_zero_or_more(&mut self, expr: &Hir, greedy: bool) -> Result {
         let split_entry = self.insts.len();
         let split = self.push_split_hole();
         let Patch { hole: hole_rep, entry: entry_rep } = try!(self.c(expr));
@@ -598,11 +596,7 @@ impl Compiler {
         Ok(Patch { hole: split_hole, entry: split_entry })
     }
 
-    fn c_repeat_one_or_more(
-        &mut self,
-        expr: &Expr,
-        greedy: bool,
-    ) -> Result {
+    fn c_repeat_one_or_more(&mut self, expr: &Hir, greedy: bool) -> Result {
         let Patch { hole: hole_rep, entry: entry_rep } = try!(self.c(expr));
         self.fill_to_next(hole_rep);
         let split = self.push_split_hole();
@@ -617,7 +611,7 @@ impl Compiler {
 
     fn c_repeat_range_min_or_more(
         &mut self,
-        expr: &Expr,
+        expr: &Hir,
         greedy: bool,
         min: u32,
     ) -> Result {
@@ -630,7 +624,7 @@ impl Compiler {
 
     fn c_repeat_range(
         &mut self,
-        expr: &Expr,
+        expr: &Hir,
         greedy: bool,
         min: u32,
         max: u32,
@@ -874,7 +868,7 @@ impl InstHole {
 
 struct CompileClass<'a, 'b> {
     c: &'a mut Compiler,
-    ranges: &'b [ClassRange],
+    ranges: &'b [hir::ClassUnicodeRange],
 }
 
 impl<'a, 'b> CompileClass<'a, 'b> {
@@ -887,7 +881,7 @@ impl<'a, 'b> CompileClass<'a, 'b> {
 
         for (i, range) in self.ranges.iter().enumerate() {
             let is_last_range = i + 1 == self.ranges.len();
-            utf8_seqs.reset(range.start, range.end);
+            utf8_seqs.reset(range.start(), range.end());
             let mut it = (&mut utf8_seqs).peekable();
             loop {
                 let utf8_seq = match it.next() {
diff --git a/src/error.rs b/src/error.rs
index 869107bdfa..b02aa3e40d 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -9,11 +9,12 @@
 // except according to those terms.
 
 use std::fmt;
+use std::iter::repeat;
 
 use syntax;
 
 /// An error that occurred during parsing or compiling a regular expression.
-#[derive(Clone, Debug, PartialEq)]
+#[derive(Clone, PartialEq)]
 pub enum Error {
     /// A syntax error.
     Syntax(String),
@@ -56,6 +57,34 @@ impl fmt::Display for Error {
     }
 }
 
+// We implement our own Debug implementation so that we show nicer syntax
+// errors when people use `Regex::new(...).unwrap()`. It's a little weird,
+// but the `Syntax` variant is already storing a `String` anyway, so we might
+// as well format it nicely.
+impl fmt::Debug for Error {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match *self {
+            Error::Syntax(ref err) => {
+                let hr: String = repeat('~').take(79).collect();
+                try!(writeln!(f, "Syntax("));
+                try!(writeln!(f, "{}", hr));
+                try!(writeln!(f, "{}", err));
+                try!(writeln!(f, "{}", hr));
+                try!(write!(f, ")"));
+                Ok(())
+            }
+            Error::CompiledTooBig(limit) => {
+                f.debug_tuple("CompiledTooBig")
+                    .field(&limit)
+                    .finish()
+            }
+            Error::__Nonexhaustive => {
+                f.debug_tuple("__Nonexhaustive").finish()
+            }
+        }
+    }
+}
+
 impl From<syntax::Error> for Error {
     fn from(err: syntax::Error) -> Error {
         Error::Syntax(err.to_string())
diff --git a/src/exec.rs b/src/exec.rs
index d12a725cf0..51350dd675 100644
--- a/src/exec.rs
+++ b/src/exec.rs
@@ -14,7 +14,9 @@ use std::cmp;
 use std::sync::Arc;
 
 use thread_local::CachedThreadLocal;
-use syntax::{Expr, ExprBuilder, Literals};
+use syntax::ParserBuilder;
+use syntax::hir::Hir;
+use syntax::hir::literal::Literals;
 
 use backtrack;
 use compile::Compiler;
@@ -102,7 +104,7 @@ pub struct ExecBuilder {
 /// Parsed represents a set of parsed regular expressions and their detected
 /// literals.
 struct Parsed {
-    exprs: Vec<Expr>,
+    exprs: Vec<Hir>,
     prefixes: Literals,
     suffixes: Literals,
     bytes: bool,
@@ -214,19 +216,25 @@ impl ExecBuilder {
         // If we're compiling a regex set and that set has any anchored
         // expressions, then disable all literal optimizations.
         for pat in &self.options.pats {
-            let parser =
-                ExprBuilder::new()
+            let mut parser =
+                ParserBuilder::new()
+                    // TODO(burntsushi): Disable octal in regex 1.0. Nobody
+                    // uses it, and we'll get better error messages when
+                    // someone tries to use a backreference. Provide a new
+                    // opt-in toggle for it though.
+                    .octal(true)
                     .case_insensitive(self.options.case_insensitive)
                     .multi_line(self.options.multi_line)
                     .dot_matches_new_line(self.options.dot_matches_new_line)
                     .swap_greed(self.options.swap_greed)
                     .ignore_whitespace(self.options.ignore_whitespace)
                     .unicode(self.options.unicode)
-                    .allow_bytes(!self.only_utf8);
+                    .allow_invalid_utf8(!self.only_utf8)
+                    .build();
             let expr = try!(parser.parse(pat));
-            bytes = bytes || expr.has_bytes();
+            bytes = bytes || !expr.is_always_utf8();
 
-            if !expr.is_anchored_start() && expr.has_anchored_start() {
+            if !expr.is_anchored_start() && expr.is_any_anchored_start() {
                 // Partial anchors unfortunately make it hard to use prefixes,
                 // so disable them.
                 prefixes = None;
@@ -243,14 +251,14 @@ impl ExecBuilder {
                 }
             });
 
-            if !expr.is_anchored_end() && expr.has_anchored_end() {
+            if !expr.is_anchored_end() && expr.is_any_anchored_end() {
                 // Partial anchors unfortunately make it hard to use suffixes,
                 // so disable them.
                 suffixes = None;
             } else if is_set && expr.is_anchored_end() {
                 // Regex sets with anchors do not go well with literal
                 // optimizations.
-                prefixes = None;
+                suffixes = None;
             }
             suffixes = suffixes.and_then(|mut suffixes| {
                 if !suffixes.union_suffixes(&expr) {
diff --git a/src/input.rs b/src/input.rs
index 3d87257c01..e24214954e 100644
--- a/src/input.rs
+++ b/src/input.rs
@@ -371,7 +371,7 @@ impl Char {
     ///
     /// If the character is absent, then false is returned.
     pub fn is_word_char(self) -> bool {
-        char::from_u32(self.0).map_or(false, syntax::is_word_char)
+        char::from_u32(self.0).map_or(false, syntax::is_word_character)
     }
 
     /// Returns true iff the byte is a word byte.
diff --git a/src/lib.rs b/src/lib.rs
index 86d9bae6da..31ee2553a2 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -217,9 +217,8 @@ This implementation executes regular expressions **only** on valid UTF-8
 while exposing match locations as byte indices into the search string.
 
 Only simple case folding is supported. Namely, when matching
-case-insensitively, the characters are first mapped using the [simple case
-folding](ftp://ftp.unicode.org/Public/UNIDATA/CaseFolding.txt) mapping
-before matching.
+case-insensitively, the characters are first mapped using the "simple" case
+folding rules defined by Unicode.
 
 Regular expressions themselves are **only** interpreted as a sequence of
 Unicode scalar values. This means you can use Unicode characters directly
@@ -248,9 +247,9 @@ are some examples:
   recognize `\n` and not any of the other forms of line terminators defined
   by Unicode.
 
-Finally, Unicode general categories and scripts are available as character
-classes. For example, you can match a sequence of numerals, Greek or
-Cherokee letters:
+Unicode general categories, scripts, script extensions, ages and a smattering
+of boolean properties are available as character classes. For example, you can
+match a sequence of numerals, Greek or Cherokee letters:
 
 ```rust
 # extern crate regex; use regex::Regex;
@@ -261,6 +260,12 @@ assert_eq!((mat.start(), mat.end()), (3, 23));
 # }
 ```
 
+For a more detailed breakdown of Unicode support with respect to
+[UTS#18](http://unicode.org/reports/tr18/),
+please see the
+[UNICODE](https://github.com/rust-lang/regex/blob/master/UNICODE.md)
+document in the root of the regex repository.
+
 # Opt out of Unicode support
 
 The `bytes` sub-module provides a `Regex` type that can be used to match
@@ -307,6 +312,8 @@ a separate crate, [`regex-syntax`](../regex_syntax/index.html).
 [x[^xyz]]     Nested/grouping character class (matching any character except y and z)
 [a-y&&xyz]    Intersection (matching x or y)
 [0-9&&[^4]]   Subtraction using intersection and negation (matching 0-9 except 4)
+[0-9--4]      Direct subtraction (matching 0-9 except 4)
+[a-g~~b-h]    Symmetric difference (matching `a` and `h` only)
 [\[\]]        Escaping in character classes (matching [ or ])
 </pre>
 
@@ -431,16 +438,20 @@ assert_eq!(&cap[0], "abc");
 ## Escape sequences
 
 <pre class="rust">
-\*         literal *, works for any punctuation character: \.+*?()|[]{}^$
-\a         bell (\x07)
-\f         form feed (\x0C)
-\t         horizontal tab
-\n         new line
-\r         carriage return
-\v         vertical tab (\x0B)
-\123       octal character code (up to three digits)
-\x7F       hex character code (exactly two digits)
-\x{10FFFF} any hex character code corresponding to a Unicode code point
+\*          literal *, works for any punctuation character: \.+*?()|[]{}^$
+\a          bell (\x07)
+\f          form feed (\x0C)
+\t          horizontal tab
+\n          new line
+\r          carriage return
+\v          vertical tab (\x0B)
+\123        octal character code (up to three digits)
+\x7F        hex character code (exactly two digits)
+\x{10FFFF}  any hex character code corresponding to a Unicode code point
+\u007F      hex character code (exactly four digits)
+\u{7F}      any hex character code corresponding to a Unicode code point
+\U0000007F  hex character code (exactly eight digits)
+\U{7F}      any hex character code corresponding to a Unicode code point
 </pre>
 
 ## Perl character classes (Unicode friendly)
diff --git a/src/literals.rs b/src/literals.rs
index 72c210b858..6f35b57fae 100644
--- a/src/literals.rs
+++ b/src/literals.rs
@@ -12,7 +12,7 @@ use std::mem;
 
 use aho_corasick::{Automaton, AcAutomaton, FullAcAutomaton};
 use memchr::{memchr, memchr2, memchr3};
-use syntax;
+use syntax::hir::literal::{Literal, Literals};
 
 use freqs::BYTE_FREQUENCIES;
 
@@ -42,7 +42,7 @@ enum Matcher {
     /// A single substring, find using Boyer-Moore.
     BoyerMoore(BoyerMooreSearch),
     /// An Aho-Corasick automaton.
-    AC(FullAcAutomaton<syntax::Lit>),
+    AC(FullAcAutomaton<Literal>),
     /// A simd accelerated multiple string matcher. Used only for a small
     /// number of small literals.
     Teddy128(Teddy),
@@ -51,22 +51,22 @@ enum Matcher {
 impl LiteralSearcher {
     /// Returns a matcher that never matches and never advances the input.
     pub fn empty() -> Self {
-        Self::new(syntax::Literals::empty(), Matcher::Empty)
+        Self::new(Literals::empty(), Matcher::Empty)
     }
 
     /// Returns a matcher for literal prefixes from the given set.
-    pub fn prefixes(lits: syntax::Literals) -> Self {
+    pub fn prefixes(lits: Literals) -> Self {
         let matcher = Matcher::prefixes(&lits);
         Self::new(lits, matcher)
     }
 
     /// Returns a matcher for literal suffixes from the given set.
-    pub fn suffixes(lits: syntax::Literals) -> Self {
+    pub fn suffixes(lits: Literals) -> Self {
         let matcher = Matcher::suffixes(&lits);
         Self::new(lits, matcher)
     }
 
-    fn new(lits: syntax::Literals, matcher: Matcher) -> Self {
+    fn new(lits: Literals, matcher: Matcher) -> Self {
         let complete = lits.all_complete();
         LiteralSearcher {
             complete: complete,
@@ -183,17 +183,17 @@ impl LiteralSearcher {
 }
 
 impl Matcher {
-    fn prefixes(lits: &syntax::Literals) -> Self {
+    fn prefixes(lits: &Literals) -> Self {
         let sset = SingleByteSet::prefixes(lits);
         Matcher::new(lits, sset)
     }
 
-    fn suffixes(lits: &syntax::Literals) -> Self {
+    fn suffixes(lits: &Literals) -> Self {
         let sset = SingleByteSet::suffixes(lits);
         Matcher::new(lits, sset)
     }
 
-    fn new(lits: &syntax::Literals, sset: SingleByteSet) -> Self {
+    fn new(lits: &Literals, sset: SingleByteSet) -> Self {
         if lits.literals().is_empty() {
             return Matcher::Empty;
         }
@@ -245,7 +245,7 @@ pub enum LiteralIter<'a> {
     Empty,
     Bytes(&'a [u8]),
     Single(&'a [u8]),
-    AC(&'a [syntax::Lit]),
+    AC(&'a [Literal]),
     Teddy128(&'a [Vec<u8>]),
 }
 
@@ -313,7 +313,7 @@ impl SingleByteSet {
         }
     }
 
-    fn prefixes(lits: &syntax::Literals) -> SingleByteSet {
+    fn prefixes(lits: &Literals) -> SingleByteSet {
         let mut sset = SingleByteSet::new();
         for lit in lits.literals() {
             sset.complete = sset.complete && lit.len() == 1;
@@ -330,7 +330,7 @@ impl SingleByteSet {
         sset
     }
 
-    fn suffixes(lits: &syntax::Literals) -> SingleByteSet {
+    fn suffixes(lits: &Literals) -> SingleByteSet {
         let mut sset = SingleByteSet::new();
         for lit in lits.literals() {
             sset.complete = sset.complete && lit.len() == 1;
diff --git a/src/simd_accel/teddy128.rs b/src/simd_accel/teddy128.rs
index d4d63dc0c5..6a2f3fd8e1 100644
--- a/src/simd_accel/teddy128.rs
+++ b/src/simd_accel/teddy128.rs
@@ -331,7 +331,7 @@ use simd::u8x16;
 use simd::x86::sse2::Sse2Bool8ix16;
 use simd::x86::ssse3::Ssse3U8x16;
 
-use syntax;
+use syntax::hir::literal::Literals;
 
 /// Corresponds to the number of bytes read at a time in the haystack.
 const BLOCK_SIZE: usize = 16;
@@ -388,7 +388,7 @@ impl Teddy {
     ///
     /// If a `Teddy` matcher could not be created (e.g., `pats` is empty or has
     /// an empty substring), then `None` is returned.
-    pub fn new(pats: &syntax::Literals) -> Option<Teddy> {
+    pub fn new(pats: &Literals) -> Option<Teddy> {
         let pats: Vec<_> = pats.literals().iter().map(|p|p.to_vec()).collect();
         let min_len = pats.iter().map(|p| p.len()).min().unwrap_or(0);
         // Don't allow any empty patterns and require that we have at
diff --git a/src/simd_fallback/teddy128.rs b/src/simd_fallback/teddy128.rs
index 8d829b4b90..d7ecad6e1b 100644
--- a/src/simd_fallback/teddy128.rs
+++ b/src/simd_fallback/teddy128.rs
@@ -1,4 +1,4 @@
-use syntax;
+use syntax::hir::literal::Literals;
 
 pub fn is_teddy_128_available() -> bool {
     false
@@ -15,7 +15,7 @@ pub struct Match {
 }
 
 impl Teddy {
-    pub fn new(_pats: &syntax::Literals) -> Option<Teddy> { None }
+    pub fn new(_pats: &Literals) -> Option<Teddy> { None }
     pub fn patterns(&self) -> &[Vec<u8>] { &[] }
     pub fn len(&self) -> usize { 0 }
     pub fn approximate_size(&self) -> usize { 0 }
diff --git a/tests/crazy.rs b/tests/crazy.rs
index c7c5723084..207466c23f 100644
--- a/tests/crazy.rs
+++ b/tests/crazy.rs
@@ -61,6 +61,19 @@ mat!(greedy_one_many_optional, r"((?:.?)+)=", "a=b", Some((0, 2)));
 mat!(greedy_range_min_many, r"((?:.*){1,})=", "a=b", Some((0, 2)));
 mat!(greedy_range_many, r"((?:.*){1,2})=", "a=b", Some((0, 2)));
 
+// Test that we handle various flavors of empty expressions.
+matiter!(match_empty1, r"", "", (0, 0));
+matiter!(match_empty2, r"", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty3, r"()", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty4, r"()*", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty5, r"()+", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty6, r"()?", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty7, r"()()", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty8, r"()+|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty9, r"z|()+", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty10, r"()+|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty11, r"b|()+", "abc", (0, 0), (1, 2), (3, 3));
+
 // Test that the DFA can handle pathological cases.
 // (This should result in the DFA's cache being flushed too frequently, which
 // should cause it to quit and fall back to the NFA algorithm.)
diff --git a/tests/noparse.rs b/tests/noparse.rs
index ce2d369e20..62eb5be77e 100644
--- a/tests/noparse.rs
+++ b/tests/noparse.rs
@@ -11,7 +11,6 @@ macro_rules! noparse(
     );
 );
 
-noparse!(fail_double_repeat, "a**");
 noparse!(fail_no_repeat_arg, "*");
 noparse!(fail_incomplete_escape, "\\");
 noparse!(fail_class_incomplete, "[A-");
@@ -23,11 +22,8 @@ noparse!(fail_open_paren, "(");
 noparse!(fail_close_paren, ")");
 noparse!(fail_invalid_range, "[a-Z]");
 noparse!(fail_empty_capture_name, "(?P<>a)");
-noparse!(fail_empty_capture_exp, "(?P<name>)");
 noparse!(fail_bad_capture_name, "(?P<na-me>)");
 noparse!(fail_bad_flag, "(?a)a");
-noparse!(fail_empty_alt_before, "|a");
-noparse!(fail_empty_alt_after, "a|");
 noparse!(fail_too_big, "a{10000000}");
 noparse!(fail_counted_no_close, "a{1001");
 noparse!(fail_unfinished_cap, "(?");
@@ -40,9 +36,15 @@ noparse!(fail_flag_bad, "(?a)");
 noparse!(fail_flag_empty, "(?)");
 noparse!(fail_double_neg, "(?-i-i)");
 noparse!(fail_neg_empty, "(?i-)");
-noparse!(fail_empty_group, "()");
 noparse!(fail_dupe_named, "(?P<a>.)(?P<a>.)");
 noparse!(fail_range_end_no_class, "[a-[:lower:]]");
 noparse!(fail_range_end_no_begin, r"[a-\A]");
 noparse!(fail_range_end_no_end, r"[a-\z]");
 noparse!(fail_range_end_no_boundary, r"[a-\b]");
+noparse!(fail_empty_alt1, r"|z");
+noparse!(fail_empty_alt2, r"z|");
+noparse!(fail_empty_alt3, r"|");
+noparse!(fail_empty_alt4, r"||");
+noparse!(fail_empty_alt5, r"()|z");
+noparse!(fail_empty_alt6, r"z|()");
+noparse!(fail_empty_alt7, r"(|)");
diff --git a/tests/unicode.rs b/tests/unicode.rs
index 48e9a95aaf..9b65bc2b5c 100644
--- a/tests/unicode.rs
+++ b/tests/unicode.rs
@@ -29,3 +29,82 @@ mat!(uni_boundary_none, r"\d\b", "6δ", None);
 mat!(uni_boundary_ogham, r"\d\b", "6 ", Some((0, 1)));
 mat!(uni_not_boundary_none, r"\d\B", "6δ", Some((0, 1)));
 mat!(uni_not_boundary_ogham, r"\d\B", "6 ", None);
+
+// Test general categories.
+//
+// We should test more, but there's a lot. Write a script to generate more of
+// these tests.
+mat!(uni_class_gencat_cased_letter,
+     r"\p{Cased_Letter}", "Ａ", Some((0, 3)));
+mat!(uni_class_gencat_close_punctuation,
+     r"\p{Close_Punctuation}", "❯", Some((0, 3)));
+mat!(uni_class_gencat_connector_punctuation,
+     r"\p{Connector_Punctuation}", "⁀", Some((0, 3)));
+mat!(uni_class_gencat_control,
+     r"\p{Control}", "\u{9f}", Some((0, 2)));
+mat!(uni_class_gencat_currency_symbol,
+     r"\p{Currency_Symbol}", "￡", Some((0, 3)));
+mat!(uni_class_gencat_dash_punctuation,
+     r"\p{Dash_Punctuation}", "〰", Some((0, 3)));
+mat!(uni_class_gencat_decimal_numer,
+     r"\p{Decimal_Number}", "𑓙", Some((0, 4)));
+mat!(uni_class_gencat_enclosing_mark,
+     r"\p{Enclosing_Mark}", "\u{A672}", Some((0, 3)));
+mat!(uni_class_gencat_final_punctuation,
+     r"\p{Final_Punctuation}", "⸡", Some((0, 3)));
+mat!(uni_class_gencat_format,
+     r"\p{Format}", "\u{E007F}", Some((0, 4)));
+mat!(uni_class_gencat_initial_punctuation,
+     r"\p{Initial_Punctuation}", "⸜", Some((0, 3)));
+mat!(uni_class_gencat_letter,
+     r"\p{Letter}", "Έ", Some((0, 2)));
+mat!(uni_class_gencat_letter_number,
+     r"\p{Letter_Number}", "ↂ", Some((0, 3)));
+mat!(uni_class_gencat_line_separator,
+     r"\p{Line_Separator}", "\u{2028}", Some((0, 3)));
+mat!(uni_class_gencat_lowercase_letter,
+     r"\p{Lowercase_Letter}", "ϛ", Some((0, 2)));
+mat!(uni_class_gencat_mark,
+     r"\p{Mark}", "\u{E01EF}", Some((0, 4)));
+mat!(uni_class_gencat_math,
+     r"\p{Math}", "⋿", Some((0, 3)));
+mat!(uni_class_gencat_modifier_letter,
+     r"\p{Modifier_Letter}", "𖭃", Some((0, 4)));
+mat!(uni_class_gencat_modifier_symbol,
+     r"\p{Modifier_Symbol}", "🏿", Some((0, 4)));
+mat!(uni_class_gencat_nonspacing_mark,
+     r"\p{Nonspacing_Mark}", "\u{1E94A}", Some((0, 4)));
+mat!(uni_class_gencat_number,
+     r"\p{Number}", "⓿", Some((0, 3)));
+mat!(uni_class_gencat_open_punctuation,
+     r"\p{Open_Punctuation}", "｟", Some((0, 3)));
+mat!(uni_class_gencat_other,
+     r"\p{Other}", "\u{bc9}", Some((0, 3)));
+mat!(uni_class_gencat_other_letter,
+     r"\p{Other_Letter}", "ꓷ", Some((0, 3)));
+mat!(uni_class_gencat_other_number,
+     r"\p{Other_Number}", "㉏", Some((0, 3)));
+mat!(uni_class_gencat_other_punctuation,
+     r"\p{Other_Punctuation}", "𞥞", Some((0, 4)));
+mat!(uni_class_gencat_other_symbol,
+     r"\p{Other_Symbol}", "⅌", Some((0, 3)));
+mat!(uni_class_gencat_paragraph_separator,
+     r"\p{Paragraph_Separator}", "\u{2029}", Some((0, 3)));
+mat!(uni_class_gencat_private_use,
+     r"\p{Private_Use}", "\u{10FFFD}", Some((0, 4)));
+mat!(uni_class_gencat_punctuation,
+     r"\p{Punctuation}", "𑁍", Some((0, 4)));
+mat!(uni_class_gencat_separator,
+     r"\p{Separator}", "\u{3000}", Some((0, 3)));
+mat!(uni_class_gencat_space_separator,
+     r"\p{Space_Separator}", "\u{205F}", Some((0, 3)));
+mat!(uni_class_gencat_spacing_mark,
+     r"\p{Spacing_Mark}", "\u{16F7E}", Some((0, 4)));
+mat!(uni_class_gencat_symbol,
+     r"\p{Symbol}", "⯈", Some((0, 3)));
+mat!(uni_class_gencat_titlecase_letter,
+     r"\p{Titlecase_Letter}", "ῼ", Some((0, 3)));
+mat!(uni_class_gencat_unassigned,
+     r"\p{Unassigned}", "\u{10FFFF}", Some((0, 4)));
+mat!(uni_class_gencat_uppercase_letter,
+     r"\p{Uppercase_Letter}", "Ꝋ", Some((0, 3)));