rust-lang
diff --git a/‎.gitignore
+1-8 b/‎.gitignore
+1-8
diff --git a/‎Cargo.lock
+17 b/‎Cargo.lock
+17
diff --git a/‎Cargo.toml
+1 b/‎Cargo.toml
+1
diff --git a/‎src/libcore/char/methods.rs
+8-8 b/‎src/libcore/char/methods.rs
+8-8
diff --git a/‎src/libcore/char/mod.rs
+2-2 b/‎src/libcore/char/mod.rs
+2-2
diff --git a/‎src/libcore/unicode/bool_trie.rs
-66 b/‎src/libcore/unicode/bool_trie.rs
-66
diff --git a/‎src/libcore/unicode/mod.rs
+49-5 b/‎src/libcore/unicode/mod.rs
+49-5
@@ -34,14 +34,7 @@ __pycache__/
 # Created by default with `src/ci/docker/run.sh`:
 /obj/
 /rustllvm/
-/src/libcore/unicode/DerivedCoreProperties.txt
-/src/libcore/unicode/DerivedNormalizationProps.txt
-/src/libcore/unicode/PropList.txt
-/src/libcore/unicode/ReadMe.txt
-/src/libcore/unicode/Scripts.txt
-/src/libcore/unicode/SpecialCasing.txt
-/src/libcore/unicode/UnicodeData.txt
-/src/libcore/unicode/downloaded
+/unicode-downloads
 /target/
 # Generated by compiletest for incremental:
 /tmp/
 
@@ -4953,6 +4953,16 @@ version = "1.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "612d636f949607bdf9b123b4a6f6d966dedf3ff669f7f045890d3a4a73948169"
 
+[[package]]
+name = "ucd-parse"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ca6b52bf4da6512f0f07785a04769222e50d29639e7ecd016b7806fd2de306b4"
+dependencies = [
+ "lazy_static 1.3.0",
+ "regex",
+]
+
 [[package]]
 name = "ucd-trie"
 version = "0.1.1"
@@ -4974,6 +4984,13 @@ dependencies = [
  "version_check 0.1.5",
 ]
 
+[[package]]
+name = "unicode-bdd"
+version = "0.1.0"
+dependencies = [
+ "ucd-parse",
+]
+
 [[package]]
 name = "unicode-bidi"
 version = "0.3.4"
 
@@ -23,6 +23,7 @@ members = [
   "src/tools/rustfmt",
   "src/tools/miri",
   "src/tools/rustdoc-themes",
+  "src/tools/unicode-table-generator",
 ]
 exclude = [
   "build",
 
@@ -3,7 +3,7 @@
 use crate::slice;
 use crate::str::from_utf8_unchecked_mut;
 use crate::unicode::printable::is_printable;
-use crate::unicode::tables::{conversions, derived_property, general_category, property};
+use crate::unicode::{self, conversions};
 
 use super::*;
 
@@ -552,7 +552,7 @@ impl char {
     pub fn is_alphabetic(self) -> bool {
         match self {
             'a'..='z' | 'A'..='Z' => true,
-            c => c > '\x7f' && derived_property::Alphabetic(c),
+            c => c > '\x7f' && unicode::Alphabetic(c),
         }
     }
 
@@ -583,7 +583,7 @@ impl char {
     pub fn is_lowercase(self) -> bool {
         match self {
             'a'..='z' => true,
-            c => c > '\x7f' && derived_property::Lowercase(c),
+            c => c > '\x7f' && unicode::Lowercase(c),
         }
     }
 
@@ -614,7 +614,7 @@ impl char {
     pub fn is_uppercase(self) -> bool {
         match self {
             'A'..='Z' => true,
-            c => c > '\x7f' && derived_property::Uppercase(c),
+            c => c > '\x7f' && unicode::Uppercase(c),
         }
     }
 
@@ -642,7 +642,7 @@ impl char {
     pub fn is_whitespace(self) -> bool {
         match self {
             ' ' | '\x09'..='\x0d' => true,
-            c => c > '\x7f' && property::White_Space(c),
+            c => c > '\x7f' && unicode::White_Space(c),
         }
     }
 
@@ -693,7 +693,7 @@ impl char {
     #[stable(feature = "rust1", since = "1.0.0")]
     #[inline]
     pub fn is_control(self) -> bool {
-        general_category::Cc(self)
+        unicode::Cc(self)
     }
 
     /// Returns `true` if this `char` has the `Grapheme_Extend` property.
@@ -707,7 +707,7 @@ impl char {
     /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
     #[inline]
     pub(crate) fn is_grapheme_extended(self) -> bool {
-        derived_property::Grapheme_Extend(self)
+        unicode::Grapheme_Extend(self)
     }
 
     /// Returns `true` if this `char` has one of the general categories for numbers.
@@ -739,7 +739,7 @@ impl char {
     pub fn is_numeric(self) -> bool {
         match self {
             '0'..='9' => true,
-            c => c > '\x7f' && general_category::N(c),
+            c => c > '\x7f' && unicode::N(c),
         }
     }
 
 
@@ -37,9 +37,9 @@ pub use self::decode::{decode_utf16, DecodeUtf16, DecodeUtf16Error};
 
 // unstable re-exports
 #[unstable(feature = "unicode_version", issue = "49726")]
-pub use crate::unicode::tables::UNICODE_VERSION;
-#[unstable(feature = "unicode_version", issue = "49726")]
 pub use crate::unicode::version::UnicodeVersion;
+#[unstable(feature = "unicode_version", issue = "49726")]
+pub use crate::unicode::UNICODE_VERSION;
 
 use crate::fmt::{self, Write};
 use crate::iter::FusedIterator;
 
@@ -1,15 +1,59 @@
 #![unstable(feature = "unicode_internals", issue = "none")]
 #![allow(missing_docs)]
 
-mod bool_trie;
 pub(crate) mod printable;
-pub(crate) mod tables;
+mod unicode_data;
 pub(crate) mod version;
 
+use version::UnicodeVersion;
+
+/// The version of [Unicode](http://www.unicode.org/) that the Unicode parts of
+/// `char` and `str` methods are based on.
+#[unstable(feature = "unicode_version", issue = "49726")]
+pub const UNICODE_VERSION: UnicodeVersion = UnicodeVersion {
+    major: unicode_data::UNICODE_VERSION.0,
+    minor: unicode_data::UNICODE_VERSION.1,
+    micro: unicode_data::UNICODE_VERSION.2,
+    _priv: (),
+};
+
 // For use in liballoc, not re-exported in libstd.
 pub mod derived_property {
-    pub use crate::unicode::tables::derived_property::{Case_Ignorable, Cased};
+    pub use super::{Case_Ignorable, Cased};
 }
-pub mod conversions {
-    pub use crate::unicode::tables::conversions::{to_lower, to_upper};
+
+pub use unicode_data::alphabetic::lookup as Alphabetic;
+pub use unicode_data::case_ignorable::lookup as Case_Ignorable;
+pub use unicode_data::cased::lookup as Cased;
+pub use unicode_data::cc::lookup as Cc;
+pub use unicode_data::conversions;
+pub use unicode_data::grapheme_extend::lookup as Grapheme_Extend;
+pub use unicode_data::lowercase::lookup as Lowercase;
+pub use unicode_data::n::lookup as N;
+pub use unicode_data::uppercase::lookup as Uppercase;
+pub use unicode_data::white_space::lookup as White_Space;
+
+#[inline(always)]
+fn range_search<const N: usize, const N1: usize, const N2: usize>(
+    needle: u32,
+    chunk_idx_map: &[u8; N],
+    (last_chunk_idx, last_chunk_mapping): (u16, u8),
+    bitset_chunk_idx: &[[u8; 16]; N1],
+    bitset: &[u64; N2],
+) -> bool {
+    let bucket_idx = (needle / 64) as usize;
+    let chunk_map_idx = bucket_idx / 16;
+    let chunk_piece = bucket_idx % 16;
+    let chunk_idx = if chunk_map_idx >= N {
+        if chunk_map_idx == last_chunk_idx as usize {
+            last_chunk_mapping
+        } else {
+            return false;
+        }
+    } else {
+        chunk_idx_map[chunk_map_idx]
+    };
+    let idx = bitset_chunk_idx[(chunk_idx as usize)][chunk_piece];
+    let word = bitset[(idx as usize)];
+    (word & (1 << (needle % 64) as u64)) != 0
 }
Original file line number	Diff line number	Diff line change
`@@ -23,6 +23,7 @@ members = [`
`23`	`23`	`"src/tools/rustfmt",`
`24`	`24`	`"src/tools/miri",`
`25`	`25`	`"src/tools/rustdoc-themes",`
	`26`	`+ "src/tools/unicode-table-generator",`
`26`	`27`	`]`
`27`	`28`	`exclude = [`
`28`	`29`	`"build",`
Original file line number	Diff line number	Diff line change
`@@ -3,7 +3,7 @@`
`3`	`3`	`use crate::slice;`
`4`	`4`	`use crate::str::from_utf8_unchecked_mut;`
`5`	`5`	`use crate::unicode::printable::is_printable;`
`6`		`-use crate::unicode::tables::{conversions, derived_property, general_category, property};`
	`6`	`+use crate::unicode::{self, conversions};`
`7`	`7`
`8`	`8`	`use super::*;`
`9`	`9`
`@@ -552,7 +552,7 @@ impl char {`
`552`	`552`	`pub fn is_alphabetic(self) -> bool {`
`553`	`553`	`match self {`
`554`	`554`	`'a'..='z' \| 'A'..='Z' => true,`
`555`		`- c => c > '\x7f' && derived_property::Alphabetic(c),`
	`555`	`+ c => c > '\x7f' && unicode::Alphabetic(c),`
`556`	`556`	`}`
`557`	`557`	`}`
`558`	`558`
`@@ -583,7 +583,7 @@ impl char {`
`583`	`583`	`pub fn is_lowercase(self) -> bool {`
`584`	`584`	`match self {`
`585`	`585`	`'a'..='z' => true,`
`586`		`- c => c > '\x7f' && derived_property::Lowercase(c),`
	`586`	`+ c => c > '\x7f' && unicode::Lowercase(c),`
`587`	`587`	`}`
`588`	`588`	`}`
`589`	`589`
`@@ -614,7 +614,7 @@ impl char {`
`614`	`614`	`pub fn is_uppercase(self) -> bool {`
`615`	`615`	`match self {`
`616`	`616`	`'A'..='Z' => true,`
`617`		`- c => c > '\x7f' && derived_property::Uppercase(c),`
	`617`	`+ c => c > '\x7f' && unicode::Uppercase(c),`
`618`	`618`	`}`
`619`	`619`	`}`
`620`	`620`
`@@ -642,7 +642,7 @@ impl char {`
`642`	`642`	`pub fn is_whitespace(self) -> bool {`
`643`	`643`	`match self {`
`644`	`644`	`' ' \| '\x09'..='\x0d' => true,`
`645`		`- c => c > '\x7f' && property::White_Space(c),`
	`645`	`+ c => c > '\x7f' && unicode::White_Space(c),`
`646`	`646`	`}`
`647`	`647`	`}`
`648`	`648`
`@@ -693,7 +693,7 @@ impl char {`
`693`	`693`	`#[stable(feature = "rust1", since = "1.0.0")]`
`694`	`694`	`#[inline]`
`695`	`695`	`pub fn is_control(self) -> bool {`
`696`		`- general_category::Cc(self)`
	`696`	`+ unicode::Cc(self)`
`697`	`697`	`}`
`698`	`698`
`699`	`699`	/// Returns `true` if this `char` has the `Grapheme_Extend` property.
`@@ -707,7 +707,7 @@ impl char {`
`707`	`707`	/// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
`708`	`708`	`#[inline]`
`709`	`709`	`pub(crate) fn is_grapheme_extended(self) -> bool {`
`710`		`- derived_property::Grapheme_Extend(self)`
	`710`	`+ unicode::Grapheme_Extend(self)`
`711`	`711`	`}`
`712`	`712`
`713`	`713`	/// Returns `true` if this `char` has one of the general categories for numbers.
`@@ -739,7 +739,7 @@ impl char {`
`739`	`739`	`pub fn is_numeric(self) -> bool {`
`740`	`740`	`match self {`
`741`	`741`	`'0'..='9' => true,`
`742`		`- c => c > '\x7f' && general_category::N(c),`
	`742`	`+ c => c > '\x7f' && unicode::N(c),`
`743`	`743`	`}`
`744`	`744`	`}`
`745`	`745`