@@ -76,6 +76,66 @@ def get_codepoints(f):
76
76
for c in range (prev_codepoint + 1 , NUM_CODEPOINTS ):
77
77
yield Codepoint (c , None )
78
78
79
+ def compress_singletons (singletons ):
80
+ uppers = [] # (upper, # items in lowers)
81
+ lowers = []
82
+
83
+ for i in singletons :
84
+ upper = i >> 8
85
+ lower = i & 0xff
86
+ if len (uppers ) == 0 or uppers [- 1 ][0 ] != upper :
87
+ uppers .append ((upper , 1 ))
88
+ else :
89
+ upper , count = uppers [- 1 ]
90
+ uppers [- 1 ] = upper , count + 1
91
+ lowers .append (lower )
92
+
93
+ return uppers , lowers
94
+
95
+ def compress_normal (normal ):
96
+ # lengths 0x00..0x7f are encoded as 00, 01, ..., 7e, 7f
97
+ # lengths 0x80..0x7fff are encoded as 80 80, 80 81, ..., ff fe, ff ff
98
+ compressed = [] # [truelen, (truelenaux), falselen, (falselenaux)]
99
+
100
+ prev_start = 0
101
+ for start , count in normal :
102
+ truelen = start - prev_start
103
+ falselen = count
104
+ prev_start = start + count
105
+
106
+ assert truelen < 0x8000 and falselen < 0x8000
107
+ entry = []
108
+ if truelen > 0x7f :
109
+ entry .append (0x80 | (truelen >> 8 ))
110
+ entry .append (truelen & 0xff )
111
+ else :
112
+ entry .append (truelen & 0x7f )
113
+ if falselen > 0x7f :
114
+ entry .append (0x80 | (falselen >> 8 ))
115
+ entry .append (falselen & 0xff )
116
+ else :
117
+ entry .append (falselen & 0x7f )
118
+
119
+ compressed .append (entry )
120
+
121
+ return compressed
122
+
123
+ def print_singletons (uppers , lowers , uppersname , lowersname ):
124
+ print ("const {}: &'static [(u8, u8)] = &[" .format (uppersname ))
125
+ for u , c in uppers :
126
+ print (" ({:#04x}, {})," .format (u , c ))
127
+ print ("];" )
128
+ print ("const {}: &'static [u8] = &[" .format (lowersname ))
129
+ for i in range (0 , len (lowers ), 8 ):
130
+ print (" {}" .format (" " .join ("{:#04x}," .format (l ) for l in lowers [i :i + 8 ])))
131
+ print ("];" )
132
+
133
+ def print_normal (normal , normalname ):
134
+ print ("const {}: &'static [u8] = &[" .format (normalname ))
135
+ for v in normal :
136
+ print (" {}" .format (" " .join ("{:#04x}," .format (i ) for i in v )))
137
+ print ("];" )
138
+
79
139
def main ():
80
140
file = get_file ("http://www.unicode.org/Public/UNIDATA/UnicodeData.txt" )
81
141
@@ -111,6 +171,11 @@ def main():
111
171
else :
112
172
normal0 .append ((a , b - a ))
113
173
174
+ singletons0u , singletons0l = compress_singletons (singletons0 )
175
+ singletons1u , singletons1l = compress_singletons (singletons1 )
176
+ normal0 = compress_normal (normal0 )
177
+ normal1 = compress_normal (normal1 )
178
+
114
179
print ("""\
115
180
// Copyright 2012-2016 The Rust Project Developers. See the COPYRIGHT
116
181
// file at the top-level directory of this distribution and at
@@ -125,38 +190,49 @@ def main():
125
190
// NOTE: The following code was generated by "src/etc/char_private.py",
126
191
// do not edit directly!
127
192
128
- use slice::SliceExt;
129
-
130
- fn check(x: u16, singletons: &[u16], normal: &[u16]) -> bool {
131
- for &s in singletons {
132
- if x == s {
133
- return false;
134
- } else if x < s {
193
+ fn check(x: u16, singletonuppers: &[(u8, u8)], singletonlowers: &[u8],
194
+ normal: &[u8]) -> bool {
195
+ let xupper = (x >> 8) as u8;
196
+ let mut lowerstart = 0;
197
+ for &(upper, lowercount) in singletonuppers {
198
+ let lowerend = lowerstart + lowercount as usize;
199
+ if xupper == upper {
200
+ for &lower in &singletonlowers[lowerstart..lowerend] {
201
+ if lower == x as u8 {
202
+ return false;
203
+ }
204
+ }
205
+ } else if xupper < upper {
135
206
break;
136
207
}
208
+ lowerstart = lowerend;
137
209
}
138
- for w in normal.chunks(2) {
139
- let start = w[0];
140
- let len = w[1];
141
- let difference = (x as i32) - (start as i32);
142
- if 0 <= difference {
143
- if difference < len as i32 {
144
- return false;
145
- }
210
+
211
+ let mut x = x as i32;
212
+ let mut normal = normal.iter().cloned();
213
+ let mut current = true;
214
+ while let Some(v) = normal.next() {
215
+ let len = if v & 0x80 != 0 {
216
+ ((v & 0x7f) as i32) << 8 | normal.next().unwrap() as i32
146
217
} else {
218
+ v as i32
219
+ };
220
+ x -= len;
221
+ if x < 0 {
147
222
break;
148
223
}
224
+ current = !current;
149
225
}
150
- true
226
+ current
151
227
}
152
228
153
229
pub fn is_printable(x: char) -> bool {
154
230
let x = x as u32;
155
231
let lower = x as u16;
156
232
if x < 0x10000 {
157
- check(lower, SINGLETONS0 , NORMAL0)
233
+ check(lower, SINGLETONS0U, SINGLETONS0L , NORMAL0)
158
234
} else if x < 0x20000 {
159
- check(lower, SINGLETONS1 , NORMAL1)
235
+ check(lower, SINGLETONS1U, SINGLETONS1L , NORMAL1)
160
236
} else {\
161
237
""" )
162
238
for a , b in extra :
@@ -169,22 +245,10 @@ def main():
169
245
}\
170
246
""" )
171
247
print ()
172
- print ("const SINGLETONS0: &'static [u16] = &[" )
173
- for s in singletons0 :
174
- print (" 0x{:x}," .format (s ))
175
- print ("];" )
176
- print ("const SINGLETONS1: &'static [u16] = &[" )
177
- for s in singletons1 :
178
- print (" 0x{:x}," .format (s ))
179
- print ("];" )
180
- print ("const NORMAL0: &'static [u16] = &[" )
181
- for a , b in normal0 :
182
- print (" 0x{:x}, 0x{:x}," .format (a , b ))
183
- print ("];" )
184
- print ("const NORMAL1: &'static [u16] = &[" )
185
- for a , b in normal1 :
186
- print (" 0x{:x}, 0x{:x}," .format (a , b ))
187
- print ("];" )
248
+ print_singletons (singletons0u , singletons0l , 'SINGLETONS0U' , 'SINGLETONS0L' )
249
+ print_singletons (singletons1u , singletons1l , 'SINGLETONS1U' , 'SINGLETONS1L' )
250
+ print_normal (normal0 , 'NORMAL0' )
251
+ print_normal (normal1 , 'NORMAL1' )
188
252
189
253
if __name__ == '__main__' :
190
254
main ()
0 commit comments