18
18
# Since this should not require frequent updates, we just store this
19
19
# out-of-line and check the unicode.rs file into git.
20
20
21
- import fileinput , re , os , sys
21
+ import fileinput , re , os , sys , collections
22
22
23
23
preamble = '''// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
24
24
// file at the top-level directory of this distribution and at
@@ -160,19 +160,9 @@ def to_combines(combs):
160
160
return combs_out
161
161
162
162
def format_table_content (f , content , indent ):
163
- line = " " * indent
164
- first = True
165
- for chunk in content .split ("," ):
166
- if len (line ) + len (chunk ) < 98 :
167
- if first :
168
- line += chunk
169
- else :
170
- line += ", " + chunk
171
- first = False
172
- else :
173
- f .write (line + ",\n " )
174
- line = " " * indent + chunk
175
- f .write (line )
163
+ indent = " " * indent
164
+ for c in content :
165
+ f .write ("%s%s,\n " % (indent , c ))
176
166
177
167
def load_properties (f , interestingprops ):
178
168
fetch (f )
@@ -220,14 +210,44 @@ def emit_table(f, name, t_data, t_type = "&'static [(char, char)]", is_pub=True,
220
210
if is_pub :
221
211
pub_string = "pub "
222
212
f .write (" %sconst %s: %s = &[\n " % (pub_string , name , t_type ))
223
- data = ""
224
- first = True
225
- for dat in t_data :
226
- if not first :
227
- data += ","
228
- first = False
229
- data += pfun (dat )
230
- format_table_content (f , data , 8 )
213
+ format_table_content (f , [pfun (d ) for d in t_data ], 8 )
214
+ f .write ("\n ];\n \n " )
215
+
216
+ def emit_strtab_table (f , name , keys , vfun , is_pub = True ,
217
+ tab_entry_type = 'char' , slice_element_sfun = escape_char ):
218
+ pub_string = ""
219
+ if is_pub :
220
+ pub_string = "pub "
221
+ f .write (" %s const %s: &'static [(char, Slice)] = &[\n "
222
+ % (pub_string , name ))
223
+
224
+ strtab = collections .OrderedDict ()
225
+ strtab_offset = 0
226
+
227
+ # TODO: a more sophisticated algorithm here would not only check for the
228
+ # existence of v in the strtab, but also v in contiguous substrings of
229
+ # strtab, if that's possible.
230
+ for k in keys :
231
+ v = tuple (vfun (k ))
232
+ if v in strtab :
233
+ item_slice = strtab [v ]
234
+ else :
235
+ value_len = len (v )
236
+ item_slice = (strtab_offset , value_len )
237
+ strtab [v ] = item_slice
238
+ strtab_offset += value_len
239
+
240
+ f .write ("%s(%s, Slice { offset: %d, length: %d }),\n "
241
+ % (" " * 8 , escape_char (k ), item_slice [0 ], item_slice [1 ]))
242
+
243
+ f .write ("\n ];\n \n " )
244
+
245
+ f .write (" %s const %s_STRTAB: &'static [%s] = &[\n "
246
+ % (pub_string , name , tab_entry_type ))
247
+
248
+ for (v , _ ) in strtab .iteritems ():
249
+ f .write ("%s%s,\n " % (" " * 8 , ', ' .join (slice_element_sfun (c ) for c in v )))
250
+
231
251
f .write ("\n ];\n \n " )
232
252
233
253
def emit_norm_module (f , canon , compat , combine , norm_props , general_category_mark ):
@@ -251,43 +271,38 @@ def emit_norm_module(f, canon, compat, combine, norm_props, general_category_mar
251
271
canon_comp_keys .sort ()
252
272
253
273
f .write ("pub mod normalization {\n " )
274
+ f .write ("""
275
+ pub struct Slice {
276
+ pub offset: u16,
277
+ pub length: u16,
278
+ }
279
+ """ )
254
280
255
281
def mkdata_fun (table ):
256
282
def f (char ):
257
- data = "(%s,&[" % escape_char (char )
258
- first = True
259
- for d in table [char ]:
260
- if not first :
261
- data += ","
262
- first = False
263
- data += escape_char (d )
264
- data += "])"
265
- return data
283
+ return table [char ]
266
284
return f
267
285
286
+ # TODO: should the strtab of these two tables be of type &'static str, for
287
+ # smaller data?
268
288
f .write (" // Canonical decompositions\n " )
269
- emit_table (f , "canonical_table" , canon_keys , "&'static [(char, &'static [char])]" ,
270
- pfun = mkdata_fun (canon ))
289
+ emit_strtab_table (f , "canonical_table" , canon_keys ,
290
+ vfun = mkdata_fun (canon ))
271
291
272
292
f .write (" // Compatibility decompositions\n " )
273
- emit_table (f , "compatibility_table" , compat_keys , "&'static [(char, &'static [char])]" ,
274
- pfun = mkdata_fun (compat ))
275
-
276
- def comp_pfun (char ):
277
- data = "(%s,&[" % escape_char (char )
278
- canon_comp [char ].sort (lambda x , y : x [0 ] - y [0 ])
279
- first = True
280
- for pair in canon_comp [char ]:
281
- if not first :
282
- data += ","
283
- first = False
284
- data += "(%s,%s)" % (escape_char (pair [0 ]), escape_char (pair [1 ]))
285
- data += "])"
286
- return data
293
+ emit_strtab_table (f , "compatibility_table" , compat_keys ,
294
+ vfun = mkdata_fun (compat ))
295
+
296
+ def comp_vfun (char ):
297
+ return sorted (canon_comp [char ], lambda x , y : x [0 ] - y [0 ])
287
298
288
299
f .write (" // Canonical compositions\n " )
289
- emit_table (f , "composition_table" , canon_comp_keys ,
290
- "&'static [(char, &'static [(char, char)])]" , pfun = comp_pfun )
300
+ # "&'static [(char, &'static [(char, char)])]", pfun=comp_pfun)
301
+ emit_strtab_table (f , "composition_table" , canon_comp_keys ,
302
+ vfun = comp_vfun ,
303
+ tab_entry_type = "(char, char)" ,
304
+ slice_element_sfun = lambda pair : "(%s,%s)" % (escape_char (pair [0 ]),
305
+ escape_char (pair [1 ])))
291
306
292
307
f .write ("""
293
308
fn bsearch_range_value_table(c: char, r: &'static [(char, char, u8)]) -> u8 {
0 commit comments