18
18
# Since this should not require frequent updates, we just store this
19
19
# out-of-line and check the unicode.rs file into git.
20
20
import collections
21
- import requests
21
+ import urllib . request
22
22
23
23
UNICODE_VERSION = "9.0.0"
24
24
UCD_URL = "https://www.unicode.org/Public/%s/ucd/" % UNICODE_VERSION
@@ -68,9 +68,9 @@ def __init__(self):
68
68
69
69
def stats (name , table ):
70
70
count = sum (len (v ) for v in table .values ())
71
- print "%s: %d chars => %d decomposed chars" % (name , len (table ), count )
71
+ print ( "%s: %d chars => %d decomposed chars" % (name , len (table ), count ) )
72
72
73
- print "Decomposition table stats:"
73
+ print ( "Decomposition table stats:" )
74
74
stats ("Canonical decomp" , self .canon_decomp )
75
75
stats ("Compatible decomp" , self .compat_decomp )
76
76
stats ("Canonical fully decomp" , self .canon_fully_decomp )
@@ -79,8 +79,8 @@ def stats(name, table):
79
79
self .ss_leading , self .ss_trailing = self ._compute_stream_safe_tables ()
80
80
81
81
def _fetch (self , filename ):
82
- resp = requests . get (UCD_URL + filename )
83
- return resp .text
82
+ resp = urllib . request . urlopen (UCD_URL + filename )
83
+ return resp .read (). decode ( 'utf-8' )
84
84
85
85
def _load_unicode_data (self ):
86
86
self .combining_classes = {}
@@ -234,7 +234,7 @@ def _decompose(char_int, compatible):
234
234
# need to store their overlap when they agree. When they don't agree,
235
235
# store the decomposition in the compatibility table since we'll check
236
236
# that first when normalizing to NFKD.
237
- assert canon_fully_decomp <= compat_fully_decomp
237
+ assert set ( canon_fully_decomp ) <= set ( compat_fully_decomp )
238
238
239
239
for ch in set (canon_fully_decomp ) & set (compat_fully_decomp ):
240
240
if canon_fully_decomp [ch ] == compat_fully_decomp [ch ]:
@@ -284,15 +284,15 @@ def _compute_stream_safe_tables(self):
284
284
285
285
return leading_nonstarters , trailing_nonstarters
286
286
287
- hexify = lambda c : hex ( c )[ 2 :]. upper (). rjust ( 4 , '0' )
287
+ hexify = lambda c : '{:04X}' . format ( c )
288
288
289
289
def gen_combining_class (combining_classes , out ):
290
290
out .write ("#[inline]\n " )
291
291
out .write ("pub fn canonical_combining_class(c: char) -> u8 {\n " )
292
292
out .write (" match c {\n " )
293
293
294
294
for char , combining_class in sorted (combining_classes .items ()):
295
- out .write (" '\u{%s}' => %s,\n " % (hexify (char ), combining_class ))
295
+ out .write (" '\\ u{%s}' => %s,\n " % (hexify (char ), combining_class ))
296
296
297
297
out .write (" _ => 0,\n " )
298
298
out .write (" }\n " )
@@ -304,7 +304,7 @@ def gen_composition_table(canon_comp, out):
304
304
out .write (" match (c1, c2) {\n " )
305
305
306
306
for (c1 , c2 ), c3 in sorted (canon_comp .items ()):
307
- out .write (" ('\u{%s}', '\u{%s}') => Some('\u{%s}'),\n " % (hexify (c1 ), hexify (c2 ), hexify (c3 )))
307
+ out .write (" ('\\ u{%s}', '\\ u{%s}') => Some('\ \ u{%s}'),\n " % (hexify (c1 ), hexify (c2 ), hexify (c3 )))
308
308
309
309
out .write (" _ => None,\n " )
310
310
out .write (" }\n " )
@@ -323,8 +323,8 @@ def gen_decomposition_tables(canon_decomp, compat_decomp, out):
323
323
out .write (" Some(match c {\n " )
324
324
325
325
for char , chars in sorted (table .items ()):
326
- d = ", " .join ("'\u{%s}'" % hexify (c ) for c in chars )
327
- out .write (" '\u{%s}' => &[%s],\n " % (hexify (char ), d ))
326
+ d = ", " .join ("'\\ u{%s}'" % hexify (c ) for c in chars )
327
+ out .write (" '\\ u{%s}' => &[%s],\n " % (hexify (char ), d ))
328
328
329
329
out .write (" _ => return None,\n " )
330
330
out .write (" })\n " )
@@ -375,8 +375,8 @@ def gen_combining_mark(general_category_mark, out):
375
375
out .write ("pub fn is_combining_mark(c: char) -> bool {\n " )
376
376
out .write (" match c {\n " )
377
377
378
- for char in general_category_mark :
379
- out .write (" '\u{%s}' => true,\n " % hexify (char ))
378
+ for char in sorted ( general_category_mark ) :
379
+ out .write (" '\\ u{%s}' => true,\n " % hexify (char ))
380
380
381
381
out .write (" _ => false,\n " )
382
382
out .write (" }\n " )
@@ -387,8 +387,8 @@ def gen_stream_safe(leading, trailing, out):
387
387
out .write ("pub fn stream_safe_leading_nonstarters(c: char) -> usize {\n " )
388
388
out .write (" match c {\n " )
389
389
390
- for char , num_leading in leading .items ():
391
- out .write (" '\u{%s}' => %d,\n " % (hexify (char ), num_leading ))
390
+ for char , num_leading in sorted ( leading .items () ):
391
+ out .write (" '\\ u{%s}' => %d,\n " % (hexify (char ), num_leading ))
392
392
393
393
out .write (" _ => 0,\n " )
394
394
out .write (" }\n " )
@@ -399,8 +399,8 @@ def gen_stream_safe(leading, trailing, out):
399
399
out .write ("pub fn stream_safe_trailing_nonstarters(c: char) -> usize {\n " )
400
400
out .write (" match c {\n " )
401
401
402
- for char , num_trailing in trailing .items ():
403
- out .write (" '\u{%s}' => %d,\n " % (hexify (char ), num_trailing ))
402
+ for char , num_trailing in sorted ( trailing .items () ):
403
+ out .write (" '\\ u{%s}' => %d,\n " % (hexify (char ), num_trailing ))
404
404
405
405
out .write (" _ => 0,\n " )
406
406
out .write (" }\n " )
@@ -419,7 +419,7 @@ def gen_tests(tests, out):
419
419
""" )
420
420
421
421
out .write ("pub const NORMALIZATION_TESTS: &[NormalizationTest] = &[\n " )
422
- str_literal = lambda s : '"%s"' % "" .join ("\u{%s}" % c for c in s )
422
+ str_literal = lambda s : '"%s"' % "" .join ("\\ u{%s}" % c for c in s )
423
423
424
424
for test in tests :
425
425
out .write (" NormalizationTest {\n " )
@@ -434,7 +434,7 @@ def gen_tests(tests, out):
434
434
435
435
if __name__ == '__main__' :
436
436
data = UnicodeData ()
437
- with open ("tables.rs" , "w" ) as out :
437
+ with open ("tables.rs" , "w" , newline = " \n " ) as out :
438
438
out .write (PREAMBLE )
439
439
out .write ("use quick_check::IsNormalized;\n " )
440
440
out .write ("use quick_check::IsNormalized::*;\n " )
@@ -470,6 +470,6 @@ def gen_tests(tests, out):
470
470
gen_stream_safe (data .ss_leading , data .ss_trailing , out )
471
471
out .write ("\n " )
472
472
473
- with open ("normalization_tests.rs" , "w" ) as out :
473
+ with open ("normalization_tests.rs" , "w" , newline = " \n " ) as out :
474
474
out .write (PREAMBLE )
475
475
gen_tests (data .norm_tests , out )
0 commit comments