Skip to content

Commit aa84f63

Browse files
committed
Update unicode script to Python 3
1 parent f24cb8a commit aa84f63

File tree

1 file changed

+20
-20
lines changed

1 file changed

+20
-20
lines changed

scripts/unicode.py

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
# Since this should not require frequent updates, we just store this
1919
# out-of-line and check the unicode.rs file into git.
2020
import collections
21-
import requests
21+
import urllib.request
2222

2323
UNICODE_VERSION = "9.0.0"
2424
UCD_URL = "https://www.unicode.org/Public/%s/ucd/" % UNICODE_VERSION
@@ -68,9 +68,9 @@ def __init__(self):
6868

6969
def stats(name, table):
7070
count = sum(len(v) for v in table.values())
71-
print "%s: %d chars => %d decomposed chars" % (name, len(table), count)
71+
print("%s: %d chars => %d decomposed chars" % (name, len(table), count))
7272

73-
print "Decomposition table stats:"
73+
print("Decomposition table stats:")
7474
stats("Canonical decomp", self.canon_decomp)
7575
stats("Compatible decomp", self.compat_decomp)
7676
stats("Canonical fully decomp", self.canon_fully_decomp)
@@ -79,8 +79,8 @@ def stats(name, table):
7979
self.ss_leading, self.ss_trailing = self._compute_stream_safe_tables()
8080

8181
def _fetch(self, filename):
82-
resp = requests.get(UCD_URL + filename)
83-
return resp.text
82+
resp = urllib.request.urlopen(UCD_URL + filename)
83+
return resp.read().decode('utf-8')
8484

8585
def _load_unicode_data(self):
8686
self.combining_classes = {}
@@ -234,7 +234,7 @@ def _decompose(char_int, compatible):
234234
# need to store their overlap when they agree. When they don't agree,
235235
# store the decomposition in the compatibility table since we'll check
236236
# that first when normalizing to NFKD.
237-
assert canon_fully_decomp <= compat_fully_decomp
237+
assert set(canon_fully_decomp) <= set(compat_fully_decomp)
238238

239239
for ch in set(canon_fully_decomp) & set(compat_fully_decomp):
240240
if canon_fully_decomp[ch] == compat_fully_decomp[ch]:
@@ -284,15 +284,15 @@ def _compute_stream_safe_tables(self):
284284

285285
return leading_nonstarters, trailing_nonstarters
286286

287-
hexify = lambda c: hex(c)[2:].upper().rjust(4, '0')
287+
hexify = lambda c: '{:04X}'.format(c)
288288

289289
def gen_combining_class(combining_classes, out):
290290
out.write("#[inline]\n")
291291
out.write("pub fn canonical_combining_class(c: char) -> u8 {\n")
292292
out.write(" match c {\n")
293293

294294
for char, combining_class in sorted(combining_classes.items()):
295-
out.write(" '\u{%s}' => %s,\n" % (hexify(char), combining_class))
295+
out.write(" '\\u{%s}' => %s,\n" % (hexify(char), combining_class))
296296

297297
out.write(" _ => 0,\n")
298298
out.write(" }\n")
@@ -304,7 +304,7 @@ def gen_composition_table(canon_comp, out):
304304
out.write(" match (c1, c2) {\n")
305305

306306
for (c1, c2), c3 in sorted(canon_comp.items()):
307-
out.write(" ('\u{%s}', '\u{%s}') => Some('\u{%s}'),\n" % (hexify(c1), hexify(c2), hexify(c3)))
307+
out.write(" ('\\u{%s}', '\\u{%s}') => Some('\\u{%s}'),\n" % (hexify(c1), hexify(c2), hexify(c3)))
308308

309309
out.write(" _ => None,\n")
310310
out.write(" }\n")
@@ -323,8 +323,8 @@ def gen_decomposition_tables(canon_decomp, compat_decomp, out):
323323
out.write(" Some(match c {\n")
324324

325325
for char, chars in sorted(table.items()):
326-
d = ", ".join("'\u{%s}'" % hexify(c) for c in chars)
327-
out.write(" '\u{%s}' => &[%s],\n" % (hexify(char), d))
326+
d = ", ".join("'\\u{%s}'" % hexify(c) for c in chars)
327+
out.write(" '\\u{%s}' => &[%s],\n" % (hexify(char), d))
328328

329329
out.write(" _ => return None,\n")
330330
out.write(" })\n")
@@ -375,8 +375,8 @@ def gen_combining_mark(general_category_mark, out):
375375
out.write("pub fn is_combining_mark(c: char) -> bool {\n")
376376
out.write(" match c {\n")
377377

378-
for char in general_category_mark:
379-
out.write(" '\u{%s}' => true,\n" % hexify(char))
378+
for char in sorted(general_category_mark):
379+
out.write(" '\\u{%s}' => true,\n" % hexify(char))
380380

381381
out.write(" _ => false,\n")
382382
out.write(" }\n")
@@ -387,8 +387,8 @@ def gen_stream_safe(leading, trailing, out):
387387
out.write("pub fn stream_safe_leading_nonstarters(c: char) -> usize {\n")
388388
out.write(" match c {\n")
389389

390-
for char, num_leading in leading.items():
391-
out.write(" '\u{%s}' => %d,\n" % (hexify(char), num_leading))
390+
for char, num_leading in sorted(leading.items()):
391+
out.write(" '\\u{%s}' => %d,\n" % (hexify(char), num_leading))
392392

393393
out.write(" _ => 0,\n")
394394
out.write(" }\n")
@@ -399,8 +399,8 @@ def gen_stream_safe(leading, trailing, out):
399399
out.write("pub fn stream_safe_trailing_nonstarters(c: char) -> usize {\n")
400400
out.write(" match c {\n")
401401

402-
for char, num_trailing in trailing.items():
403-
out.write(" '\u{%s}' => %d,\n" % (hexify(char), num_trailing))
402+
for char, num_trailing in sorted(trailing.items()):
403+
out.write(" '\\u{%s}' => %d,\n" % (hexify(char), num_trailing))
404404

405405
out.write(" _ => 0,\n")
406406
out.write(" }\n")
@@ -419,7 +419,7 @@ def gen_tests(tests, out):
419419
""")
420420

421421
out.write("pub const NORMALIZATION_TESTS: &[NormalizationTest] = &[\n")
422-
str_literal = lambda s: '"%s"' % "".join("\u{%s}" % c for c in s)
422+
str_literal = lambda s: '"%s"' % "".join("\\u{%s}" % c for c in s)
423423

424424
for test in tests:
425425
out.write(" NormalizationTest {\n")
@@ -434,7 +434,7 @@ def gen_tests(tests, out):
434434

435435
if __name__ == '__main__':
436436
data = UnicodeData()
437-
with open("tables.rs", "w") as out:
437+
with open("tables.rs", "w", newline = "\n") as out:
438438
out.write(PREAMBLE)
439439
out.write("use quick_check::IsNormalized;\n")
440440
out.write("use quick_check::IsNormalized::*;\n")
@@ -470,6 +470,6 @@ def gen_tests(tests, out):
470470
gen_stream_safe(data.ss_leading, data.ss_trailing, out)
471471
out.write("\n")
472472

473-
with open("normalization_tests.rs", "w") as out:
473+
with open("normalization_tests.rs", "w", newline = "\n") as out:
474474
out.write(PREAMBLE)
475475
gen_tests(data.norm_tests, out)

0 commit comments

Comments
 (0)