1
+ from pathlib import Path
2
+ import re
3
+
4
+ def load_existing_list (path ):
5
+ with open (path , encoding = 'utf-8' , newline = '' ) as f :
6
+ return f .read ()
7
+
8
+ japanese_list = '../msgothic_0_charset_Japanese.txt'
9
+ multilang_list = '../msgothic_2_charset_OtherLang.txt'
10
+ out_char_list = '../msgothic_2_charset_JP_and_OtherLang.txt'
11
+
12
+ jp = load_existing_list (japanese_list )
13
+ multi = load_existing_list (multilang_list )
14
+
15
+ chars_to_add = set (jp )
16
+ existing_chars = set (multi )
17
+
18
+
19
+ with open (out_char_list , 'w' , encoding = 'utf-8' , newline = '' ) as f :
20
+ for i , c in enumerate (multi ):
21
+ f .write (c )
22
+
23
+ # This is very bad for performance if there are lots of new chars found, but it works for now to maintain ordering
24
+ remove_list = []
25
+ for new_character in chars_to_add :
26
+ if new_character < c :
27
+ f .write (new_character )
28
+ remove_list .append (new_character )
29
+ print (f"Inserting new character { new_character } at position { i } as it is less than { c } " )
30
+
31
+ for item in remove_list :
32
+ chars_to_add .remove (item )
33
+
34
+ remove_list = []
35
+ for char in chars_to_add :
36
+ if char not in existing_chars :
37
+ f .write (char )
38
+ else :
39
+ print (f"WARNING: character { char } already exists, skipping" )
40
+ remove_list .append (char )
41
+
42
+ for item in remove_list :
43
+ chars_to_add .remove (item )
44
+
45
+
46
+ if chars_to_add :
47
+ raise Exception (f"One or more characters were not added { chars_to_add } " )
0 commit comments