1
+ from pathlib import Path
2
+ import re
3
+
4
+ en_regex = re .compile (r'OutputLine\([^,]*,\s*[^,]*,\s*[^,]*,\s*([^,]*)' )
5
+
6
+ def load_existing_list (path ):
7
+ with open (path , encoding = 'utf-8' , newline = '' ) as f :
8
+ return f .read ()
9
+
10
+
11
+ existing_char_list = Path ('C:/drojf/large_projects/umineko/ui-editing-scripts/scripts/CharacterInfoExtraction/msgothic_2_charset_OtherLang.txt' )
12
+ out_char_list = existing_char_list .with_suffix (existing_char_list .suffix + '.out' )
13
+ source_directory = Path ('C:/drojf/large_projects/umineko/HIGURASHI_REPOS' )
14
+
15
+ existing_char_list_text = load_existing_list (existing_char_list )
16
+ existing_font_set = set (existing_char_list_text )
17
+
18
+ all_chars = set ()
19
+
20
+ for file in source_directory .rglob ("*.txt" ):
21
+ print (file )
22
+ with open (file , encoding = 'utf-8' ) as f :
23
+ whole_file_string = f .read ()
24
+ for match in en_regex .finditer (whole_file_string ):
25
+ if match :
26
+ outputline_english_arg = match .group (1 )
27
+ for c in outputline_english_arg :
28
+ all_chars .add (c )
29
+
30
+ all_chars_list = list (all_chars )
31
+ all_chars_list .sort ()
32
+
33
+ chars_to_add = []
34
+ new_char_found = False
35
+ for char in all_chars_list :
36
+ if char not in existing_font_set :
37
+ print (f'NEW CHAR: { char } ' )
38
+ new_char_found = True
39
+ chars_to_add .append (char )
40
+
41
+ if not new_char_found :
42
+ print ("No new characters found!" )
43
+
44
+ final_list = list (existing_font_set .union (all_chars ))
45
+ final_list .sort ()
46
+
47
+ for c in final_list :
48
+ print (c , end = '' )
49
+
50
+ print ()
51
+
52
+
53
+ with open (out_char_list , 'w' , encoding = 'utf-8' , newline = '' ) as f :
54
+ for i , c in enumerate (existing_char_list_text ):
55
+ f .write (c )
56
+
57
+ # This is very bad for performance if there are lots of new chars found, but it works for now to maintain ordering
58
+ for new_character in chars_to_add :
59
+ if new_character < c :
60
+ f .write (new_character )
61
+ chars_to_add .remove (new_character )
62
+ print (f"Inserting new character { new_character } at position { i } as it is less than { c } " )
63
+
64
+ if chars_to_add :
65
+ raise Exception (f"One or more characters were not added { chars_to_add } " )
0 commit comments