1
1
from pathlib import Path
2
2
import re
3
3
4
- en_regex = re .compile (r'OutputLine\([^,]*,\s*[^,]*,\s*[^,]*,\s*([^,]*)' )
4
+ en_regex = re .compile (r'OutputLine\([^,]*,\s*( [^,]*) ,\s*[^,]*,\s*([^,]*)' )
5
5
6
6
def load_existing_list (path ):
7
7
with open (path , encoding = 'utf-8' , newline = '' ) as f :
8
8
return f .read ()
9
9
10
10
11
- existing_char_list = Path ('C:/drojf/large_projects/umineko/ui-editing-scripts/scripts/CharacterInfoExtraction/msgothic_2_charset_OtherLang .txt' )
11
+ existing_char_list = Path ('C:/drojf/large_projects/umineko/ui-editing-scripts/scripts/CharacterInfoExtraction/msgothic_2_charset_JP_and_OtherLang .txt' )
12
12
out_char_list = existing_char_list .with_suffix (existing_char_list .suffix + '.out' )
13
13
source_directory = Path ('C:/drojf/large_projects/umineko/HIGURASHI_REPOS' )
14
14
@@ -17,15 +17,25 @@ def load_existing_list(path):
17
17
18
18
all_chars = set ()
19
19
20
+ search_en = True
21
+ search_jp = True
22
+
20
23
for file in source_directory .rglob ("*.txt" ):
21
24
print (file )
22
25
with open (file , encoding = 'utf-8' ) as f :
23
26
whole_file_string = f .read ()
24
27
for match in en_regex .finditer (whole_file_string ):
25
28
if match :
29
+ outputline_jp_arg = match .group (1 )
26
30
outputline_english_arg = match .group (1 )
27
- for c in outputline_english_arg :
28
- all_chars .add (c )
31
+
32
+ if search_en :
33
+ for c in outputline_english_arg :
34
+ all_chars .add (c )
35
+
36
+ if search_jp :
37
+ for c in outputline_jp_arg :
38
+ all_chars .add (c )
29
39
30
40
all_chars_list = list (all_chars )
31
41
all_chars_list .sort ()
@@ -55,11 +65,26 @@ def load_existing_list(path):
55
65
f .write (c )
56
66
57
67
# This is very bad for performance if there are lots of new chars found, but it works for now to maintain ordering
68
+ remove_list = []
58
69
for new_character in chars_to_add :
59
70
if new_character < c :
60
71
f .write (new_character )
61
- chars_to_add . remove (new_character )
72
+ remove_list . append (new_character )
62
73
print (f"Inserting new character { new_character } at position { i } as it is less than { c } " )
63
74
75
+ for item in remove_list :
76
+ chars_to_add .remove (item )
77
+
78
+ remove_list = []
79
+ for char in chars_to_add :
80
+ if char not in existing_font_set :
81
+ f .write (char )
82
+ else :
83
+ print (f"WARNING: character { char } already exists, skipping" )
84
+ remove_list .append (char )
85
+
86
+ for item in remove_list :
87
+ chars_to_add .remove (item )
88
+
64
89
if chars_to_add :
65
90
raise Exception (f"One or more characters were not added { chars_to_add } " )
0 commit comments