Skip to content

Commit f3c8d89

Browse files
committed
Add new JP characters to character lists
1 parent 3e7f7aa commit f3c8d89

File tree

3 files changed

+32
-7
lines changed

3 files changed

+32
-7
lines changed

scripts/CharacterInfoExtraction/PythonTextExtractor/extract.py

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
from pathlib import Path
22
import re
33

4-
en_regex = re.compile(r'OutputLine\([^,]*,\s*[^,]*,\s*[^,]*,\s*([^,]*)')
4+
en_regex = re.compile(r'OutputLine\([^,]*,\s*([^,]*),\s*[^,]*,\s*([^,]*)')
55

66
def load_existing_list(path):
77
with open(path, encoding='utf-8', newline='') as f:
88
return f.read()
99

1010

11-
existing_char_list = Path('C:/drojf/large_projects/umineko/ui-editing-scripts/scripts/CharacterInfoExtraction/msgothic_2_charset_OtherLang.txt')
11+
existing_char_list = Path('C:/drojf/large_projects/umineko/ui-editing-scripts/scripts/CharacterInfoExtraction/msgothic_2_charset_JP_and_OtherLang.txt')
1212
out_char_list = existing_char_list.with_suffix(existing_char_list.suffix + '.out')
1313
source_directory = Path('C:/drojf/large_projects/umineko/HIGURASHI_REPOS')
1414

@@ -17,15 +17,25 @@ def load_existing_list(path):
1717

1818
all_chars = set()
1919

20+
search_en = True
21+
search_jp = True
22+
2023
for file in source_directory.rglob("*.txt"):
2124
print(file)
2225
with open(file, encoding='utf-8') as f:
2326
whole_file_string = f.read()
2427
for match in en_regex.finditer(whole_file_string):
2528
if match:
29+
outputline_jp_arg = match.group(1)
2630
outputline_english_arg = match.group(1)
27-
for c in outputline_english_arg:
28-
all_chars.add(c)
31+
32+
if search_en:
33+
for c in outputline_english_arg:
34+
all_chars.add(c)
35+
36+
if search_jp:
37+
for c in outputline_jp_arg:
38+
all_chars.add(c)
2939

3040
all_chars_list = list(all_chars)
3141
all_chars_list.sort()
@@ -55,11 +65,26 @@ def load_existing_list(path):
5565
f.write(c)
5666

5767
# This is very bad for performance if there are lots of new chars found, but it works for now to maintain ordering
68+
remove_list = []
5869
for new_character in chars_to_add:
5970
if new_character < c:
6071
f.write(new_character)
61-
chars_to_add.remove(new_character)
72+
remove_list.append(new_character)
6273
print(f"Inserting new character {new_character} at position {i} as it is less than {c}")
6374

75+
for item in remove_list:
76+
chars_to_add.remove(item)
77+
78+
remove_list = []
79+
for char in chars_to_add:
80+
if char not in existing_font_set:
81+
f.write(char)
82+
else:
83+
print(f"WARNING: character {char} already exists, skipping")
84+
remove_list.append(char)
85+
86+
for item in remove_list:
87+
chars_to_add.remove(item)
88+
6489
if chars_to_add:
6590
raise Exception(f"One or more characters were not added {chars_to_add}")

0 commit comments

Comments
 (0)