Add script for merging multiple text-edits together

drojf · drojf · commit a91dac2a3c8c · 2023-07-14T20:10:39.000+10:00
diff --git a/scripts/Manually Run Scripts/MergeMultipleTextEdits.py b/scripts/Manually Run Scripts/MergeMultipleTextEdits.py
@@ -0,0 +1,100 @@
+# Use this script to merge together multiple text-edits.json files together
+# It expects all .json files to be in a folder called 'text-edits' next to this script
+# It outputs to a file called merged-translations.json
+# Files will be output in alphabetical order, so it is recommended to prepend each file with the chapter number
+# If there are any conflicts, and exception will be raised.
+#
+# This script is usually never needed, unless translators have been swapping out the text-edits.json for each chapter
+# rather than maintaining a common text-edits.json for all chapters.
+
+import os
+import json
+
+
+class Fragment:
+    order = 0
+
+    def __init__(self, fragment_as_dictionary: dict[str, str]):
+        self.current_english = fragment_as_dictionary['CurrentEnglish']
+        self.current_japanese = fragment_as_dictionary['CurrentJapanese']
+        self.new_english = fragment_as_dictionary['NewEnglish']
+        self.new_japanese = fragment_as_dictionary['NewJapanese']
+        self.discriminator = fragment_as_dictionary.get('Discriminator')
+        self.order = Fragment.order
+        Fragment.order += 1
+
+        # Generate a key for this Fragment, used later to check for conflicting fragments
+        self.key = self.current_english + self.current_japanese
+        if self.discriminator is not None:
+            self.key += str(self.discriminator)
+
+
+    def equals(self, other: 'Fragment'):
+        return (
+            self.current_english == other.current_english and
+            self.current_japanese == other.current_japanese and
+            self.new_english == other.new_english and
+            self.new_japanese == other.new_japanese and
+            self.discriminator == other.discriminator
+        )
+
+    def __repr__(self) -> str:
+        return f"{self.order} ce: {self.current_english} cj: {self.current_japanese} ne: {self.new_english} nj: {self.new_japanese} d: {self.discriminator}"
+
+    def as_dict(self):
+        retval = {
+            'CurrentEnglish': self.current_english,
+            'CurrentJapanese': self.current_japanese,
+            'NewEnglish': self.new_english,
+            'NewJapanese': self.new_japanese,
+        }
+
+        if self.discriminator is not None:
+            retval['Discriminator'] = self.discriminator
+
+        return retval
+
+def merge(all_translations: dict[str, Fragment], fragment: Fragment):
+
+    if not fragment.key in all_translations:
+        all_translations[fragment.key] = fragment
+    else:
+        existing_item = all_translations[fragment.key]
+
+        if existing_item.equals(fragment):
+            print(f"Skipping duplicate item {fragment}")
+        else:
+            raise Exception(f"Warning: non duplicate item existing:{existing_item} new: {fragment}")
+
+
+
+in_folder = "text-edits"
+
+files = os.listdir(in_folder)
+
+all_fragments = [] # type: list[Fragment]
+
+for filename in files:
+    path = os.path.join(in_folder, filename)
+    print(f"Parsing {path}")
+
+    with open(path, encoding='utf-8') as f:
+        chapter_list_dict = json.loads(f.read())
+
+        all_fragments.extend(Fragment(f) for f in chapter_list_dict)
+
+all_translations = {}
+
+# Merge all fragments into one dict, ignoring duplicates
+for f in all_fragments:
+    print(f.current_english)
+    merge(all_translations, f)
+    print()
+
+# Convert to list and sort by 'order' which is the order the fragments were loaded
+sorted_translations = list(sorted(all_translations.values(), key=lambda f: f.order))
+for item in sorted_translations:
+    print(item)
+
+with open("merged-translations.json", 'w', encoding='utf-8') as out:
+    out.write(json.dumps([f.as_dict() for f in sorted_translations], indent=4, ensure_ascii=False))