Skip to content

Commit 82e3780

Browse files
Merge pull request #40 from pescheckit/feature_added_fix-fuzzy
Feature added fix fuzzy
2 parents 0449df6 + 93cf370 commit 82e3780

File tree

7 files changed

+133
-20
lines changed

7 files changed

+133
-20
lines changed

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,9 @@ docker run -v /Users/username/translations:/input \
179179
```bash
180180
python -m pytest
181181
```
182+
```bash
183+
docker run --rm -v $(pwd):/app -w /app --entrypoint python python-gpt-po -m pytest -v
184+
```
182185

183186
## Documentation
184187

python_gpt_po/main.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,7 @@ def main():
184184
model=model,
185185
bulk_mode=args.bulk,
186186
fuzzy=args.fuzzy,
187+
fix_fuzzy=args.fix_fuzzy,
187188
folder_language=args.folder_language
188189
)
189190

python_gpt_po/models/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,5 @@ class TranslationConfig:
1616
model: str
1717
bulk_mode: bool = False
1818
fuzzy: bool = False
19+
fix_fuzzy: bool = False
1920
folder_language: bool = False

python_gpt_po/services/translation_service.py

Lines changed: 67 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -336,36 +336,38 @@ def scan_and_process_po_files(
336336
logging.info("Discovered .po file: %s", po_file_path)
337337

338338
# Prepare the PO file, if it returns None then skip this file
339-
po_file = self._prepare_po_file(po_file_path, languages)
340-
if po_file is None:
339+
po_file_result = self._prepare_po_file(po_file_path, languages)
340+
if po_file_result is None:
341341
logging.info("Skipping file %s due to language mismatch or other issues", po_file_path)
342342
continue
343343

344-
# Process the file
345-
self.process_po_file(po_file_path, languages, detail_languages)
344+
# Process the file, passing the prepared po_file and file_lang
345+
self.process_po_file(po_file_path, languages, detail_languages, po_file_result)
346346

347347
def process_po_file(
348348
self,
349349
po_file_path: str,
350350
languages: List[str],
351-
detail_languages: Optional[Dict[str, str]] = None
351+
detail_languages: Optional[Dict[str, str]] = None,
352+
po_file_result=None,
352353
):
353354
"""Processes a single .po file with translations."""
354355
try:
355-
po_file = self._prepare_po_file(po_file_path, languages)
356-
if not po_file:
357-
return
356+
# Only prepare the po_file if not provided (for backward compatibility)
357+
if po_file_result is None:
358+
po_file_result = self._prepare_po_file(po_file_path, languages)
359+
if po_file_result is None:
360+
return
358361

359-
file_lang = self.po_file_handler.get_file_language(
360-
po_file_path,
361-
po_file,
362-
languages,
363-
self.config.folder_language
364-
)
362+
po_file, file_lang = po_file_result
365363

366364
# Get the detailed language name if available
367365
detail_lang = detail_languages.get(file_lang) if detail_languages else None
368366

367+
if self.config.fix_fuzzy:
368+
self.fix_fuzzy_entries(po_file, po_file_path, file_lang, detail_lang)
369+
return
370+
369371
texts_to_translate = [entry.msgid for entry in po_file if not entry.msgstr.strip() and entry.msgid]
370372
translations = self.get_translations(texts_to_translate, file_lang, po_file_path, detail_lang)
371373

@@ -384,6 +386,9 @@ def process_po_file(
384386
def _prepare_po_file(self, po_file_path: str, languages: List[str]):
385387
"""Prepares the .po file for translation."""
386388
if self.config.fuzzy:
389+
logging.warning(
390+
"Consider running with '--fix-fuzzy' to clean and update the fuzzy translations properly.",
391+
)
387392
self.po_file_handler.disable_fuzzy_translations(po_file_path)
388393
po_file = polib.pofile(po_file_path)
389394
file_lang = self.po_file_handler.get_file_language(
@@ -395,7 +400,7 @@ def _prepare_po_file(self, po_file_path: str, languages: List[str]):
395400
if not file_lang:
396401
logging.warning("Skipping .po file due to language mismatch: %s", po_file_path)
397402
return None
398-
return po_file
403+
return po_file, file_lang
399404

400405
def get_translations(
401406
self,
@@ -424,6 +429,22 @@ def _update_po_entries(
424429
else:
425430
self._handle_empty_translation(entry, target_language, detail_language)
426431

432+
def _update_fuzzy_po_entries(
433+
self,
434+
po_file,
435+
translations: List[str],
436+
entries_to_update: list
437+
):
438+
"""Update only fuzzy entries, remove 'fuzzy' flag, and log cleanly."""
439+
for entry, translation in zip(entries_to_update, translations):
440+
if translation.strip():
441+
self.po_file_handler.update_po_entry(po_file, entry.msgid, translation)
442+
if 'fuzzy' in entry.flags:
443+
entry.flags.remove('fuzzy')
444+
logging.info("Fixed fuzzy entry '%s' -> '%s'", entry.msgid, translation)
445+
else:
446+
logging.warning("Translation for fuzzy '%s' is still empty, leaving fuzzy.", entry.msgid)
447+
427448
def _handle_empty_translation(self, entry, target_language: str, detail_language: Optional[str] = None):
428449
"""Handles cases where the initial translation is empty."""
429450
logging.warning("Empty translation for '%s'. Attempting individual translation.", entry.msgid)
@@ -453,3 +474,34 @@ def _handle_untranslated_entries(self, po_file, target_language: str, detail_lan
453474
)
454475
else:
455476
logging.error("Failed to translate '%s' after final attempt.", entry.msgid)
477+
478+
def fix_fuzzy_entries(
479+
self,
480+
po_file,
481+
po_file_path: str,
482+
target_language: str,
483+
detail_language: Optional[str] = None,
484+
):
485+
"""Find and fix fuzzy entries in a PO file using AI translation."""
486+
fuzzy_entries = [entry for entry in po_file if 'fuzzy' in entry.flags]
487+
488+
if not fuzzy_entries:
489+
logging.info("No fuzzy entries found in %s", po_file_path)
490+
return
491+
492+
logging.info("Found %d fuzzy entries to fix in %s", len(fuzzy_entries), po_file_path)
493+
494+
texts_to_translate = [entry.msgid for entry in fuzzy_entries]
495+
translations = self.get_translations(texts_to_translate, target_language, po_file_path, detail_language)
496+
497+
self._update_fuzzy_po_entries(po_file, translations, entries_to_update=fuzzy_entries)
498+
499+
po_file.save(po_file_path)
500+
501+
self.po_file_handler.log_translation_status(
502+
po_file_path,
503+
texts_to_translate,
504+
[entry.msgstr for entry in fuzzy_entries]
505+
)
506+
507+
logging.info("Fuzzy fix completed for %s", po_file_path)

python_gpt_po/tests/unit/__init__.py

Whitespace-only changes.
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
from unittest.mock import MagicMock, patch
2+
3+
import polib
4+
5+
from python_gpt_po.models.config import TranslationConfig
6+
from python_gpt_po.models.enums import ModelProvider
7+
from python_gpt_po.models.provider_clients import ProviderClients
8+
from python_gpt_po.services.translation_service import TranslationService
9+
from python_gpt_po.tests.test_multi_provider import SAMPLE_PO_CONTENT
10+
11+
12+
def test_fix_fuzzy_entries_on_sample_po_content(tmp_path):
13+
# Write SAMPLE_PO_CONTENT to a real temp file
14+
po_file_path = tmp_path / "sample.po"
15+
po_file_path.write_text(SAMPLE_PO_CONTENT, encoding="utf-8")
16+
17+
# Parse the file with polib
18+
po_file = polib.pofile(str(po_file_path))
19+
20+
# Sanity check - confirm fuzzy is present
21+
fuzzy_entries = [entry for entry in po_file if 'fuzzy' in entry.flags]
22+
assert len(fuzzy_entries) == 1
23+
assert fuzzy_entries[0].msgid == "This is a fuzzy translation"
24+
25+
# Setup dummy config
26+
clients = ProviderClients()
27+
config = TranslationConfig(
28+
provider_clients=clients,
29+
provider=ModelProvider.OPENAI,
30+
model="gpt-4o",
31+
bulk_mode=True,
32+
fuzzy=False,
33+
folder_language=False,
34+
fix_fuzzy=True
35+
)
36+
service = TranslationService(config=config)
37+
38+
# Mock get_translations to return a valid translation
39+
service.get_translations = MagicMock(return_value=["Ceci est une traduction correcte"])
40+
41+
# Patch save to avoid actual file I/O
42+
with patch.object(po_file, 'save') as mock_save:
43+
service.fix_fuzzy_entries(po_file, str(po_file_path), "fr")
44+
45+
# Validate that the entry was updated and fuzzy flag removed
46+
assert fuzzy_entries[0].msgstr == "Ceci est une traduction correcte"
47+
assert 'fuzzy' not in fuzzy_entries[0].flags
48+
49+
# Validate save was called
50+
mock_save.assert_called_once_with(str(po_file_path))

python_gpt_po/utils/cli.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ def parse_args():
6868
provider_group = parser.add_argument_group('Provider Settings')
6969
api_group = parser.add_argument_group('API Keys')
7070
advanced_group = parser.add_argument_group('Advanced Options')
71+
fuzzy_group = advanced_group.add_mutually_exclusive_group()
7172

7273
# Required arguments (not required if listing models)
7374
required_group.add_argument(
@@ -135,11 +136,6 @@ def parse_args():
135136
)
136137

137138
# Advanced options
138-
advanced_group.add_argument(
139-
"--fuzzy",
140-
action="store_true",
141-
help="Process fuzzy translations (remove fuzzy markers)"
142-
)
143139
advanced_group.add_argument(
144140
"--bulk",
145141
action="store_true",
@@ -152,6 +148,16 @@ def parse_args():
152148
metavar="SIZE",
153149
help="Number of strings to translate in each batch (default: 50)"
154150
)
151+
fuzzy_group.add_argument(
152+
"--fuzzy",
153+
action="store_true",
154+
help="Remove fuzzy markers without translating (legacy behavior, risky)"
155+
)
156+
fuzzy_group.add_argument(
157+
"--fix-fuzzy",
158+
action="store_true",
159+
help="Translate and clean fuzzy entries safely (recommended)"
160+
)
155161

156162
# Version information
157163
parser.add_argument(

0 commit comments

Comments
 (0)