Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 30 additions & 5 deletions dialect/providers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class ProviderFeature(Flag):
""" If it supports sending translation suggestions to the service """


class ProvideLangModel(Enum):
class ProviderLangModel(Enum):
STATIC = auto()
"""
The provider populate its `src_languages` and `dest_languages` properties.
Expand All @@ -55,6 +55,13 @@ class ProvideLangModel(Enum):
"""


class ProviderLangComparison(Enum):
PLAIN = auto()
"""Perform a simple language codes comparision. a == b"""
DEEP = auto()
"""Check for ISO 3166-1 and 15924 codes, and compare if base languages """


@dataclass
class TranslationRequest:
text: str
Expand Down Expand Up @@ -92,8 +99,10 @@ class BaseProvider:
""" Provider capabilities, translation, tts, etc """
features: ProviderFeature = ProviderFeature.NONE
""" Provider features """
lang_model: ProvideLangModel = ProvideLangModel.STATIC
lang_model: ProviderLangModel = ProviderLangModel.STATIC
""" Translation language model """
lang_comp: ProviderLangComparison = ProviderLangComparison.PLAIN
""" Define behavior of default `cmp_langs` method """

defaults: ProviderDefaults = {
"instance_url": "",
Expand Down Expand Up @@ -222,10 +231,10 @@ def cmp_langs(self, a: str, b: str) -> bool:
"""
Compare two language codes.

It assumes that the codes have been normalized by ``BaseProvider.normalize_lang_code``
so providers might need to use ``BaseProvider.denormalize_lang`` on ``a`` and ``b``.
It assumes that the codes have been normalized by ``BaseProvider.normalize_lang_code``.

This method exists so providers can add additional comparison logic.
Default behavior depends on `self.lang_comp` value.

Args:
a: First lang to compare.
Expand All @@ -235,7 +244,23 @@ def cmp_langs(self, a: str, b: str) -> bool:
Whether both languages are equals in some way or not.
"""

return a == b
# Early return if both langs are just the same
if a == b:
return True

# Plain comparison
if self.lang_comp == ProviderLangComparison.PLAIN:
return a == b

# Split lang code to separate possible country/script code
a_codes = a.split("-")
b_codes = b.split("-")

if a_codes[0] == b_codes[0]: # Check base codes
return True

return False


def dest_langs_for(self, code: str) -> list[str]:
"""
Expand Down
6 changes: 4 additions & 2 deletions dialect/providers/modules/bing.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def translate_url(self):
return self.format_url("www.bing.com", "/ttranslatev3", params)

async def init_trans(self):
response = await self.get(self.html_url, self._headers, check_common=False, json=False)
response = await self.get(self.html_url, self._headers, check_common=False, return_json=False)

if response:
try:
Expand Down Expand Up @@ -125,11 +125,13 @@ async def translate(self, request):
detected=detected,
pronunciation=TranslationPronunciation(None, pronunciation),
)
else:
raise UnexpectedError("Unexpected translation response")

except Exception as exc:
raise UnexpectedError from exc

def check_known_errors(self, _status, data):
def check_known_errors(self, status, data):
if not data:
raise UnexpectedError("Response is empty!")

Expand Down
25 changes: 6 additions & 19 deletions dialect/providers/modules/deepl.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Copyright 2024 Rafael Mardojai CM
# SPDX-License-Identifier: GPL-3.0-or-later

from dialect.providers.base import ProviderCapability, ProviderFeature, Translation
from dialect.providers.base import ProviderCapability, ProviderFeature, ProviderLangComparison, Translation
from dialect.providers.errors import APIKeyInvalid, APIKeyRequired, ServiceLimitReached, UnexpectedError
from dialect.providers.soup import SoupProvider

Expand All @@ -20,6 +20,7 @@ class Provider(SoupProvider):
| ProviderFeature.API_KEY_REQUIRED
| ProviderFeature.API_KEY_USAGE
)
lang_comp = ProviderLangComparison.DEEP

defaults = {
"instance_url": "",
Expand Down Expand Up @@ -64,8 +65,8 @@ def headers(self):

async def init_trans(self):
# Get languages
src_langs = await self.get(self.source_lang_url, headers=self.headers)
dest_langs = await self.get(self.target_lang_url, headers=self.headers)
src_langs = await self.get(self.source_lang_url, self.headers)
dest_langs = await self.get(self.target_lang_url, self.headers)

if src_langs and dest_langs and isinstance(src_langs, list) and isinstance(dest_langs, list):
for lang in src_langs:
Expand All @@ -79,7 +80,7 @@ async def validate_api_key(self, key):
headers = {"Authorization": f"DeepL-Auth-Key {key}"}

try:
await self.get(url, headers=headers)
await self.get(url, headers)
return True
except (APIKeyInvalid, APIKeyRequired):
return False
Expand Down Expand Up @@ -110,7 +111,7 @@ async def translate(self, request):
raise UnexpectedError

async def api_char_usage(self):
response = await self.get(self.usage_url, headers=self.headers)
response = await self.get(self.usage_url, self.headers)

try:
usage = response.get("character_count")
Expand All @@ -121,20 +122,6 @@ async def api_char_usage(self):
except Exception as exc:
raise UnexpectedError from exc

def cmp_langs(self, a, b):
# Early return if both langs are just the same
if a == b:
return True

# Split lang code to separate it from possible country/script code
a_codes = a.split("-")
b_codes = b.split("-")

if a_codes[0] == b_codes[0]: # Check base codes
return True

return False

def check_known_errors(self, status, data):
message = data.get("message", "") if isinstance(data, dict) else ""

Expand Down
8 changes: 1 addition & 7 deletions dialect/providers/modules/google.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ async def init_trans(self):
langs_url = self.format_url(
self._get_translate_host(".com"), "/translate_a/l", {"client": "t", "alpha": "true"}
)
response = await self.get(langs_url, self._headers, check_common=False)
response = await self.get(langs_url, self._headers, False)

try:
for code, name in response["tl"].items():
Expand Down Expand Up @@ -447,9 +447,3 @@ def __init__(self, text: str, candidates: list[str]):

def __str__(self):
return self.text

def __dict__(self):
return {
"text": self.text,
"candidates": self.candidates,
}
2 changes: 1 addition & 1 deletion dialect/providers/modules/libretrans.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ async def suggest(self, text, src, dest, suggestion):
except: # noqa
return False

def check_known_errors(self, _status, data):
def check_known_errors(self, status, data):
if not data:
raise UnexpectedError("Response is empty!")

Expand Down
2 changes: 1 addition & 1 deletion dialect/providers/modules/lingva.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ async def speech(self, text, language):
file.close()
raise UnexpectedError from exc

def check_known_errors(self, _status, data):
def check_known_errors(self, status, data):
"""Raises a proper Exception if an error is found in the data."""
if not data:
raise UnexpectedError("Response is empty!")
Expand Down
139 changes: 31 additions & 108 deletions dialect/providers/modules/yandex.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
# Copyright 2023 Rafael Mardojai CM
# SPDX-License-Identifier: GPL-3.0-or-later

import json
import re
from uuid import uuid4

from dialect.providers.base import (
ProviderCapability,
ProviderFeature,
ProviderLangComparison,
Translation,
)
from dialect.providers.errors import ProviderError, UnexpectedError
Expand All @@ -18,6 +21,7 @@ class Provider(SoupProvider):

capabilities = ProviderCapability.TRANSLATION
features = ProviderFeature.DETECTION
lang_comp = ProviderLangComparison.DEEP

defaults = {
"instance_url": "",
Expand All @@ -37,116 +41,35 @@ def __init__(self, **kwargs):

self._uuid = str(uuid4()).replace("-", "")

async def init_trans(self):
languages = [
"af",
"sq",
"am",
"ar",
"hy",
"az",
"ba",
"eu",
"be",
"bn",
"bs",
"bg",
"my",
"ca",
"ceb",
"zh",
"cv",
"hr",
"cs",
"da",
"nl",
"sjn",
"emj",
"en",
"eo",
"et",
"fi",
"fr",
"gl",
"ka",
"de",
"el",
"gu",
"ht",
"he",
"mrj",
"hi",
"hu",
"is",
"id",
"ga",
"it",
"ja",
"jv",
"kn",
"kk",
"kazlat",
"km",
"ko",
"ky",
"lo",
"la",
"lv",
"lt",
"lb",
"mk",
"mg",
"ms",
"ml",
"mt",
"mi",
"mr",
"mhr",
"mn",
"ne",
"no",
"pap",
"fa",
"pl",
"pt",
"pa",
"ro",
"ru",
"gd",
"sr",
"si",
"sk",
"sl",
"es",
"su",
"sw",
"sv",
"tl",
"tg",
"ta",
"tt",
"te",
"th",
"tr",
"udm",
"uk",
"ur",
"uz",
"uzbcyr",
"vi",
"cy",
"xh",
"sah",
"yi",
"zu",
]
for code in languages:
self.add_lang(code)

@property
def translate_url(self):
path = f"/api/v1/tr.json/translate?id={self._uuid}-0-0&srv=android"
return self.format_url("translate.yandex.net", path)
params = {"id": self._uuid + "-0-0", "srv": "android"}
return self.format_url("translate.yandex.net", "/api/v1/tr.json/translate", params)

async def init_trans(self):
# Get Yandex Translate web HTML to parse languages
# Using `/api/v1/tr.json/getLangs` doesn't provide all the languages that Yandex supports
html_url = self.format_url("translate.yandex.com")
response = await self.get(html_url, check_common=False, return_json=False)

if response:
try:
# Decode response bytes
text = response.decode("utf-8")
# Get Yandex languages
languages = re.findall(r"TRANSLATOR_LANGS: (.*?),\n", text)[0] # noqa
languages: dict[str, str] = json.loads(languages) # type: ignore
# Get Yandex dialects list, dialects aren't valid src tranlation langs
dialects = re.findall(r"DIALECTS: (.*?),\n", text)[0] # noqa
dialects: list[str] = json.loads(dialects) # type: ignore
# Populate languages lists
for code, name in languages.items():
self.add_lang(code, name, trans_src=code not in dialects)

except Exception as exc:
raise UnexpectedError("Failed parsing HTML from yandex.com") from exc
else:
raise UnexpectedError("Could not get HTML from yandex.com")

async def translate(self, request):
src, dest = self.denormalize_lang(request.src, request.dest)
Expand Down
Loading