-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathtranslation.py
139 lines (112 loc) · 4.73 KB
/
translation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import logging
import boto3
import requests
from bs4 import BeautifulSoup
from django.conf import settings
from django.utils.module_loading import import_string
logger = logging.getLogger(__name__)
# Array of language : ['en', 'es', 'fr', ....]
AVAILABLE_LANGUAGES = [lang for lang, _ in settings.LANGUAGES]
class BaseTranslator:
def _fake_translation(self, text, dest_language, source_language):
"""
This is only used for test
"""
return text + f' translated to "{dest_language}" using source language "{source_language}"'
class DummyTranslator(BaseTranslator):
def translate_text(self, text, dest_language, source_language="auto"):
return self._fake_translation(text, dest_language, source_language)
class AmazonTranslator(BaseTranslator):
"""
Amazon Translator helper
"""
def __init__(self, client=None):
if settings.TESTING:
return
if not settings.AWS_TRANSLATE_ACCESS_KEY or not settings.AWS_TRANSLATE_SECRET_KEY or not settings.AWS_TRANSLATE_REGION:
raise Exception("Translation configuration missing")
# NOTE: Service not used for testing
self._translator = client or boto3.client(
"translate",
aws_access_key_id=settings.AWS_TRANSLATE_ACCESS_KEY,
aws_secret_access_key=settings.AWS_TRANSLATE_SECRET_KEY,
region_name=settings.AWS_TRANSLATE_REGION,
)
def translate_text(self, text, dest_language, source_language="auto"):
# NOTE: using 'auto' as source_language will cost extra. Language Detection: https://aws.amazon.com/comprehend/pricing/
if settings.TESTING:
# NOTE: Mocking for test purpose
return self._fake_translation(text, dest_language, source_language)
return self._translator.translate_text(Text=text, SourceLanguageCode=source_language, TargetLanguageCode=dest_language)[
"TranslatedText"
]
class IfrcTranslator(BaseTranslator):
"""
IFRC Translator helper
"""
domain: str
url: str
params: dict
def __init__(self):
if not settings.IFRC_TRANSLATION_DOMAIN or not settings.IFRC_TRANSLATION_HEADER_API_KEY:
raise Exception("Translation configuration missing")
self.domain = settings.IFRC_TRANSLATION_DOMAIN.strip("/")
self.url = f"{self.domain}/api/translate"
self.headers = {
"X-API-KEY": settings.IFRC_TRANSLATION_HEADER_API_KEY,
}
@classmethod
def is_text_html(cls, text):
return bool(BeautifulSoup(text, "html.parser").find())
@classmethod
def find_last_slashtable(cls, text, limit):
tag = "</table>"
truncate_here = text[:limit].rfind(tag)
if truncate_here != -1:
truncate_here += len(tag)
return truncate_here
@classmethod
def find_last_slashp(cls, text, limit):
tag = "</p>"
truncate_here = text[:limit].rfind(tag)
if truncate_here != -1:
truncate_here += len(tag)
return truncate_here
def translate_text(self, text, dest_language, source_language=None):
if settings.TESTING:
# NOTE: Mocking for test purpose
return self._fake_translation(text, dest_language, source_language)
# A dirty workaround to handle oversized HTML+CSS texts, usually tables:
textTail = ""
if len(text) > settings.AZURE_TRANSL_LIMIT:
truncate_here = self.find_last_slashtable(text, settings.AZURE_TRANSL_LIMIT)
if truncate_here != -1:
textTail = text[truncate_here:]
text = text[:truncate_here]
else:
truncate_here = self.find_last_slashp(text, settings.AZURE_TRANSL_LIMIT)
if truncate_here != -1:
textTail = text[truncate_here:]
text = text[:truncate_here]
else:
textTail = text[settings.AZURE_TRANSL_LIMIT :]
text = text[: settings.AZURE_TRANSL_LIMIT]
payload = {
"text": text,
"from": source_language,
"to": dest_language,
}
if self.is_text_html(text):
# NOTE: Sending 'text' throws 500 from IFRC translation endpoint
# So only sending if html
payload["textType"] = "html"
response = requests.post(
self.url,
headers=self.headers,
json=payload,
)
# Not using == 200 – it would break tests with MagicMock name=requests.post() results
if response.status_code != 500:
return response.json()[0]["translations"][0]["text"] + textTail
def get_translator_class():
return import_string(settings.AUTO_TRANSLATION_TRANSLATOR)