Spaces:
Sleeping
Sleeping
Yaron Koresh
commited on
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,7 +3,6 @@ from bs4 import BeautifulSoup
|
|
| 3 |
from abc import ABC, abstractmethod
|
| 4 |
from pathlib import Path
|
| 5 |
from typing import List, Optional, Union
|
| 6 |
-
from langdetect import detect as get_language
|
| 7 |
from collections import namedtuple
|
| 8 |
from inspect import signature
|
| 9 |
import os
|
|
@@ -659,6 +658,7 @@ def all_pipes(pos,neg,artist,song):
|
|
| 659 |
|
| 660 |
return imgs
|
| 661 |
|
|
|
|
| 662 |
language_codes = {
|
| 663 |
"afrikaans": "af",
|
| 664 |
"albanian": "sq",
|
|
@@ -963,28 +963,121 @@ class BaseTranslator(ABC):
|
|
| 963 |
translated = self.translate(text, **kwargs)
|
| 964 |
arr.append(translated)
|
| 965 |
return arr
|
| 966 |
-
|
| 967 |
-
def translate(txt,to_lang="en",from_lang=False):
|
| 968 |
-
log(f'CALL translate')
|
| 969 |
-
if not from_lang:
|
| 970 |
-
from_lang = get_language(txt)
|
| 971 |
-
if(from_lang == to_lang):
|
| 972 |
-
log(f'RET translate with txt as {txt}')
|
| 973 |
-
return txt
|
| 974 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 975 |
|
| 976 |
-
|
| 977 |
-
|
| 978 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 979 |
translation = ""
|
| 980 |
-
if len(txt) >
|
| 981 |
words = txt.split()
|
| 982 |
while len(words) > 0:
|
| 983 |
chunk = ""
|
| 984 |
-
while len(words) > 0 and len(chunk) <
|
| 985 |
chunk = chunk + " " + words[0]
|
| 986 |
words = words[1:]
|
| 987 |
-
if len(chunk) >
|
| 988 |
_words = chunk.split()
|
| 989 |
words = [_words[-1], *words]
|
| 990 |
chunk = " ".join(_words[:-1])
|
|
|
|
| 3 |
from abc import ABC, abstractmethod
|
| 4 |
from pathlib import Path
|
| 5 |
from typing import List, Optional, Union
|
|
|
|
| 6 |
from collections import namedtuple
|
| 7 |
from inspect import signature
|
| 8 |
import os
|
|
|
|
| 658 |
|
| 659 |
return imgs
|
| 660 |
|
| 661 |
+
google_translate_endpoint = "https://translate.google.com/m"
|
| 662 |
language_codes = {
|
| 663 |
"afrikaans": "af",
|
| 664 |
"albanian": "sq",
|
|
|
|
| 963 |
translated = self.translate(text, **kwargs)
|
| 964 |
arr.append(translated)
|
| 965 |
return arr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 966 |
|
| 967 |
+
class GoogleTranslator(BaseTranslator):
|
| 968 |
+
"""
|
| 969 |
+
class that wraps functions, which use Google Translate under the hood to translate text(s)
|
| 970 |
+
"""
|
| 971 |
|
| 972 |
+
def __init__(
|
| 973 |
+
self,
|
| 974 |
+
source: str = "auto",
|
| 975 |
+
target: str = "en",
|
| 976 |
+
proxies: Optional[dict] = None,
|
| 977 |
+
**kwargs
|
| 978 |
+
):
|
| 979 |
+
"""
|
| 980 |
+
@param source: source language to translate from
|
| 981 |
+
@param target: target language to translate to
|
| 982 |
+
"""
|
| 983 |
+
self.proxies = proxies
|
| 984 |
+
super().__init__(
|
| 985 |
+
base_url=google_translate_endpoint,
|
| 986 |
+
source=source,
|
| 987 |
+
target=target,
|
| 988 |
+
element_tag="div",
|
| 989 |
+
element_query={"class": "t0"},
|
| 990 |
+
payload_key="q", # key of text in the url
|
| 991 |
+
**kwargs
|
| 992 |
+
)
|
| 993 |
+
|
| 994 |
+
self._alt_element_query = {"class": "result-container"}
|
| 995 |
+
|
| 996 |
+
def translate(self, text: str, **kwargs) -> str:
|
| 997 |
+
"""
|
| 998 |
+
function to translate a text
|
| 999 |
+
@param text: desired text to translate
|
| 1000 |
+
@return: str: translated text
|
| 1001 |
+
"""
|
| 1002 |
+
if is_input_valid(text, max_chars=1000):
|
| 1003 |
+
text = text.strip()
|
| 1004 |
+
if self._same_source_target() or is_empty(text):
|
| 1005 |
+
return text
|
| 1006 |
+
self._url_params["tl"] = self._target
|
| 1007 |
+
self._url_params["sl"] = self._source
|
| 1008 |
+
|
| 1009 |
+
if self.payload_key:
|
| 1010 |
+
self._url_params[self.payload_key] = text
|
| 1011 |
+
|
| 1012 |
+
response = requests.get(
|
| 1013 |
+
self._base_url, params=self._url_params, proxies=self.proxies
|
| 1014 |
+
)
|
| 1015 |
+
if response.status_code == 429:
|
| 1016 |
+
raise TooManyRequests()
|
| 1017 |
+
|
| 1018 |
+
if request_failed(status_code=response.status_code):
|
| 1019 |
+
raise RequestError()
|
| 1020 |
+
|
| 1021 |
+
soup = BeautifulSoup(response.text, "html.parser")
|
| 1022 |
+
|
| 1023 |
+
element = soup.find(self._element_tag, self._element_query)
|
| 1024 |
+
response.close()
|
| 1025 |
+
|
| 1026 |
+
if not element:
|
| 1027 |
+
element = soup.find(self._element_tag, self._alt_element_query)
|
| 1028 |
+
if not element:
|
| 1029 |
+
raise TranslationNotFound(text)
|
| 1030 |
+
if element.get_text(strip=True) == text.strip():
|
| 1031 |
+
to_translate_alpha = "".join(
|
| 1032 |
+
ch for ch in text.strip() if ch.isalnum()
|
| 1033 |
+
)
|
| 1034 |
+
translated_alpha = "".join(
|
| 1035 |
+
ch for ch in element.get_text(strip=True) if ch.isalnum()
|
| 1036 |
+
)
|
| 1037 |
+
if (
|
| 1038 |
+
to_translate_alpha
|
| 1039 |
+
and translated_alpha
|
| 1040 |
+
and to_translate_alpha == translated_alpha
|
| 1041 |
+
):
|
| 1042 |
+
self._url_params["tl"] = self._target
|
| 1043 |
+
if "hl" not in self._url_params:
|
| 1044 |
+
return text.strip()
|
| 1045 |
+
del self._url_params["hl"]
|
| 1046 |
+
return self.translate(text)
|
| 1047 |
+
|
| 1048 |
+
else:
|
| 1049 |
+
return element.get_text(strip=True)
|
| 1050 |
+
|
| 1051 |
+
def translate_file(self, path: str, **kwargs) -> str:
|
| 1052 |
+
"""
|
| 1053 |
+
translate directly from file
|
| 1054 |
+
@param path: path to the target file
|
| 1055 |
+
@type path: str
|
| 1056 |
+
@param kwargs: additional args
|
| 1057 |
+
@return: str
|
| 1058 |
+
"""
|
| 1059 |
+
return self._translate_file(path, **kwargs)
|
| 1060 |
+
|
| 1061 |
+
def translate_batch(self, batch: List[str], **kwargs) -> List[str]:
|
| 1062 |
+
"""
|
| 1063 |
+
translate a list of texts
|
| 1064 |
+
@param batch: list of texts you want to translate
|
| 1065 |
+
@return: list of translations
|
| 1066 |
+
"""
|
| 1067 |
+
return self._translate_batch(batch, **kwargs)
|
| 1068 |
+
|
| 1069 |
+
def translate(txt,to_lang="en",from_lang="auto"):
|
| 1070 |
+
log(f'CALL translate')
|
| 1071 |
+
translator = GoogleTranslator(from_lang=from_lang,to_lang=to_lang)
|
| 1072 |
translation = ""
|
| 1073 |
+
if len(txt) > 1000:
|
| 1074 |
words = txt.split()
|
| 1075 |
while len(words) > 0:
|
| 1076 |
chunk = ""
|
| 1077 |
+
while len(words) > 0 and len(chunk) < 1000:
|
| 1078 |
chunk = chunk + " " + words[0]
|
| 1079 |
words = words[1:]
|
| 1080 |
+
if len(chunk) > 1000:
|
| 1081 |
_words = chunk.split()
|
| 1082 |
words = [_words[-1], *words]
|
| 1083 |
chunk = " ".join(_words[:-1])
|