bazarr/libs/deep_translator/google.py

123 lines
3.8 KiB
Python

"""
google translator API
"""
__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"
from typing import List, Optional
import requests
from bs4 import BeautifulSoup
from deep_translator.base import BaseTranslator
from deep_translator.constants import BASE_URLS
from deep_translator.exceptions import (
RequestError,
TooManyRequests,
TranslationNotFound,
)
from deep_translator.validate import is_empty, is_input_valid, request_failed
class GoogleTranslator(BaseTranslator):
"""
class that wraps functions, which use Google Translate under the hood to translate text(s)
"""
def __init__(
self,
source: str = "auto",
target: str = "en",
proxies: Optional[dict] = None,
**kwargs
):
"""
@param source: source language to translate from
@param target: target language to translate to
"""
self.proxies = proxies
super().__init__(
base_url=BASE_URLS.get("GOOGLE_TRANSLATE"),
source=source,
target=target,
element_tag="div",
element_query={"class": "t0"},
payload_key="q", # key of text in the url
**kwargs
)
self._alt_element_query = {"class": "result-container"}
def translate(self, text: str, **kwargs) -> str:
"""
function to translate a text
@param text: desired text to translate
@return: str: translated text
"""
if is_input_valid(text, max_chars=5000):
text = text.strip()
if self._same_source_target() or is_empty(text):
return text
self._url_params["tl"] = self._target
self._url_params["sl"] = self._source
if self.payload_key:
self._url_params[self.payload_key] = text
response = requests.get(
self._base_url, params=self._url_params, proxies=self.proxies
)
if response.status_code == 429:
raise TooManyRequests()
if request_failed(status_code=response.status_code):
raise RequestError()
soup = BeautifulSoup(response.text, "html.parser")
element = soup.find(self._element_tag, self._element_query)
response.close()
if not element:
element = soup.find(self._element_tag, self._alt_element_query)
if not element:
raise TranslationNotFound(text)
if element.get_text(strip=True) == text.strip():
to_translate_alpha = "".join(
ch for ch in text.strip() if ch.isalnum()
)
translated_alpha = "".join(
ch for ch in element.get_text(strip=True) if ch.isalnum()
)
if (
to_translate_alpha
and translated_alpha
and to_translate_alpha == translated_alpha
):
self._url_params["tl"] = self._target
if "hl" not in self._url_params:
return text.strip()
del self._url_params["hl"]
return self.translate(text)
else:
return element.get_text(strip=True)
def translate_file(self, path: str, **kwargs) -> str:
"""
translate directly from file
@param path: path to the target file
@type path: str
@param kwargs: additional args
@return: str
"""
return self._translate_file(path, **kwargs)
def translate_batch(self, batch: List[str], **kwargs) -> List[str]:
"""
translate a list of texts
@param batch: list of texts you want to translate
@return: list of translations
"""
return self._translate_batch(batch, **kwargs)