2024-02-20 00:46:14 +00:00
|
|
|
import logging
|
|
|
|
import time
|
|
|
|
from http.cookies import SimpleCookie
|
|
|
|
from random import randint
|
|
|
|
|
|
|
|
import pycountry
|
|
|
|
from requests.cookies import RequestsCookieJar
|
2024-08-06 11:15:50 +00:00
|
|
|
from subliminal.exceptions import AuthenticationError, ProviderError
|
2024-02-20 00:46:14 +00:00
|
|
|
from subliminal.providers import ParserBeautifulSoup
|
|
|
|
from subliminal_patch.http import RetryingCFSession
|
|
|
|
from subliminal_patch.pitcher import store_verification
|
|
|
|
from subliminal_patch.providers import Provider
|
|
|
|
from subliminal_patch.subtitle import Subtitle
|
|
|
|
from subzero.language import Language
|
|
|
|
from .utils import get_archive_from_bytes, get_subtitle_from_archive, FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
supported_languages_names = [
|
|
|
|
"Abkhazian",
|
|
|
|
"Afar",
|
|
|
|
"Afrikaans",
|
|
|
|
"Akan",
|
|
|
|
"Albanian",
|
|
|
|
"Amharic",
|
|
|
|
"Arabic",
|
|
|
|
"Aragonese",
|
|
|
|
"Armenian",
|
|
|
|
"Assamese",
|
|
|
|
"Avaric",
|
|
|
|
"Avestan",
|
|
|
|
"Aymara",
|
|
|
|
"Azerbaijani",
|
|
|
|
"Bambara",
|
|
|
|
"Bashkir",
|
|
|
|
"Basque",
|
|
|
|
"Belarusian",
|
|
|
|
"Bengali",
|
|
|
|
"Bihari languages",
|
|
|
|
"Bislama",
|
|
|
|
"Bokmål, Norwegian",
|
|
|
|
"Bosnian",
|
|
|
|
"Brazilian Portuguese",
|
|
|
|
"Breton",
|
|
|
|
"Bulgarian",
|
|
|
|
"Burmese",
|
|
|
|
"Cantonese",
|
|
|
|
"Catalan",
|
|
|
|
"Central Khmer",
|
|
|
|
"Chamorro",
|
|
|
|
"Chechen",
|
|
|
|
"Chichewa",
|
|
|
|
"Chinese",
|
|
|
|
"Church Slavic",
|
|
|
|
"Chuvash",
|
|
|
|
"Cornish",
|
|
|
|
"Corsican",
|
|
|
|
"Cree",
|
|
|
|
"Croatian",
|
|
|
|
"Czech",
|
|
|
|
"Danish",
|
|
|
|
"Dhivehi",
|
|
|
|
"Dutch",
|
|
|
|
"Dzongkha",
|
|
|
|
"English",
|
|
|
|
"Esperanto",
|
|
|
|
"Estonian",
|
|
|
|
"Ewe",
|
|
|
|
"Faroese",
|
|
|
|
"Fijian",
|
|
|
|
"Filipino",
|
|
|
|
"Finnish",
|
|
|
|
"French",
|
|
|
|
"Fulah",
|
|
|
|
"Gaelic",
|
|
|
|
"Galician",
|
|
|
|
"Ganda",
|
|
|
|
"Georgian",
|
|
|
|
"German",
|
|
|
|
"Greek",
|
|
|
|
"Guarani",
|
|
|
|
"Gujarati",
|
|
|
|
"Haitian",
|
|
|
|
"Hausa",
|
|
|
|
"Hebrew",
|
|
|
|
"Herero",
|
|
|
|
"Hindi",
|
|
|
|
"Hiri Motu",
|
|
|
|
"Hungarian",
|
|
|
|
"Icelandic",
|
|
|
|
"Ido",
|
|
|
|
"Igbo",
|
|
|
|
"Indonesian",
|
|
|
|
"Interlingua",
|
|
|
|
"Interlingue",
|
|
|
|
"Inuktitut",
|
|
|
|
"Inupiaq",
|
|
|
|
"Irish",
|
|
|
|
"Italian",
|
|
|
|
"Japanese",
|
|
|
|
"Javanese",
|
|
|
|
"Kalaallisut",
|
|
|
|
"Kannada",
|
|
|
|
"Kanuri",
|
|
|
|
"Kashmiri",
|
|
|
|
"Kazakh",
|
|
|
|
"Kikuyu",
|
|
|
|
"Kinyarwanda",
|
|
|
|
"Kirghiz",
|
|
|
|
"Komi",
|
|
|
|
"Kongo",
|
|
|
|
"Korean",
|
|
|
|
"Kuanyama",
|
|
|
|
"Kurdish",
|
|
|
|
"Lao",
|
|
|
|
"Latin",
|
|
|
|
"Latvian",
|
|
|
|
"Limburgan",
|
|
|
|
"Lingala",
|
|
|
|
"Lithuanian",
|
|
|
|
"Luba-Katanga",
|
|
|
|
"Luxembourgish",
|
|
|
|
"Macedonian",
|
|
|
|
"Malagasy",
|
|
|
|
"Malay",
|
|
|
|
"Malayalam",
|
|
|
|
"Maltese",
|
|
|
|
"Mandarin",
|
|
|
|
"Manx",
|
|
|
|
"Maori",
|
|
|
|
"Marathi",
|
|
|
|
"Marshallese",
|
|
|
|
"Mongolian",
|
|
|
|
"Moore",
|
|
|
|
"Nauru",
|
|
|
|
"Navajo",
|
|
|
|
"Ndebele, North",
|
|
|
|
"Ndebele, South",
|
|
|
|
"Ndonga",
|
|
|
|
"Nepali",
|
|
|
|
"Northern Sami",
|
|
|
|
"Norwegian",
|
|
|
|
"Norwegian Nynorsk",
|
|
|
|
"Occitan (post 1500)",
|
|
|
|
"Ojibwa",
|
|
|
|
"Oriya",
|
|
|
|
"Oromo",
|
|
|
|
"Ossetian",
|
|
|
|
"Pali",
|
|
|
|
"Panjabi",
|
|
|
|
"Persian",
|
|
|
|
"Polish",
|
|
|
|
"Portuguese",
|
|
|
|
"Pushto",
|
|
|
|
"Quechua",
|
|
|
|
"Romanian",
|
|
|
|
"Romansh",
|
|
|
|
"Rundi",
|
|
|
|
"Russian",
|
|
|
|
"Samoan",
|
|
|
|
"Sango",
|
|
|
|
"Sanskrit",
|
|
|
|
"Sardinian",
|
|
|
|
"Serbian",
|
|
|
|
"Shona",
|
|
|
|
"Sichuan Yi",
|
|
|
|
"Sindhi",
|
|
|
|
"Sinhala",
|
|
|
|
"Slovak",
|
|
|
|
"Slovenian",
|
|
|
|
"Somali",
|
|
|
|
"Sotho, Southern",
|
|
|
|
"Spanish",
|
|
|
|
"Sundanese",
|
|
|
|
"Swahili",
|
|
|
|
"Swati",
|
|
|
|
"Swedish",
|
|
|
|
"Tagalog",
|
|
|
|
"Tahitian",
|
|
|
|
"Tajik",
|
|
|
|
"Tamil",
|
|
|
|
"Tatar",
|
|
|
|
"Telugu",
|
|
|
|
"Thai",
|
|
|
|
"Tibetan",
|
|
|
|
"Tigrinya",
|
|
|
|
"Tongan",
|
|
|
|
"Tsonga",
|
|
|
|
"Tswana",
|
|
|
|
"Turkish",
|
|
|
|
"Turkmen",
|
|
|
|
"Twi",
|
|
|
|
"Uighur",
|
|
|
|
"Ukrainian",
|
|
|
|
"Urdu",
|
|
|
|
"Uzbek",
|
|
|
|
"Venda",
|
|
|
|
"Vietnamese",
|
|
|
|
"Volapük",
|
|
|
|
"Walloon",
|
|
|
|
"Welsh",
|
|
|
|
"Western Frisian",
|
|
|
|
"Wolof",
|
|
|
|
"Xhosa",
|
|
|
|
"Yiddish",
|
|
|
|
"Yoruba",
|
|
|
|
"Zhuang",
|
|
|
|
"Zulu"
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
class AvistazNetworkSubtitle(Subtitle):
|
|
|
|
"""AvistaZ.to Subtitle."""
|
|
|
|
provider_name = None
|
|
|
|
|
|
|
|
def __init__(self, provider_name, page_link, download_link, language, video, filename, release, uploader):
|
|
|
|
super().__init__(language, page_link=page_link)
|
|
|
|
self.provider_name = provider_name
|
|
|
|
self.hearing_impaired = None
|
|
|
|
self.language = language
|
|
|
|
self.filename = filename
|
|
|
|
self.release_info = release
|
|
|
|
self.page_link = page_link
|
|
|
|
self.download_link = download_link
|
|
|
|
self.video = video
|
|
|
|
self.matches = None
|
|
|
|
self.content = None
|
|
|
|
self.uploader = uploader
|
|
|
|
self.encoding = None
|
|
|
|
|
|
|
|
@property
|
|
|
|
def id(self):
|
|
|
|
return self.filename
|
|
|
|
|
|
|
|
def get_matches(self, video):
|
|
|
|
# we download subtitles directly from the
|
|
|
|
# release page, so it's always a perfect match
|
|
|
|
self.matches = {'hash'}
|
|
|
|
return self.matches
|
|
|
|
|
|
|
|
|
|
|
|
def lookup_lang(name):
|
|
|
|
try:
|
|
|
|
return Language(pycountry.languages.lookup(name).alpha_3)
|
|
|
|
except:
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
class AvistazNetworkProviderBase(Provider):
|
|
|
|
"""AvistaZ Network base provider"""
|
|
|
|
subtitle_class = AvistazNetworkSubtitle
|
|
|
|
languages = set(filter(None, map(lookup_lang, supported_languages_names)))
|
|
|
|
languages.update(set(Language.rebuild(L, hi=True) for L in languages))
|
|
|
|
|
|
|
|
server_url = None
|
|
|
|
provider_name = None
|
|
|
|
hash_verifiable = True
|
|
|
|
|
|
|
|
def __init__(self, cookies, user_agent=None):
|
|
|
|
self.session = None
|
|
|
|
self.cookies = cookies
|
|
|
|
self.user_agent = user_agent
|
|
|
|
|
|
|
|
def initialize(self):
|
|
|
|
self.session = RetryingCFSession()
|
|
|
|
|
|
|
|
if self.user_agent:
|
|
|
|
self.session.headers['User-Agent'] = self.user_agent
|
|
|
|
else:
|
|
|
|
self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)]
|
|
|
|
|
|
|
|
if self.cookies:
|
|
|
|
self.session.cookies = RequestsCookieJar()
|
|
|
|
simple_cookie = SimpleCookie()
|
|
|
|
simple_cookie.load(self.cookies)
|
|
|
|
|
|
|
|
for k, v in simple_cookie.items():
|
|
|
|
self.session.cookies.set(k, v.value)
|
|
|
|
|
|
|
|
rr = self.session.get(self.server_url + 'rules', allow_redirects=False, timeout=10,
|
|
|
|
headers={"Referer": self.server_url})
|
|
|
|
if rr.status_code in [302, 404, 403]:
|
|
|
|
logger.info('Cookies expired')
|
|
|
|
raise AuthenticationError("cookies not valid anymore")
|
|
|
|
|
|
|
|
store_verification(self.provider_name, self.session)
|
|
|
|
logger.debug('Cookies valid')
|
|
|
|
time.sleep(2)
|
|
|
|
return True
|
|
|
|
|
|
|
|
def terminate(self):
|
|
|
|
self.session.close()
|
|
|
|
|
|
|
|
def list_subtitles(self, video, languages):
|
|
|
|
if video.info_url is None or not video.info_url.startswith(self.server_url):
|
|
|
|
logger.debug('%s not downloaded from %s. Skipped', video, self.server_url)
|
|
|
|
return []
|
|
|
|
|
|
|
|
html = self._query_info_url(video.info_url)
|
|
|
|
|
|
|
|
if html is None:
|
|
|
|
logger.debug('%s release page not found. Release might have been removed', video)
|
|
|
|
return []
|
|
|
|
|
|
|
|
release = self._parse_release_table(html)
|
|
|
|
|
|
|
|
if release['Subtitles'].table is None:
|
|
|
|
logger.debug('No subtitles found for %s', video)
|
|
|
|
return []
|
|
|
|
|
|
|
|
subtitle_columns = list(map(lambda x: x.get_text(), release['Subtitles'].thead.find_all('th')))
|
|
|
|
|
|
|
|
subtitles = []
|
|
|
|
for row in release['Subtitles'].tbody.find_all('tr', recursive=False):
|
|
|
|
|
|
|
|
subtitle_cols = self._parse_subtitle_row(row, subtitle_columns)
|
|
|
|
|
|
|
|
release_name = release['Title'].get_text().strip()
|
|
|
|
lang = lookup_lang(subtitle_cols['Language'].get_text().strip())
|
|
|
|
download_link = subtitle_cols['Download'].a['href']
|
2024-08-19 12:58:41 +00:00
|
|
|
uploader_name = subtitle_cols['Uploader'].get_text().strip() if 'Uploader' in subtitle_cols else None
|
2024-02-20 00:46:14 +00:00
|
|
|
|
|
|
|
if lang not in languages:
|
|
|
|
continue
|
|
|
|
|
|
|
|
subtitles.append(self.subtitle_class(
|
|
|
|
provider_name=self.provider_name,
|
|
|
|
page_link=video.info_url,
|
|
|
|
download_link=download_link,
|
|
|
|
language=lang,
|
|
|
|
video=video,
|
|
|
|
filename=download_link.split('/')[-1],
|
|
|
|
release=release_name,
|
|
|
|
uploader=uploader_name,
|
|
|
|
))
|
|
|
|
|
|
|
|
return subtitles
|
|
|
|
|
|
|
|
def _query_info_url(self, info_url):
|
|
|
|
response = self.session.get(info_url, timeout=30)
|
|
|
|
|
|
|
|
if response.status_code == 404:
|
|
|
|
return None
|
|
|
|
else:
|
|
|
|
response.raise_for_status()
|
|
|
|
|
|
|
|
return response.content.decode('utf-8', 'ignore')
|
|
|
|
|
|
|
|
def _parse_subtitle_row(self, row, subtitle_columns):
|
|
|
|
columns = {}
|
|
|
|
for i, data in enumerate(row.find_all('td', recursive=False)):
|
|
|
|
columns[subtitle_columns[i]] = data
|
|
|
|
return columns
|
|
|
|
|
|
|
|
def _parse_release_table(self, html):
|
|
|
|
release_data_table = (ParserBeautifulSoup(html, ['html.parser'])
|
2024-08-06 11:15:50 +00:00
|
|
|
.select_one('#content-area > div.block > div.table-responsive > table > tbody'))
|
|
|
|
|
|
|
|
if release_data_table is None:
|
|
|
|
raise ProviderError('Unexpected HTML page layout - no release data table found')
|
2024-02-20 00:46:14 +00:00
|
|
|
|
|
|
|
rows = {}
|
|
|
|
for tr in release_data_table.find_all('tr', recursive=False):
|
|
|
|
rows[tr.td.get_text()] = tr.select_one('td:nth-child(2)', recursive=False)
|
|
|
|
return rows
|
|
|
|
|
|
|
|
def download_subtitle(self, subtitle):
|
|
|
|
response = self.session.get(subtitle.download_link)
|
|
|
|
response.raise_for_status()
|
|
|
|
if subtitle.filename.endswith((".zip", ".rar")):
|
|
|
|
archive = get_archive_from_bytes(response.content)
|
|
|
|
subtitle.content = get_subtitle_from_archive(
|
|
|
|
archive, episode=subtitle.video.episode
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
subtitle.content = response.content
|