bazarr/libs/subliminal_patch/providers/subscene.py

355 lines
14 KiB
Python
Raw Normal View History

2018-10-31 16:08:29 +00:00
# coding=utf-8
import io
import logging
import os
import time
2019-06-21 01:34:14 +00:00
import traceback
from urllib import parse
2019-06-21 01:34:14 +00:00
import requests
import inflect
2019-06-21 01:34:14 +00:00
import re
import json
2018-10-31 16:08:29 +00:00
2020-12-16 04:32:47 +00:00
import html
import zipfile
import rarfile
2018-10-31 16:08:29 +00:00
from babelfish import language_converters
from guessit import guessit
2019-06-21 01:34:14 +00:00
from dogpile.cache.api import NO_VALUE
2018-10-31 16:08:29 +00:00
from subliminal import Episode, ProviderError
from subliminal.exceptions import ConfigurationError, ServiceUnavailable
2018-10-31 16:08:29 +00:00
from subliminal.utils import sanitize_release_group
2019-06-21 01:34:14 +00:00
from subliminal.cache import region
2019-04-28 04:02:12 +00:00
from subliminal_patch.http import RetryingCFSession
2018-10-31 16:08:29 +00:00
from subliminal_patch.providers import Provider
from subliminal_patch.providers.mixins import ProviderSubtitleArchiveMixin
from subliminal_patch.subtitle import Subtitle, guess_matches
from subliminal_patch.converters.subscene import language_ids, supported_languages
from subscene_api.subscene import search, SearchTypes, Subtitle as APISubtitle, SITE_DOMAIN
2018-10-31 16:08:29 +00:00
from subzero.language import Language
p = inflect.engine()
2018-10-31 16:08:29 +00:00
language_converters.register('subscene = subliminal_patch.converters.subscene:SubsceneConverter')
logger = logging.getLogger(__name__)
class SubsceneSubtitle(Subtitle):
provider_name = 'subscene'
hearing_impaired_verifiable = True
is_pack = False
page_link = None
season = None
episode = None
releases = None
def __init__(self, language, release_info, hearing_impaired=False, page_link=None, encoding=None, mods=None,
asked_for_release_group=None, asked_for_episode=None):
super(SubsceneSubtitle, self).__init__(language, hearing_impaired=hearing_impaired, page_link=page_link,
encoding=encoding, mods=mods)
self.release_info = self.releases = release_info
self.asked_for_episode = asked_for_episode
self.asked_for_release_group = asked_for_release_group
self.season = None
self.episode = None
@classmethod
def from_api(cls, s):
return cls(Language.fromsubscene(s.language.strip()), s.title, hearing_impaired=s.hearing_impaired,
page_link=s.url)
@property
def id(self):
return self.page_link
@property
def numeric_id(self):
return self.page_link.split("/")[-1]
def get_matches(self, video):
matches = set()
if self.release_info.strip() == get_video_filename(video):
logger.debug("Using hash match as the release name is the same")
matches |= {"hash"}
# episode
if isinstance(video, Episode):
guess = guessit(self.release_info, {'type': 'episode'})
self.season = guess.get("season")
self.episode = guess.get("episode")
matches |= guess_matches(video, guess)
if "season" in matches and "episode" not in guess:
# pack
matches.add("episode")
logger.debug("%r is a pack", self)
self.is_pack = True
if "title" in guess and "year" in matches:
if video.series in guess['title']:
matches.add("series")
2018-10-31 16:08:29 +00:00
# movie
else:
guess = guessit(self.release_info, {'type': 'movie'})
matches |= guess_matches(video, guess)
if video.release_group and "release_group" not in matches and "release_group" in guess:
if sanitize_release_group(video.release_group) in sanitize_release_group(guess["release_group"]):
matches.add("release_group")
self.matches = matches
return matches
def get_download_link(self, session):
return APISubtitle.get_zipped_url(self.page_link, session)
def get_video_filename(video):
return os.path.splitext(os.path.basename(video.original_name))[0]
class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin):
"""
This currently only searches for the filename on SubScene. It doesn't open every found subtitle page to avoid
massive hammering, thus it can't determine whether a subtitle is only-foreign or not.
"""
subtitle_class = SubsceneSubtitle
languages = supported_languages
languages.update(set(Language.rebuild(l, forced=True) for l in languages))
2020-09-10 18:26:37 +00:00
languages.update(set(Language.rebuild(l, hi=True) for l in languages))
2018-10-31 16:08:29 +00:00
session = None
skip_wrong_fps = False
hearing_impaired_verifiable = True
only_foreign = False
2019-06-21 01:34:14 +00:00
username = None
password = None
2018-10-31 16:08:29 +00:00
search_throttle = 8 # seconds
2019-06-21 01:34:14 +00:00
def __init__(self, only_foreign=False, username=None, password=None):
if not all((username, password)):
raise ConfigurationError('Username and password must be specified')
2018-10-31 16:08:29 +00:00
self.only_foreign = only_foreign
2019-06-21 01:34:14 +00:00
self.username = username
self.password = password
2018-10-31 16:08:29 +00:00
def initialize(self):
logger.info("Creating session")
2019-04-28 04:02:12 +00:00
self.session = RetryingCFSession()
2018-10-31 16:08:29 +00:00
2019-09-23 02:27:23 +00:00
prev_cookies = region.get("subscene_cookies2")
if prev_cookies != NO_VALUE:
logger.debug("Re-using old subscene cookies: %r", prev_cookies)
self.session.cookies.update(prev_cookies)
else:
logger.debug("Logging in")
self.login()
2019-06-21 01:34:14 +00:00
def login(self):
r = self.session.get("https://subscene.com/account/login")
2019-09-23 02:27:23 +00:00
if "Server Error" in r.text:
logger.error("Login unavailable; Maintenance?")
raise ServiceUnavailable("Login unavailable; Maintenance?")
2019-09-23 02:27:23 +00:00
match = re.search(r"<script id='modelJson' type='application/json'>\s*(.+)\s*</script>", r.text)
2019-06-21 01:34:14 +00:00
if match:
h = html
2019-06-21 01:34:14 +00:00
data = json.loads(h.unescape(match.group(1)))
login_url = parse.urljoin(data["siteUrl"], data["loginUrl"])
2019-06-21 01:34:14 +00:00
time.sleep(1.0)
r = self.session.post(login_url,
{
"username": self.username,
"password": self.password,
data["antiForgery"]["name"]: data["antiForgery"]["value"]
})
pep_content = re.search(r"<form method=\"post\" action=\"https://subscene\.com/\">"
r".+name=\"id_token\".+?value=\"(?P<id_token>.+?)\".*?"
r"access_token\".+?value=\"(?P<access_token>.+?)\".+?"
r"token_type.+?value=\"(?P<token_type>.+?)\".+?"
r"expires_in.+?value=\"(?P<expires_in>.+?)\".+?"
r"scope.+?value=\"(?P<scope>.+?)\".+?"
r"state.+?value=\"(?P<state>.+?)\".+?"
r"session_state.+?value=\"(?P<session_state>.+?)\"",
2019-09-23 02:27:23 +00:00
r.text, re.MULTILINE | re.DOTALL)
2019-06-21 01:34:14 +00:00
if pep_content:
r = self.session.post(SITE_DOMAIN, pep_content.groupdict())
try:
r.raise_for_status()
except Exception:
raise ProviderError("Something went wrong when trying to log in: %s", traceback.format_exc())
else:
cj = self.session.cookies.copy()
store_cks = ("scene", "idsrv", "idsrv.xsrf", "idsvr.clients", "idsvr.session", "idsvr.username")
for cn in self.session.cookies.keys():
2019-06-21 01:34:14 +00:00
if cn not in store_cks:
del cj[cn]
logger.debug("Storing cookies: %r", cj)
2019-11-08 14:21:40 +00:00
region.set("subscene_cookies2", cj)
2019-06-21 01:34:14 +00:00
return
raise ProviderError("Something went wrong when trying to log in #1")
2018-10-31 16:08:29 +00:00
def terminate(self):
logger.info("Closing session")
self.session.close()
def _create_filters(self, languages):
self.filters = dict(HearingImpaired="2")
acc_filters = self.filters.copy()
2018-10-31 16:08:29 +00:00
if self.only_foreign:
self.filters["ForeignOnly"] = "True"
acc_filters["ForeignOnly"] = self.filters["ForeignOnly"].lower()
2018-10-31 16:08:29 +00:00
logger.info("Only searching for foreign/forced subtitles")
selected_ids = []
for l in languages:
lid = language_ids.get(l.basename, language_ids.get(l.alpha3, None))
if lid:
selected_ids.append(str(lid))
acc_filters["SelectedIds"] = selected_ids
self.filters["LanguageFilter"] = ",".join(acc_filters["SelectedIds"])
2019-09-23 02:27:23 +00:00
last_filters = region.get("subscene_filters")
if last_filters != acc_filters:
2019-11-08 14:21:40 +00:00
region.set("subscene_filters", acc_filters)
logger.debug("Setting account filters to %r", acc_filters)
self.session.post("https://u.subscene.com/filter", acc_filters, allow_redirects=False)
2018-10-31 16:08:29 +00:00
logger.debug("Filter created: '%s'" % self.filters)
def _enable_filters(self):
self.session.cookies.update(self.filters)
logger.debug("Filters applied")
def list_subtitles(self, video, languages):
if not video.original_name:
logger.info("Skipping search because we don't know the original release name")
return []
self._create_filters(languages)
self._enable_filters()
return [s for s in self.query(video) if s.language in languages]
def download_subtitle(self, subtitle):
if subtitle.pack_data:
logger.info("Using previously downloaded pack data")
if rarfile.is_rarfile(io.BytesIO(subtitle.pack_data)):
logger.debug('Identified rar archive')
archive = rarfile.RarFile(io.BytesIO(subtitle.pack_data))
elif zipfile.is_zipfile(io.BytesIO(subtitle.pack_data)):
logger.debug('Identified zip archive')
archive = zipfile.ZipFile(io.BytesIO(subtitle.pack_data))
else:
logger.error('Unsupported compressed format')
return
2018-10-31 16:08:29 +00:00
subtitle.pack_data = None
try:
subtitle.content = self.get_subtitle_from_archive(subtitle, archive)
return
except ProviderError:
pass
# open the archive
r = self.session.get(subtitle.get_download_link(self.session), timeout=10)
r.raise_for_status()
archive_stream = io.BytesIO(r.content)
if rarfile.is_rarfile(archive_stream):
logger.debug('Identified rar archive')
archive = rarfile.RarFile(archive_stream)
elif zipfile.is_zipfile(archive_stream):
logger.debug('Identified zip archive')
archive = zipfile.ZipFile(archive_stream)
else:
logger.error('Unsupported compressed format')
return
2018-10-31 16:08:29 +00:00
subtitle.content = self.get_subtitle_from_archive(subtitle, archive)
# store archive as pack_data for later caching
subtitle.pack_data = r.content
def parse_results(self, video, film):
subtitles = []
for s in film.subtitles:
2019-06-21 01:34:14 +00:00
try:
subtitle = SubsceneSubtitle.from_api(s)
2019-09-17 02:04:27 +00:00
except NotImplementedError as e:
2019-06-21 01:34:14 +00:00
logger.info(e)
continue
2018-10-31 16:08:29 +00:00
subtitle.asked_for_release_group = video.release_group
if isinstance(video, Episode):
subtitle.asked_for_episode = video.episode
if self.only_foreign:
subtitle.language = Language.rebuild(subtitle.language, forced=True)
2020-09-10 18:26:37 +00:00
# set subtitle language to hi if it's hearing_impaired
if subtitle.hearing_impaired:
subtitle.language = Language.rebuild(subtitle.language, hi=True)
2018-10-31 16:08:29 +00:00
subtitles.append(subtitle)
logger.debug('Found subtitle %r', subtitle)
return subtitles
2019-06-21 01:34:14 +00:00
def do_search(self, *args, **kwargs):
try:
return search(*args, **kwargs)
except requests.HTTPError:
region.delete("subscene_cookies2")
2018-10-31 16:08:29 +00:00
def query(self, video):
subtitles = []
2018-10-31 16:08:29 +00:00
if isinstance(video, Episode):
2020-10-06 01:41:13 +00:00
titles = list(set([video.series] + video.alternative_series[:1]))
more_than_one = len(titles) > 1
for series in titles:
term = u"%s - %s Season" % (series, p.number_to_words("%sth" % video.season).capitalize())
logger.debug('Searching with series and season: %s', term)
film = self.do_search(term, session=self.session, release=False, throttle=self.search_throttle,
limit_to=SearchTypes.TvSerie)
if not film and video.season == 1:
logger.debug('Searching with series name: %s', series)
film = self.do_search(series, session=self.session, release=False, throttle=self.search_throttle,
limit_to=SearchTypes.TvSerie)
2018-10-31 16:08:29 +00:00
if film and film.subtitles:
logger.debug('Searching found: %s', len(film.subtitles))
2018-10-31 16:08:29 +00:00
subtitles += self.parse_results(video, film)
else:
logger.debug('No results found')
if more_than_one:
time.sleep(self.search_throttle)
else:
2020-10-06 01:41:13 +00:00
titles = list(set([video.title] + video.alternative_titles[:1]))
more_than_one = len(titles) > 1
for title in titles:
2019-06-21 01:34:14 +00:00
logger.debug('Searching for movie results: %r', title)
film = self.do_search(title, year=video.year, session=self.session, limit_to=None, release=False,
throttle=self.search_throttle)
if film and film.subtitles:
subtitles += self.parse_results(video, film)
if more_than_one:
time.sleep(self.search_throttle)
2018-10-31 16:08:29 +00:00
logger.info("%s subtitles found" % len(subtitles))
return subtitles