bazarr/libs/subliminal_patch/providers/opensubtitles.py

444 lines
18 KiB
Python
Raw Normal View History

2018-10-31 16:08:29 +00:00
# coding=utf-8
2019-09-17 02:04:27 +00:00
from __future__ import absolute_import
2018-10-31 16:08:29 +00:00
import base64
import logging
import os
import traceback
2020-10-18 17:09:25 +00:00
import re
2018-10-31 16:08:29 +00:00
import zlib
import time
import requests
from babelfish import language_converters
from dogpile.cache.api import NO_VALUE
from guessit import guessit
2018-10-31 16:08:29 +00:00
from subliminal.exceptions import ConfigurationError, ServiceUnavailable
from subliminal.providers.opensubtitles import OpenSubtitlesProvider as _OpenSubtitlesProvider,\
OpenSubtitlesSubtitle as _OpenSubtitlesSubtitle, Episode, Movie, ServerProxy, Unauthorized, NoSession, \
2019-04-28 04:02:12 +00:00
DownloadLimitReached, InvalidImdbid, UnknownUserAgent, DisabledUserAgent, OpenSubtitlesError
2019-09-17 02:04:27 +00:00
from .mixins import ProviderRetryMixin
2018-10-31 16:08:29 +00:00
from subliminal.subtitle import fix_line_ending
from subliminal_patch.providers import reinitialize_on_error
2018-10-31 16:08:29 +00:00
from subliminal_patch.http import SubZeroRequestsTransport
from subliminal_patch.utils import sanitize, fix_inconsistent_naming
2018-10-31 16:08:29 +00:00
from subliminal.cache import region
from subliminal_patch.score import framerate_equal
from subliminal_patch.subtitle import guess_matches
2018-10-31 16:08:29 +00:00
from subzero.language import Language
from ..exceptions import TooManyRequests, APIThrottled
logger = logging.getLogger(__name__)
def fix_tv_naming(title):
"""Fix TV show titles with inconsistent naming using dictionary, but do not sanitize them.
:param str title: original title.
:return: new title.
:rtype: str
"""
return fix_inconsistent_naming(title, {"Superman & Lois": "Superman and Lois",
}, True)
def fix_movie_naming(title):
return fix_inconsistent_naming(title, {
}, True)
2018-10-31 16:08:29 +00:00
class OpenSubtitlesSubtitle(_OpenSubtitlesSubtitle):
hash_verifiable = True
hearing_impaired_verifiable = True
def __init__(self, language, hearing_impaired, page_link, subtitle_id, matched_by, movie_kind, hash, movie_name,
movie_release_name, movie_year, movie_imdb_id, series_season, series_episode, query_parameters,
filename, encoding, fps, skip_wrong_fps=True):
super(OpenSubtitlesSubtitle, self).__init__(language, hearing_impaired, page_link, subtitle_id,
matched_by, movie_kind, hash,
movie_name, movie_release_name, movie_year, movie_imdb_id,
series_season, series_episode, filename, encoding)
self.query_parameters = query_parameters or {}
self.fps = fps
self.release_info = movie_release_name
self.wrong_fps = False
self.skip_wrong_fps = skip_wrong_fps
self.movie_imdb_id = movie_imdb_id
2018-10-31 16:08:29 +00:00
def get_fps(self):
try:
return float(self.fps)
except:
return None
2018-10-31 16:08:29 +00:00
def get_matches(self, video, hearing_impaired=False):
matches = super(OpenSubtitlesSubtitle, self).get_matches(video)
type_ = "episode" if isinstance(video, Episode) else "movie"
matches |= guess_matches(video, guessit(self.movie_release_name, {'type': type_}))
matches |= guess_matches(video, guessit(self.filename, {'type': type_}))
# episode
if type_ == "episode" and self.movie_kind == "episode":
# series
if fix_tv_naming(video.series) and (sanitize(self.series_name) in (
sanitize(name) for name in [fix_tv_naming(video.series)] + video.alternative_series)):
matches.add('series')
# movie
elif type_ == "movie" and self.movie_kind == "movie":
# title
if fix_movie_naming(video.title) and (sanitize(self.movie_name) in (
sanitize(name) for name in [fix_movie_naming(video.title)] + video.alternative_titles)):
matches.add('title')
2018-10-31 16:08:29 +00:00
sub_fps = None
try:
sub_fps = float(self.fps)
except ValueError:
pass
# video has fps info, sub also, and sub's fps is greater than 0
if video.fps and sub_fps and not framerate_equal(video.fps, self.fps):
self.wrong_fps = True
if self.skip_wrong_fps:
logger.debug("Wrong FPS (expected: %s, got: %s, lowering score massively)", video.fps, self.fps)
# fixme: may be too harsh
return set()
else:
logger.debug("Wrong FPS (expected: %s, got: %s, continuing)", video.fps, self.fps)
# matched by tag?
if self.matched_by == "tag":
# treat a tag match equally to a hash match
logger.debug("Subtitle matched by tag, treating it as a hash-match. Tag: '%s'",
self.query_parameters.get("tag", None))
matches.add("hash")
# imdb_id match so we'll consider year as matching
if self.movie_imdb_id and video.imdb_id and (self.movie_imdb_id == video.imdb_id):
matches.add("year")
2018-10-31 16:08:29 +00:00
return matches
class OpenSubtitlesProvider(ProviderRetryMixin, _OpenSubtitlesProvider):
only_foreign = False
also_foreign = False
subtitle_class = OpenSubtitlesSubtitle
hash_verifiable = True
hearing_impaired_verifiable = True
skip_wrong_fps = True
is_vip = False
use_ssl = True
timeout = 15
default_url = "//api.opensubtitles.org/xml-rpc"
vip_url = "//vip-api.opensubtitles.org/xml-rpc"
languages = {Language.fromopensubtitles(l) for l in language_converters['szopensubtitles'].codes}
languages.update(set(Language.rebuild(l, forced=True) for l in languages))
2020-09-10 18:26:37 +00:00
languages.update(set(Language.rebuild(l, hi=True) for l in languages))
2018-10-31 16:08:29 +00:00
video_types = (Episode, Movie)
2018-10-31 16:08:29 +00:00
def __init__(self, username=None, password=None, use_tag_search=False, only_foreign=False, also_foreign=False,
skip_wrong_fps=True, is_vip=False, use_ssl=True, timeout=15):
if any((username, password)) and not all((username, password)):
raise ConfigurationError('Username and password must be specified')
self.username = username or ''
self.password = password or ''
self.use_tag_search = use_tag_search
self.only_foreign = only_foreign
self.also_foreign = also_foreign
self.skip_wrong_fps = skip_wrong_fps
self.token = None
self.is_vip = is_vip
self.use_ssl = use_ssl
self.timeout = timeout
logger.debug("Using timeout: %d", timeout)
if use_ssl:
logger.debug("Using HTTPS connection")
self.default_url = ("https:" if use_ssl else "http:") + self.default_url
self.vip_url = ("https:" if use_ssl else "http:") + self.vip_url
if use_tag_search:
logger.info("Using tag/exact filename search")
if only_foreign:
logger.info("Only searching for foreign/forced subtitles")
def get_server_proxy(self, url, timeout=None):
return ServerProxy(url, SubZeroRequestsTransport(use_https=self.use_ssl, timeout=timeout or self.timeout,
user_agent=os.environ.get("SZ_USER_AGENT", "Sub-Zero/2")))
2018-10-31 16:08:29 +00:00
def log_in_url(self, server_url):
self.token = None
self.server = self.get_server_proxy(server_url)
2018-10-31 16:08:29 +00:00
response = self.retry(
lambda: checked(
lambda: self.server.LogIn(self.username, self.password, 'eng',
os.environ.get("SZ_USER_AGENT", "Sub-Zero/2"))
2018-10-31 16:08:29 +00:00
)
)
self.token = response['token']
logger.debug('Logged in with token %r', self.token[:10]+"X"*(len(self.token)-10))
2019-09-19 01:50:20 +00:00
region.set("os_token", bytearray(self.token, encoding='utf-8'))
region.set("os_server_url", bytearray(server_url, encoding='utf-8'))
def log_in(self):
logger.info('Logging in')
try:
self.log_in_url(self.vip_url if self.is_vip else self.default_url)
except Unauthorized:
if self.is_vip:
logger.info("VIP server login failed, falling back")
try:
self.log_in_url(self.default_url)
except Unauthorized:
pass
if not self.token:
logger.error("Login failed, please check your credentials")
raise Unauthorized
2018-10-31 16:08:29 +00:00
def use_token_or_login(self, func):
if not self.token:
self.log_in()
return func()
try:
return func()
except Unauthorized:
self.log_in()
return func()
def initialize(self):
token_cache = region.get("os_token")
url_cache = region.get("os_server_url")
2018-10-31 16:08:29 +00:00
if token_cache is not NO_VALUE and url_cache is not NO_VALUE:
self.token = token_cache.decode("utf-8")
self.server = self.get_server_proxy(url_cache.decode("utf-8"))
logger.debug("Using previous login token: %r", self.token[:10] + "X" * (len(self.token) - 10))
else:
self.server = None
self.token = None
2018-10-31 16:08:29 +00:00
def terminate(self):
self.server = None
self.token = None
2018-10-31 16:08:29 +00:00
def list_subtitles(self, video, languages):
"""
:param video:
:param languages:
:return:
patch: query movies even if hash is known; add tag parameter
"""
season = episode = None
if isinstance(video, Episode):
query = [video.series] + video.alternative_series
2018-10-31 16:08:29 +00:00
season = video.season
episode = episode = min(video.episode) if isinstance(video.episode, list) else video.episode
if video.is_special:
season = None
episode = None
query = [u"%s %s" % (series, video.title) for series in [video.series] + video.alternative_series]
2018-10-31 16:08:29 +00:00
logger.info("%s: Searching for special: %r", self.__class__, query)
# elif ('opensubtitles' not in video.hashes or not video.size) and not video.imdb_id:
# query = video.name.split(os.sep)[-1]
else:
query = [video.title] + video.alternative_titles
2018-10-31 16:08:29 +00:00
2020-11-05 01:41:21 +00:00
if isinstance(video, Episode):
imdb_id = video.series_imdb_id
else:
imdb_id = video.imdb_id
return self.query(video, languages, hash=video.hashes.get('opensubtitles'), size=video.size,
2020-11-05 01:41:21 +00:00
imdb_id=imdb_id, query=query, season=season, episode=episode, tag=video.original_name,
2018-10-31 16:08:29 +00:00
use_tag_search=self.use_tag_search, only_foreign=self.only_foreign,
also_foreign=self.also_foreign)
@reinitialize_on_error((NoSession, Unauthorized, OpenSubtitlesError, ServiceUnavailable), attempts=1)
def query(self, video, languages, hash=None, size=None, imdb_id=None, query=None, season=None, episode=None,
tag=None, use_tag_search=False, only_foreign=False, also_foreign=False):
2018-10-31 16:08:29 +00:00
# fill the search criteria
criteria = []
if hash and size:
criteria.append({'moviehash': hash, 'moviebytesize': str(size)})
if use_tag_search and tag:
criteria.append({'tag': tag})
if imdb_id:
if season and episode:
criteria.append({'imdbid': imdb_id[2:], 'season': season, 'episode': episode})
else:
criteria.append({'imdbid': imdb_id[2:]})
2020-11-05 01:41:21 +00:00
# Commented out after the issue with episode released after October 17th 2020.
# if query and season and episode:
# for q in query:
# criteria.append({'query': q.replace('\'', ''), 'season': season, 'episode': episode})
# elif query:
# for q in query:
# criteria.append({'query': q.replace('\'', '')})
2018-10-31 16:08:29 +00:00
if not criteria:
raise ValueError('Not enough information')
# add the language
for criterion in criteria:
criterion['sublanguageid'] = ','.join(sorted(l.opensubtitles for l in languages))
# query the server
logger.info('Searching subtitles %r', criteria)
response = self.use_token_or_login(
lambda: self.retry(lambda: checked(lambda: self.server.SearchSubtitles(self.token, criteria)))
)
subtitles = []
# exit if no data
if not response['data']:
logger.info('No subtitles found')
return subtitles
# loop over subtitle items
for subtitle_item in response['data']:
_subtitle_item = subtitle_item
# in case OS messes their API results up again, check whether we've got a dict or a string as subtitle_item
if hasattr(_subtitle_item, "startswith"):
_subtitle_item = response["data"][subtitle_item]
# read the item
language = Language.fromopensubtitles(_subtitle_item['SubLanguageID'])
hearing_impaired = bool(int(_subtitle_item['SubHearingImpaired']))
page_link = _subtitle_item['SubtitlesLink']
subtitle_id = int(_subtitle_item['IDSubtitleFile'])
matched_by = _subtitle_item['MatchedBy']
movie_kind = _subtitle_item['MovieKind']
hash = _subtitle_item['MovieHash']
movie_name = _subtitle_item['MovieName']
movie_release_name = _subtitle_item['MovieReleaseName']
movie_year = int(_subtitle_item['MovieYear']) if _subtitle_item['MovieYear'] else None
if season or episode:
movie_imdb_id = 'tt' + _subtitle_item['SeriesIMDBParent']
else:
movie_imdb_id = 'tt' + _subtitle_item['IDMovieImdb']
2018-10-31 16:08:29 +00:00
movie_fps = _subtitle_item.get('MovieFPS')
series_season = int(_subtitle_item['SeriesSeason']) if _subtitle_item['SeriesSeason'] else None
series_episode = int(_subtitle_item['SeriesEpisode']) if _subtitle_item['SeriesEpisode'] else None
filename = _subtitle_item['SubFileName']
encoding = _subtitle_item.get('SubEncoding') or None
foreign_parts_only = bool(int(_subtitle_item.get('SubForeignPartsOnly', 0)))
# foreign/forced subtitles only wanted
if only_foreign and not foreign_parts_only:
continue
# foreign/forced not wanted
elif not only_foreign and not also_foreign and foreign_parts_only:
continue
# set subtitle language to forced if it's foreign_parts_only
elif (also_foreign or only_foreign) and foreign_parts_only:
2018-10-31 16:08:29 +00:00
language = Language.rebuild(language, forced=True)
2020-09-10 18:26:37 +00:00
# set subtitle language to hi if it's hearing_impaired
if hearing_impaired:
language = Language.rebuild(language, hi=True)
if language not in languages:
continue
2020-10-18 17:09:25 +00:00
if video.imdb_id and (movie_imdb_id != re.sub("(?<![^a-zA-Z])0+","", video.imdb_id)):
continue
2018-10-31 16:08:29 +00:00
query_parameters = _subtitle_item.get("QueryParameters")
subtitle = self.subtitle_class(language, hearing_impaired, page_link, subtitle_id, matched_by,
movie_kind,
hash, movie_name, movie_release_name, movie_year, movie_imdb_id,
series_season, series_episode, query_parameters, filename, encoding,
movie_fps, skip_wrong_fps=self.skip_wrong_fps)
subtitle.uploader = _subtitle_item['UserNickName'] if _subtitle_item['UserNickName'] else 'anonymous'
2018-10-31 16:08:29 +00:00
logger.debug('Found subtitle %r by %s', subtitle, matched_by)
subtitles.append(subtitle)
return subtitles
@reinitialize_on_error((NoSession, Unauthorized, OpenSubtitlesError, ServiceUnavailable), attempts=1)
2018-10-31 16:08:29 +00:00
def download_subtitle(self, subtitle):
logger.info('Downloading subtitle %r', subtitle)
response = self.use_token_or_login(
lambda: checked(
lambda: self.server.DownloadSubtitles(self.token, [str(subtitle.subtitle_id)])
)
)
subtitle.content = fix_line_ending(zlib.decompress(base64.b64decode(response['data'][0]['data']), 47))
def checked(fn, raise_api_limit=False):
"""Run :fn: and check the response status before returning it.
:param fn: the function to make an XMLRPC call to OpenSubtitles.
:return: the response.
:raise: :class:`OpenSubtitlesError`
"""
response = None
try:
try:
response = fn()
except APIThrottled:
if not raise_api_limit:
logger.info("API request limit hit, waiting and trying again once.")
time.sleep(12)
return checked(fn, raise_api_limit=True)
raise
except requests.RequestException as e:
status_code = e.response.status_code
else:
status_code = int(response['status'][:3])
except:
status_code = None
if status_code == 401:
raise Unauthorized
if status_code == 406:
raise NoSession
if status_code == 407:
raise DownloadLimitReached
if status_code == 413:
raise InvalidImdbid
if status_code == 414:
raise UnknownUserAgent
if status_code == 415:
raise DisabledUserAgent
if status_code == 429:
if not raise_api_limit:
raise TooManyRequests
else:
raise APIThrottled
if status_code == 503:
raise ServiceUnavailable(str(status_code))
if status_code != 200:
if response and "status" in response:
raise OpenSubtitlesError(response['status'])
raise ServiceUnavailable("Unknown Error, empty response: %s: %r" % (status_code, response))
return response