2021-01-25 19:28:55 +00:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
import logging
|
|
|
|
import os
|
|
|
|
import datetime
|
|
|
|
|
|
|
|
from requests import Session, ConnectionError, Timeout, ReadTimeout
|
|
|
|
from subzero.language import Language
|
|
|
|
|
|
|
|
from babelfish import language_converters
|
|
|
|
from subliminal import Episode, Movie
|
|
|
|
from subliminal.score import get_equivalent_release_groups
|
|
|
|
from subliminal.utils import sanitize_release_group, sanitize
|
2021-01-26 03:05:56 +00:00
|
|
|
from subliminal_patch.exceptions import TooManyRequests
|
2021-01-25 19:28:55 +00:00
|
|
|
from subliminal.exceptions import DownloadLimitExceeded, AuthenticationError, ConfigurationError, ServiceUnavailable, \
|
|
|
|
ProviderError
|
|
|
|
from .mixins import ProviderRetryMixin
|
2021-06-08 22:44:30 +00:00
|
|
|
from subliminal_patch.subtitle import Subtitle
|
2021-01-25 19:28:55 +00:00
|
|
|
from subliminal.subtitle import fix_line_ending, SUBTITLE_EXTENSIONS
|
|
|
|
from subliminal_patch.providers import Provider
|
2021-06-08 22:44:30 +00:00
|
|
|
from subliminal_patch.subtitle import guess_matches
|
2021-03-11 15:23:00 +00:00
|
|
|
from subliminal_patch.utils import fix_inconsistent_naming
|
2021-01-25 19:28:55 +00:00
|
|
|
from subliminal.cache import region
|
2021-05-27 12:49:52 +00:00
|
|
|
from dogpile.cache.api import NO_VALUE
|
2021-01-25 19:28:55 +00:00
|
|
|
from guessit import guessit
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
SHOW_EXPIRATION_TIME = datetime.timedelta(weeks=1).total_seconds()
|
2021-03-22 14:16:51 +00:00
|
|
|
TOKEN_EXPIRATION_TIME = datetime.timedelta(hours=12).total_seconds()
|
2021-01-25 19:28:55 +00:00
|
|
|
|
|
|
|
|
2021-03-11 15:23:00 +00:00
|
|
|
def fix_tv_naming(title):
|
|
|
|
"""Fix TV show titles with inconsistent naming using dictionary, but do not sanitize them.
|
|
|
|
|
|
|
|
:param str title: original title.
|
|
|
|
:return: new title.
|
|
|
|
:rtype: str
|
|
|
|
|
|
|
|
"""
|
|
|
|
return fix_inconsistent_naming(title, {"Superman & Lois": "Superman and Lois",
|
|
|
|
}, True)
|
|
|
|
|
|
|
|
|
|
|
|
def fix_movie_naming(title):
|
|
|
|
return fix_inconsistent_naming(title, {
|
|
|
|
}, True)
|
|
|
|
|
|
|
|
|
2021-01-25 19:28:55 +00:00
|
|
|
class OpenSubtitlesComSubtitle(Subtitle):
|
|
|
|
provider_name = 'opensubtitlescom'
|
|
|
|
hash_verifiable = False
|
|
|
|
|
|
|
|
def __init__(self, language, hearing_impaired, page_link, file_id, releases, uploader, title, year,
|
|
|
|
hash_matched, hash=None, season=None, episode=None):
|
|
|
|
self.title = title
|
|
|
|
self.year = year
|
|
|
|
self.season = season
|
|
|
|
self.episode = episode
|
|
|
|
self.releases = releases
|
|
|
|
self.release_info = releases
|
|
|
|
self.language = language
|
|
|
|
self.hearing_impaired = hearing_impaired
|
|
|
|
self.file_id = file_id
|
|
|
|
self.page_link = page_link
|
|
|
|
self.download_link = None
|
|
|
|
self.uploader = uploader
|
|
|
|
self.matches = None
|
|
|
|
self.hash = hash
|
|
|
|
self.encoding = 'utf-8'
|
|
|
|
self.hash_matched = hash_matched
|
|
|
|
|
|
|
|
@property
|
|
|
|
def id(self):
|
|
|
|
return self.file_id
|
|
|
|
|
|
|
|
def get_matches(self, video):
|
|
|
|
matches = set()
|
2021-06-08 22:44:30 +00:00
|
|
|
type_ = "movie" if isinstance(video, Movie) else "episode"
|
2021-01-25 19:28:55 +00:00
|
|
|
|
|
|
|
# handle movies and series separately
|
2021-06-08 22:44:30 +00:00
|
|
|
if type_ == "episode":
|
2021-01-25 19:28:55 +00:00
|
|
|
# series
|
|
|
|
matches.add('series')
|
|
|
|
# year
|
|
|
|
if video.year == self.year:
|
|
|
|
matches.add('year')
|
|
|
|
# season
|
|
|
|
if video.season == self.season:
|
|
|
|
matches.add('season')
|
|
|
|
# episode
|
|
|
|
if video.episode == self.episode:
|
|
|
|
matches.add('episode')
|
2021-06-08 22:44:30 +00:00
|
|
|
else:
|
2021-01-25 19:28:55 +00:00
|
|
|
# title
|
|
|
|
matches.add('title')
|
|
|
|
# year
|
|
|
|
if video.year == self.year:
|
|
|
|
matches.add('year')
|
|
|
|
|
|
|
|
# rest is same for both groups
|
|
|
|
|
|
|
|
# release_group
|
|
|
|
if (video.release_group and self.releases and
|
|
|
|
any(r in sanitize_release_group(self.releases)
|
|
|
|
for r in get_equivalent_release_groups(sanitize_release_group(video.release_group)))):
|
|
|
|
matches.add('release_group')
|
2021-06-08 22:44:30 +00:00
|
|
|
|
2021-01-25 19:28:55 +00:00
|
|
|
if self.hash_matched:
|
|
|
|
matches.add('hash')
|
2021-06-08 22:44:30 +00:00
|
|
|
|
2021-01-25 19:28:55 +00:00
|
|
|
# other properties
|
2021-06-08 22:44:30 +00:00
|
|
|
matches |= guess_matches(video, guessit(self.releases, {"type": type_}))
|
2021-01-25 19:28:55 +00:00
|
|
|
|
|
|
|
self.matches = matches
|
|
|
|
|
|
|
|
return matches
|
|
|
|
|
|
|
|
|
|
|
|
class OpenSubtitlesComProvider(ProviderRetryMixin, Provider):
|
|
|
|
"""OpenSubtitlesCom Provider"""
|
2021-05-27 12:49:52 +00:00
|
|
|
server_url = 'https://api.opensubtitles.com/api/v1/'
|
2021-01-25 19:28:55 +00:00
|
|
|
|
|
|
|
languages = {Language.fromopensubtitles(l) for l in language_converters['szopensubtitles'].codes}
|
|
|
|
languages.update(set(Language.rebuild(l, forced=True) for l in languages))
|
|
|
|
|
|
|
|
def __init__(self, username=None, password=None, use_hash=True, api_key=None):
|
2021-08-11 13:38:39 +00:00
|
|
|
if not all((username, password)):
|
|
|
|
raise ConfigurationError('Username and password must be specified')
|
|
|
|
|
2021-01-26 03:05:56 +00:00
|
|
|
if not api_key:
|
|
|
|
raise ConfigurationError('Api_key must be specified')
|
|
|
|
|
2021-01-25 19:28:55 +00:00
|
|
|
if not all((username, password)):
|
|
|
|
raise ConfigurationError('Username and password must be specified')
|
|
|
|
|
|
|
|
self.session = Session()
|
|
|
|
self.session.headers = {'User-Agent': os.environ.get("SZ_USER_AGENT", "Sub-Zero/2"),
|
|
|
|
'Api-Key': api_key,
|
|
|
|
'Content-Type': 'application/json'}
|
|
|
|
self.token = None
|
|
|
|
self.username = username
|
|
|
|
self.password = password
|
|
|
|
self.video = None
|
|
|
|
self.use_hash = use_hash
|
|
|
|
|
|
|
|
def initialize(self):
|
2021-05-27 15:17:20 +00:00
|
|
|
self.token = region.get("oscom_token", expiration_time=TOKEN_EXPIRATION_TIME)
|
2021-05-27 12:49:52 +00:00
|
|
|
if self.token is NO_VALUE:
|
|
|
|
self.login()
|
2021-01-25 19:28:55 +00:00
|
|
|
|
|
|
|
def terminate(self):
|
|
|
|
self.session.close()
|
|
|
|
|
|
|
|
def login(self):
|
|
|
|
try:
|
|
|
|
r = self.session.post(self.server_url + 'login',
|
|
|
|
json={"username": self.username, "password": self.password},
|
|
|
|
allow_redirects=False,
|
2021-03-22 16:14:53 +00:00
|
|
|
timeout=30)
|
2021-01-25 19:28:55 +00:00
|
|
|
except (ConnectionError, Timeout, ReadTimeout):
|
|
|
|
raise ServiceUnavailable('Unknown Error, empty response: %s: %r' % (r.status_code, r))
|
|
|
|
else:
|
|
|
|
if r.status_code == 200:
|
|
|
|
try:
|
|
|
|
self.token = r.json()['token']
|
|
|
|
except ValueError:
|
|
|
|
raise ProviderError('Invalid JSON returned by provider')
|
|
|
|
else:
|
|
|
|
region.set("oscom_token", self.token)
|
|
|
|
return True
|
|
|
|
elif r.status_code == 401:
|
2021-01-26 03:05:56 +00:00
|
|
|
raise AuthenticationError('Login failed: {}'.format(r.reason))
|
|
|
|
elif r.status_code == 429:
|
|
|
|
raise TooManyRequests()
|
2021-05-27 12:49:52 +00:00
|
|
|
elif r.status_code == 503:
|
|
|
|
raise ProviderError(r.reason)
|
2021-01-25 19:28:55 +00:00
|
|
|
else:
|
2021-01-26 03:05:56 +00:00
|
|
|
raise ProviderError('Bad status code: {}'.format(r.status_code))
|
2021-01-25 19:28:55 +00:00
|
|
|
finally:
|
|
|
|
return False
|
|
|
|
|
|
|
|
@region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
|
|
|
|
def search_titles(self, title):
|
|
|
|
title_id = None
|
2021-01-26 03:05:56 +00:00
|
|
|
imdb_id = None
|
|
|
|
|
|
|
|
if isinstance(self.video, Episode) and self.video.series_imdb_id:
|
|
|
|
imdb_id = self.video.series_imdb_id
|
|
|
|
elif isinstance(self.video, Movie) and self.video.imdb_id:
|
|
|
|
imdb_id = self.video.imdb_id
|
2021-01-25 19:28:55 +00:00
|
|
|
|
2021-01-26 03:05:56 +00:00
|
|
|
if imdb_id:
|
|
|
|
parameters = {'imdb_id': imdb_id}
|
|
|
|
logging.debug('Searching using this IMDB id: {}'.format(imdb_id))
|
2021-01-25 19:28:55 +00:00
|
|
|
else:
|
|
|
|
parameters = {'query': title}
|
2021-01-26 03:05:56 +00:00
|
|
|
logging.debug('Searching using this title: {}'.format(title))
|
2021-01-25 19:28:55 +00:00
|
|
|
|
2021-03-22 16:14:53 +00:00
|
|
|
results = self.session.get(self.server_url + 'features', params=parameters, timeout=30)
|
2021-01-25 19:28:55 +00:00
|
|
|
|
|
|
|
if results.status_code == 401:
|
2021-01-26 03:05:56 +00:00
|
|
|
logging.debug('Authentification failed: clearing cache and attempting to login.')
|
2021-01-25 19:28:55 +00:00
|
|
|
region.delete("oscom_token")
|
|
|
|
self.login()
|
|
|
|
|
2021-03-22 16:14:53 +00:00
|
|
|
results = self.session.get(self.server_url + 'features', params=parameters, timeout=30)
|
2021-01-25 19:28:55 +00:00
|
|
|
|
2021-01-26 03:05:56 +00:00
|
|
|
if results.status_code == 429:
|
|
|
|
raise TooManyRequests()
|
2021-05-27 12:49:52 +00:00
|
|
|
elif results.status_code == 503:
|
|
|
|
raise ProviderError(results.reason)
|
2021-01-26 03:05:56 +00:00
|
|
|
elif results.status_code == 429:
|
|
|
|
raise TooManyRequests()
|
2021-05-27 12:49:52 +00:00
|
|
|
elif results.status_code == 503:
|
|
|
|
raise ProviderError(results.reason)
|
2021-01-25 19:28:55 +00:00
|
|
|
|
|
|
|
# deserialize results
|
|
|
|
try:
|
|
|
|
results_dict = results.json()['data']
|
|
|
|
except ValueError:
|
2021-01-26 03:05:56 +00:00
|
|
|
raise ProviderError('Invalid JSON returned by provider')
|
2021-01-25 19:28:55 +00:00
|
|
|
else:
|
|
|
|
# loop over results
|
|
|
|
for result in results_dict:
|
2021-03-11 15:23:00 +00:00
|
|
|
if fix_tv_naming(title).lower() == result['attributes']['title'].lower() and \
|
2021-01-25 19:28:55 +00:00
|
|
|
(not self.video.year or self.video.year == int(result['attributes']['year'])):
|
|
|
|
title_id = result['id']
|
|
|
|
break
|
|
|
|
|
|
|
|
if title_id:
|
2021-01-26 03:05:56 +00:00
|
|
|
logging.debug('Found this title ID: {}'.format(title_id))
|
2021-01-25 19:28:55 +00:00
|
|
|
return title_id
|
|
|
|
finally:
|
|
|
|
if not title_id:
|
2021-01-26 03:05:56 +00:00
|
|
|
logger.debug('No match found for {}'.format(title))
|
2021-01-25 19:28:55 +00:00
|
|
|
|
|
|
|
def query(self, languages, video):
|
|
|
|
self.video = video
|
|
|
|
if self.use_hash:
|
|
|
|
hash = self.video.hashes.get('opensubtitlescom')
|
2021-01-26 03:05:56 +00:00
|
|
|
logging.debug('Searching using this hash: {}'.format(hash))
|
2021-01-25 19:28:55 +00:00
|
|
|
else:
|
|
|
|
hash = None
|
|
|
|
|
|
|
|
if isinstance(self.video, Episode):
|
|
|
|
title = self.video.series
|
|
|
|
else:
|
|
|
|
title = self.video.title
|
|
|
|
|
|
|
|
title_id = self.search_titles(title)
|
|
|
|
if not title_id:
|
|
|
|
return []
|
|
|
|
lang_strings = [str(lang) for lang in languages]
|
|
|
|
langs = ','.join(lang_strings)
|
2021-01-26 03:05:56 +00:00
|
|
|
logging.debug('Searching for this languages: {}'.format(lang_strings))
|
2021-01-25 19:28:55 +00:00
|
|
|
|
|
|
|
# query the server
|
|
|
|
if isinstance(self.video, Episode):
|
|
|
|
res = self.session.get(self.server_url + 'subtitles',
|
|
|
|
params={'parent_feature_id': title_id,
|
|
|
|
'languages': langs,
|
|
|
|
'episode_number': self.video.episode,
|
|
|
|
'season_number': self.video.season,
|
|
|
|
'moviehash': hash},
|
2021-03-22 16:14:53 +00:00
|
|
|
timeout=30)
|
2021-01-25 19:28:55 +00:00
|
|
|
else:
|
|
|
|
res = self.session.get(self.server_url + 'subtitles',
|
|
|
|
params={'id': title_id,
|
|
|
|
'languages': langs,
|
|
|
|
'moviehash': hash},
|
2021-03-22 16:14:53 +00:00
|
|
|
timeout=30)
|
2021-01-25 19:28:55 +00:00
|
|
|
|
2021-01-26 03:05:56 +00:00
|
|
|
if res.status_code == 429:
|
|
|
|
raise TooManyRequests()
|
2021-01-25 19:28:55 +00:00
|
|
|
|
2021-05-27 12:49:52 +00:00
|
|
|
elif res.status_code == 503:
|
|
|
|
raise ProviderError(res.reason)
|
|
|
|
|
2021-01-26 03:05:56 +00:00
|
|
|
subtitles = []
|
2021-01-25 19:28:55 +00:00
|
|
|
|
2021-01-26 03:05:56 +00:00
|
|
|
try:
|
|
|
|
result = res.json()
|
|
|
|
except ValueError:
|
|
|
|
raise ProviderError('Invalid JSON returned by provider')
|
|
|
|
else:
|
|
|
|
logging.debug('Query returned {} subtitles'.format(len(result['data'])))
|
|
|
|
|
|
|
|
if len(result['data']):
|
|
|
|
for item in result['data']:
|
|
|
|
if 'season_number' in item['attributes']['feature_details']:
|
|
|
|
season_number = item['attributes']['feature_details']['season_number']
|
|
|
|
else:
|
|
|
|
season_number = None
|
|
|
|
|
|
|
|
if 'episode_number' in item['attributes']['feature_details']:
|
|
|
|
episode_number = item['attributes']['feature_details']['episode_number']
|
|
|
|
else:
|
|
|
|
episode_number = None
|
|
|
|
|
|
|
|
if 'moviehash_match' in item['attributes']:
|
|
|
|
moviehash_match = item['attributes']['moviehash_match']
|
|
|
|
else:
|
|
|
|
moviehash_match = False
|
|
|
|
|
|
|
|
if len(item['attributes']['files']):
|
|
|
|
subtitle = OpenSubtitlesComSubtitle(
|
|
|
|
language=Language.fromietf(item['attributes']['language']),
|
|
|
|
hearing_impaired=item['attributes']['hearing_impaired'],
|
|
|
|
page_link=item['attributes']['url'],
|
|
|
|
file_id=item['attributes']['files'][0]['file_id'],
|
|
|
|
releases=item['attributes']['release'],
|
|
|
|
uploader=item['attributes']['uploader']['name'],
|
|
|
|
title=item['attributes']['feature_details']['movie_name'],
|
|
|
|
year=item['attributes']['feature_details']['year'],
|
|
|
|
season=season_number,
|
|
|
|
episode=episode_number,
|
|
|
|
hash_matched=moviehash_match
|
|
|
|
)
|
|
|
|
subtitle.get_matches(self.video)
|
|
|
|
subtitles.append(subtitle)
|
2021-01-25 19:28:55 +00:00
|
|
|
|
|
|
|
return subtitles
|
|
|
|
|
|
|
|
def list_subtitles(self, video, languages):
|
|
|
|
return self.query(languages, video)
|
|
|
|
|
|
|
|
def download_subtitle(self, subtitle):
|
2021-06-20 00:10:32 +00:00
|
|
|
if self.token is NO_VALUE:
|
|
|
|
logger.debug("No cached token, we'll try to login again.")
|
|
|
|
self.login()
|
2021-06-24 03:13:22 +00:00
|
|
|
if self.token is NO_VALUE:
|
|
|
|
logger.debug("Unable to obtain an authentication token right now, we'll try again later.")
|
|
|
|
raise ProviderError("Unable to obtain an authentication token")
|
2021-06-20 00:10:32 +00:00
|
|
|
|
2021-01-25 19:28:55 +00:00
|
|
|
logger.info('Downloading subtitle %r', subtitle)
|
|
|
|
|
2021-03-22 14:16:51 +00:00
|
|
|
headers = {'Accept': 'application/json', 'Content-Type': 'application/json',
|
|
|
|
'Authorization': 'Beaker ' + self.token}
|
2021-01-25 19:28:55 +00:00
|
|
|
res = self.session.post(self.server_url + 'download',
|
|
|
|
json={'file_id': subtitle.file_id, 'sub_format': 'srt'},
|
|
|
|
headers=headers,
|
2021-03-22 16:14:53 +00:00
|
|
|
timeout=30)
|
2021-01-26 03:05:56 +00:00
|
|
|
if res.status_code == 429:
|
|
|
|
raise TooManyRequests()
|
|
|
|
elif res.status_code == 406:
|
|
|
|
raise DownloadLimitExceeded("Daily download limit reached")
|
2021-05-27 12:49:52 +00:00
|
|
|
elif res.status_code == 503:
|
|
|
|
raise ProviderError(res.reason)
|
2021-01-26 03:05:56 +00:00
|
|
|
else:
|
|
|
|
try:
|
|
|
|
subtitle.download_link = res.json()['link']
|
|
|
|
except ValueError:
|
|
|
|
raise ProviderError('Invalid JSON returned by provider')
|
|
|
|
else:
|
2021-03-22 16:14:53 +00:00
|
|
|
r = self.session.get(subtitle.download_link, timeout=30)
|
2021-01-26 03:05:56 +00:00
|
|
|
|
|
|
|
if res.status_code == 429:
|
|
|
|
raise TooManyRequests()
|
|
|
|
elif res.status_code == 406:
|
|
|
|
raise DownloadLimitExceeded("Daily download limit reached")
|
2021-05-27 12:49:52 +00:00
|
|
|
elif res.status_code == 503:
|
|
|
|
raise ProviderError(res.reason)
|
2021-01-25 19:28:55 +00:00
|
|
|
|
2021-01-26 03:05:56 +00:00
|
|
|
subtitle_content = r.content
|
2021-01-25 19:28:55 +00:00
|
|
|
|
2021-01-26 03:05:56 +00:00
|
|
|
if subtitle_content:
|
|
|
|
subtitle.content = fix_line_ending(subtitle_content)
|
|
|
|
else:
|
|
|
|
logger.debug('Could not download subtitle from {}'.format(subtitle.download_link))
|