1
0
Fork 0
mirror of https://github.com/morpheus65535/bazarr synced 2024-12-27 01:57:33 +00:00

Improved opensubtitles.com resilience and properly deal with the Invalid JSON/Bad Gateway exception.

This commit is contained in:
morpheus65535 2022-01-28 15:56:04 -05:00
parent 63f3454c8f
commit 669bd3376a

View file

@ -5,13 +5,14 @@ import time
import datetime import datetime
from requests import Session, ConnectionError, Timeout, ReadTimeout, RequestException from requests import Session, ConnectionError, Timeout, ReadTimeout, RequestException
from requests.exceptions import JSONDecodeError
from subzero.language import Language from subzero.language import Language
from babelfish import language_converters from babelfish import language_converters
from subliminal import Episode, Movie from subliminal import Episode, Movie
from subliminal.score import get_equivalent_release_groups from subliminal.score import get_equivalent_release_groups
from subliminal.utils import sanitize_release_group, sanitize from subliminal.utils import sanitize_release_group, sanitize
from subliminal_patch.exceptions import TooManyRequests from subliminal_patch.exceptions import TooManyRequests, APIThrottled
from subliminal.exceptions import DownloadLimitExceeded, AuthenticationError, ConfigurationError, ServiceUnavailable, \ from subliminal.exceptions import DownloadLimitExceeded, AuthenticationError, ConfigurationError, ServiceUnavailable, \
ProviderError ProviderError
from .mixins import ProviderRetryMixin from .mixins import ProviderRetryMixin
@ -29,6 +30,8 @@ logger = logging.getLogger(__name__)
SHOW_EXPIRATION_TIME = datetime.timedelta(weeks=1).total_seconds() SHOW_EXPIRATION_TIME = datetime.timedelta(weeks=1).total_seconds()
TOKEN_EXPIRATION_TIME = datetime.timedelta(hours=12).total_seconds() TOKEN_EXPIRATION_TIME = datetime.timedelta(hours=12).total_seconds()
retry_amount=5
def fix_tv_naming(title): def fix_tv_naming(title):
"""Fix TV show titles with inconsistent naming using dictionary, but do not sanitize them. """Fix TV show titles with inconsistent naming using dictionary, but do not sanitize them.
@ -166,15 +169,14 @@ class OpenSubtitlesComProvider(ProviderRetryMixin, Provider):
lambda: self.session.post(self.server_url + 'login', lambda: self.session.post(self.server_url + 'login',
json={"username": self.username, "password": self.password}, json={"username": self.username, "password": self.password},
allow_redirects=False, allow_redirects=False,
timeout=30) timeout=30),
) validate_json=True,
json_key_name='token'
),
amount=retry_amount
) )
try:
self.token = r.json()['token'] self.token = r.json()['token']
except ValueError:
raise ProviderError('Invalid JSON returned by provider')
else:
region.set("oscom_token", self.token) region.set("oscom_token", self.token)
return return
@ -190,13 +192,16 @@ class OpenSubtitlesComProvider(ProviderRetryMixin, Provider):
title_id = None title_id = None
parameters = {'query': title.lower()} parameters = {'query': title.lower()}
logging.debug('Searching using this title: {}'.format(title)) logging.debug(f'Searching using this title: {title}')
results = self.retry( results = self.retry(
lambda: checked( lambda: checked(
lambda: self.session.get(self.server_url + 'features', params=parameters, timeout=30), lambda: self.session.get(self.server_url + 'features', params=parameters, timeout=30),
validate_token=True validate_token=True,
) validate_json=True,
json_key_name='data'
),
amount=retry_amount
) )
if results == 401: if results == 401:
@ -206,16 +211,16 @@ class OpenSubtitlesComProvider(ProviderRetryMixin, Provider):
results = self.retry( results = self.retry(
lambda: checked( lambda: checked(
lambda: self.session.get(self.server_url + 'features', params=parameters, timeout=30) lambda: self.session.get(self.server_url + 'features', params=parameters, timeout=30),
) validate_json=True,
json_key_name='data'
),
amount=retry_amount
) )
# deserialize results # deserialize results
try:
results_dict = results.json()['data'] results_dict = results.json()['data']
except ValueError:
raise ProviderError('Invalid JSON returned by provider')
else:
# loop over results # loop over results
for result in results_dict: for result in results_dict:
if 'title' in result['attributes']: if 'title' in result['attributes']:
@ -233,17 +238,17 @@ class OpenSubtitlesComProvider(ProviderRetryMixin, Provider):
continue continue
if title_id: if title_id:
logging.debug('Found this title ID: {}'.format(title_id)) logging.debug(f'Found this title ID: {title_id}')
return self.sanitize_external_ids(title_id) return self.sanitize_external_ids(title_id)
finally:
if not title_id: if not title_id:
logger.debug('No match found for {}'.format(title)) logger.debug(f'No match found for {title}')
def query(self, languages, video): def query(self, languages, video):
self.video = video self.video = video
if self.use_hash: if self.use_hash:
file_hash = self.video.hashes.get('opensubtitlescom') file_hash = self.video.hashes.get('opensubtitlescom')
logging.debug('Searching using this hash: {}'.format(hash)) logging.debug(f'Searching using this hash: {hash}')
else: else:
file_hash = None file_hash = None
@ -275,7 +280,7 @@ class OpenSubtitlesComProvider(ProviderRetryMixin, Provider):
forced = 'exclude' forced = 'exclude'
langs = ','.join(lang_strings) langs = ','.join(lang_strings)
logging.debug('Searching for this languages: {}'.format(lang_strings)) logging.debug(f'Searching for this languages: {lang_strings}')
# query the server # query the server
if isinstance(self.video, Episode): if isinstance(self.video, Episode):
@ -289,8 +294,11 @@ class OpenSubtitlesComProvider(ProviderRetryMixin, Provider):
('parent_feature_id', title_id) if title_id else ('imdb_id', imdb_id), ('parent_feature_id', title_id) if title_id else ('imdb_id', imdb_id),
('season_number', self.video.season), ('season_number', self.video.season),
('query', os.path.basename(self.video.name))), ('query', os.path.basename(self.video.name))),
timeout=30) timeout=30),
) validate_json=True,
json_key_name='data'
),
amount=retry_amount
) )
else: else:
res = self.retry( res = self.retry(
@ -301,20 +309,18 @@ class OpenSubtitlesComProvider(ProviderRetryMixin, Provider):
('languages', langs.lower()), ('languages', langs.lower()),
('moviehash', file_hash), ('moviehash', file_hash),
('query', os.path.basename(self.video.name))), ('query', os.path.basename(self.video.name))),
timeout=30) timeout=30),
) validate_json=True,
json_key_name='data'
),
amount=retry_amount
) )
subtitles = [] subtitles = []
try:
result = res.json() result = res.json()
if 'data' not in result:
raise ValueError logging.debug(f"Query returned {len(result['data'])} subtitles")
except ValueError:
raise ProviderError('Invalid JSON returned by provider')
else:
logging.debug('Query returned {} subtitles'.format(len(result['data'])))
if len(result['data']): if len(result['data']):
for item in result['data']: for item in result['data']:
@ -373,39 +379,43 @@ class OpenSubtitlesComProvider(ProviderRetryMixin, Provider):
lambda: self.session.post(self.server_url + 'download', lambda: self.session.post(self.server_url + 'download',
json={'file_id': subtitle.file_id, 'sub_format': 'srt'}, json={'file_id': subtitle.file_id, 'sub_format': 'srt'},
headers=headers, headers=headers,
timeout=30) timeout=30),
) validate_json=True,
json_key_name='link'
),
amount=retry_amount
) )
try:
download_data = res.json() download_data = res.json()
except ValueError:
raise ProviderError('Invalid JSON returned by provider')
else:
if 'link' not in download_data:
return False
subtitle.download_link = download_data['link'] subtitle.download_link = download_data['link']
r = self.retry( r = self.retry(
lambda: checked( lambda: checked(
lambda: self.session.get(subtitle.download_link, timeout=30) lambda: self.session.get(subtitle.download_link, timeout=30),
) validate_content=True
),
amount=retry_amount
) )
subtitle_content = r.content if not r:
logger.debug(f'Could not download subtitle from {subtitle.download_link}')
if subtitle_content: subtitle.content = None
subtitle.content = fix_line_ending(subtitle_content) return
else: else:
logger.debug('Could not download subtitle from {}'.format(subtitle.download_link)) subtitle_content = r.content
subtitle.content = fix_line_ending(subtitle_content)
def checked(fn, validate_token=False): def checked(fn, raise_api_limit=False, validate_token=False, validate_json=False, json_key_name=None,
validate_content=False):
"""Run :fn: and check the response status before returning it. """Run :fn: and check the response status before returning it.
:param fn: the function to make an API call to OpenSubtitles.com. :param fn: the function to make an API call to OpenSubtitles.com.
:param raise_api_limit: if True we wait a little bit longer before running the call again.
:param validate_token: test if token is valid and return 401 if not. :param validate_token: test if token is valid and return 401 if not.
:param validate_json: test if response is valid json.
:param json_key_name: test if returned json contain a specific key.
:param validate_content: test if response have a content (used with download).
:return: the response. :return: the response.
""" """
@ -413,12 +423,19 @@ def checked(fn, validate_token=False):
try: try:
try: try:
response = fn() response = fn()
except APIThrottled:
if not raise_api_limit:
logger.info("API request limit hit, waiting and trying again once.")
time.sleep(2)
return checked(fn, raise_api_limit=True)
raise
except (ConnectionError, Timeout, ReadTimeout): except (ConnectionError, Timeout, ReadTimeout):
raise ServiceUnavailable('Unknown Error, empty response: %s: %r' % (response.status_code, response)) raise ServiceUnavailable(f'Unknown Error, empty response: {response.status_code}: {response}')
except RequestException as e: except Exception:
status_code = e.response.status_code logging.exception('Unhandled exception raised.')
raise ProviderError('Unhandled exception raised. Check log.')
else: else:
status_code = int(response['status'][:3]) status_code = response.status_code
except Exception: except Exception:
status_code = None status_code = None
else: else:
@ -426,15 +443,34 @@ def checked(fn, validate_token=False):
if validate_token: if validate_token:
return 401 return 401
else: else:
raise AuthenticationError('Login failed: {}'.format(response.reason)) raise AuthenticationError(f'Login failed: {response.reason}')
elif status_code == 406: elif status_code == 406:
raise DownloadLimitExceeded("Daily download limit reached") raise DownloadLimitExceeded("Daily download limit reached")
elif status_code == 429: elif status_code == 429:
raise TooManyRequests() raise TooManyRequests()
elif status_code == 502:
raise APIThrottled()
elif 500 <= status_code <= 599: elif 500 <= status_code <= 599:
raise ProviderError(response.reason) raise ProviderError(response.reason)
if status_code != 200: if status_code != 200:
raise ProviderError('Bad status code: {}'.format(response.status_code)) raise ProviderError(f'Bad status code: {response.status_code}')
if validate_json:
try:
json_test = response.json()
except JSONDecodeError:
raise ProviderError('Invalid JSON returned by provider')
else:
if json_key_name not in json_test:
raise ProviderError(f'Invalid JSON returned by provider: no {json_key_name} key in returned json.')
if validate_content:
if not hasattr(response, 'content'):
logging.error('Download link returned no content attribute.')
return False
elif not response.content:
logging.error(f'This download link returned empty content: {response.url}')
return False
return response return response