From 040ddb236269c7a27d5d4f9c7fe708e53caba72f Mon Sep 17 00:00:00 2001 From: morpheus65535 Date: Fri, 11 Mar 2022 22:18:36 -0500 Subject: [PATCH] Improved throttling of LegendasDivx provider to prevent IP address blocking by reaching 150 searches a day. #1757 --- bazarr/get_providers.py | 10 ++++++- libs/subliminal_patch/exceptions.py | 6 ++++ .../providers/legendasdivx.py | 28 ++++++++++++++++++- 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/bazarr/get_providers.py b/bazarr/get_providers.py index bf7efad33..bdcdc5d7b 100644 --- a/bazarr/get_providers.py +++ b/bazarr/get_providers.py @@ -13,7 +13,8 @@ from get_args import args from config import settings, get_array_from from event_handler import event_stream from utils import get_binary, blacklist_log, blacklist_log_movie -from subliminal_patch.exceptions import TooManyRequests, APIThrottled, ParseResponseError, IPAddressBlocked, MustGetBlacklisted +from subliminal_patch.exceptions import TooManyRequests, APIThrottled, ParseResponseError, IPAddressBlocked, \ + MustGetBlacklisted, SearchLimitReached from subliminal.providers.opensubtitles import DownloadLimitReached from subliminal.exceptions import DownloadLimitExceeded, ServiceUnavailable from subliminal import region as subliminal_cache_region @@ -36,6 +37,11 @@ def time_until_end_of_day(dt=None): titulky_server_local_time = datetime.datetime.now(tz=pytz.timezone('Europe/Prague')).replace(tzinfo=None) titulky_limit_reset_datetime = time_until_end_of_day(dt=titulky_server_local_time) +# LegendasDivx reset its searches limit at approximately midnight, Lisbon time, everyday. +legendasdivx_server_local_time = datetime.datetime.now(tz=pytz.timezone('Europe/Lisbon')).replace(tzinfo=None) +legendasdivx_limit_reset_datetime = time_until_end_of_day(dt=legendasdivx_server_local_time) + \ + datetime.timedelta(minutes=15) + hours_until_end_of_day = time_until_end_of_day().seconds // 3600 + 1 VALID_THROTTLE_EXCEPTIONS = (TooManyRequests, DownloadLimitExceeded, ServiceUnavailable, APIThrottled, @@ -79,6 +85,8 @@ PROVIDER_THROTTLE_MAP = { datetime.timedelta(hours=hours_until_end_of_day), "{} hours".format(str(hours_until_end_of_day))), IPAddressBlocked: ( datetime.timedelta(hours=hours_until_end_of_day), "{} hours".format(str(hours_until_end_of_day))), + SearchLimitReached: (legendasdivx_limit_reset_datetime, + f"{legendasdivx_limit_reset_datetime.seconds // 3600} hours"), } } diff --git a/libs/subliminal_patch/exceptions.py b/libs/subliminal_patch/exceptions.py index 8b931425a..d257520b5 100644 --- a/libs/subliminal_patch/exceptions.py +++ b/libs/subliminal_patch/exceptions.py @@ -25,6 +25,12 @@ class IPAddressBlocked(ProviderError): pass +class SearchLimitReached(ProviderError): + """Exception raised when maximum searches for a provider have been reached.""" + + pass + + class MustGetBlacklisted(ProviderError): def __init__(self, id: str, media_type: str): super().__init__() diff --git a/libs/subliminal_patch/providers/legendasdivx.py b/libs/subliminal_patch/providers/legendasdivx.py index e4a5ab292..d224f6a17 100644 --- a/libs/subliminal_patch/providers/legendasdivx.py +++ b/libs/subliminal_patch/providers/legendasdivx.py @@ -17,7 +17,7 @@ from subliminal.providers import ParserBeautifulSoup from subliminal.subtitle import SUBTITLE_EXTENSIONS, fix_line_ending from subliminal.utils import sanitize, sanitize_release_group from subliminal.video import Episode, Movie -from subliminal_patch.exceptions import TooManyRequests, IPAddressBlocked +from subliminal_patch.exceptions import TooManyRequests, IPAddressBlocked, SearchLimitReached from subliminal_patch.http import RetryingCFSession from subliminal_patch.providers import Provider, reinitialize_on_error from subliminal_patch.score import get_scores, framerate_equal @@ -121,6 +121,7 @@ class LegendasdivxProvider(Provider): languages = {Language('por', 'BR')} | {Language('por')} video_types = (Episode, Movie) SEARCH_THROTTLE = 8 + SAFE_SEARCH_LIMIT = 145 # real limit is 150, but we use 145 to keep a buffer and prevent IPAddressBlocked exception to be raised site = 'https://www.legendasdivx.pt' headers = { 'User-Agent': os.environ.get("SZ_USER_AGENT", "Sub-Zero/2"), @@ -294,10 +295,21 @@ class LegendasdivxProvider(Provider): try: # sleep for a 1 second before another request sleep(1) + searchLimitReached = False self.headers['Referer'] = self.site + '/index.php' self.session.headers.update(self.headers) res = self.session.get(_searchurl.format(query=querytext), allow_redirects=False) res.raise_for_status() + if res.status_code == 200 and "', res.text) + if searches_count_groups: + try: + searches_count = int(searches_count_groups.group(1)) + except TypeError: + pass + else: + if searches_count >= self.SAFE_SEARCH_LIMIT: + searchLimitReached = True if (res.status_code == 200 and "A legenda não foi encontrada" in res.text): logger.warning('Legendasdivx.pt :: query %s return no results!', querytext) # for series, if no results found, try again just with series and season (subtitle packs) @@ -308,6 +320,16 @@ class LegendasdivxProvider(Provider): sleep(1) res = self.session.get(_searchurl.format(query=querytext), allow_redirects=False) res.raise_for_status() + if res.status_code == 200 and "', res.text) + if searches_count_groups: + try: + searches_count = int(searches_count_groups.group(1)) + except TypeError: + pass + else: + if searches_count >= self.SAFE_SEARCH_LIMIT: + searchLimitReached = True if (res.status_code == 200 and "A legenda não foi encontrada" in res.text): logger.warning('Legendasdivx.pt :: query {0} return no results for language {1}(for series and season only).'.format(querytext, language_id)) continue @@ -331,6 +353,10 @@ class LegendasdivxProvider(Provider): logger.error("LegendasDivx.pt :: Uncaught error: %r", e) raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r", e) + if searchLimitReached: + raise SearchLimitReached( + "LegendasDivx.pt :: You've reached maximum number of search for the day.") + bsoup = ParserBeautifulSoup(res.content, ['html.parser']) # search for more than 10 results (legendasdivx uses pagination)