Merge pull request #586 from ngosang/feature/subdivx5

Improve Subdivx provider, handle more exceptions
2019-10-05 10:00:13 -04:00 · 2019-10-05 10:00:13 -04:00 · 4e5e7d6744
parent 9ca305740b 69f23c65a8
commit 4e5e7d6744
4 changed files with 92 additions and 78 deletions
--- a/bazarr/get_providers.py
+++ b/bazarr/get_providers.py
@ -8,10 +8,11 @@ import time
 from get_args import args
 from config import settings
-from subliminal_patch.exceptions import TooManyRequests, APIThrottled
+from subliminal_patch.exceptions import TooManyRequests, APIThrottled, ParseResponseError
 from subliminal.exceptions import DownloadLimitExceeded, ServiceUnavailable
-VALID_THROTTLE_EXCEPTIONS = (TooManyRequests, DownloadLimitExceeded, ServiceUnavailable, APIThrottled)
+VALID_THROTTLE_EXCEPTIONS = (TooManyRequests, DownloadLimitExceeded, ServiceUnavailable, APIThrottled,
                             ParseResponseError)
 VALID_COUNT_EXCEPTIONS = ('TooManyRequests', 'ServiceUnavailable', 'APIThrottled')
 PROVIDER_THROTTLE_MAP = {
@ -20,6 +21,7 @@ PROVIDER_THROTTLE_MAP = {
        DownloadLimitExceeded: (datetime.timedelta(hours=3), "3 hours"),
        ServiceUnavailable: (datetime.timedelta(minutes=20), "20 minutes"),
        APIThrottled: (datetime.timedelta(minutes=10), "10 minutes"),
        ParseResponseError: (datetime.timedelta(hours=6), "6 hours"),
    },
    "opensubtitles": {
        TooManyRequests: (datetime.timedelta(hours=3), "3 hours"),
--- a/libs/subliminal_patch/core.py
+++ b/libs/subliminal_patch/core.py
@ -28,7 +28,7 @@ from subliminal.utils import hash_napiprojekt, hash_opensubtitles, hash_shooter,
 from subliminal.video import VIDEO_EXTENSIONS, Video, Episode, Movie
 from subliminal.core import guessit, ProviderPool, io, is_windows_special_path, \
    ThreadPoolExecutor, check_video
-from subliminal_patch.exceptions import TooManyRequests, APIThrottled
+from subliminal_patch.exceptions import TooManyRequests, APIThrottled, ParseResponseError
 from subzero.language import Language
 from scandir import scandir, scandir_generic as _scandir_generic
@ -280,7 +280,7 @@ class SZProviderPool(ProviderPool):
                logger.debug("RAR Traceback: %s", traceback.format_exc())
                return False
-            except (TooManyRequests, DownloadLimitExceeded, ServiceUnavailable, APIThrottled), e:
+            except (TooManyRequests, DownloadLimitExceeded, ServiceUnavailable, APIThrottled, ParseResponseError) as e:
                self.throttle_callback(subtitle.provider_name, e)
                self.discarded_providers.add(subtitle.provider_name)
                return False
--- a/libs/subliminal_patch/exceptions.py
+++ b/libs/subliminal_patch/exceptions.py
@ -9,3 +9,8 @@ class TooManyRequests(ProviderError):
 class APIThrottled(ProviderError):
    pass
 class ParseResponseError(ProviderError):
    """Exception raised by providers when they are not able to parse the response."""
    pass
--- a/libs/subliminal_patch/providers/subdivx.py
+++ b/libs/subliminal_patch/providers/subdivx.py
@ -7,13 +7,14 @@ import zipfile
 import rarfile
 from subzero.language import Language
 from guessit import guessit
 from requests import Session
 from subliminal import __short_version__
 from subliminal.exceptions import ServiceUnavailable
 from subliminal.providers import ParserBeautifulSoup, Provider
 from subliminal.subtitle import SUBTITLE_EXTENSIONS, Subtitle, fix_line_ending,guess_matches
 from subliminal.video import Episode, Movie
 from subliminal_patch.exceptions import ParseResponseError
 logger = logging.getLogger(__name__)
@ -119,35 +120,17 @@ class SubdivxSubtitlesProvider(Provider):
        language = self.language_list[0]
        search_link = self.server_url + 'index.php'
        while True:
-            r = self.session.get(search_link, params=params, timeout=10)
+            response = self.session.get(search_link, params=params, timeout=10)
-            r.raise_for_status()
+            self._check_response(response)
-            if not r.content:
+            try:
-                logger.debug('No data returned from provider')
+                page_subtitles = self._parse_subtitles_page(response, language)
-                return []
+            except Exception as e:
                raise ParseResponseError('Error parsing subtitles list: ' + str(e))
-            page_soup = ParserBeautifulSoup(r.content.decode('iso-8859-1', 'ignore'), ['lxml', 'html.parser'])
+            subtitles += page_subtitles
            title_soups = page_soup.find_all("div", {'id': 'menu_detalle_buscador'})
            body_soups = page_soup.find_all("div", {'id': 'buscador_detalle'})
            if len(title_soups) != len(body_soups):
                logger.debug('Error in provider data')
                return []
            for subtitle in range(0, len(title_soups)):
                title_soup, body_soup = title_soups[subtitle], body_soups[subtitle]
-                # title
+            if len(page_subtitles) >= 20:
                title = title_soup.find("a").text.replace("Subtitulo de ", "")
                page_link = title_soup.find("a")["href"].replace('http://', 'https://')
                # body
                description = body_soup.find("div", {'id': 'buscador_detalle_sub'}).text
                subtitle = self.subtitle_class(language, page_link, description, title)
                logger.debug('Found subtitle %r', subtitle)
                subtitles.append(subtitle)
            if len(title_soups) >= 20:
                params['pg'] += 1  # search next page
                time.sleep(self.multi_result_throttle)
            else:
@ -175,58 +158,82 @@ class SubdivxSubtitlesProvider(Provider):
        return subtitles
    def get_download_link(self, subtitle):
        r = self.session.get(subtitle.page_link, timeout=10)
        r.raise_for_status()
        if r.content:
            page_soup = ParserBeautifulSoup(r.content.decode('iso-8859-1', 'ignore'), ['lxml', 'html.parser'])
            links_soup = page_soup.find_all("a", {'class': 'detalle_link'})
            for link_soup in links_soup:
                if link_soup['href'].startswith('bajar'):
                    return self.server_url + link_soup['href']
        logger.debug('No data returned from provider')
        return None
    def download_subtitle(self, subtitle):
        if isinstance(subtitle, SubdivxSubtitle):
            # download the subtitle
            logger.info('Downloading subtitle %r', subtitle)
            # get download link
-            download_link = self.get_download_link(subtitle)
+            download_link = self._get_download_link(subtitle)
            r = self.session.get(download_link, headers={'Referer': subtitle.page_link}, timeout=30)
            r.raise_for_status()
-            if not r.content:
+            # download zip / rar file with the subtitle
-                logger.debug('Unable to download subtitle. No data returned from provider')
+            response = self.session.get(download_link, headers={'Referer': subtitle.page_link}, timeout=30)
-                return
+            self._check_response(response)
-            archive = _get_archive(r.content)
+            # open the compressed archive
            archive = self._get_archive(response.content)
-            subtitle_content = _get_subtitle_from_archive(archive)
+            # extract the subtitle
-            if subtitle_content:
+            subtitle_content = self._get_subtitle_from_archive(archive)
            subtitle.content = fix_line_ending(subtitle_content)
            else:
                logger.debug('Could not extract subtitle from %r', archive)
    def _check_response(self, response):
        if response.status_code != 200:
            raise ServiceUnavailable('Bad status code: ' + str(response.status_code))
-def _get_archive(content):
+    def _parse_subtitles_page(self, response, language):
        subtitles = []
        page_soup = ParserBeautifulSoup(response.content.decode('iso-8859-1', 'ignore'), ['lxml', 'html.parser'])
        title_soups = page_soup.find_all("div", {'id': 'menu_detalle_buscador'})
        body_soups = page_soup.find_all("div", {'id': 'buscador_detalle'})
        for subtitle in range(0, len(title_soups)):
            title_soup, body_soup = title_soups[subtitle], body_soups[subtitle]
            # title
            title = title_soup.find("a").text.replace("Subtitulo de ", "")
            page_link = title_soup.find("a")["href"].replace('http://', 'https://')
            # body
            description = body_soup.find("div", {'id': 'buscador_detalle_sub'}).text
            subtitle = self.subtitle_class(language, page_link, description, title)
            logger.debug('Found subtitle %r', subtitle)
            subtitles.append(subtitle)
        return subtitles
    def _get_download_link(self, subtitle):
        response = self.session.get(subtitle.page_link, timeout=10)
        self._check_response(response)
        try:
            page_soup = ParserBeautifulSoup(response.content.decode('iso-8859-1', 'ignore'), ['lxml', 'html.parser'])
            links_soup = page_soup.find_all("a", {'class': 'detalle_link'})
            for link_soup in links_soup:
                if link_soup['href'].startswith('bajar'):
                    return self.server_url + link_soup['href']
        except Exception as e:
            raise ParseResponseError('Error parsing download link: ' + str(e))
        raise ParseResponseError('Download link not found')
    def _get_archive(self, content):
        # open the archive
        archive_stream = io.BytesIO(content)
    archive = None
        if rarfile.is_rarfile(archive_stream):
            logger.debug('Identified rar archive')
            archive = rarfile.RarFile(archive_stream)
        elif zipfile.is_zipfile(archive_stream):
            logger.debug('Identified zip archive')
            archive = zipfile.ZipFile(archive_stream)
        else:
            raise ParseResponseError('Unsupported compressed format')
        return archive
-
+    def _get_subtitle_from_archive(self, archive):
 def _get_subtitle_from_archive(archive):
        for name in archive.namelist():
            # discard hidden files
            if os.path.split(name)[-1].startswith('.'):
@ -238,4 +245,4 @@ def _get_subtitle_from_archive(archive):
            return archive.read(name)
-    return None
+        raise ParseResponseError('Can not find the subtitle in the compressed file')