diff --git a/libs/subliminal_patch/providers/subdivx.py b/libs/subliminal_patch/providers/subdivx.py index 4a97205f3..b5e0b19da 100644 --- a/libs/subliminal_patch/providers/subdivx.py +++ b/libs/subliminal_patch/providers/subdivx.py @@ -1,27 +1,22 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import -import io import logging -import os import re import time -import zipfile -import rarfile from subzero.language import Language from requests import Session from subliminal import __short_version__ -from subliminal.exceptions import ServiceUnavailable from subliminal.providers import ParserBeautifulSoup -from subliminal.subtitle import SUBTITLE_EXTENSIONS, fix_line_ending, guess_matches from subliminal.video import Episode, Movie from subliminal_patch.exceptions import APIThrottled from six.moves import range -from subliminal_patch.score import get_scores from subliminal_patch.subtitle import Subtitle from subliminal_patch.providers import Provider -from guessit import guessit +from subliminal_patch.providers.utils import get_archive_from_bytes +from subliminal_patch.providers.utils import get_subtitle_from_archive +from subliminal_patch.providers.utils import update_matches _SERVER_URL = "https://www.subdivx.com" @@ -72,15 +67,7 @@ class SubdivxSubtitle(Subtitle): # already matched in search query matches.update(["title", "year"]) - # Special string comparisons are unnecessary. Guessit can match keys - # from any string and find even more keywords. - matches |= guess_matches( - video, - guessit( - self.description, - {"type": "episode" if isinstance(video, Episode) else "movie"}, - ), - ) + update_matches(matches, video, self.description) # Don't lowercase; otherwise it will match a lot of false positives if video.release_group and video.release_group in self.description: @@ -100,7 +87,6 @@ class SubdivxSubtitlesProvider(Provider): subtitle_class = SubdivxSubtitle multi_result_throttle = 2 - language_list = list(languages) def __init__(self): self.session = None @@ -114,14 +100,27 @@ class SubdivxSubtitlesProvider(Provider): self.session.close() def query(self, video, languages): + subtitles = [] + if isinstance(video, Episode): - query = f"{video.series} S{video.season:02}E{video.episode:02}" + for query in ( + f"{video.series} S{video.season:02}E{video.episode:02}", + f"{video.series} S{video.season:02}", + ): + subtitles += self._handle_multi_page_search(query, video) + + # Fallback + if not subtitles: + subtitles += self._handle_multi_page_search(video.series, video) else: # Subdvix has problems searching foreign movies if the year is # appended. A proper solution would be filtering results with the # year in self._parse_subtitles_page. - query = video.title + subtitles += self._handle_multi_page_search(video.title, video) + return subtitles + + def _handle_multi_page_search(self, query, video, max_loops=3): params = { "buscar2": query, "accion": "5", @@ -130,23 +129,19 @@ class SubdivxSubtitlesProvider(Provider): "realiza_b": "1", "pg": "1", } + logger.debug("Query: %s", query) - logger.debug(f"Searching subtitles: {query}") - subtitles = [] - language = self.language_list[0] - search_link = f"{_SERVER_URL}/index.php" - while True: - response = self.session.get( - search_link, params=params, allow_redirects=True, timeout=20 - ) + loops = 1 + max_loops_not_met = True - try: - page_subtitles = self._parse_subtitles_page(video, response, language) - except Exception as e: - logger.error(f"Error parsing subtitles list: {e}") - break + while max_loops_not_met: + loops += 1 + max_loops_not_met = loops < max_loops - subtitles += page_subtitles + page_subtitles = self._get_page_subtitles(params, video) + + logger.debug("Yielding %d subtitles", len(page_subtitles)) + yield from page_subtitles if len(page_subtitles) < 100: break # this is the last page @@ -154,7 +149,19 @@ class SubdivxSubtitlesProvider(Provider): params["pg"] += 1 # search next page time.sleep(self.multi_result_throttle) - return subtitles + def _get_page_subtitles(self, params, video): + search_link = f"{_SERVER_URL}/index.php" + response = self.session.get( + search_link, params=params, allow_redirects=True, timeout=20 + ) + + try: + page_subtitles = self._parse_subtitles_page(video, response) + except Exception as error: + logger.error(f"Error parsing subtitles list: {error}") + return [] + + return page_subtitles def list_subtitles(self, video, languages): return self.query(video, languages) @@ -171,14 +178,19 @@ class SubdivxSubtitlesProvider(Provider): ) response.raise_for_status() - # open the compressed archive - archive = _get_archive(response.content) + # TODO: add MustGetBlacklisted support - # extract the subtitle - subtitle_content = _get_subtitle_from_archive(archive, subtitle) - subtitle.content = fix_line_ending(subtitle_content) + archive = get_archive_from_bytes(response.content) + if archive is None: + raise APIThrottled("Unknwon compressed format") - def _parse_subtitles_page(self, video, response, language): + episode = None + if isinstance(subtitle.video, Episode): + episode = subtitle.video.episode + + subtitle.content = get_subtitle_from_archive(archive, episode=episode) + + def _parse_subtitles_page(self, video, response): subtitles = [] page_soup = ParserBeautifulSoup( @@ -241,79 +253,6 @@ def _clean_title(title): return title -def _get_archive(content): - # open the archive - archive_stream = io.BytesIO(content) - if rarfile.is_rarfile(archive_stream): - logger.debug("Identified rar archive") - archive = rarfile.RarFile(archive_stream) - elif zipfile.is_zipfile(archive_stream): - logger.debug("Identified zip archive") - archive = zipfile.ZipFile(archive_stream) - else: - raise APIThrottled("Unsupported compressed format") - - return archive - - -def _get_subtitle_from_archive(archive, subtitle): - _valid_names = [] - for name in archive.namelist(): - # discard hidden files - # discard non-subtitle files - if not os.path.split(name)[-1].startswith(".") and name.lower().endswith( - SUBTITLE_EXTENSIONS - ): - _valid_names.append(name) - - # archive with only 1 subtitle - if len(_valid_names) == 1: - logger.debug( - f"returning from archive: {_valid_names[0]} (single subtitle file)" - ) - return archive.read(_valid_names[0]) - - # in archives with more than 1 subtitle (season pack) we try to guess the best subtitle file - _scores = get_scores(subtitle.video) - _max_score = 0 - _max_name = "" - for name in _valid_names: - _guess = guessit(name) - if "season" not in _guess: - _guess["season"] = -1 - if "episode" not in _guess: - _guess["episode"] = -1 - - if isinstance(subtitle.video, Episode): - logger.debug("guessing %s" % name) - logger.debug( - f"subtitle S{_guess['season']}E{_guess['episode']} video " - f"S{subtitle.video.season}E{subtitle.video.episode}" - ) - - if ( - subtitle.video.episode != _guess["episode"] - or subtitle.video.season != _guess["season"] - ): - logger.debug("subtitle does not match video, skipping") - continue - - matches = set() - matches |= guess_matches(subtitle.video, _guess) - _score = sum((_scores.get(match, 0) for match in matches)) - logger.debug("srt matches: %s, score %d" % (matches, _score)) - if _score > _max_score: - _max_score = _score - _max_name = name - logger.debug(f"new max: {name} {_score}") - - if _max_score > 0: - logger.debug(f"returning from archive: {_max_name} scored {_max_score}") - return archive.read(_max_name) - - raise APIThrottled("Can not find the subtitle in the compressed file") - - def _get_download_url(data): try: return [ diff --git a/tests/subliminal_patch/test_subdivx.py b/tests/subliminal_patch/test_subdivx.py index 16639becf..2b0a6c11f 100644 --- a/tests/subliminal_patch/test_subdivx.py +++ b/tests/subliminal_patch/test_subdivx.py @@ -1,31 +1,32 @@ # -*- coding: utf-8 -*- import pytest +import copy + from subliminal_patch.providers.subdivx import SubdivxSubtitlesProvider from subliminal_patch.providers.subdivx import SubdivxSubtitle +from subliminal_patch.core import SZProviderPool from subliminal_patch.core import Episode from subzero.language import Language -@pytest.mark.vcr def test_list_subtitles_movie(movies): item = movies["dune"] with SubdivxSubtitlesProvider() as provider: subtitles = provider.list_subtitles(item, {Language("spa", "MX")}) - assert subtitles - assert len(subtitles) == 9 + assert len(subtitles) >= 9 -@pytest.mark.vcr -def test_list_subtitles_episode(episodes): - item = episodes["breaking_bad_s01e01"] +@pytest.mark.parametrize( + "episode_key,expected", [("breaking_bad_s01e01", 15), ("inexistent", 0)] +) +def test_list_subtitles_episode(episodes, episode_key, expected): + item = episodes[episode_key] with SubdivxSubtitlesProvider() as provider: subtitles = provider.list_subtitles(item, {Language("spa", "MX")}) - assert subtitles - assert len(subtitles) == 15 + assert len(subtitles) >= expected -@pytest.mark.vcr def test_download_subtitle(movies): subtitle = SubdivxSubtitle( Language("spa", "MX"), @@ -41,6 +42,24 @@ def test_download_subtitle(movies): assert subtitle.content is not None +def test_download_subtitle_episode_pack(episodes): + video = copy.copy(episodes["breaking_bad_s01e01"]) + video.episode = 3 + + subtitle = SubdivxSubtitle( + Language("spa", "MX"), + video, + "https://www.subdivx.com/X66XMzY1NjEwX-breaking-bad-s01e0107.html", + "Breaking Bad S01E01-07", + "Son los del torrent que vienen Formato / Dimensiones 624x352 / TamaƱo 351 MB -Incluye los Torrents-", + "", + "https://www.subdivx.com/bajar.php?id=365610&u=7", + ) + with SubdivxSubtitlesProvider() as provider: + provider.download_subtitle(subtitle) + assert subtitle.content is not None + + @pytest.fixture def video(): return Episode( @@ -59,7 +78,6 @@ def video(): ) -@pytest.mark.vcr def test_subtitle_description_not_lowercase(video): with SubdivxSubtitlesProvider() as provider: subtitles = provider.list_subtitles(video, {Language("spa", "MX")})