diff --git a/libs/subliminal_patch/providers/addic7ed.py b/libs/subliminal_patch/providers/addic7ed.py index 66e29070c..ebdd7ae2b 100644 --- a/libs/subliminal_patch/providers/addic7ed.py +++ b/libs/subliminal_patch/providers/addic7ed.py @@ -8,6 +8,7 @@ import time from random import randint from urllib.parse import quote_plus +import babelfish from dogpile.cache.api import NO_VALUE from requests import Session from subliminal.cache import region @@ -39,7 +40,8 @@ class Addic7edSubtitle(_Addic7edSubtitle): download_link, uploader=None): super(Addic7edSubtitle, self).__init__(language, hearing_impaired, page_link, series, season, episode, title, year, version, download_link) - self.release_info = version.replace('+', ',') if version else None + # Guessit will fail if the input is None + self.release_info = version.replace('+', ',') if version else "" self.uploader = uploader def get_matches(self, video): @@ -300,9 +302,9 @@ class Addic7edProvider(_Addic7edProvider): # LXML parser seems to fail when parsing Addic7ed.com HTML markup. # Last known version to work properly is 3.6.4 (next version, 3.7.0, fails) # Assuming the site's markup is bad, and stripping it down to only contain what's needed. - show_cells = re.findall(show_cells_re, r.content) + show_cells = [cell.decode("utf-8", "ignore") for cell in re.findall(show_cells_re, r.content)] if show_cells: - soup = ParserBeautifulSoup(''.join(show_cells).decode('utf-8', 'ignore'), ['lxml', 'html.parser']) + soup = ParserBeautifulSoup(''.join(show_cells), ['lxml', 'html.parser']) else: # If RegEx fails, fall back to original r.content and use 'html.parser' soup = ParserBeautifulSoup(r.content, ['html.parser']) @@ -435,7 +437,12 @@ class Addic7edProvider(_Addic7edProvider): continue # read the item - language = Language.fromaddic7ed(cells[3].text) + try: + language = Language.fromaddic7ed(cells[3].text) + except babelfish.exceptions.LanguageReverseError as error: + logger.debug("Language error: %s, Ignoring subtitle", error) + continue + hearing_impaired = bool(cells[6].text) page_link = self.server_url + cells[2].a['href'][1:] season = int(cells[0].text) @@ -461,7 +468,7 @@ class Addic7edProvider(_Addic7edProvider): def query_movie(self, movie_id, title, year=None): # get the page of the movie - logger.info('Getting the page of movie id %d', movie_id) + logger.info('Getting the page of movie id %s', movie_id) r = self.session.get(self.server_url + 'movie/' + movie_id, timeout=10, headers={ @@ -505,11 +512,24 @@ class Addic7edProvider(_Addic7edProvider): continue # read the item - language = Language.fromaddic7ed(row2.contents[4].text.strip('\n')) + try: + language = Language.fromaddic7ed(row2.contents[4].text.strip('\n')) + except babelfish.exceptions.LanguageReverseError as error: + logger.debug("Language error: %s, Ignoring subtitle", error) + continue + hearing_impaired = bool(row3.contents[1].contents[1].attrs['src'].endswith('hi.jpg')) page_link = self.server_url + 'movie/' + movie_id - version_matches = re.search(r'Version\s(.+),.+', str(row1.contents[1].contents[1])) - version = version_matches.group(1) if version_matches else None + + # Seems like Addic7ed returns the first word in the language of the user (Version, VersiĆ³n, etc) + # As we can't match a regex, we will just strip the first word + try: + version = " ".join(str(row1.contents[1].contents[1]).split()[1:]) + version_matches = re.search(r"(.+),.+", version) + version = version_matches.group(1) if version_matches else None + except IndexError: + version = None + try: download_link = row2.contents[8].contents[3].attrs['href'][1:] except IndexError: diff --git a/libs/subliminal_patch/providers/embeddedsubtitles.py b/libs/subliminal_patch/providers/embeddedsubtitles.py index aa8927fd6..d634eb735 100644 --- a/libs/subliminal_patch/providers/embeddedsubtitles.py +++ b/libs/subliminal_patch/providers/embeddedsubtitles.py @@ -142,7 +142,11 @@ class EmbeddedSubtitlesProvider(Provider): return subtitles def list_subtitles(self, video, languages): - return self.query(video.name, languages) + if not os.path.isfile(video.original_path): + logger.debug("Ignoring inexistent file: %s", video.original_path) + return [] + + return self.query(video.original_path, languages) def download_subtitle(self, subtitle): path = self._get_subtitle_path(subtitle) diff --git a/libs/subliminal_patch/video.py b/libs/subliminal_patch/video.py index 00260c058..b58c452be 100644 --- a/libs/subliminal_patch/video.py +++ b/libs/subliminal_patch/video.py @@ -28,3 +28,4 @@ class Video(Video_): self.external_subtitle_languages = set() self.streaming_service = streaming_service self.edition = edition + self.original_path = name diff --git a/libs/version.txt b/libs/version.txt index 5e419434b..ce79c009b 100644 --- a/libs/version.txt +++ b/libs/version.txt @@ -12,6 +12,7 @@ deep-translator=1.5.4 dogpile.cache=0.6.5 engineio=4.3.0 enzyme=0.4.1 +fese=0.1.0 ffsubsync=0.4.11 Flask=1.1.1 flask-restful=0.3.8 diff --git a/tests/subliminal_patch/test_addic7ed.py b/tests/subliminal_patch/test_addic7ed.py new file mode 100644 index 000000000..f0f47e447 --- /dev/null +++ b/tests/subliminal_patch/test_addic7ed.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 + +import os +import pytest +import datetime +import tempfile + +import subliminal +from subliminal_patch.providers.addic7ed import Addic7edProvider +from subliminal_patch.providers.addic7ed import Addic7edSubtitle +from dogpile.cache.region import register_backend as register_cache_backend +from subzero.language import Language + + +_ENV_VARS = ( + "ANTICAPTCHA_CLASS", + "ANTICAPTCHA_ACCOUNT_KEY", + "ADDIC7ED_USERNAME", + "ADDIC7ED_PASSWORD", +) + + +def _can_run(): + for env_var in _ENV_VARS: + if not os.environ.get(env_var): + return True + + return False + + +pytestmark = pytest.mark.skipif( + _can_run(), reason=f"Some environment variables not set: {_ENV_VARS}" +) + + +@pytest.fixture(scope="session") +def region(): + register_cache_backend( + "subzero.cache.file", "subzero.cache_backends.file", "SZFileBackend" + ) + subliminal.region.configure( + "subzero.cache.file", + expiration_time=datetime.timedelta(days=30), + arguments={"appname": "sz_cache", "app_cache_dir": tempfile.gettempdir()}, + ) + subliminal.region.backend.sync() + + +def test_list_subtitles_episode(region, episodes): + item = episodes["breaking_bad_s01e01"] + language = Language("eng") + with Addic7edProvider( + os.environ["ADDIC7ED_USERNAME"], os.environ["ADDIC7ED_PASSWORD"] + ) as provider: + subtitles = provider.list_subtitles(item, {language}) + assert len(subtitles) == 6 + + subliminal.region.backend.sync() + + +def test_list_subtitles_movie(region, movies): + item = movies["dune"] + language = Language("eng") + with Addic7edProvider( + os.environ["ADDIC7ED_USERNAME"], os.environ["ADDIC7ED_PASSWORD"] + ) as provider: + subtitles = provider.list_subtitles(item, {language}) + assert len(subtitles) == 2 + + subliminal.region.backend.sync()