1
0
Fork 0
mirror of https://github.com/morpheus65535/bazarr synced 2025-01-03 05:25:28 +00:00

Subf2m provider: add support for IMDB IDs

This commit is contained in:
Vitiko 2023-06-13 02:13:42 -04:00
parent e3c4def89a
commit ee1506ed54
2 changed files with 115 additions and 48 deletions

View file

@ -7,12 +7,10 @@ import re
import time import time
import urllib.parse import urllib.parse
from guessit import guessit
from requests import Session
from bs4 import BeautifulSoup as bso from bs4 import BeautifulSoup as bso
from guessit import guessit from guessit import guessit
from requests import Session from requests import Session
from subliminal.exceptions import ConfigurationError
from subliminal_patch.core import Episode from subliminal_patch.core import Episode
from subliminal_patch.core import Movie from subliminal_patch.core import Movie
from subliminal_patch.exceptions import APIThrottled from subliminal_patch.exceptions import APIThrottled
@ -38,9 +36,9 @@ class Subf2mSubtitle(Subtitle):
self.episode_title = None self.episode_title = None
self._matches = set( self._matches = set(
("title", "year") ("title", "year", "imdb_id")
if episode_number is None if episode_number is None
else ("title", "series", "year", "season", "episode") else ("title", "series", "year", "season", "episode", "imdb_id")
) )
def get_matches(self, video): def get_matches(self, video):
@ -153,10 +151,11 @@ class Subf2mProvider(Provider):
video_types = (Episode, Movie) video_types = (Episode, Movie)
subtitle_class = Subf2mSubtitle subtitle_class = Subf2mSubtitle
def __init__(self, verify_ssl=True, user_agent=None, session_factory=None): def __init__(self, user_agent, verify_ssl=True, session_factory=None):
super().__init__() super().__init__()
if not user_agent:
raise ValueError("User-agent config missing") if not (user_agent or "").strip():
raise ConfigurationError("User-agent config missing")
self._user_agent = user_agent self._user_agent = user_agent
self._verify_ssl = verify_ssl self._verify_ssl = verify_ssl
@ -214,18 +213,17 @@ class Subf2mProvider(Provider):
for title in soup.select("li div[class='title'] a"): for title in soup.select("li div[class='title'] a"):
yield title yield title
def _search_movie(self, title, year): def _search_movie(self, title, year, return_len=3):
title = title.lower() title = title.lower()
year = str(year) year = str(year)
found_movie = None
results = [] results = []
for result in self._gen_results(title): for result in self._gen_results(title):
text = result.text.lower() text = result.text.lower()
match = self._movie_title_regex.match(text) match = self._movie_title_regex.match(text)
if not match: if not match:
continue continue
match_title = match.group(1) match_title = match.group(1)
match_year = match.group(3) match_year = match.group(3)
if year == match_year: if year == match_year:
@ -238,19 +236,21 @@ class Subf2mProvider(Provider):
if results: if results:
results.sort(key=lambda x: x["similarity"], reverse=True) results.sort(key=lambda x: x["similarity"], reverse=True)
found_movie = results[0]["href"] results = [result["href"] for result in results]
logger.debug("Movie found: %s", results[0]) if results:
return found_movie results = set(results[:return_len])
logger.debug("Results: %s", results)
return results
def _search_tv_show_season(self, title, season, year=None): return []
def _search_tv_show_season(self, title, season, year=None, return_len=3):
try: try:
season_str = _SEASONS[season - 1].lower() season_str = _SEASONS[season - 1].lower()
except IndexError: except IndexError:
logger.debug("Season number not supported: %s", season) logger.debug("Season number not supported: %s", season)
return None return None
found_tv_show_season = None
results = [] results = []
for result in self._gen_results(title): for result in self._gen_results(title):
text = result.text.lower() text = result.text.lower()
@ -278,13 +278,20 @@ class Subf2mProvider(Provider):
if results: if results:
results.sort(key=lambda x: x["similarity"], reverse=True) results.sort(key=lambda x: x["similarity"], reverse=True)
found_tv_show_season = results[0]["href"] results = [result["href"] for result in results]
logger.debug("TV Show season found: %s", results[0]) if results:
results = set(results[:return_len])
logger.debug("Results: %s", results)
return results
return found_tv_show_season return []
def _find_movie_subtitles(self, path, language): def _find_movie_subtitles(self, path, language, imdb_id):
soup = self._get_subtitle_page_soup(path, language) soup = self._get_subtitle_page_soup(path, language)
imdb_matched = _match_imdb(soup, imdb_id)
if not imdb_matched:
return []
subtitles = [] subtitles = []
for item in soup.select("li.item"): for item in soup.select("li.item"):
@ -298,9 +305,12 @@ class Subf2mProvider(Provider):
return subtitles return subtitles
def _find_episode_subtitles( def _find_episode_subtitles(
self, path, season, episode, language, episode_title=None self, path, season, episode, language, episode_title=None, imdb_id=None
): ):
soup = self._get_subtitle_page_soup(path, language) soup = self._get_subtitle_page_soup(path, language)
imdb_matched = _match_imdb(soup, imdb_id)
if not imdb_matched:
return []
subtitles = [] subtitles = []
@ -359,27 +369,45 @@ class Subf2mProvider(Provider):
is_episode = isinstance(video, Episode) is_episode = isinstance(video, Episode)
if is_episode: if is_episode:
result = self._search_tv_show_season(video.series, video.season, video.year) paths = self._search_tv_show_season(video.series, video.season, video.year)
else: else:
result = self._search_movie(video.title, video.year) paths = self._search_movie(video.title, video.year)
if result is None: if not paths:
logger.debug("No results") logger.debug("No results")
return [] return []
subtitles = [] subs = []
for path in paths:
must_break = False
for language in languages: logger.debug("Looking for subs from %s", path)
if is_episode:
subtitles.extend( for language in languages:
self._find_episode_subtitles( if is_episode:
result, video.season, video.episode, language, video.title subs.extend(
self._find_episode_subtitles(
path,
video.season,
video.episode,
language,
video.title,
video.series_imdb_id,
)
) )
)
else:
subtitles.extend(self._find_movie_subtitles(result, language))
return subtitles else:
subs.extend(
self._find_movie_subtitles(path, language, video.imdb_id)
)
must_break = subs != []
if must_break:
logger.debug("Good path found: %s. Not running over others.", path)
break
return subs
def download_subtitle(self, subtitle): def download_subtitle(self, subtitle):
# TODO: add MustGetBlacklisted support # TODO: add MustGetBlacklisted support
@ -426,6 +454,32 @@ _EPISODE_SPECIAL_RE = re.compile(
) )
def _match_imdb(soup, imdb_id):
try:
parsed_imdb_id = (
soup.select_one(
"#content > div.subtitles.byFilm > div.box.clearfix > div.top.left > div.header > h2 > a"
)
.get("href") # type: ignore
.split("/")[-1] # type: ignore
.strip()
)
except AttributeError:
logger.debug("Couldn't get IMDB ID")
parsed_imdb_id = None
if parsed_imdb_id is not None and parsed_imdb_id != imdb_id:
logger.debug("Wrong IMDB ID: '%s' != '%s'", parsed_imdb_id, imdb_id)
return False
if parsed_imdb_id is None:
logger.debug("Matching subtitles as IMDB ID was not parsed.")
else:
logger.debug("Good IMDB ID: '%s' == '%s'", parsed_imdb_id, imdb_id)
return True
def _get_episode_from_release(release: str): def _get_episode_from_release(release: str):
match = _EPISODE_SPECIAL_RE.search(release) match = _EPISODE_SPECIAL_RE.search(release)
if match is None: if match is None:

View file

@ -1,10 +1,10 @@
import pytest import pytest
from subliminal_patch.providers import subf2m from subliminal_patch.providers import subf2m
from subliminal_patch.providers.subf2m import ConfigurationError
from subliminal_patch.providers.subf2m import Subf2mProvider from subliminal_patch.providers.subf2m import Subf2mProvider
from subliminal_patch.providers.subf2m import Subf2mSubtitle from subliminal_patch.providers.subf2m import Subf2mSubtitle
from subzero.language import Language from subzero.language import Language
_U_A = "Mozilla/5.0 (Linux; Android 10; SM-G996U Build/QP1A.190711.020; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Mobile Safari/537.36" _U_A = "Mozilla/5.0 (Linux; Android 10; SM-G996U Build/QP1A.190711.020; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Mobile Safari/537.36"
@ -26,13 +26,15 @@ def provider():
("Cure", 1997, "/subtitles/cure-kyua"), ("Cure", 1997, "/subtitles/cure-kyua"),
], ],
) )
def test_search_movie(provider, movies, title, year, expected_url): def test_search_movie(provider, title, year, expected_url):
movie = list(movies.values())[0] result = provider._search_movie(title, year)
movie.title = title assert expected_url in result
movie.year = year
result = provider._search_movie(movie.title, movie.year)
assert result == expected_url def test_init_empty_user_agent_raises_configurationerror():
with pytest.raises(ConfigurationError):
with Subf2mProvider(user_agent=" ") as provider:
assert provider
@pytest.mark.parametrize( @pytest.mark.parametrize(
@ -52,27 +54,37 @@ def test_search_movie(provider, movies, title, year, expected_url):
) )
def test_search_tv_show_season(provider, series_title, season, year, expected_url): def test_search_tv_show_season(provider, series_title, season, year, expected_url):
result = provider._search_tv_show_season(series_title, season, year) result = provider._search_tv_show_season(series_title, season, year)
assert result == expected_url assert expected_url in result
@pytest.mark.parametrize("language", [Language.fromalpha2("en"), Language("por", "BR")]) @pytest.mark.parametrize("language", [Language.fromalpha2("en"), Language("por", "BR")])
def test_find_movie_subtitles(provider, language): def test_find_movie_subtitles(provider, language, movies):
path = "/subtitles/dune-2021" path = "/subtitles/dune-2021"
for sub in provider._find_movie_subtitles(path, language): for sub in provider._find_movie_subtitles(path, language, movies["dune"].imdb_id):
assert sub.language == language assert sub.language == language
@pytest.mark.parametrize("language", [Language.fromalpha2("en"), Language("por", "BR")]) @pytest.mark.parametrize("language", [Language.fromalpha2("en"), Language("por", "BR")])
def test_find_episode_subtitles(provider, language): def test_find_episode_subtitles(provider, language, episodes):
path = "/subtitles/breaking-bad-first-season" path = "/subtitles/breaking-bad-first-season"
for sub in provider._find_episode_subtitles(path, 1, 1, language): subs = provider._find_episode_subtitles(
path, 1, 1, language, imdb_id=episodes["breaking_bad_s01e01"].series_imdb_id
)
assert subs
for sub in subs:
assert sub.language == language assert sub.language == language
def test_find_episode_subtitles_from_complete_series_path(provider): def test_find_episode_subtitles_from_complete_series_path(provider):
path = "/subtitles/courage-the-cowardly-dog" path = "/subtitles/courage-the-cowardly-dog"
for sub in provider._find_episode_subtitles(path, 1, 1, Language.fromalpha2("en")): subs = provider._find_episode_subtitles(
path, 1, 1, Language.fromalpha2("en"), imdb_id="tt0220880"
)
assert subs
for sub in subs:
assert sub.language == Language.fromalpha2("en") assert sub.language == Language.fromalpha2("en")
@ -82,6 +94,7 @@ def test_list_and_download_subtitles_complete_series_pack(provider, episodes):
episode.series = "Sam & Max: Freelance Police" episode.series = "Sam & Max: Freelance Police"
episode.name = "The Glazed McGuffin Affair" episode.name = "The Glazed McGuffin Affair"
episode.title = "The Glazed McGuffin Affair" episode.title = "The Glazed McGuffin Affair"
episode.series_imdb_id = "tt0125646"
episode.season = 1 episode.season = 1
episode.episode = 21 episode.episode = 21