mirror of
https://github.com/morpheus65535/bazarr
synced 2025-01-03 05:25:28 +00:00
Subf2m provider: add support for IMDB IDs
This commit is contained in:
parent
e3c4def89a
commit
ee1506ed54
2 changed files with 115 additions and 48 deletions
|
@ -7,12 +7,10 @@ import re
|
||||||
import time
|
import time
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
from guessit import guessit
|
|
||||||
|
|
||||||
from requests import Session
|
|
||||||
from bs4 import BeautifulSoup as bso
|
from bs4 import BeautifulSoup as bso
|
||||||
from guessit import guessit
|
from guessit import guessit
|
||||||
from requests import Session
|
from requests import Session
|
||||||
|
from subliminal.exceptions import ConfigurationError
|
||||||
from subliminal_patch.core import Episode
|
from subliminal_patch.core import Episode
|
||||||
from subliminal_patch.core import Movie
|
from subliminal_patch.core import Movie
|
||||||
from subliminal_patch.exceptions import APIThrottled
|
from subliminal_patch.exceptions import APIThrottled
|
||||||
|
@ -38,9 +36,9 @@ class Subf2mSubtitle(Subtitle):
|
||||||
self.episode_title = None
|
self.episode_title = None
|
||||||
|
|
||||||
self._matches = set(
|
self._matches = set(
|
||||||
("title", "year")
|
("title", "year", "imdb_id")
|
||||||
if episode_number is None
|
if episode_number is None
|
||||||
else ("title", "series", "year", "season", "episode")
|
else ("title", "series", "year", "season", "episode", "imdb_id")
|
||||||
)
|
)
|
||||||
|
|
||||||
def get_matches(self, video):
|
def get_matches(self, video):
|
||||||
|
@ -153,10 +151,11 @@ class Subf2mProvider(Provider):
|
||||||
video_types = (Episode, Movie)
|
video_types = (Episode, Movie)
|
||||||
subtitle_class = Subf2mSubtitle
|
subtitle_class = Subf2mSubtitle
|
||||||
|
|
||||||
def __init__(self, verify_ssl=True, user_agent=None, session_factory=None):
|
def __init__(self, user_agent, verify_ssl=True, session_factory=None):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
if not user_agent:
|
|
||||||
raise ValueError("User-agent config missing")
|
if not (user_agent or "").strip():
|
||||||
|
raise ConfigurationError("User-agent config missing")
|
||||||
|
|
||||||
self._user_agent = user_agent
|
self._user_agent = user_agent
|
||||||
self._verify_ssl = verify_ssl
|
self._verify_ssl = verify_ssl
|
||||||
|
@ -214,18 +213,17 @@ class Subf2mProvider(Provider):
|
||||||
for title in soup.select("li div[class='title'] a"):
|
for title in soup.select("li div[class='title'] a"):
|
||||||
yield title
|
yield title
|
||||||
|
|
||||||
def _search_movie(self, title, year):
|
def _search_movie(self, title, year, return_len=3):
|
||||||
title = title.lower()
|
title = title.lower()
|
||||||
year = str(year)
|
year = str(year)
|
||||||
|
|
||||||
found_movie = None
|
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
for result in self._gen_results(title):
|
for result in self._gen_results(title):
|
||||||
text = result.text.lower()
|
text = result.text.lower()
|
||||||
match = self._movie_title_regex.match(text)
|
match = self._movie_title_regex.match(text)
|
||||||
if not match:
|
if not match:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
match_title = match.group(1)
|
match_title = match.group(1)
|
||||||
match_year = match.group(3)
|
match_year = match.group(3)
|
||||||
if year == match_year:
|
if year == match_year:
|
||||||
|
@ -238,19 +236,21 @@ class Subf2mProvider(Provider):
|
||||||
|
|
||||||
if results:
|
if results:
|
||||||
results.sort(key=lambda x: x["similarity"], reverse=True)
|
results.sort(key=lambda x: x["similarity"], reverse=True)
|
||||||
found_movie = results[0]["href"]
|
results = [result["href"] for result in results]
|
||||||
logger.debug("Movie found: %s", results[0])
|
if results:
|
||||||
return found_movie
|
results = set(results[:return_len])
|
||||||
|
logger.debug("Results: %s", results)
|
||||||
|
return results
|
||||||
|
|
||||||
def _search_tv_show_season(self, title, season, year=None):
|
return []
|
||||||
|
|
||||||
|
def _search_tv_show_season(self, title, season, year=None, return_len=3):
|
||||||
try:
|
try:
|
||||||
season_str = _SEASONS[season - 1].lower()
|
season_str = _SEASONS[season - 1].lower()
|
||||||
except IndexError:
|
except IndexError:
|
||||||
logger.debug("Season number not supported: %s", season)
|
logger.debug("Season number not supported: %s", season)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
found_tv_show_season = None
|
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
for result in self._gen_results(title):
|
for result in self._gen_results(title):
|
||||||
text = result.text.lower()
|
text = result.text.lower()
|
||||||
|
@ -278,13 +278,20 @@ class Subf2mProvider(Provider):
|
||||||
|
|
||||||
if results:
|
if results:
|
||||||
results.sort(key=lambda x: x["similarity"], reverse=True)
|
results.sort(key=lambda x: x["similarity"], reverse=True)
|
||||||
found_tv_show_season = results[0]["href"]
|
results = [result["href"] for result in results]
|
||||||
logger.debug("TV Show season found: %s", results[0])
|
if results:
|
||||||
|
results = set(results[:return_len])
|
||||||
|
logger.debug("Results: %s", results)
|
||||||
|
return results
|
||||||
|
|
||||||
return found_tv_show_season
|
return []
|
||||||
|
|
||||||
def _find_movie_subtitles(self, path, language):
|
def _find_movie_subtitles(self, path, language, imdb_id):
|
||||||
soup = self._get_subtitle_page_soup(path, language)
|
soup = self._get_subtitle_page_soup(path, language)
|
||||||
|
imdb_matched = _match_imdb(soup, imdb_id)
|
||||||
|
if not imdb_matched:
|
||||||
|
return []
|
||||||
|
|
||||||
subtitles = []
|
subtitles = []
|
||||||
|
|
||||||
for item in soup.select("li.item"):
|
for item in soup.select("li.item"):
|
||||||
|
@ -298,9 +305,12 @@ class Subf2mProvider(Provider):
|
||||||
return subtitles
|
return subtitles
|
||||||
|
|
||||||
def _find_episode_subtitles(
|
def _find_episode_subtitles(
|
||||||
self, path, season, episode, language, episode_title=None
|
self, path, season, episode, language, episode_title=None, imdb_id=None
|
||||||
):
|
):
|
||||||
soup = self._get_subtitle_page_soup(path, language)
|
soup = self._get_subtitle_page_soup(path, language)
|
||||||
|
imdb_matched = _match_imdb(soup, imdb_id)
|
||||||
|
if not imdb_matched:
|
||||||
|
return []
|
||||||
|
|
||||||
subtitles = []
|
subtitles = []
|
||||||
|
|
||||||
|
@ -359,27 +369,45 @@ class Subf2mProvider(Provider):
|
||||||
is_episode = isinstance(video, Episode)
|
is_episode = isinstance(video, Episode)
|
||||||
|
|
||||||
if is_episode:
|
if is_episode:
|
||||||
result = self._search_tv_show_season(video.series, video.season, video.year)
|
paths = self._search_tv_show_season(video.series, video.season, video.year)
|
||||||
else:
|
else:
|
||||||
result = self._search_movie(video.title, video.year)
|
paths = self._search_movie(video.title, video.year)
|
||||||
|
|
||||||
if result is None:
|
if not paths:
|
||||||
logger.debug("No results")
|
logger.debug("No results")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
subtitles = []
|
subs = []
|
||||||
|
for path in paths:
|
||||||
|
must_break = False
|
||||||
|
|
||||||
for language in languages:
|
logger.debug("Looking for subs from %s", path)
|
||||||
if is_episode:
|
|
||||||
subtitles.extend(
|
for language in languages:
|
||||||
self._find_episode_subtitles(
|
if is_episode:
|
||||||
result, video.season, video.episode, language, video.title
|
subs.extend(
|
||||||
|
self._find_episode_subtitles(
|
||||||
|
path,
|
||||||
|
video.season,
|
||||||
|
video.episode,
|
||||||
|
language,
|
||||||
|
video.title,
|
||||||
|
video.series_imdb_id,
|
||||||
|
)
|
||||||
)
|
)
|
||||||
)
|
|
||||||
else:
|
|
||||||
subtitles.extend(self._find_movie_subtitles(result, language))
|
|
||||||
|
|
||||||
return subtitles
|
else:
|
||||||
|
subs.extend(
|
||||||
|
self._find_movie_subtitles(path, language, video.imdb_id)
|
||||||
|
)
|
||||||
|
|
||||||
|
must_break = subs != []
|
||||||
|
|
||||||
|
if must_break:
|
||||||
|
logger.debug("Good path found: %s. Not running over others.", path)
|
||||||
|
break
|
||||||
|
|
||||||
|
return subs
|
||||||
|
|
||||||
def download_subtitle(self, subtitle):
|
def download_subtitle(self, subtitle):
|
||||||
# TODO: add MustGetBlacklisted support
|
# TODO: add MustGetBlacklisted support
|
||||||
|
@ -426,6 +454,32 @@ _EPISODE_SPECIAL_RE = re.compile(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _match_imdb(soup, imdb_id):
|
||||||
|
try:
|
||||||
|
parsed_imdb_id = (
|
||||||
|
soup.select_one(
|
||||||
|
"#content > div.subtitles.byFilm > div.box.clearfix > div.top.left > div.header > h2 > a"
|
||||||
|
)
|
||||||
|
.get("href") # type: ignore
|
||||||
|
.split("/")[-1] # type: ignore
|
||||||
|
.strip()
|
||||||
|
)
|
||||||
|
except AttributeError:
|
||||||
|
logger.debug("Couldn't get IMDB ID")
|
||||||
|
parsed_imdb_id = None
|
||||||
|
|
||||||
|
if parsed_imdb_id is not None and parsed_imdb_id != imdb_id:
|
||||||
|
logger.debug("Wrong IMDB ID: '%s' != '%s'", parsed_imdb_id, imdb_id)
|
||||||
|
return False
|
||||||
|
|
||||||
|
if parsed_imdb_id is None:
|
||||||
|
logger.debug("Matching subtitles as IMDB ID was not parsed.")
|
||||||
|
else:
|
||||||
|
logger.debug("Good IMDB ID: '%s' == '%s'", parsed_imdb_id, imdb_id)
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
def _get_episode_from_release(release: str):
|
def _get_episode_from_release(release: str):
|
||||||
match = _EPISODE_SPECIAL_RE.search(release)
|
match = _EPISODE_SPECIAL_RE.search(release)
|
||||||
if match is None:
|
if match is None:
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
import pytest
|
import pytest
|
||||||
from subliminal_patch.providers import subf2m
|
from subliminal_patch.providers import subf2m
|
||||||
|
from subliminal_patch.providers.subf2m import ConfigurationError
|
||||||
from subliminal_patch.providers.subf2m import Subf2mProvider
|
from subliminal_patch.providers.subf2m import Subf2mProvider
|
||||||
from subliminal_patch.providers.subf2m import Subf2mSubtitle
|
from subliminal_patch.providers.subf2m import Subf2mSubtitle
|
||||||
from subzero.language import Language
|
from subzero.language import Language
|
||||||
|
|
||||||
|
|
||||||
_U_A = "Mozilla/5.0 (Linux; Android 10; SM-G996U Build/QP1A.190711.020; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Mobile Safari/537.36"
|
_U_A = "Mozilla/5.0 (Linux; Android 10; SM-G996U Build/QP1A.190711.020; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Mobile Safari/537.36"
|
||||||
|
|
||||||
|
|
||||||
|
@ -26,13 +26,15 @@ def provider():
|
||||||
("Cure", 1997, "/subtitles/cure-kyua"),
|
("Cure", 1997, "/subtitles/cure-kyua"),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_search_movie(provider, movies, title, year, expected_url):
|
def test_search_movie(provider, title, year, expected_url):
|
||||||
movie = list(movies.values())[0]
|
result = provider._search_movie(title, year)
|
||||||
movie.title = title
|
assert expected_url in result
|
||||||
movie.year = year
|
|
||||||
|
|
||||||
result = provider._search_movie(movie.title, movie.year)
|
|
||||||
assert result == expected_url
|
def test_init_empty_user_agent_raises_configurationerror():
|
||||||
|
with pytest.raises(ConfigurationError):
|
||||||
|
with Subf2mProvider(user_agent=" ") as provider:
|
||||||
|
assert provider
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
|
@ -52,27 +54,37 @@ def test_search_movie(provider, movies, title, year, expected_url):
|
||||||
)
|
)
|
||||||
def test_search_tv_show_season(provider, series_title, season, year, expected_url):
|
def test_search_tv_show_season(provider, series_title, season, year, expected_url):
|
||||||
result = provider._search_tv_show_season(series_title, season, year)
|
result = provider._search_tv_show_season(series_title, season, year)
|
||||||
assert result == expected_url
|
assert expected_url in result
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("language", [Language.fromalpha2("en"), Language("por", "BR")])
|
@pytest.mark.parametrize("language", [Language.fromalpha2("en"), Language("por", "BR")])
|
||||||
def test_find_movie_subtitles(provider, language):
|
def test_find_movie_subtitles(provider, language, movies):
|
||||||
path = "/subtitles/dune-2021"
|
path = "/subtitles/dune-2021"
|
||||||
for sub in provider._find_movie_subtitles(path, language):
|
for sub in provider._find_movie_subtitles(path, language, movies["dune"].imdb_id):
|
||||||
assert sub.language == language
|
assert sub.language == language
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("language", [Language.fromalpha2("en"), Language("por", "BR")])
|
@pytest.mark.parametrize("language", [Language.fromalpha2("en"), Language("por", "BR")])
|
||||||
def test_find_episode_subtitles(provider, language):
|
def test_find_episode_subtitles(provider, language, episodes):
|
||||||
path = "/subtitles/breaking-bad-first-season"
|
path = "/subtitles/breaking-bad-first-season"
|
||||||
for sub in provider._find_episode_subtitles(path, 1, 1, language):
|
subs = provider._find_episode_subtitles(
|
||||||
|
path, 1, 1, language, imdb_id=episodes["breaking_bad_s01e01"].series_imdb_id
|
||||||
|
)
|
||||||
|
assert subs
|
||||||
|
|
||||||
|
for sub in subs:
|
||||||
assert sub.language == language
|
assert sub.language == language
|
||||||
|
|
||||||
|
|
||||||
def test_find_episode_subtitles_from_complete_series_path(provider):
|
def test_find_episode_subtitles_from_complete_series_path(provider):
|
||||||
path = "/subtitles/courage-the-cowardly-dog"
|
path = "/subtitles/courage-the-cowardly-dog"
|
||||||
|
|
||||||
for sub in provider._find_episode_subtitles(path, 1, 1, Language.fromalpha2("en")):
|
subs = provider._find_episode_subtitles(
|
||||||
|
path, 1, 1, Language.fromalpha2("en"), imdb_id="tt0220880"
|
||||||
|
)
|
||||||
|
assert subs
|
||||||
|
|
||||||
|
for sub in subs:
|
||||||
assert sub.language == Language.fromalpha2("en")
|
assert sub.language == Language.fromalpha2("en")
|
||||||
|
|
||||||
|
|
||||||
|
@ -82,6 +94,7 @@ def test_list_and_download_subtitles_complete_series_pack(provider, episodes):
|
||||||
episode.series = "Sam & Max: Freelance Police"
|
episode.series = "Sam & Max: Freelance Police"
|
||||||
episode.name = "The Glazed McGuffin Affair"
|
episode.name = "The Glazed McGuffin Affair"
|
||||||
episode.title = "The Glazed McGuffin Affair"
|
episode.title = "The Glazed McGuffin Affair"
|
||||||
|
episode.series_imdb_id = "tt0125646"
|
||||||
episode.season = 1
|
episode.season = 1
|
||||||
episode.episode = 21
|
episode.episode = 21
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue