mirror of https://github.com/morpheus65535/bazarr
Subf2m Provider: add more improvements
* Retry queries when subf2m server returns 503 * Add support for episode titles in season packs * Add support for season packs with "Complete Series" titles
This commit is contained in:
parent
254df4446f
commit
fe8101dcee
|
@ -1,14 +1,15 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from difflib import SequenceMatcher
|
||||||
import functools
|
import functools
|
||||||
import logging
|
import logging
|
||||||
import urllib.parse
|
|
||||||
import re
|
import re
|
||||||
|
import time
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
from bs4 import BeautifulSoup as bso
|
from bs4 import BeautifulSoup as bso
|
||||||
from guessit import guessit
|
from guessit import guessit
|
||||||
from requests import Session
|
from requests import Session
|
||||||
from difflib import SequenceMatcher
|
|
||||||
from subliminal_patch.core import Episode
|
from subliminal_patch.core import Episode
|
||||||
from subliminal_patch.core import Movie
|
from subliminal_patch.core import Movie
|
||||||
from subliminal_patch.exceptions import APIThrottled
|
from subliminal_patch.exceptions import APIThrottled
|
||||||
|
@ -31,6 +32,7 @@ class Subf2mSubtitle(Subtitle):
|
||||||
|
|
||||||
self.release_info = release_info
|
self.release_info = release_info
|
||||||
self.episode_number = episode_number
|
self.episode_number = episode_number
|
||||||
|
self.episode_title = None
|
||||||
|
|
||||||
self._matches = set(
|
self._matches = set(
|
||||||
("title", "year")
|
("title", "year")
|
||||||
|
@ -115,7 +117,7 @@ class Subf2mProvider(Provider):
|
||||||
provider_name = "subf2m"
|
provider_name = "subf2m"
|
||||||
|
|
||||||
_movie_title_regex = re.compile(r"^(.+?)( \((\d{4})\))?$")
|
_movie_title_regex = re.compile(r"^(.+?)( \((\d{4})\))?$")
|
||||||
_tv_show_title_regex = re.compile(r"^(.+?) - (.*?) season( \((\d{4})\))?$")
|
_tv_show_title_regex = re.compile(r"^(.+?) - (.*?) (season|series)( \((\d{4})\))?$")
|
||||||
_supported_languages = {}
|
_supported_languages = {}
|
||||||
_supported_languages["brazillian-portuguese"] = Language("por", "BR")
|
_supported_languages["brazillian-portuguese"] = Language("por", "BR")
|
||||||
|
|
||||||
|
@ -138,12 +140,34 @@ class Subf2mProvider(Provider):
|
||||||
def terminate(self):
|
def terminate(self):
|
||||||
self._session.close()
|
self._session.close()
|
||||||
|
|
||||||
|
def _safe_get_text(self, url, retry=3, default_return=""):
|
||||||
|
req = None
|
||||||
|
|
||||||
|
for n in range(retry):
|
||||||
|
req = self._session.get(url, stream=True)
|
||||||
|
# Sometimes subf2m will return a 503 code. This error usually disappears
|
||||||
|
# retrying the query
|
||||||
|
if req.status_code == 503:
|
||||||
|
logger.debug("503 returned. Trying again [%d] in 3 seconds", n + 1)
|
||||||
|
time.sleep(3)
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
req.raise_for_status()
|
||||||
|
break
|
||||||
|
|
||||||
|
if req is not None:
|
||||||
|
return "\n".join(
|
||||||
|
line for line in req.iter_lines(decode_unicode=True) if line
|
||||||
|
)
|
||||||
|
|
||||||
|
return default_return
|
||||||
|
|
||||||
def _gen_results(self, query):
|
def _gen_results(self, query):
|
||||||
req = self._session.get(
|
query = urllib.parse.quote(query)
|
||||||
f"{_BASE_URL}/subtitles/searchbytitle?query={urllib.parse.quote(query)}&l=",
|
|
||||||
stream=True,
|
url = f"{_BASE_URL}/subtitles/searchbytitle?query={query}&l="
|
||||||
)
|
|
||||||
text = "\n".join(line for line in req.iter_lines(decode_unicode=True) if line)
|
text = self._safe_get_text(url)
|
||||||
soup = bso(text, "html.parser")
|
soup = bso(text, "html.parser")
|
||||||
|
|
||||||
for title in soup.select("li div[class='title'] a"):
|
for title in soup.select("li div[class='title'] a"):
|
||||||
|
@ -189,12 +213,19 @@ class Subf2mProvider(Provider):
|
||||||
results = []
|
results = []
|
||||||
for result in self._gen_results(title):
|
for result in self._gen_results(title):
|
||||||
text = result.text.lower()
|
text = result.text.lower()
|
||||||
|
|
||||||
match = self._tv_show_title_regex.match(text)
|
match = self._tv_show_title_regex.match(text)
|
||||||
if not match:
|
if not match:
|
||||||
|
logger.debug("Series title not matched: %s", text)
|
||||||
continue
|
continue
|
||||||
|
else:
|
||||||
|
logger.debug("Series title matched: %s", text)
|
||||||
|
|
||||||
match_title = match.group(1)
|
match_title = match.group(1)
|
||||||
match_season = match.group(2)
|
match_season = match.group(2)
|
||||||
if season_str == match_season:
|
|
||||||
|
# Match "complete series" titles as they usually contain season packs
|
||||||
|
if season_str == match_season or match_season == "complete":
|
||||||
results.append(
|
results.append(
|
||||||
{
|
{
|
||||||
"href": result.get("href"),
|
"href": result.get("href"),
|
||||||
|
@ -223,7 +254,9 @@ class Subf2mProvider(Provider):
|
||||||
|
|
||||||
return subtitles
|
return subtitles
|
||||||
|
|
||||||
def _find_episode_subtitles(self, path, season, episode, language):
|
def _find_episode_subtitles(
|
||||||
|
self, path, season, episode, language, episode_title=None
|
||||||
|
):
|
||||||
soup = self._get_subtitle_page_soup(path, language)
|
soup = self._get_subtitle_page_soup(path, language)
|
||||||
|
|
||||||
subtitles = []
|
subtitles = []
|
||||||
|
@ -258,6 +291,8 @@ class Subf2mProvider(Provider):
|
||||||
if subtitle is None:
|
if subtitle is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
subtitle.episode_title = episode_title
|
||||||
|
|
||||||
logger.debug("Found subtitle: %s", subtitle)
|
logger.debug("Found subtitle: %s", subtitle)
|
||||||
subtitles.append(subtitle)
|
subtitles.append(subtitle)
|
||||||
|
|
||||||
|
@ -266,8 +301,7 @@ class Subf2mProvider(Provider):
|
||||||
def _get_subtitle_page_soup(self, path, language):
|
def _get_subtitle_page_soup(self, path, language):
|
||||||
language_path = self._supported_languages_reversed[language]
|
language_path = self._supported_languages_reversed[language]
|
||||||
|
|
||||||
req = self._session.get(f"{_BASE_URL}{path}/{language_path}", stream=True)
|
text = self._safe_get_text(f"{_BASE_URL}{path}/{language_path}")
|
||||||
text = "\n".join(line for line in req.iter_lines(decode_unicode=True) if line)
|
|
||||||
|
|
||||||
return bso(text, "html.parser")
|
return bso(text, "html.parser")
|
||||||
|
|
||||||
|
@ -289,7 +323,7 @@ class Subf2mProvider(Provider):
|
||||||
if is_episode:
|
if is_episode:
|
||||||
subtitles.extend(
|
subtitles.extend(
|
||||||
self._find_episode_subtitles(
|
self._find_episode_subtitles(
|
||||||
result, video.season, video.episode, language
|
result, video.season, video.episode, language, video.title
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
|
@ -300,14 +334,13 @@ class Subf2mProvider(Provider):
|
||||||
def download_subtitle(self, subtitle):
|
def download_subtitle(self, subtitle):
|
||||||
# TODO: add MustGetBlacklisted support
|
# TODO: add MustGetBlacklisted support
|
||||||
|
|
||||||
req = self._session.get(subtitle.page_link, stream=True)
|
text = self._safe_get_text(subtitle.page_link)
|
||||||
text = "\n".join(line for line in req.iter_lines(decode_unicode=True) if line)
|
|
||||||
soup = bso(text, "html.parser")
|
soup = bso(text, "html.parser")
|
||||||
try:
|
try:
|
||||||
download_url = _BASE_URL + str(
|
download_url = _BASE_URL + str(
|
||||||
soup.select_one("a[id='downloadButton']")["href"] # type: ignore
|
soup.select_one("a[id='downloadButton']")["href"] # type: ignore
|
||||||
)
|
)
|
||||||
except (AttributeError, KeyError):
|
except (AttributeError, KeyError, TypeError):
|
||||||
raise APIThrottled(f"Couldn't get download url from {subtitle.page_link}")
|
raise APIThrottled(f"Couldn't get download url from {subtitle.page_link}")
|
||||||
|
|
||||||
downloaded = self._session.get(download_url, allow_redirects=True)
|
downloaded = self._session.get(download_url, allow_redirects=True)
|
||||||
|
@ -318,7 +351,9 @@ class Subf2mProvider(Provider):
|
||||||
raise APIThrottled(f"Invalid archive: {subtitle.page_link}")
|
raise APIThrottled(f"Invalid archive: {subtitle.page_link}")
|
||||||
|
|
||||||
subtitle.content = get_subtitle_from_archive(
|
subtitle.content = get_subtitle_from_archive(
|
||||||
archive, episode=subtitle.episode_number
|
archive,
|
||||||
|
episode=subtitle.episode_number,
|
||||||
|
episode_title=subtitle.episode_title,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -33,6 +33,7 @@ def test_search_movie(movies, title, year, expected_url):
|
||||||
("Breaking Bad", 1, "/subtitles/breaking-bad-first-season"),
|
("Breaking Bad", 1, "/subtitles/breaking-bad-first-season"),
|
||||||
("House Of The Dragon", 1, "/subtitles/house-of-the-dragon-first-season"),
|
("House Of The Dragon", 1, "/subtitles/house-of-the-dragon-first-season"),
|
||||||
("The Bear", 1, "/subtitles/the-bear-first-season"),
|
("The Bear", 1, "/subtitles/the-bear-first-season"),
|
||||||
|
("Courage the Cowardly Dog", 1, "/subtitles/courage-the-cowardly-dog"),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_search_tv_show_season(episodes, title, season, expected_url):
|
def test_search_tv_show_season(episodes, title, season, expected_url):
|
||||||
|
@ -62,6 +63,16 @@ def test_find_episode_subtitles(language):
|
||||||
assert sub.language == language
|
assert sub.language == language
|
||||||
|
|
||||||
|
|
||||||
|
def test_fint_episode_subtitles_from_complete_series_path(episodes):
|
||||||
|
path = "/subtitles/courage-the-cowardly-dog"
|
||||||
|
|
||||||
|
with Subf2mProvider() as provider:
|
||||||
|
for sub in provider._find_episode_subtitles(
|
||||||
|
path, 1, 1, Language.fromalpha2("en")
|
||||||
|
):
|
||||||
|
assert sub.language == Language.fromalpha2("en")
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def subtitle():
|
def subtitle():
|
||||||
release_info = """Dune-2021.All.WEBDLL
|
release_info = """Dune-2021.All.WEBDLL
|
||||||
|
@ -143,3 +154,17 @@ def test_download_subtitle_episode(subtitle_episode):
|
||||||
with Subf2mProvider() as provider:
|
with Subf2mProvider() as provider:
|
||||||
provider.download_subtitle(subtitle_episode)
|
provider.download_subtitle(subtitle_episode)
|
||||||
assert subtitle_episode.is_valid()
|
assert subtitle_episode.is_valid()
|
||||||
|
|
||||||
|
|
||||||
|
def test_download_subtitle_episode_with_title():
|
||||||
|
sub = Subf2mSubtitle(
|
||||||
|
Language.fromalpha2("en"),
|
||||||
|
"https://subf2m.co/subtitles/courage-the-cowardly-dog/english/2232402",
|
||||||
|
"Season 3 complete.",
|
||||||
|
13,
|
||||||
|
)
|
||||||
|
|
||||||
|
sub.episode_title = "Feast of the Bullfrogs"
|
||||||
|
with Subf2mProvider() as provider:
|
||||||
|
provider.download_subtitle(sub)
|
||||||
|
assert sub.is_valid()
|
||||||
|
|
Loading…
Reference in New Issue