Subf2m Provider: add more improvements

* Retry queries when subf2m server returns 503
* Add support for episode titles in season packs
* Add support for season packs with "Complete Series" titles
This commit is contained in:
Vitiko 2022-10-26 20:27:49 -04:00
parent 254df4446f
commit fe8101dcee
2 changed files with 77 additions and 17 deletions

View File

@ -1,14 +1,15 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from difflib import SequenceMatcher
import functools import functools
import logging import logging
import urllib.parse
import re import re
import time
import urllib.parse
from bs4 import BeautifulSoup as bso from bs4 import BeautifulSoup as bso
from guessit import guessit from guessit import guessit
from requests import Session from requests import Session
from difflib import SequenceMatcher
from subliminal_patch.core import Episode from subliminal_patch.core import Episode
from subliminal_patch.core import Movie from subliminal_patch.core import Movie
from subliminal_patch.exceptions import APIThrottled from subliminal_patch.exceptions import APIThrottled
@ -31,6 +32,7 @@ class Subf2mSubtitle(Subtitle):
self.release_info = release_info self.release_info = release_info
self.episode_number = episode_number self.episode_number = episode_number
self.episode_title = None
self._matches = set( self._matches = set(
("title", "year") ("title", "year")
@ -115,7 +117,7 @@ class Subf2mProvider(Provider):
provider_name = "subf2m" provider_name = "subf2m"
_movie_title_regex = re.compile(r"^(.+?)( \((\d{4})\))?$") _movie_title_regex = re.compile(r"^(.+?)( \((\d{4})\))?$")
_tv_show_title_regex = re.compile(r"^(.+?) - (.*?) season( \((\d{4})\))?$") _tv_show_title_regex = re.compile(r"^(.+?) - (.*?) (season|series)( \((\d{4})\))?$")
_supported_languages = {} _supported_languages = {}
_supported_languages["brazillian-portuguese"] = Language("por", "BR") _supported_languages["brazillian-portuguese"] = Language("por", "BR")
@ -138,12 +140,34 @@ class Subf2mProvider(Provider):
def terminate(self): def terminate(self):
self._session.close() self._session.close()
def _safe_get_text(self, url, retry=3, default_return=""):
req = None
for n in range(retry):
req = self._session.get(url, stream=True)
# Sometimes subf2m will return a 503 code. This error usually disappears
# retrying the query
if req.status_code == 503:
logger.debug("503 returned. Trying again [%d] in 3 seconds", n + 1)
time.sleep(3)
continue
else:
req.raise_for_status()
break
if req is not None:
return "\n".join(
line for line in req.iter_lines(decode_unicode=True) if line
)
return default_return
def _gen_results(self, query): def _gen_results(self, query):
req = self._session.get( query = urllib.parse.quote(query)
f"{_BASE_URL}/subtitles/searchbytitle?query={urllib.parse.quote(query)}&l=",
stream=True, url = f"{_BASE_URL}/subtitles/searchbytitle?query={query}&l="
)
text = "\n".join(line for line in req.iter_lines(decode_unicode=True) if line) text = self._safe_get_text(url)
soup = bso(text, "html.parser") soup = bso(text, "html.parser")
for title in soup.select("li div[class='title'] a"): for title in soup.select("li div[class='title'] a"):
@ -189,12 +213,19 @@ class Subf2mProvider(Provider):
results = [] results = []
for result in self._gen_results(title): for result in self._gen_results(title):
text = result.text.lower() text = result.text.lower()
match = self._tv_show_title_regex.match(text) match = self._tv_show_title_regex.match(text)
if not match: if not match:
logger.debug("Series title not matched: %s", text)
continue continue
else:
logger.debug("Series title matched: %s", text)
match_title = match.group(1) match_title = match.group(1)
match_season = match.group(2) match_season = match.group(2)
if season_str == match_season:
# Match "complete series" titles as they usually contain season packs
if season_str == match_season or match_season == "complete":
results.append( results.append(
{ {
"href": result.get("href"), "href": result.get("href"),
@ -223,7 +254,9 @@ class Subf2mProvider(Provider):
return subtitles return subtitles
def _find_episode_subtitles(self, path, season, episode, language): def _find_episode_subtitles(
self, path, season, episode, language, episode_title=None
):
soup = self._get_subtitle_page_soup(path, language) soup = self._get_subtitle_page_soup(path, language)
subtitles = [] subtitles = []
@ -258,6 +291,8 @@ class Subf2mProvider(Provider):
if subtitle is None: if subtitle is None:
continue continue
subtitle.episode_title = episode_title
logger.debug("Found subtitle: %s", subtitle) logger.debug("Found subtitle: %s", subtitle)
subtitles.append(subtitle) subtitles.append(subtitle)
@ -266,8 +301,7 @@ class Subf2mProvider(Provider):
def _get_subtitle_page_soup(self, path, language): def _get_subtitle_page_soup(self, path, language):
language_path = self._supported_languages_reversed[language] language_path = self._supported_languages_reversed[language]
req = self._session.get(f"{_BASE_URL}{path}/{language_path}", stream=True) text = self._safe_get_text(f"{_BASE_URL}{path}/{language_path}")
text = "\n".join(line for line in req.iter_lines(decode_unicode=True) if line)
return bso(text, "html.parser") return bso(text, "html.parser")
@ -289,7 +323,7 @@ class Subf2mProvider(Provider):
if is_episode: if is_episode:
subtitles.extend( subtitles.extend(
self._find_episode_subtitles( self._find_episode_subtitles(
result, video.season, video.episode, language result, video.season, video.episode, language, video.title
) )
) )
else: else:
@ -300,14 +334,13 @@ class Subf2mProvider(Provider):
def download_subtitle(self, subtitle): def download_subtitle(self, subtitle):
# TODO: add MustGetBlacklisted support # TODO: add MustGetBlacklisted support
req = self._session.get(subtitle.page_link, stream=True) text = self._safe_get_text(subtitle.page_link)
text = "\n".join(line for line in req.iter_lines(decode_unicode=True) if line)
soup = bso(text, "html.parser") soup = bso(text, "html.parser")
try: try:
download_url = _BASE_URL + str( download_url = _BASE_URL + str(
soup.select_one("a[id='downloadButton']")["href"] # type: ignore soup.select_one("a[id='downloadButton']")["href"] # type: ignore
) )
except (AttributeError, KeyError): except (AttributeError, KeyError, TypeError):
raise APIThrottled(f"Couldn't get download url from {subtitle.page_link}") raise APIThrottled(f"Couldn't get download url from {subtitle.page_link}")
downloaded = self._session.get(download_url, allow_redirects=True) downloaded = self._session.get(download_url, allow_redirects=True)
@ -318,7 +351,9 @@ class Subf2mProvider(Provider):
raise APIThrottled(f"Invalid archive: {subtitle.page_link}") raise APIThrottled(f"Invalid archive: {subtitle.page_link}")
subtitle.content = get_subtitle_from_archive( subtitle.content = get_subtitle_from_archive(
archive, episode=subtitle.episode_number archive,
episode=subtitle.episode_number,
episode_title=subtitle.episode_title,
) )

View File

@ -33,6 +33,7 @@ def test_search_movie(movies, title, year, expected_url):
("Breaking Bad", 1, "/subtitles/breaking-bad-first-season"), ("Breaking Bad", 1, "/subtitles/breaking-bad-first-season"),
("House Of The Dragon", 1, "/subtitles/house-of-the-dragon-first-season"), ("House Of The Dragon", 1, "/subtitles/house-of-the-dragon-first-season"),
("The Bear", 1, "/subtitles/the-bear-first-season"), ("The Bear", 1, "/subtitles/the-bear-first-season"),
("Courage the Cowardly Dog", 1, "/subtitles/courage-the-cowardly-dog"),
], ],
) )
def test_search_tv_show_season(episodes, title, season, expected_url): def test_search_tv_show_season(episodes, title, season, expected_url):
@ -62,6 +63,16 @@ def test_find_episode_subtitles(language):
assert sub.language == language assert sub.language == language
def test_fint_episode_subtitles_from_complete_series_path(episodes):
path = "/subtitles/courage-the-cowardly-dog"
with Subf2mProvider() as provider:
for sub in provider._find_episode_subtitles(
path, 1, 1, Language.fromalpha2("en")
):
assert sub.language == Language.fromalpha2("en")
@pytest.fixture @pytest.fixture
def subtitle(): def subtitle():
release_info = """Dune-2021.All.WEBDLL release_info = """Dune-2021.All.WEBDLL
@ -143,3 +154,17 @@ def test_download_subtitle_episode(subtitle_episode):
with Subf2mProvider() as provider: with Subf2mProvider() as provider:
provider.download_subtitle(subtitle_episode) provider.download_subtitle(subtitle_episode)
assert subtitle_episode.is_valid() assert subtitle_episode.is_valid()
def test_download_subtitle_episode_with_title():
sub = Subf2mSubtitle(
Language.fromalpha2("en"),
"https://subf2m.co/subtitles/courage-the-cowardly-dog/english/2232402",
"Season 3 complete.",
13,
)
sub.episode_title = "Feast of the Bullfrogs"
with Subf2mProvider() as provider:
provider.download_subtitle(sub)
assert sub.is_valid()