Subf2m Provider: add more improvements

* Retry queries when subf2m server returns 503
* Add support for episode titles in season packs
* Add support for season packs with "Complete Series" titles
This commit is contained in:
Vitiko 2022-10-26 20:27:49 -04:00
parent 254df4446f
commit fe8101dcee
2 changed files with 77 additions and 17 deletions

View File

@ -1,14 +1,15 @@
# -*- coding: utf-8 -*-
from difflib import SequenceMatcher
import functools
import logging
import urllib.parse
import re
import time
import urllib.parse
from bs4 import BeautifulSoup as bso
from guessit import guessit
from requests import Session
from difflib import SequenceMatcher
from subliminal_patch.core import Episode
from subliminal_patch.core import Movie
from subliminal_patch.exceptions import APIThrottled
@ -31,6 +32,7 @@ class Subf2mSubtitle(Subtitle):
self.release_info = release_info
self.episode_number = episode_number
self.episode_title = None
self._matches = set(
("title", "year")
@ -115,7 +117,7 @@ class Subf2mProvider(Provider):
provider_name = "subf2m"
_movie_title_regex = re.compile(r"^(.+?)( \((\d{4})\))?$")
_tv_show_title_regex = re.compile(r"^(.+?) - (.*?) season( \((\d{4})\))?$")
_tv_show_title_regex = re.compile(r"^(.+?) - (.*?) (season|series)( \((\d{4})\))?$")
_supported_languages = {}
_supported_languages["brazillian-portuguese"] = Language("por", "BR")
@ -138,12 +140,34 @@ class Subf2mProvider(Provider):
def terminate(self):
self._session.close()
def _safe_get_text(self, url, retry=3, default_return=""):
req = None
for n in range(retry):
req = self._session.get(url, stream=True)
# Sometimes subf2m will return a 503 code. This error usually disappears
# retrying the query
if req.status_code == 503:
logger.debug("503 returned. Trying again [%d] in 3 seconds", n + 1)
time.sleep(3)
continue
else:
req.raise_for_status()
break
if req is not None:
return "\n".join(
line for line in req.iter_lines(decode_unicode=True) if line
)
return default_return
def _gen_results(self, query):
req = self._session.get(
f"{_BASE_URL}/subtitles/searchbytitle?query={urllib.parse.quote(query)}&l=",
stream=True,
)
text = "\n".join(line for line in req.iter_lines(decode_unicode=True) if line)
query = urllib.parse.quote(query)
url = f"{_BASE_URL}/subtitles/searchbytitle?query={query}&l="
text = self._safe_get_text(url)
soup = bso(text, "html.parser")
for title in soup.select("li div[class='title'] a"):
@ -189,12 +213,19 @@ class Subf2mProvider(Provider):
results = []
for result in self._gen_results(title):
text = result.text.lower()
match = self._tv_show_title_regex.match(text)
if not match:
logger.debug("Series title not matched: %s", text)
continue
else:
logger.debug("Series title matched: %s", text)
match_title = match.group(1)
match_season = match.group(2)
if season_str == match_season:
# Match "complete series" titles as they usually contain season packs
if season_str == match_season or match_season == "complete":
results.append(
{
"href": result.get("href"),
@ -223,7 +254,9 @@ class Subf2mProvider(Provider):
return subtitles
def _find_episode_subtitles(self, path, season, episode, language):
def _find_episode_subtitles(
self, path, season, episode, language, episode_title=None
):
soup = self._get_subtitle_page_soup(path, language)
subtitles = []
@ -258,6 +291,8 @@ class Subf2mProvider(Provider):
if subtitle is None:
continue
subtitle.episode_title = episode_title
logger.debug("Found subtitle: %s", subtitle)
subtitles.append(subtitle)
@ -266,8 +301,7 @@ class Subf2mProvider(Provider):
def _get_subtitle_page_soup(self, path, language):
language_path = self._supported_languages_reversed[language]
req = self._session.get(f"{_BASE_URL}{path}/{language_path}", stream=True)
text = "\n".join(line for line in req.iter_lines(decode_unicode=True) if line)
text = self._safe_get_text(f"{_BASE_URL}{path}/{language_path}")
return bso(text, "html.parser")
@ -289,7 +323,7 @@ class Subf2mProvider(Provider):
if is_episode:
subtitles.extend(
self._find_episode_subtitles(
result, video.season, video.episode, language
result, video.season, video.episode, language, video.title
)
)
else:
@ -300,14 +334,13 @@ class Subf2mProvider(Provider):
def download_subtitle(self, subtitle):
# TODO: add MustGetBlacklisted support
req = self._session.get(subtitle.page_link, stream=True)
text = "\n".join(line for line in req.iter_lines(decode_unicode=True) if line)
text = self._safe_get_text(subtitle.page_link)
soup = bso(text, "html.parser")
try:
download_url = _BASE_URL + str(
soup.select_one("a[id='downloadButton']")["href"] # type: ignore
)
except (AttributeError, KeyError):
except (AttributeError, KeyError, TypeError):
raise APIThrottled(f"Couldn't get download url from {subtitle.page_link}")
downloaded = self._session.get(download_url, allow_redirects=True)
@ -318,7 +351,9 @@ class Subf2mProvider(Provider):
raise APIThrottled(f"Invalid archive: {subtitle.page_link}")
subtitle.content = get_subtitle_from_archive(
archive, episode=subtitle.episode_number
archive,
episode=subtitle.episode_number,
episode_title=subtitle.episode_title,
)

View File

@ -33,6 +33,7 @@ def test_search_movie(movies, title, year, expected_url):
("Breaking Bad", 1, "/subtitles/breaking-bad-first-season"),
("House Of The Dragon", 1, "/subtitles/house-of-the-dragon-first-season"),
("The Bear", 1, "/subtitles/the-bear-first-season"),
("Courage the Cowardly Dog", 1, "/subtitles/courage-the-cowardly-dog"),
],
)
def test_search_tv_show_season(episodes, title, season, expected_url):
@ -62,6 +63,16 @@ def test_find_episode_subtitles(language):
assert sub.language == language
def test_fint_episode_subtitles_from_complete_series_path(episodes):
path = "/subtitles/courage-the-cowardly-dog"
with Subf2mProvider() as provider:
for sub in provider._find_episode_subtitles(
path, 1, 1, Language.fromalpha2("en")
):
assert sub.language == Language.fromalpha2("en")
@pytest.fixture
def subtitle():
release_info = """Dune-2021.All.WEBDLL
@ -143,3 +154,17 @@ def test_download_subtitle_episode(subtitle_episode):
with Subf2mProvider() as provider:
provider.download_subtitle(subtitle_episode)
assert subtitle_episode.is_valid()
def test_download_subtitle_episode_with_title():
sub = Subf2mSubtitle(
Language.fromalpha2("en"),
"https://subf2m.co/subtitles/courage-the-cowardly-dog/english/2232402",
"Season 3 complete.",
13,
)
sub.episode_title = "Feast of the Bullfrogs"
with Subf2mProvider() as provider:
provider.download_subtitle(sub)
assert sub.is_valid()