Refactor Argenteam Provider

* Deprecate text search in favour of IMDB search
* Simplify code
This commit is contained in:
Vitiko 2022-11-13 19:41:42 -04:00
parent 2e4480dd5f
commit 52760d8bc7
3 changed files with 146 additions and 203 deletions

View File

@ -1,21 +1,20 @@
# coding=utf-8
from __future__ import absolute_import
from json import JSONDecodeError
import logging
import os
import io
import time
import urllib.parse
from json import JSONDecodeError
from zipfile import ZipFile
from guessit import guessit
from requests import Session
from subliminal import Episode, Movie
from subliminal.utils import sanitize
from subliminal import Episode
from subliminal import Movie
from subliminal_patch.providers import Provider
from subliminal_patch.subtitle import Subtitle, guess_matches
from subliminal_patch.providers.mixins import ProviderSubtitleArchiveMixin
from subliminal_patch.providers.utils import get_archive_from_bytes
from subliminal_patch.providers.utils import get_subtitle_from_archive
from subliminal_patch.providers.utils import update_matches
from subliminal_patch.subtitle import Subtitle
from subzero.language import Language
BASE_URL = "https://argenteam.net"
@ -30,42 +29,31 @@ class ArgenteamSubtitle(Subtitle):
def __init__(self, language, page_link, download_link, release_info, matches):
super(ArgenteamSubtitle, self).__init__(language, page_link=page_link)
self._found_matches = matches
self.page_link = page_link
self.download_link = download_link
self.found_matches = matches
self._release_info = release_info
# Original subtitle filename guessed from the URL
self.release_info = urllib.parse.unquote(self.download_link.split("/")[-1])
self.release_info = release_info
@property
def id(self):
return self.download_link
def get_matches(self, video):
type_ = "episode" if isinstance(video, Episode) else "movie"
update_matches(self._found_matches, video, self.release_info)
self.found_matches |= guess_matches(
video,
guessit(self.release_info, {"type": type_}),
)
self.found_matches |= guess_matches(
video,
guessit(self._release_info, {"type": type_}),
)
return self.found_matches
return self._found_matches
class ArgenteamProvider(Provider, ProviderSubtitleArchiveMixin):
provider_name = "argenteam"
# Safe to assume every subtitle from Argenteam is Latam Spanish
languages = {Language("spa", "MX")}
video_types = (Episode, Movie)
subtitle_class = ArgenteamSubtitle
hearing_impaired_verifiable = False
language_list = list(languages)
multi_result_throttle = 2 # seconds
_default_lang = Language("spa", "MX")
def __init__(self):
self.session = Session()
@ -78,31 +66,36 @@ class ArgenteamProvider(Provider, ProviderSubtitleArchiveMixin):
def terminate(self):
self.session.close()
def query(self, title, video, titles=None):
def query(self, video):
is_episode = isinstance(video, Episode)
season = episode = None
url = f"{API_URL}/movie"
if is_episode:
season = video.season
episode = video.episode
url = f"{API_URL}/episode"
argenteam_ids = self._search_ids(
title, season=season, episode=episode, titles=titles
)
imdb_id = video.series_imdb_id if is_episode else video.imdb_id
else:
argenteam_ids = self._search_ids(
title, year=video.year, imdb_id=video.imdb_id, titles=titles
)
if not argenteam_ids:
if not imdb_id:
logger.debug("%s doesn't have IMDB ID. Can't search")
return []
language = self.language_list[0]
if is_episode:
argenteam_ids = self._search_ids(
imdb_id, season=video.season, episode=video.episode
)
else:
argenteam_ids = self._search_ids(imdb_id)
if not argenteam_ids:
logger.debug("No IDs found")
return []
return self._parse_subtitles(argenteam_ids, is_episode)
def _parse_subtitles(self, ids, is_episode=True):
movie_kind = "episode" if is_episode else "movie"
subtitles = []
has_multiple_ids = len(argenteam_ids) > 1
for aid in argenteam_ids:
response = self.session.get(url, params={"id": aid}, timeout=10)
for aid in ids:
response = self.session.get(
f"{API_URL}/{movie_kind}", params={"id": aid}, timeout=10
)
response.raise_for_status()
try:
@ -113,81 +106,55 @@ class ArgenteamProvider(Provider, ProviderSubtitleArchiveMixin):
if not content or not content.get("releases"):
continue
imdb_id = year = None
returned_title = title
if not is_episode and "info" in content:
imdb_id = content["info"].get("imdb")
year = content["info"].get("year")
returned_title = content["info"].get("title", title)
for r in content["releases"]:
for s in r["subtitles"]:
movie_kind = "episode" if is_episode else "movie"
page_link = f"{BASE_URL}/{movie_kind}/{aid}"
release_info = self._combine_release_info(r)
release_info = self._combine_release_info(r, s)
logger.debug("Got release info: %s", release_info)
download_link = s["uri"].replace("http://", "https://")
matches_ = self._get_query_matches(
video,
movie_kind=movie_kind,
season=season,
episode=episode,
title=returned_title,
year=year,
imdb_id=imdb_id,
tvdb_id=content.get("tvdb"),
)
# Already matched within query
if is_episode:
matches = {"series", "title", "season", "episode", "imdb_id"}
else:
matches = {"title", "year", "imdb_id"}
if matches_ is not None:
subtitles.append(
ArgenteamSubtitle(
language,
page_link,
download_link,
release_info,
matches_,
)
subtitles.append(
ArgenteamSubtitle(
self._default_lang,
page_link,
download_link,
release_info,
matches,
)
if has_multiple_ids:
time.sleep(self.multi_result_throttle)
)
return subtitles
def list_subtitles(self, video, languages):
if isinstance(video, Episode):
titles = [video.series] + video.alternative_series[:2]
else:
titles = [video.title] + video.alternative_titles[:2]
for title in titles:
subs = self.query(title, video, titles=titles)
if subs:
return subs
time.sleep(self.multi_result_throttle)
return []
return self.query(video)
def download_subtitle(self, subtitle):
# download as a zip
logger.info("Downloading subtitle %r", subtitle)
r = self.session.get(subtitle.download_link, timeout=10)
r.raise_for_status()
# open the zip
with ZipFile(io.BytesIO(r.content)) as zf:
subtitle.content = self.get_subtitle_from_archive(subtitle, zf)
archive = get_archive_from_bytes(r.content)
subtitle.content = get_subtitle_from_archive(archive)
def _search_ids(self, title, **kwargs):
query = title
titles = kwargs.get("titles") or []
def _search_ids(self, identifier, **kwargs):
"""
:param identifier: imdb_id or title (without year)
"""
identifier = identifier.lstrip("tt")
is_episode = False
query = identifier
if kwargs.get("season") and kwargs.get("episode"):
is_episode = True
query = f"{title} S{kwargs['season']:02}E{kwargs['episode']:02}"
query = f"{identifier} S{kwargs['season']:02}E{kwargs['episode']:02}"
logger.debug(f"Searching ID (episode: {is_episode}) for {query}")
logger.debug("Searching ID for %s", query)
r = self.session.get(f"{API_URL}/search", params={"q": query}, timeout=10)
r.raise_for_status()
@ -200,84 +167,27 @@ class ArgenteamProvider(Provider, ProviderSubtitleArchiveMixin):
if not results.get("results"):
return []
match_ids = []
for result in results["results"]:
if result["type"] == "movie" and is_episode:
continue
imdb = f"tt{result.get('imdb', 'n/a')}"
if not is_episode and imdb == kwargs.get("imdb_id"):
logger.debug("Movie matched by IMDB ID, taking shortcut")
match_ids = [result["id"]]
break
# advanced title check in case of multiple movie results
title_year = kwargs.get("year") and kwargs.get("title")
if results["total"] > 1 and not is_episode and title_year:
sanitized = sanitize(result["title"])
titles = [f"{sanitize(name)} {kwargs['year']}" for name in titles]
if sanitized not in titles:
continue
match_ids.append(result["id"])
if match_ids:
ids = ", ".join(str(id) for id in match_ids)
logger.debug("Found matching IDs: %s", ids)
else:
logger.debug("Nothing found from %s query", query)
match_ids = [result["id"] for result in results["results"]]
logger.debug("Found matching IDs: %s", match_ids)
return match_ids
def _get_query_matches(self, video, **kwargs):
matches = set()
def _combine_release_info(self, release_dict, subtitle_dict):
releases = [
urllib.parse.unquote(subtitle_dict.get("uri", "Unknown").split("/")[-1])
]
if isinstance(video, Episode) and kwargs.get("movie_kind") == "episode":
if (kwargs.get("tvdb_id") and video.series_tvdb_id) and str(
video.series_tvdb_id
) != str(kwargs.get("tvdb_id")):
logger.debug(
"TVDB ID not matched: %s - %s", kwargs, video.series_tvdb_id
)
return None
combine = [
release_dict.get(key)
for key in ("source", "codec", "tags")
if release_dict.get(key)
]
if video.series and (
sanitize(kwargs.get("title"))
in (
sanitize(name) for name in [video.series] + video.alternative_series
)
):
matches.add("series")
if video.season and kwargs.get("season") == video.season:
matches.add("season")
if video.episode and kwargs.get("episode") == video.episode:
matches.add("episode")
# year (year is not available for series, but we assume it matches)
matches.add("year")
elif isinstance(video, Movie) and kwargs.get("movie_kind") == "movie":
if video.title and (
sanitize(kwargs.get("title"))
in (sanitize(name) for name in [video.title] + video.alternative_titles)
):
matches.add("title")
if video.imdb_id and f"tt{kwargs.get('imdb_id')}" == str(video.imdb_id):
matches.add("imdb_id")
if video.year and kwargs.get("year") == video.year:
matches.add("year")
else:
logger.info(f"{kwargs.get('movie_kind')} is not a valid movie_kind")
return matches
def _combine_release_info(self, release_dict):
keys = ("source", "codec", "tags", "team")
combine = [release_dict.get(key) for key in keys if release_dict.get(key)]
if combine:
return ".".join(combine)
return "Unknown"
r_info = ".".join(combine)
if release_dict.get("team"):
r_info += f"-{release_dict['team']}"
releases.append(r_info)
return "\n".join(releases)

View File

@ -123,6 +123,7 @@ def episodes():
1,
1,
source="Blu-Ray",
series_imdb_id="tt0903747",
release_group="REWARD",
resolution="720p",
video_codec="H.264",

View File

@ -8,14 +8,39 @@ from subliminal_patch.core import Episode
from subzero.language import Language
@pytest.mark.parametrize(
"imdb_id,expected_id", [("tt0028950", 62790), ("tt0054407", 102006)]
)
def test_search_ids_movie(imdb_id, expected_id):
with ArgenteamProvider() as provider:
ids = provider._search_ids(imdb_id)
assert ids[0] == expected_id
def test_search_ids_tv_show():
with ArgenteamProvider() as provider:
ids = provider._search_ids("tt0306414", season=1, episode=1)
assert ids[0] == 10075
def test_parse_subtitles_episode():
with ArgenteamProvider() as provider:
assert len(provider._parse_subtitles([10075])) > 1
def test_parse_subtitles_movie():
with ArgenteamProvider() as provider:
assert len(provider._parse_subtitles([61], is_episode=False)) > 3
def test_get_matches_episode(episodes):
episode = episodes["breaking_bad_s01e01"]
subtitle = ArgenteamSubtitle(
Language.fromalpha2("es"),
None,
"https://argenteam.net/subtitles/24002/Breaking.Bad.%282008%29.S01E01-Pilot.BluRay.x264.720p-REWARD",
"BluRay x264 720p",
{"title", "season", "episode", "imdb_id"},
"Breaking.Bad.(2008).S01E01-Pilot.BluRay.x264.720p-REWARD\nBluRay x264 720p",
{"series", "title", "season", "episode", "imdb_id"},
)
matches = subtitle.get_matches(episode)
assert matches == {
@ -52,10 +77,10 @@ def test_get_matches_movie(movies):
"resolution",
"edition",
"video_codec",
"streaming_service",
}
@pytest.mark.vcr
def test_list_subtitles_movie(movies):
item = movies["dune"]
with ArgenteamProvider() as provider:
@ -69,7 +94,20 @@ def test_list_subtitles_movie(movies):
assert any(expected == sub.download_link for sub in subtitles)
@pytest.mark.vcr
def test_list_subtitles_movie_no_imdb(movies):
item = movies["dune"]
item.imdb_id = None
with ArgenteamProvider() as provider:
assert not provider.list_subtitles(item, {Language("spa", "MX")})
def test_list_subtitles_movie_not_found(movies):
item = movies["dune"]
item.imdb_id = "tt29318321832"
with ArgenteamProvider() as provider:
assert not provider.list_subtitles(item, {Language("spa", "MX")})
def test_list_subtitles_episode(episodes):
item = episodes["breaking_bad_s01e01"]
with ArgenteamProvider() as provider:
@ -82,29 +120,23 @@ def test_list_subtitles_episode(episodes):
assert any(expected == sub.download_link for sub in subtitles)
@pytest.mark.vcr
def test_list_subtitles_episode_no_imdb_id(episodes):
item = episodes["breaking_bad_s01e01"]
item.series_imdb_id = None
with ArgenteamProvider() as provider:
assert not provider.list_subtitles(item, {Language("spa", "MX")})
def test_list_subtitles_episode_not_found(episodes):
item = episodes["breaking_bad_s01e01"]
item.series_imdb_id = "tt29318321832"
with ArgenteamProvider() as provider:
assert not provider.list_subtitles(item, {Language("spa", "MX")})
def test_download_subtitle(episodes):
item = episodes["breaking_bad_s01e01"]
with ArgenteamProvider() as provider:
subtitles = provider.list_subtitles(item, {Language("spa", "MX")})
subtitle = subtitles[0]
provider.download_subtitle(subtitle)
assert subtitle.content is not None
@pytest.mark.vcr
def test_list_subtitles_episode_with_tvdb():
video = Episode(
"Severance.S01E01.720p.BluRay.X264-REWARD.mkv",
"Severance",
1,
1,
source="Blu-Ray",
release_group="REWARD",
resolution="720p",
video_codec="H.264",
series_tvdb_id=371980,
)
with ArgenteamProvider() as provider:
subtitles = provider.list_subtitles(video, {Language("spa", "MX")})
assert len(subtitles) == 0
provider.download_subtitle(subtitles[0])
assert subtitles[0].is_valid()