2020-09-30 17:39:25 +00:00
|
|
|
# -*- coding: utf-8 -*-
|
2020-10-11 19:40:07 +00:00
|
|
|
import io
|
2020-09-30 17:39:25 +00:00
|
|
|
import logging
|
|
|
|
import os
|
|
|
|
import zipfile
|
|
|
|
|
2020-10-11 19:40:07 +00:00
|
|
|
import rarfile
|
2020-09-30 17:39:25 +00:00
|
|
|
from requests import Session
|
2020-10-27 23:17:40 +00:00
|
|
|
from guessit import guessit
|
2020-10-11 19:40:07 +00:00
|
|
|
from subliminal import Episode, Movie
|
2020-09-30 17:39:25 +00:00
|
|
|
from subliminal.exceptions import ServiceUnavailable
|
2020-10-11 19:40:07 +00:00
|
|
|
from subliminal.subtitle import SUBTITLE_EXTENSIONS, fix_line_ending
|
|
|
|
from subliminal_patch.exceptions import APIThrottled
|
2020-09-30 17:39:25 +00:00
|
|
|
from subliminal_patch.providers import Provider
|
2020-10-27 23:17:40 +00:00
|
|
|
from subliminal_patch.subtitle import Subtitle, guess_matches
|
2020-10-11 19:40:07 +00:00
|
|
|
from subzero.language import Language
|
2020-09-30 17:39:25 +00:00
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
2021-06-06 13:57:29 +00:00
|
|
|
SERVER_URL = "http://sapidb.caretas.club"
|
|
|
|
PAGE_URL = "https://sucha.caretas.club"
|
2021-01-25 21:18:03 +00:00
|
|
|
UNDESIRED_FILES = ("[eng]", ".en.", ".eng.", ".fr.", ".pt.")
|
2020-09-30 17:39:25 +00:00
|
|
|
|
|
|
|
|
|
|
|
class SuchaSubtitle(Subtitle):
|
|
|
|
provider_name = "sucha"
|
|
|
|
hash_verifiable = False
|
|
|
|
|
|
|
|
def __init__(
|
2020-10-11 19:40:07 +00:00
|
|
|
self,
|
|
|
|
language,
|
2020-12-03 18:23:52 +00:00
|
|
|
release_info,
|
2020-10-11 19:40:07 +00:00
|
|
|
filename,
|
2020-12-03 18:23:52 +00:00
|
|
|
download_id,
|
|
|
|
download_type,
|
2020-10-11 19:40:07 +00:00
|
|
|
matches,
|
2020-09-30 17:39:25 +00:00
|
|
|
):
|
|
|
|
super(SuchaSubtitle, self).__init__(
|
2021-01-25 21:18:03 +00:00
|
|
|
language, hearing_impaired=False, page_link=PAGE_URL
|
2020-09-30 17:39:25 +00:00
|
|
|
)
|
2020-12-03 18:23:52 +00:00
|
|
|
self.download_id = download_id
|
|
|
|
self.download_type = download_type
|
2020-09-30 17:39:25 +00:00
|
|
|
self.language = language
|
2020-12-03 18:23:52 +00:00
|
|
|
self.guessed_release_info = release_info
|
2020-09-30 17:39:25 +00:00
|
|
|
self.filename = filename
|
2020-12-03 18:23:52 +00:00
|
|
|
self.release_info = (
|
|
|
|
release_info if len(release_info) > len(filename) else filename
|
|
|
|
)
|
2020-09-30 17:39:25 +00:00
|
|
|
self.found_matches = matches
|
|
|
|
|
|
|
|
@property
|
|
|
|
def id(self):
|
2020-12-03 18:23:52 +00:00
|
|
|
return self.download_id
|
2020-09-30 17:39:25 +00:00
|
|
|
|
|
|
|
def get_matches(self, video):
|
2021-06-06 13:57:29 +00:00
|
|
|
type_ = "episode" if isinstance(video, Episode) else "movie"
|
2020-12-03 18:23:52 +00:00
|
|
|
self.found_matches |= guess_matches(
|
|
|
|
video,
|
2021-06-06 13:57:29 +00:00
|
|
|
guessit(self.filename, {"type": type_}),
|
2020-12-03 18:23:52 +00:00
|
|
|
)
|
|
|
|
self.found_matches |= guess_matches(
|
|
|
|
video,
|
2021-06-06 13:57:29 +00:00
|
|
|
guessit(self.guessed_release_info, {"type": type_}),
|
2020-12-03 18:23:52 +00:00
|
|
|
)
|
2020-09-30 17:39:25 +00:00
|
|
|
return self.found_matches
|
|
|
|
|
|
|
|
|
|
|
|
class SuchaProvider(Provider):
|
|
|
|
"""Sucha Provider"""
|
2021-06-06 13:57:29 +00:00
|
|
|
|
|
|
|
# This is temporary. Castilian spanish subtitles may exist, but are rare
|
|
|
|
# and currently impossible to guess from the API.
|
|
|
|
languages = {Language("spa", "MX")}
|
2020-09-30 17:39:25 +00:00
|
|
|
language_list = list(languages)
|
|
|
|
video_types = (Episode, Movie)
|
|
|
|
|
|
|
|
def initialize(self):
|
|
|
|
self.session = Session()
|
2021-06-06 13:57:29 +00:00
|
|
|
self.session.headers.update(
|
|
|
|
{"User-Agent": os.environ.get("SZ_USER_AGENT", "Sub-Zero/2")}
|
|
|
|
)
|
2020-09-30 17:39:25 +00:00
|
|
|
|
|
|
|
def terminate(self):
|
|
|
|
self.session.close()
|
|
|
|
|
|
|
|
def query(self, languages, video):
|
2021-06-06 13:57:29 +00:00
|
|
|
movie_year = video.year or "0"
|
2020-12-03 18:23:52 +00:00
|
|
|
is_episode = isinstance(video, Episode)
|
2021-06-06 13:57:29 +00:00
|
|
|
type_str = "episode" if is_episode else "movie"
|
2020-09-30 17:39:25 +00:00
|
|
|
language = self.language_list[0]
|
2021-01-25 21:18:03 +00:00
|
|
|
|
2020-09-30 17:39:25 +00:00
|
|
|
if is_episode:
|
2021-01-25 21:18:03 +00:00
|
|
|
q = {"query": f"{video.series} S{video.season:02}E{video.episode:02}"}
|
2020-09-30 17:39:25 +00:00
|
|
|
else:
|
2020-12-03 18:23:52 +00:00
|
|
|
q = {"query": video.title, "year": movie_year}
|
2021-01-25 21:18:03 +00:00
|
|
|
|
|
|
|
logger.debug(f"Searching subtitles: {q}")
|
2021-06-06 13:57:29 +00:00
|
|
|
result = self.session.get(f"{SERVER_URL}/{type_str}", params=q, timeout=10)
|
2021-01-25 21:18:03 +00:00
|
|
|
result.raise_for_status()
|
|
|
|
|
2021-06-06 13:57:29 +00:00
|
|
|
results = result.json()
|
2021-06-10 20:00:42 +00:00
|
|
|
if isinstance(results, dict):
|
2021-06-08 22:42:34 +00:00
|
|
|
logger.debug("No subtitles found")
|
|
|
|
return []
|
|
|
|
|
2020-12-03 21:36:32 +00:00
|
|
|
subtitles = []
|
2021-06-06 13:57:29 +00:00
|
|
|
for item in results:
|
2020-12-03 21:36:32 +00:00
|
|
|
matches = set()
|
2021-06-06 13:57:29 +00:00
|
|
|
title = item.get("title", "").lower()
|
|
|
|
alt_title = item.get("alt_title", title).lower()
|
2021-01-25 21:18:03 +00:00
|
|
|
|
2021-06-06 13:57:29 +00:00
|
|
|
if any(video.title.lower() in item for item in (title, alt_title)):
|
|
|
|
matches.add("title")
|
2021-01-25 21:18:03 +00:00
|
|
|
|
2021-06-06 13:57:29 +00:00
|
|
|
if str(item["year"]) == video.year:
|
2020-12-03 21:36:32 +00:00
|
|
|
matches.add("year")
|
2021-01-25 21:18:03 +00:00
|
|
|
|
2021-06-06 13:57:29 +00:00
|
|
|
if is_episode and any(
|
|
|
|
q["query"].lower() in item for item in (title, alt_title)
|
|
|
|
):
|
2021-06-08 22:42:34 +00:00
|
|
|
matches.update(("title", "series", "season", "episode", "year"))
|
2021-06-06 13:57:29 +00:00
|
|
|
|
2020-12-03 21:36:32 +00:00
|
|
|
subtitles.append(
|
|
|
|
SuchaSubtitle(
|
|
|
|
language,
|
2021-06-06 13:57:29 +00:00
|
|
|
item["release"],
|
|
|
|
item["filename"],
|
|
|
|
str(item["id"]),
|
|
|
|
type_str,
|
2020-12-03 21:36:32 +00:00
|
|
|
matches,
|
2020-09-30 17:39:25 +00:00
|
|
|
)
|
2020-12-03 21:36:32 +00:00
|
|
|
)
|
|
|
|
return subtitles
|
2020-09-30 17:39:25 +00:00
|
|
|
|
|
|
|
def list_subtitles(self, video, languages):
|
|
|
|
return self.query(languages, video)
|
|
|
|
|
|
|
|
def _get_archive(self, content):
|
|
|
|
archive_stream = io.BytesIO(content)
|
2021-01-25 21:18:03 +00:00
|
|
|
|
2020-09-30 17:39:25 +00:00
|
|
|
if rarfile.is_rarfile(archive_stream):
|
|
|
|
logger.debug("Identified rar archive")
|
2021-01-25 21:18:03 +00:00
|
|
|
return rarfile.RarFile(archive_stream)
|
|
|
|
|
|
|
|
if zipfile.is_zipfile(archive_stream):
|
2020-09-30 17:39:25 +00:00
|
|
|
logger.debug("Identified zip archive")
|
2021-01-25 21:18:03 +00:00
|
|
|
return zipfile.ZipFile(archive_stream)
|
|
|
|
|
|
|
|
raise APIThrottled("Unsupported compressed format")
|
2020-09-30 17:39:25 +00:00
|
|
|
|
|
|
|
def get_file(self, archive):
|
|
|
|
for name in archive.namelist():
|
|
|
|
if os.path.split(name)[-1].startswith("."):
|
|
|
|
continue
|
2021-01-25 21:18:03 +00:00
|
|
|
|
2020-09-30 17:39:25 +00:00
|
|
|
if not name.lower().endswith(SUBTITLE_EXTENSIONS):
|
|
|
|
continue
|
2021-01-25 21:18:03 +00:00
|
|
|
|
|
|
|
if any(undesired in name.lower() for undesired in UNDESIRED_FILES):
|
2020-09-30 17:39:25 +00:00
|
|
|
continue
|
2021-01-25 21:18:03 +00:00
|
|
|
|
|
|
|
logger.debug(f"Returning from archive: {name}")
|
2020-09-30 17:39:25 +00:00
|
|
|
return archive.read(name)
|
2021-01-25 21:18:03 +00:00
|
|
|
|
2020-09-30 17:39:25 +00:00
|
|
|
raise APIThrottled("Can not find the subtitle in the compressed file")
|
|
|
|
|
|
|
|
def download_subtitle(self, subtitle):
|
|
|
|
logger.info("Downloading subtitle %r", subtitle)
|
|
|
|
response = self.session.get(
|
2021-06-06 13:57:29 +00:00
|
|
|
f"{SERVER_URL}/download",
|
2020-12-03 18:23:52 +00:00
|
|
|
params={"id": subtitle.download_id, "type": subtitle.download_type},
|
|
|
|
timeout=10,
|
2020-09-30 17:39:25 +00:00
|
|
|
)
|
|
|
|
response.raise_for_status()
|
|
|
|
archive = self._get_archive(response.content)
|
|
|
|
subtitle_file = self.get_file(archive)
|
|
|
|
subtitle.content = fix_line_ending(subtitle_file)
|