bazarr/libs/subliminal_patch/providers/ktuvit.py

501 lines
17 KiB
Python
Raw Normal View History

2021-08-31 17:26:16 +00:00
# -*- coding: utf-8 -*-
import io
import logging
import os
import json
from subzero.language import Language
from guessit import guessit
from requests import Session
from subliminal.providers import ParserBeautifulSoup
from subliminal_patch.providers import Provider
from subliminal_patch.subtitle import Subtitle
from subliminal.subtitle import fix_line_ending
from subliminal import __short_version__
from subliminal.cache import SHOW_EXPIRATION_TIME, region
from subliminal.exceptions import AuthenticationError, ConfigurationError
from subliminal_patch.subtitle import guess_matches
from subliminal_patch.utils import sanitize
from subliminal.video import Episode, Movie
logger = logging.getLogger(__name__)
class KtuvitSubtitle(Subtitle):
"""Ktuvit Subtitle."""
provider_name = "ktuvit"
def __init__(
self,
language,
hearing_impaired,
page_link,
series,
season,
episode,
title,
imdb_id,
ktuvit_id,
subtitle_id,
release,
):
super(KtuvitSubtitle, self).__init__(language, hearing_impaired, page_link)
self.series = series
self.season = season
self.episode = episode
self.title = title
self.imdb_id = imdb_id
self.ktuvit_id = ktuvit_id
self.subtitle_id = subtitle_id
self.release = release
2021-09-11 18:27:26 +00:00
def __repr__(self):
return "<%s [%s] %r [%s:%s]>" % (
self.__class__.__name__,
self.subtitle_id,
self.page_link,
self.language,
self._guessed_encoding,
)
2021-09-11 18:27:26 +00:00
2021-08-31 17:26:16 +00:00
@property
def id(self):
return str(self.subtitle_id)
@property
def release_info(self):
return self.release
def get_matches(self, video):
matches = set()
# episode
if isinstance(video, Episode):
# series
if video.series and (
sanitize(self.title)
in (
sanitize(name) for name in [video.series] + video.alternative_series
)
):
matches.add("series")
# season
if video.season and self.season == video.season:
matches.add("season")
# episode
if video.episode and self.episode == video.episode:
matches.add("episode")
# imdb_id
if video.series_imdb_id and self.imdb_id == video.series_imdb_id:
matches.add("series_imdb_id")
# guess
matches |= guess_matches(video, guessit(self.release, {"type": "episode"}))
# movie
elif isinstance(video, Movie):
# guess
matches |= guess_matches(video, guessit(self.release, {"type": "movie"}))
# title
if video.title and (
sanitize(self.title)
in (sanitize(name) for name in [video.title] + video.alternative_titles)
):
matches.add("title")
return matches
class KtuvitProvider(Provider):
"""Ktuvit Provider."""
languages = {Language(l) for l in ["heb"]}
server_url = "https://www.ktuvit.me/"
sign_in_url = "Services/MembershipService.svc/Login"
search_url = "Services/ContentProvider.svc/SearchPage_search"
movie_info_url = "MovieInfo.aspx?ID="
episode_info_url = "Services/GetModuleAjax.ashx?"
request_download_id_url = "Services/ContentProvider.svc/RequestSubtitleDownload"
download_link = "Services/DownloadFile.ashx?DownloadIdentifier="
subtitle_class = KtuvitSubtitle
no_subtitle_str = 'אין כתוביות'
2021-08-31 17:26:16 +00:00
_tmdb_api_key = "a51ee051bcd762543373903de296e0a3"
def __init__(self, email=None, hashed_password=None):
if any((email, hashed_password)) and not all((email, hashed_password)):
raise ConfigurationError("Email and Hashed Password must be specified")
self.email = email
self.hashed_password = hashed_password
self.logged_in = False
self.session = None
self.login_cookie = None
2021-08-31 17:26:16 +00:00
def initialize(self):
self.session = Session()
# login
if self.email and self.hashed_password:
logger.info("Logging in")
data = {"request": {"Email": self.email, "Password": self.hashed_password}}
self.session.headers["Accept-Encoding"] = "gzip"
self.session.headers["Accept-Language"] = "en-us,en;q=0.5"
self.session.headers["Pragma"] = "no-cache"
self.session.headers["Cache-Control"] = "no-cache"
self.session.headers["Content-Type"] = "application/json"
self.session.headers["User-Agent"]: os.environ.get(
"SZ_USER_AGENT", "Sub-Zero/2"
)
2021-08-31 17:26:16 +00:00
r = self.session.post(
self.server_url + self.sign_in_url,
json=data,
allow_redirects=False,
timeout=10,
)
2021-08-31 17:26:16 +00:00
if r.content:
is_success = False
2021-08-31 17:26:16 +00:00
try:
is_success = self.parse_d_response(
r, "IsSuccess", False, "Authentication to the provider"
)
2021-08-31 17:26:16 +00:00
except json.decoder.JSONDecodeError:
logger.info("Failed to Login to Ktuvit")
if not is_success:
error_message = ''
try:
error_message = self.parse_d_response(r, "ErrorMessage", "[None]")
except json.decode.JSONDecoderError:
raise AuthenticationError(
"Error Logging in to Ktuvit Provider: " + str(r.content)
)
raise AuthenticationError(
"Error Logging in to Ktuvit Provider: " + error_message
)
2021-08-31 17:26:16 +00:00
else:
cookie_split = r.headers["set-cookie"].split("Login=")
if len(cookie_split) != 2:
self.logged_in = False
raise AuthenticationError(
"Login Failed, didn't receive valid cookie in response"
)
self.login_cookie = cookie_split[1].split(";")[0]
logger.debug("Logged in with cookie: " + self.login_cookie)
self.logged_in = True
2021-08-31 17:26:16 +00:00
def terminate(self):
self.session.close()
@region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
def _search_imdb_id(self, title, year, is_movie):
"""Search the IMDB ID for the given `title` and `year`.
:param str title: title to search for.
:param int year: year to search for (or 0 if not relevant).
:param bool is_movie: If True, IMDB ID will be searched for in TMDB instead of Wizdom.
:return: the IMDB ID for the given title and year (or None if not found).
:rtype: str
"""
# make the search
logger.info(
"Searching IMDB ID for %r%r",
title,
"" if not year else " ({})".format(year),
)
category = "movie" if is_movie else "tv"
title = title.replace("'", "")
# get TMDB ID first
r = self.session.get(
"http://api.tmdb.org/3/search/{}?api_key={}&query={}{}&language=en".format(
category,
self._tmdb_api_key,
title,
"" if not year else "&year={}".format(year),
)
)
r.raise_for_status()
tmdb_results = r.json().get("results")
if tmdb_results:
tmdb_id = tmdb_results[0].get("id")
if tmdb_id:
# get actual IMDB ID from TMDB
r = self.session.get(
"http://api.tmdb.org/3/{}/{}{}?api_key={}&language=en".format(
category,
tmdb_id,
"" if is_movie else "/external_ids",
self._tmdb_api_key,
)
)
r.raise_for_status()
imdb_id = r.json().get("imdb_id")
if imdb_id:
return str(imdb_id)
else:
return None
return None
def query(
self, title, season=None, episode=None, year=None, filename=None, imdb_id=None
):
if not self.logged_in:
logger.info("Not logged in to Ktuvit. Returning 0 results")
return {}
2021-08-31 17:26:16 +00:00
# search for the IMDB ID if needed.
is_movie = not (season and episode)
imdb_id = imdb_id or self._search_imdb_id(title, year, is_movie)
if not imdb_id:
return {}
# search
logger.debug("Using IMDB ID %r", imdb_id)
query = {
"FilmName": title,
"Actors": [],
"Studios": [],
"Directors": [],
"Genres": [],
"Countries": [],
"Languages": [],
"Year": "",
"Rating": [],
"Page": 1,
"SearchType": "0",
"WithSubsOnly": False,
}
if not is_movie:
query["SearchType"] = "1"
if year:
query["Year"] = year
# get the list of subtitles
logger.debug("Getting the list of subtitles")
url = self.server_url + self.search_url
logger.debug("Calling URL: {} with request: {}".format(url, str({"request": query})))
r = self.session.post(url, json={"request": query}, timeout=10)
2021-08-31 17:26:16 +00:00
r.raise_for_status()
if r.content:
results = self.parse_d_response(r, "Films", [], "Films/Series Information")
2021-08-31 17:26:16 +00:00
else:
return {}
2021-08-31 17:26:16 +00:00
# loop over results
subtitles = {}
for result in results:
imdb_link = result["IMDB_Link"]
imdb_link = imdb_link[0:-1] if imdb_link.endswith("/") else imdb_link
2021-08-31 17:26:16 +00:00
results_imdb_id = imdb_link.split("/")[-1]
2021-08-31 17:26:16 +00:00
if results_imdb_id != imdb_id:
logger.debug(
"Subtitles is for IMDB %r but actual IMDB ID is %r",
results_imdb_id,
imdb_id,
)
continue
language = Language("heb")
hearing_impaired = False
ktuvit_id = result["ID"]
page_link = self.server_url + self.movie_info_url + ktuvit_id
if is_movie:
subs = self._search_movie(ktuvit_id)
else:
subs = self._search_tvshow(ktuvit_id, season, episode)
logger.debug('Got {} Subs from Ktuvit'.format(len(subs)))
2021-08-31 17:26:16 +00:00
for sub in subs:
# otherwise create it
subtitle = KtuvitSubtitle(
language,
hearing_impaired,
page_link,
title,
season,
episode,
title,
imdb_id,
ktuvit_id,
sub["sub_id"],
sub["rls"],
)
logger.debug("Found subtitle %r", subtitle)
subtitles[sub["sub_id"]] = subtitle
return subtitles.values()
def _search_tvshow(self, id, season, episode):
subs = []
url = (
self.server_url
+ self.episode_info_url
+ "moduleName=SubtitlesList&SeriesID={}&Season={}&Episode={}".format(
id, season, episode
)
)
r = self.session.get(url, timeout=10)
r.raise_for_status()
if len(r.content) < 10:
logger.debug("Too short content-length in response: [{}]. Treating as No Subtitles Found ".format(str(r.content)))
return []
2021-08-31 17:26:16 +00:00
sub_list = ParserBeautifulSoup(r.content, ["html.parser"])
sub_rows = sub_list("tr")
if sub_list.find("tr") and sub_list.find("tr").find("td") and sub_list.find("tr").find("td").get_text() == self.no_subtitle_str:
logger.debug("No Subtitles Found. URL " + url)
return subs
2021-08-31 17:26:16 +00:00
for row in sub_rows:
columns = row.find_all("td")
sub = {"id": id}
for index, column in enumerate(columns):
if index == 0:
sub["rls"] = column.get_text().strip().split("\n")[0]
2021-08-31 17:26:16 +00:00
if index == 5:
sub["sub_id"] = column.find("input", attrs={"data-sub-id": True})[
"data-sub-id"
]
if 'sub_id' in sub:
subs.append(sub)
2021-08-31 17:26:16 +00:00
return subs
def _search_movie(self, movie_id):
subs = []
url = self.server_url + self.movie_info_url + movie_id
r = self.session.get(url, timeout=10)
r.raise_for_status()
if len(r.content) < 10:
logger.debug("Too short content-length in response: [{}]. Treating as No Subtitles Found ".format(str(r.content)))
return []
2021-08-31 17:26:16 +00:00
html = ParserBeautifulSoup(r.content, ["html.parser"])
sub_rows = html.select("table#subtitlesList tbody > tr")
for row in sub_rows:
columns = row.find_all("td")
sub = {"id": movie_id}
2021-08-31 17:26:16 +00:00
for index, column in enumerate(columns):
if index == 0:
sub["rls"] = column.get_text().strip().split("\n")[0]
2021-08-31 17:26:16 +00:00
if index == 5:
sub["sub_id"] = column.find("a", attrs={"data-subtitle-id": True})[
"data-subtitle-id"
]
2021-08-31 17:26:16 +00:00
if 'sub_id' in sub:
subs.append(sub)
2021-08-31 17:26:16 +00:00
return subs
def list_subtitles(self, video, languages):
season = episode = None
year = video.year
filename = video.name
if isinstance(video, Episode):
titles = [video.series] + video.alternative_series
season = video.season
episode = video.episode
imdb_id = video.series_imdb_id
else:
titles = [video.title] + video.alternative_titles
imdb_id = video.imdb_id
for title in titles:
subtitles = [
s
for s in self.query(title, season, episode, year, filename, imdb_id)
if s.language in languages
]
if subtitles:
return subtitles
return []
def download_subtitle(self, subtitle):
if isinstance(subtitle, KtuvitSubtitle):
download_identifier_request = {
2021-08-31 17:26:16 +00:00
"FilmID": subtitle.ktuvit_id,
"SubtitleID": subtitle.subtitle_id,
"FontSize": 0,
"FontColor": "",
"PredefinedLayout": -1,
}
logger.debug(
"Download Identifier Request data: "
+ str(json.dumps({"request": download_identifier_request}))
)
2021-08-31 17:26:16 +00:00
# download
url = self.server_url + self.request_download_id_url
r = self.session.post(
url, json={"request": download_identifier_request}, timeout=10
2021-08-31 17:26:16 +00:00
)
r.raise_for_status()
if r.content:
download_identifier = self.parse_d_response(r, "DownloadIdentifier")
2021-08-31 17:26:16 +00:00
url = self.server_url + self.download_link + download_identifier
2021-08-31 17:26:16 +00:00
r = self.session.get(url, timeout=10)
r.raise_for_status()
if not r.content:
logger.debug(
"Unable to download subtitle. No data returned from provider"
)
return
2021-09-11 18:27:26 +00:00
subtitle.content = fix_line_ending(r.content)
def parse_d_response(self, response, field, default_value=None, message=None):
message = message if message else field
try:
response_content = response.json()
except json.decoder.JSONDecodeError as ex:
raise json.decoder.JSONDecodeError(
"Unable to parse JSON returned while getting " + message, ex.doc, ex.pos
)
else:
# kept for manual debugging when needed:
# logger.debug("Parsing d response_content: " + str(response_content))
if "d" in response_content:
response_content = json.loads(response_content["d"])
value = response_content.get(field, default_value)
if not value and value != default_value:
raise json.decoder.JSONDecodeError(
"Missing " + message, str(response_content), 0
)
else:
raise json.decoder.JSONDecodeError(
"Incomplete JSON returned while getting " + message,
str(response_content),
0
)
return value