2022-10-26 20:53:41 +00:00
|
|
|
from collections import namedtuple
|
|
|
|
from difflib import SequenceMatcher
|
2022-04-18 20:38:23 +00:00
|
|
|
import io
|
|
|
|
import logging
|
|
|
|
import os
|
2022-10-26 20:53:41 +00:00
|
|
|
import re
|
2023-02-16 00:49:56 +00:00
|
|
|
import tempfile
|
|
|
|
from typing import Iterable, Union
|
2022-04-18 20:38:23 +00:00
|
|
|
import zipfile
|
2018-10-31 16:08:29 +00:00
|
|
|
|
2022-04-18 20:38:23 +00:00
|
|
|
from guessit import guessit
|
2023-02-16 00:49:56 +00:00
|
|
|
import pysubs2
|
2022-10-26 20:53:41 +00:00
|
|
|
import rarfile
|
2022-04-18 20:38:23 +00:00
|
|
|
from subliminal.subtitle import fix_line_ending
|
2022-04-19 00:12:07 +00:00
|
|
|
from subliminal_patch.core import Episode
|
|
|
|
from subliminal_patch.subtitle import guess_matches
|
|
|
|
|
2022-04-18 20:38:23 +00:00
|
|
|
from ._agent_list import FIRST_THOUSAND_OR_SO_USER_AGENTS
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
2022-10-27 03:53:29 +00:00
|
|
|
_MatchingSub = namedtuple("_MatchingSub", ("file", "priority", "context"))
|
2022-04-18 20:38:23 +00:00
|
|
|
|
2022-10-26 20:53:41 +00:00
|
|
|
|
2022-10-26 20:55:00 +00:00
|
|
|
def _get_matching_sub(
|
|
|
|
sub_names, forced=False, episode=None, episode_title=None, **kwargs
|
|
|
|
):
|
2022-05-04 03:38:55 +00:00
|
|
|
guess_options = {"single_value": True}
|
|
|
|
if episode is not None:
|
|
|
|
guess_options["type"] = "episode" # type: ignore
|
|
|
|
|
2022-10-26 20:53:41 +00:00
|
|
|
matching_subs = []
|
|
|
|
|
2022-05-04 03:38:55 +00:00
|
|
|
for sub_name in sub_names:
|
2022-04-18 20:38:23 +00:00
|
|
|
if not forced and os.path.splitext(sub_name.lower())[0].endswith("forced"):
|
|
|
|
logger.debug("Ignoring forced subtitle: %s", sub_name)
|
|
|
|
continue
|
|
|
|
|
|
|
|
# If it's a movie then get the first subtitle
|
2022-10-26 20:53:41 +00:00
|
|
|
if episode is None and episode_title is None:
|
2022-04-18 20:38:23 +00:00
|
|
|
logger.debug("Movie subtitle found: %s", sub_name)
|
2022-10-27 03:53:29 +00:00
|
|
|
matching_subs.append(_MatchingSub(sub_name, 2, "Movie subtitle"))
|
2022-04-18 20:38:23 +00:00
|
|
|
break
|
|
|
|
|
2022-05-04 03:38:55 +00:00
|
|
|
guess = guessit(sub_name, options=guess_options)
|
|
|
|
|
2022-10-26 20:53:41 +00:00
|
|
|
matched_episode_num = guess.get("episode")
|
|
|
|
if matched_episode_num:
|
|
|
|
logger.debug("No episode number found in file: %s", sub_name)
|
|
|
|
|
|
|
|
if episode_title is not None:
|
2022-10-27 03:53:29 +00:00
|
|
|
from_name = _analize_sub_name(sub_name, episode_title)
|
|
|
|
if from_name is not None:
|
|
|
|
matching_subs.append(from_name)
|
2022-10-26 20:53:41 +00:00
|
|
|
|
|
|
|
if episode == matched_episode_num:
|
|
|
|
logger.debug("Episode matched from number: %s", sub_name)
|
2022-10-27 03:53:29 +00:00
|
|
|
matching_subs.append(_MatchingSub(sub_name, 2, "Episode number matched"))
|
2022-10-26 20:53:41 +00:00
|
|
|
|
|
|
|
if matching_subs:
|
|
|
|
matching_subs.sort(key=lambda x: x.priority, reverse=True)
|
|
|
|
logger.debug("Matches: %s", matching_subs)
|
|
|
|
return matching_subs[0].file
|
|
|
|
else:
|
|
|
|
logger.debug("Nothing matched")
|
|
|
|
return None
|
2022-04-18 20:38:23 +00:00
|
|
|
|
|
|
|
|
2022-10-26 20:53:41 +00:00
|
|
|
def _analize_sub_name(sub_name: str, title_):
|
|
|
|
titles = re.split(r"[.-]", os.path.splitext(sub_name)[0])
|
|
|
|
for title in titles:
|
2022-10-27 03:53:29 +00:00
|
|
|
title = title.strip()
|
2022-10-26 20:53:41 +00:00
|
|
|
ratio = SequenceMatcher(None, title, title_).ratio()
|
|
|
|
if ratio > 0.85:
|
|
|
|
logger.debug(
|
|
|
|
"Episode title matched: '%s' -> '%s' [%s]", title, sub_name, ratio
|
|
|
|
)
|
2022-04-18 20:38:23 +00:00
|
|
|
|
2022-10-27 03:53:29 +00:00
|
|
|
# Avoid false positives with short titles
|
|
|
|
if len(title_) > 4 and ratio >= 0.98:
|
|
|
|
return _MatchingSub(sub_name, 3, "Perfect title ratio")
|
|
|
|
|
|
|
|
return _MatchingSub(sub_name, 1, "Normal title ratio")
|
|
|
|
|
|
|
|
logger.debug("No episode title matched from file: %s", sub_name)
|
|
|
|
return None
|
2022-04-18 20:38:23 +00:00
|
|
|
|
|
|
|
|
|
|
|
def get_subtitle_from_archive(
|
2022-10-26 20:53:41 +00:00
|
|
|
archive, forced=False, episode=None, get_first_subtitle=False, **kwargs
|
2022-04-18 20:38:23 +00:00
|
|
|
):
|
|
|
|
"Get subtitle from Rarfile/Zipfile object. Return None if nothing is found."
|
|
|
|
subs_in_archive = [
|
|
|
|
name
|
|
|
|
for name in archive.namelist()
|
|
|
|
if name.endswith((".srt", ".sub", ".ssa", ".ass"))
|
|
|
|
]
|
|
|
|
|
|
|
|
if not subs_in_archive:
|
|
|
|
logger.info("No subtitles found in archive")
|
|
|
|
return None
|
|
|
|
|
|
|
|
logger.debug("Subtitles in archive: %s", subs_in_archive)
|
|
|
|
|
|
|
|
if len(subs_in_archive) == 1 or get_first_subtitle:
|
|
|
|
logger.debug("Getting first subtitle in archive: %s", subs_in_archive)
|
|
|
|
return fix_line_ending(archive.read(subs_in_archive[0]))
|
|
|
|
|
2022-10-26 20:53:41 +00:00
|
|
|
matching_sub = _get_matching_sub(subs_in_archive, forced, episode, **kwargs)
|
2022-04-18 20:38:23 +00:00
|
|
|
|
|
|
|
if matching_sub is not None:
|
|
|
|
logger.info("Using %s from archive", matching_sub)
|
|
|
|
return fix_line_ending(archive.read(matching_sub))
|
|
|
|
|
|
|
|
logger.debug("No subtitle found in archive")
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
2022-05-22 06:49:54 +00:00
|
|
|
def is_episode(content):
|
|
|
|
return "episode" in guessit(content, {"type": "episode"})
|
|
|
|
|
|
|
|
|
2022-04-18 20:38:23 +00:00
|
|
|
def get_archive_from_bytes(content: bytes):
|
2023-02-16 00:49:56 +00:00
|
|
|
"""Get RarFile/ZipFile object from bytes. A ZipFile instance will be returned
|
|
|
|
if a subtitle-like stream is found. Return None if something else is found."""
|
2022-04-18 20:38:23 +00:00
|
|
|
archive_stream = io.BytesIO(content)
|
2023-02-16 00:49:56 +00:00
|
|
|
|
2022-04-18 20:38:23 +00:00
|
|
|
if rarfile.is_rarfile(archive_stream):
|
|
|
|
logger.debug("Identified rar archive")
|
|
|
|
return rarfile.RarFile(archive_stream)
|
|
|
|
elif zipfile.is_zipfile(archive_stream):
|
|
|
|
logger.debug("Identified zip archive")
|
|
|
|
return zipfile.ZipFile(archive_stream)
|
|
|
|
|
2023-02-16 00:49:56 +00:00
|
|
|
logger.debug("No compression format found. Trying with subtitle-like files")
|
|
|
|
|
|
|
|
# If the file is a subtitle-like file
|
|
|
|
with tempfile.NamedTemporaryFile(prefix="spsub", suffix=".srt") as tmp_f:
|
|
|
|
try:
|
|
|
|
tmp_f.write(content)
|
|
|
|
sub = pysubs2.load(tmp_f.name)
|
|
|
|
except Exception as error:
|
|
|
|
logger.debug("Couldn't load file: '%s'", error)
|
|
|
|
else:
|
|
|
|
if sub is not None:
|
|
|
|
logger.debug("Identified subtitle file: %s", sub)
|
|
|
|
zip_obj = zipfile.ZipFile(io.BytesIO(), mode="x")
|
|
|
|
zip_obj.write(tmp_f.name, os.path.basename(tmp_f.name))
|
|
|
|
return zip_obj
|
|
|
|
|
|
|
|
logger.debug("Nothing found")
|
2022-04-18 20:38:23 +00:00
|
|
|
return None
|
2022-04-19 00:12:07 +00:00
|
|
|
|
|
|
|
|
2023-02-16 00:49:56 +00:00
|
|
|
def update_matches(
|
|
|
|
matches,
|
|
|
|
video,
|
|
|
|
release_info: Union[str, Iterable[str]],
|
|
|
|
split="\n",
|
|
|
|
**guessit_options
|
|
|
|
):
|
|
|
|
"""Update matches set from release info string or Iterable.
|
|
|
|
|
|
|
|
Use the split parameter to iterate over the set delimiter; set None to avoid split."""
|
|
|
|
|
2022-04-19 00:12:07 +00:00
|
|
|
guessit_options["type"] = "episode" if isinstance(video, Episode) else "movie"
|
2023-02-16 00:49:56 +00:00
|
|
|
|
2022-04-19 00:12:07 +00:00
|
|
|
logger.debug("Guessit options to update matches: %s", guessit_options)
|
|
|
|
|
2023-02-16 00:49:56 +00:00
|
|
|
if isinstance(release_info, str):
|
|
|
|
release_info = release_info.split(split)
|
|
|
|
|
|
|
|
for release in release_info:
|
|
|
|
for release_split in release.split(split):
|
|
|
|
logger.debug("Updating matches from release info: %s", release)
|
|
|
|
matches |= guess_matches(
|
|
|
|
video, guessit(release_split.strip(), guessit_options)
|
|
|
|
)
|
|
|
|
logger.debug("New matches: %s", matches)
|
2022-04-19 00:12:07 +00:00
|
|
|
|
|
|
|
return matches
|