Update provider utils

This commit is contained in:
Vitiko 2022-10-26 16:53:41 -04:00
parent a1594e5a08
commit 23d4e3e006
2 changed files with 85 additions and 20 deletions

View File

@ -1,59 +1,86 @@
from collections import namedtuple
from difflib import SequenceMatcher
import io import io
import logging import logging
import os import os
import re
import zipfile import zipfile
import rarfile
from guessit import guessit from guessit import guessit
import rarfile
from subliminal.subtitle import fix_line_ending from subliminal.subtitle import fix_line_ending
from subliminal_patch.core import Episode from subliminal_patch.core import Episode
from subliminal_patch.subtitle import guess_matches from subliminal_patch.subtitle import guess_matches
from ._agent_list import FIRST_THOUSAND_OR_SO_USER_AGENTS from ._agent_list import FIRST_THOUSAND_OR_SO_USER_AGENTS
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def _get_matching_sub(sub_names, forced=False, episode=None): _MatchingSub = namedtuple("_MatchingSub", ("file", "priority"))
matching_sub = None
def _get_matching_sub(sub_names, forced=False, episode=None, episode_title=None):
guess_options = {"single_value": True} guess_options = {"single_value": True}
if episode is not None: if episode is not None:
guess_options["type"] = "episode" # type: ignore guess_options["type"] = "episode" # type: ignore
matching_subs = []
for sub_name in sub_names: for sub_name in sub_names:
if not forced and os.path.splitext(sub_name.lower())[0].endswith("forced"): if not forced and os.path.splitext(sub_name.lower())[0].endswith("forced"):
logger.debug("Ignoring forced subtitle: %s", sub_name) logger.debug("Ignoring forced subtitle: %s", sub_name)
continue continue
# If it's a movie then get the first subtitle # If it's a movie then get the first subtitle
if episode is None: if episode is None and episode_title is None:
logger.debug("Movie subtitle found: %s", sub_name) logger.debug("Movie subtitle found: %s", sub_name)
matching_sub = sub_name matching_subs.append(_MatchingSub(sub_name, 2))
break break
guess = guessit(sub_name, options=guess_options) guess = guessit(sub_name, options=guess_options)
if guess.get("episode") is None: matched_episode_num = guess.get("episode")
logger.debug("No episode info found in file: %s", sub_name) if matched_episode_num:
continue logger.debug("No episode number found in file: %s", sub_name)
if episode == guess["episode"]: matched_title = None
logger.debug("Episode matched: %s", sub_name) if episode_title is not None:
matching_sub = sub_name matched_title = _analize_sub_name(sub_name, episode_title)
break
logger.debug("Ignoring incorrect episode: %s", sub_name) if episode == matched_episode_num:
logger.debug("Episode matched from number: %s", sub_name)
matching_subs.append(_MatchingSub(sub_name, 2))
elif matched_title:
matching_subs.append(_MatchingSub(sub_name, 1))
else:
logger.debug("Ignoring incorrect episode: '%s'", sub_name)
return matching_sub if matching_subs:
matching_subs.sort(key=lambda x: x.priority, reverse=True)
logger.debug("Matches: %s", matching_subs)
return matching_subs[0].file
else:
logger.debug("Nothing matched")
return None
def _analize_sub_name(sub_name: str, title_):
titles = re.split(r"[.-]", os.path.splitext(sub_name)[0])
for title in titles:
ratio = SequenceMatcher(None, title, title_).ratio()
if ratio > 0.85:
logger.debug(
"Episode title matched: '%s' -> '%s' [%s]", title, sub_name, ratio
)
return True
logger.debug("No episode title matched from file")
return False
def get_subtitle_from_archive( def get_subtitle_from_archive(
archive, forced=False, episode=None, get_first_subtitle=False archive, forced=False, episode=None, get_first_subtitle=False, **kwargs
): ):
"Get subtitle from Rarfile/Zipfile object. Return None if nothing is found." "Get subtitle from Rarfile/Zipfile object. Return None if nothing is found."
subs_in_archive = [ subs_in_archive = [
@ -72,7 +99,7 @@ def get_subtitle_from_archive(
logger.debug("Getting first subtitle in archive: %s", subs_in_archive) logger.debug("Getting first subtitle in archive: %s", subs_in_archive)
return fix_line_ending(archive.read(subs_in_archive[0])) return fix_line_ending(archive.read(subs_in_archive[0]))
matching_sub = _get_matching_sub(subs_in_archive, forced, episode) matching_sub = _get_matching_sub(subs_in_archive, forced, episode, **kwargs)
if matching_sub is not None: if matching_sub is not None:
logger.info("Using %s from archive", matching_sub) logger.info("Using %s from archive", matching_sub)

View File

@ -27,6 +27,44 @@ def test_get_matching_sub(sub_names, episode, forced, expected):
assert utils._get_matching_sub(sub_names, forced, episode) == expected assert utils._get_matching_sub(sub_names, forced, episode) == expected
def test_get_matching_sub_complex_season_pack():
files = [
"30. Hard Drive Courage. The Ride Of The Valkyries.srt",
"34. So In Louvre Are We Two. Night Of The Scarecrow.srt",
"31. Scuba Scuba Doo. Conway The Contaminationist.srt",
"32. Katz Under The Sea. Curtain Of Cruelty.srt",
"27. Muriel Meets Her Match. Courage Vs. Mecha-Courage.srt",
"36. Fishy Business. Angry Nasty People.srt",
"28. Campsite Of Terror. The Record Deal.srt",
"33. Feast Of The Bullfrogs. Tulip's Worm.srt",
"37. Dome Of Doom. Snowman's Revenge.srt",
"35. Mondo Magic. Watch The Birdies.srt",
"29. Stormy Weather. The Sandman Sleeps.srt",
"38. The Quilt Club. Swindlin' Wind.srt",
]
# Courage the Cowardly Dog S03E17 "Mondo Magic"
matched = utils._get_matching_sub(files, False, 17, episode_title="Mondo Magic")
assert matched == "35. Mondo Magic. Watch The Birdies.srt"
def test_get_matching_sub_complex_season_pack_mixed_files():
files = [
"30. Hard Drive Courage. The Ride Of The Valkyries.srt",
"S03E15.srt",
"S03E16.srt",
"S03E17.srt",
"28. Campsite Of Terror. The Record Deal.srt",
"33. Feast Of The Bullfrogs. Tulip's Worm.srt",
"37. Dome Of Doom. Snowman's Revenge.srt",
"35. Mondo Magic. Watch The Birdies.srt",
"29. Stormy Weather. The Sandman Sleeps.srt",
"38. The Quilt Club. Swindlin' Wind.srt",
]
# Courage the Cowardly Dog S03E17 "Mondo Magic"
matched = utils._get_matching_sub(files, False, 17, episode_title="Mondo Magic")
assert matched == "S03E17.srt"
def test_get_subtitle_from_archive_movie(data): def test_get_subtitle_from_archive_movie(data):
with ZipFile(os.path.join(data, "archive_1.zip")) as zf: with ZipFile(os.path.join(data, "archive_1.zip")) as zf:
assert utils.get_subtitle_from_archive(zf) is not None assert utils.get_subtitle_from_archive(zf) is not None