Update provider utils

This commit is contained in:
Vitiko 2022-10-26 16:53:41 -04:00
parent a1594e5a08
commit 23d4e3e006
2 changed files with 85 additions and 20 deletions

View File

@ -1,59 +1,86 @@
from collections import namedtuple
from difflib import SequenceMatcher
import io
import logging
import os
import re
import zipfile
import rarfile
from guessit import guessit
import rarfile
from subliminal.subtitle import fix_line_ending
from subliminal_patch.core import Episode
from subliminal_patch.subtitle import guess_matches
from ._agent_list import FIRST_THOUSAND_OR_SO_USER_AGENTS
logger = logging.getLogger(__name__)
def _get_matching_sub(sub_names, forced=False, episode=None):
matching_sub = None
_MatchingSub = namedtuple("_MatchingSub", ("file", "priority"))
def _get_matching_sub(sub_names, forced=False, episode=None, episode_title=None):
guess_options = {"single_value": True}
if episode is not None:
guess_options["type"] = "episode" # type: ignore
matching_subs = []
for sub_name in sub_names:
if not forced and os.path.splitext(sub_name.lower())[0].endswith("forced"):
logger.debug("Ignoring forced subtitle: %s", sub_name)
continue
# If it's a movie then get the first subtitle
if episode is None:
if episode is None and episode_title is None:
logger.debug("Movie subtitle found: %s", sub_name)
matching_sub = sub_name
matching_subs.append(_MatchingSub(sub_name, 2))
break
guess = guessit(sub_name, options=guess_options)
if guess.get("episode") is None:
logger.debug("No episode info found in file: %s", sub_name)
continue
matched_episode_num = guess.get("episode")
if matched_episode_num:
logger.debug("No episode number found in file: %s", sub_name)
if episode == guess["episode"]:
logger.debug("Episode matched: %s", sub_name)
matching_sub = sub_name
break
matched_title = None
if episode_title is not None:
matched_title = _analize_sub_name(sub_name, episode_title)
logger.debug("Ignoring incorrect episode: %s", sub_name)
if episode == matched_episode_num:
logger.debug("Episode matched from number: %s", sub_name)
matching_subs.append(_MatchingSub(sub_name, 2))
elif matched_title:
matching_subs.append(_MatchingSub(sub_name, 1))
else:
logger.debug("Ignoring incorrect episode: '%s'", sub_name)
return matching_sub
if matching_subs:
matching_subs.sort(key=lambda x: x.priority, reverse=True)
logger.debug("Matches: %s", matching_subs)
return matching_subs[0].file
else:
logger.debug("Nothing matched")
return None
def _analize_sub_name(sub_name: str, title_):
titles = re.split(r"[.-]", os.path.splitext(sub_name)[0])
for title in titles:
ratio = SequenceMatcher(None, title, title_).ratio()
if ratio > 0.85:
logger.debug(
"Episode title matched: '%s' -> '%s' [%s]", title, sub_name, ratio
)
return True
logger.debug("No episode title matched from file")
return False
def get_subtitle_from_archive(
archive, forced=False, episode=None, get_first_subtitle=False
archive, forced=False, episode=None, get_first_subtitle=False, **kwargs
):
"Get subtitle from Rarfile/Zipfile object. Return None if nothing is found."
subs_in_archive = [
@ -72,7 +99,7 @@ def get_subtitle_from_archive(
logger.debug("Getting first subtitle in archive: %s", subs_in_archive)
return fix_line_ending(archive.read(subs_in_archive[0]))
matching_sub = _get_matching_sub(subs_in_archive, forced, episode)
matching_sub = _get_matching_sub(subs_in_archive, forced, episode, **kwargs)
if matching_sub is not None:
logger.info("Using %s from archive", matching_sub)

View File

@ -27,6 +27,44 @@ def test_get_matching_sub(sub_names, episode, forced, expected):
assert utils._get_matching_sub(sub_names, forced, episode) == expected
def test_get_matching_sub_complex_season_pack():
files = [
"30. Hard Drive Courage. The Ride Of The Valkyries.srt",
"34. So In Louvre Are We Two. Night Of The Scarecrow.srt",
"31. Scuba Scuba Doo. Conway The Contaminationist.srt",
"32. Katz Under The Sea. Curtain Of Cruelty.srt",
"27. Muriel Meets Her Match. Courage Vs. Mecha-Courage.srt",
"36. Fishy Business. Angry Nasty People.srt",
"28. Campsite Of Terror. The Record Deal.srt",
"33. Feast Of The Bullfrogs. Tulip's Worm.srt",
"37. Dome Of Doom. Snowman's Revenge.srt",
"35. Mondo Magic. Watch The Birdies.srt",
"29. Stormy Weather. The Sandman Sleeps.srt",
"38. The Quilt Club. Swindlin' Wind.srt",
]
# Courage the Cowardly Dog S03E17 "Mondo Magic"
matched = utils._get_matching_sub(files, False, 17, episode_title="Mondo Magic")
assert matched == "35. Mondo Magic. Watch The Birdies.srt"
def test_get_matching_sub_complex_season_pack_mixed_files():
files = [
"30. Hard Drive Courage. The Ride Of The Valkyries.srt",
"S03E15.srt",
"S03E16.srt",
"S03E17.srt",
"28. Campsite Of Terror. The Record Deal.srt",
"33. Feast Of The Bullfrogs. Tulip's Worm.srt",
"37. Dome Of Doom. Snowman's Revenge.srt",
"35. Mondo Magic. Watch The Birdies.srt",
"29. Stormy Weather. The Sandman Sleeps.srt",
"38. The Quilt Club. Swindlin' Wind.srt",
]
# Courage the Cowardly Dog S03E17 "Mondo Magic"
matched = utils._get_matching_sub(files, False, 17, episode_title="Mondo Magic")
assert matched == "S03E17.srt"
def test_get_subtitle_from_archive_movie(data):
with ZipFile(os.path.join(data, "archive_1.zip")) as zf:
assert utils.get_subtitle_from_archive(zf) is not None