Merge remote-tracking branch 'origin/development' into development

This commit is contained in:
morpheus65535 2023-02-16 17:13:34 -05:00
commit 2ae4738a14
4 changed files with 83 additions and 47 deletions

View File

@ -1,32 +1,34 @@
# coding=utf-8
import io
import logging
from random import randint
import re
import time
import urllib.parse
from babelfish import language_converters
from subzero.language import Language
from bs4.element import NavigableString
from bs4.element import Tag
from guessit import guessit
from requests import Session
from requests.exceptions import JSONDecodeError
import urllib.parse
from random import randint
from subliminal.subtitle import fix_line_ending
from subliminal.providers import ParserBeautifulSoup
from subliminal.score import get_equivalent_release_groups
from subliminal.utils import sanitize
from subliminal.utils import sanitize_release_group
from subliminal.video import Episode
from subliminal.video import Movie
from subliminal_patch.exceptions import APIThrottled
from subliminal_patch.providers import Provider
from subliminal_patch.providers.mixins import ProviderSubtitleArchiveMixin
from subliminal.providers import ParserBeautifulSoup
from bs4.element import Tag, NavigableString
from subliminal.score import get_equivalent_release_groups
from subliminal_patch.subtitle import Subtitle, guess_matches
from subliminal_patch.exceptions import APIThrottled
from subliminal.utils import sanitize, sanitize_release_group
from subliminal.video import Episode, Movie
from zipfile import ZipFile, is_zipfile
from rarfile import RarFile, is_rarfile
from subliminal_patch.utils import sanitize, fix_inconsistent_naming
from guessit import guessit
from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST
from subliminal_patch.subtitle import Subtitle
from subliminal_patch.utils import fix_inconsistent_naming
from subliminal_patch.utils import sanitize
from subzero.language import Language
from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST
from .utils import get_archive_from_bytes
from .utils import get_subtitle_from_archive
from .utils import update_matches
logger = logging.getLogger(__name__)
@ -78,7 +80,7 @@ class SuperSubtitlesSubtitle(Subtitle):
self.season = season
self.episode = episode
self.version = version
self.releases = releases
self.releases = releases or []
self.year = year
self.uploader = uploader
if year:
@ -91,7 +93,7 @@ class SuperSubtitlesSubtitle(Subtitle):
self.asked_for_episode = asked_for_episode
self.imdb_id = imdb_id
self.is_pack = True
self.matches = None
self.matches = set()
def numeric_id(self):
return self.subtitle_id
@ -109,8 +111,8 @@ class SuperSubtitlesSubtitle(Subtitle):
return str(self.subtitle_id)
def get_matches(self, video):
type_ = "movie" if isinstance(video, Movie) else "episode"
matches = guess_matches(video, guessit(self.release_info, {"type": type_}))
matches = set()
update_matches(matches, video, self.releases)
# episode
if isinstance(video, Episode):
@ -543,21 +545,12 @@ class SuperSubtitlesProvider(Provider, ProviderSubtitleArchiveMixin):
return subtitles
def download_subtitle(self, subtitle):
# download as a zip
logger.info('Downloading subtitle %r', subtitle.subtitle_id)
r = self.session.get(subtitle.page_link, timeout=10)
r.raise_for_status()
archive_stream = io.BytesIO(r.content)
archive = None
archive = get_archive_from_bytes(r.content)
if is_rarfile(archive_stream):
archive = RarFile(archive_stream)
elif is_zipfile(archive_stream):
archive = ZipFile(archive_stream)
else:
subtitle.content = fix_line_ending(r.content)
if archive is None:
raise APIThrottled(f"Invalid archive from {subtitle.page_link}")
if archive is not None:
subtitle.content = self.get_subtitle_from_archive(subtitle, archive)
subtitle.content = get_subtitle_from_archive(archive, episode=subtitle.episode or None)

View File

@ -4,9 +4,12 @@ import io
import logging
import os
import re
import tempfile
from typing import Iterable, Union
import zipfile
from guessit import guessit
import pysubs2
import rarfile
from subliminal.subtitle import fix_line_ending
from subliminal_patch.core import Episode
@ -119,10 +122,10 @@ def is_episode(content):
def get_archive_from_bytes(content: bytes):
"""Get RarFile/ZipFile object from bytes. Return None is something else
is found."""
# open the archive
"""Get RarFile/ZipFile object from bytes. A ZipFile instance will be returned
if a subtitle-like stream is found. Return None if something else is found."""
archive_stream = io.BytesIO(content)
if rarfile.is_rarfile(archive_stream):
logger.debug("Identified rar archive")
return rarfile.RarFile(archive_stream)
@ -130,18 +133,50 @@ def get_archive_from_bytes(content: bytes):
logger.debug("Identified zip archive")
return zipfile.ZipFile(archive_stream)
logger.debug("Unknown compression format")
logger.debug("No compression format found. Trying with subtitle-like files")
# If the file is a subtitle-like file
with tempfile.NamedTemporaryFile(prefix="spsub", suffix=".srt") as tmp_f:
try:
tmp_f.write(content)
sub = pysubs2.load(tmp_f.name)
except Exception as error:
logger.debug("Couldn't load file: '%s'", error)
else:
if sub is not None:
logger.debug("Identified subtitle file: %s", sub)
zip_obj = zipfile.ZipFile(io.BytesIO(), mode="x")
zip_obj.write(tmp_f.name, os.path.basename(tmp_f.name))
return zip_obj
logger.debug("Nothing found")
return None
def update_matches(matches, video, release_info: str, **guessit_options):
"Update matches set from release info string. New lines are iterated."
def update_matches(
matches,
video,
release_info: Union[str, Iterable[str]],
split="\n",
**guessit_options
):
"""Update matches set from release info string or Iterable.
Use the split parameter to iterate over the set delimiter; set None to avoid split."""
guessit_options["type"] = "episode" if isinstance(video, Episode) else "movie"
logger.debug("Guessit options to update matches: %s", guessit_options)
for release in release_info.split("\n"):
logger.debug("Updating matches from release info: %s", release)
matches |= guess_matches(video, guessit(release.strip(), guessit_options))
logger.debug("New matches: %s", matches)
if isinstance(release_info, str):
release_info = release_info.split(split)
for release in release_info:
for release_split in release.split(split):
logger.debug("Updating matches from release info: %s", release)
matches |= guess_matches(
video, guessit(release_split.strip(), guessit_options)
)
logger.debug("New matches: %s", matches)
return matches

View File

@ -44,7 +44,7 @@ def test_list_episode_subtitles(episode):
def test_download_episode_subtitle(episode):
subtitle = SuperSubtitlesSubtitle(
Language.fromalpha2("en"),
"https://www.feliratok.info/index.php?action=letolt&felirat=1643361676",
"https://www.feliratok.eu/index.php?action=letolt&felirat=1643361676",
1643361676,
"All of us are dead",
1,
@ -82,7 +82,7 @@ def test_download_movie_subtitle(movies):
subtitle = SuperSubtitlesSubtitle(
Language.fromalpha2("en"),
"https://www.feliratok.info/index.php?action=letolt&felirat=1634579718",
"https://www.feliratok.eu/index.php?action=letolt&felirat=1634579718",
1634579718,
"Dune",
0,

View File

@ -122,6 +122,14 @@ def test_update_matches(movies):
assert "source" in matches
def test_update_matches_iterable(movies):
matches = set()
utils.update_matches(
matches, movies["dune"], ["Subs for dune 2021 bluray x264", "Dune webrip x264"]
)
assert "source" in matches
@pytest.mark.parametrize(
"content,expected", [("the.wire.s01e01", True), ("taxi driver 1976", False)]
)