Merge remote-tracking branch 'origin/development' into development

This commit is contained in:
morpheus65535 2021-12-13 20:05:13 -05:00
commit f38d03ce86
13 changed files with 717 additions and 856 deletions

View File

@ -46,6 +46,7 @@ If you need something that is not already part of Bazarr, feel free to create a
* Assrt
* BetaSeries
* BSplayer
* Embedded Subtitles
* GreekSubtitles
* Hosszupuska
* LegendasDivx

View File

@ -190,6 +190,9 @@ defaults = {
'approved_only': 'False',
'multithreading': 'True'
},
'embeddedsubtitles': {
'include_ass': 'True',
},
'subsync': {
'use_subsync': 'False',
'use_subsync_threshold': 'False',

View File

@ -13,6 +13,7 @@ import ast
from get_args import args
from config import settings, get_array_from
from event_handler import event_stream
from utils import get_binary
from subliminal_patch.exceptions import TooManyRequests, APIThrottled, ParseResponseError, IPAddressBlocked
from subliminal.providers.opensubtitles import DownloadLimitReached
from subliminal.exceptions import DownloadLimitExceeded, ServiceUnavailable
@ -198,6 +199,12 @@ def get_providers_auth():
'email': settings.ktuvit.email,
'hashed_password': settings.ktuvit.hashed_password,
},
'embeddedsubtitles': {
'include_ass': settings.embeddedsubtitles.getboolean('include_ass'),
'cache_dir': os.path.join(args.config_dir, "cache"),
'ffprobe_path': get_binary("ffprobe"),
'ffmpeg_path': get_binary("ffmpeg"),
}
}

File diff suppressed because it is too large Load Diff

View File

@ -25,7 +25,6 @@
"bootstrap": "^4",
"lodash": "^4",
"moment": "^2.29.1",
"package.json": "^2.0.1",
"rc-slider": "^9.7",
"react": "^17",
"react-bootstrap": "^1",

View File

@ -46,6 +46,17 @@ export const ProviderList: Readonly<ProviderInfo[]> = [
key: "bsplayer",
name: "BSplayer",
},
{
key: "embeddedsubtitles",
name: "Embedded Subtitles",
description: "Embedded Subtitles from your Media Files",
defaultKey: {
include_ass: true,
},
keyNameOverride: {
include_ass: "Convert embedded ASS to SRT",
},
},
{
key: "greeksubs",
name: "GreekSubs",

401
libs/fese/__init__.py Executable file
View File

@ -0,0 +1,401 @@
# -*- coding: utf-8 -*-
# License: GPL
from __future__ import annotations
import json
import logging
import os
import re
import subprocess
from typing import List, Optional
from babelfish import Language
from babelfish.exceptions import LanguageError
import pysubs2
__version__ = "0.1.0"
logger = logging.getLogger(__name__)
# Paths to executables
FFPROBE_PATH = os.environ.get("FFPROBE_PATH", "ffprobe")
FFMPEG_PATH = os.environ.get("FFMPEG_PATH", "ffmpeg")
FFMPEG_STATS = True
FF_LOG_LEVEL = "quiet"
class FeseError(Exception):
pass
class ExtractionError(FeseError):
pass
class InvalidFile(FeseError):
pass
class InvalidStream(FeseError):
pass
class InvalidSource(FeseError):
pass
class ConversionError(FeseError):
pass
class LanguageNotFound(FeseError):
pass
# Extensions
SRT = "srt"
ASS = "ass"
class FFprobeSubtitleDisposition:
def __init__(self, data: dict):
self.default = False
self.generic = False
self.dub = False
self.original = False
self.comment = False
self.lyrics = False
self.karaoke = False
self.forced = False
self.hearing_impaired = False
self.visual_impaired = False
self.clean_effects = False
self.attached_pic = False
self.timed_thumbnails = False
self._content_type = None
for key, val in data.items():
if hasattr(self, key):
setattr(self, key, bool(val))
def update_from_tags(self, tags):
tag_title = tags.get("title")
if tag_title is None:
logger.debug("Title not found. Marking as generic")
self.generic = True
return None
l_tag_title = tag_title.lower()
for key, val in _content_types.items():
if val.search(l_tag_title) is not None:
logger.debug("Found %s: %s", key, l_tag_title)
self._content_type = key
setattr(self, key, True)
return None
logger.debug("Generic disposition title found: %s", l_tag_title)
self.generic = True
return None
@property
def suffix(self):
if self._content_type is not None:
return f"-{self._content_type}"
return ""
def __str__(self):
return self.suffix.lstrip("-").upper() or "GENERIC"
class FFprobeSubtitleStream:
"""Base class for FFprobe (FFmpeg) extractable subtitle streams."""
def __init__(self, stream: dict):
"""
:raises: LanguageNotFound
"""
self.index = int(stream.get("index", 0))
self.codec_name = stream.get("codec_name", "Unknown")
self.extension = _subtitle_extensions.get(self.codec_name, self.codec_name)
self.r_frame_rate = stream.get("r_frame_rate")
self.avg_frame_rate = stream.get("avg_frame_rate")
self.time_base = stream.get("time_base")
self.tags = stream.get("tags", {})
self.duration = float(stream.get("duration", 0))
self.start_time = float(stream.get("start_time", 0))
self.duration_ts = int(stream.get("duration_ts", 0))
self.start_pts = int(stream.get("start_pts", 0))
self.disposition = FFprobeSubtitleDisposition(stream.get("disposition", {}))
if self.tags:
self.disposition.update_from_tags(self.tags)
self.language: Language = self._language()
@property
def suffix(self):
lang = self.language.alpha2
if self.language.country is not None:
lang = f"{lang}-{self.language.country}"
return f"{lang}{self.disposition.suffix}.{self.extension}"
def _language(self) -> Language:
og_lang = self.tags.get("language")
if og_lang is not None:
if og_lang in _extra_languages:
extra = _extra_languages[og_lang]
title = self.tags.get("title", "n/a").lower()
if any(possible in title for possible in extra["matches"]):
logger.debug("Found extra language %s", extra["language_args"])
return Language(*extra["language_args"])
try:
return Language.fromalpha3b(og_lang)
except LanguageError as error:
logger.debug("Error with '%s' language: %s", og_lang, error)
raise LanguageNotFound(f"Couldn't detect language for stream: {self.tags}")
def __repr__(self) -> str:
return f"<{self.codec_name.upper()}: {self.language}@{self.disposition}>"
# Helpers
class FFprobeVideoContainer:
def __init__(self, path: str):
self.path = path
@property
def extension(self):
return os.path.splitext(self.path)[-1].lstrip(".")
def get_subtitles(self, timeout: int = 600) -> List[FFprobeSubtitleStream]:
"""Factory function to create subtitle instances from FFprobe.
:param timeout: subprocess timeout in seconds (default: 600)
:raises: InvalidSource"""
ff_command = [
FFPROBE_PATH,
"-v",
FF_LOG_LEVEL,
"-print_format",
"json",
"-show_format",
"-show_streams",
self.path,
]
try:
result = subprocess.run(
ff_command, stdout=subprocess.PIPE, check=True, timeout=timeout
)
streams = json.loads(result.stdout)["streams"]
except _ffprobe_exceptions as error:
raise InvalidSource(
f"{error} trying to get information from {self.path}"
) from error # We want to see the traceback
subs = []
for stream in streams:
if stream.get("codec_type", "n/a") != "subtitle":
continue
try:
subs.append(FFprobeSubtitleStream(stream))
except LanguageNotFound:
pass
if not subs:
logger.debug("Source doesn't have any subtitle valid streams")
return []
logger.debug("Found subtitle streams: %s", subs)
return subs
def extract_subtitles(
self,
subtitles: List[FFprobeSubtitleStream],
custom_dir=None,
overwrite=True,
timeout=600,
):
"""Extracts a list of subtitles. Returns a dictionary of the extracted
filenames by index.
:param subtitles: a list of FFprobeSubtitle instances
:param custom_dir: a custom directory to save the subtitles. Defaults to
same directory as the media file
:param overwrite: overwrite files with the same name (default: True)
:param timeout: subprocess timeout in seconds (default: 600)
:raises: ExtractionError, OSError
"""
extract_command = [FFMPEG_PATH, "-v", FF_LOG_LEVEL]
if FFMPEG_STATS:
extract_command.append("-stats")
extract_command.extend(["-y", "-i", self.path])
if custom_dir is not None:
# May raise OSError
os.makedirs(custom_dir, exist_ok=True)
items = {}
collected_paths = set()
for subtitle in subtitles:
sub_path = f"{os.path.splitext(self.path)[0]}.{subtitle.suffix}"
if custom_dir is not None:
sub_path = os.path.join(custom_dir, os.path.basename(sub_path))
if sub_path in collected_paths:
sub_path = (
f"{sub_path.rstrip(f'.{subtitle.suffix}')}"
f"-{len(collected_paths)}.{subtitle.suffix}"
)
if not overwrite and os.path.isfile(sub_path):
logger.debug("Ignoring path (OVERWRITE TRUE): %s", sub_path)
continue
extract_command.extend(
["-map", f"0:{subtitle.index}", "-c", "copy", sub_path]
)
logger.debug("Appending subtitle path: %s", sub_path)
collected_paths.add(sub_path)
items[subtitle.index] = sub_path
if not items:
logger.debug("No subtitles to extract")
return {}
logger.debug("Extracting subtitle with command %s", " ".join(extract_command))
try:
subprocess.run(extract_command, timeout=timeout, check=True)
except (subprocess.SubprocessError, FileNotFoundError) as error:
raise ExtractionError(f"Error calling ffmpeg: {error}") from error
for path in items.values():
if not os.path.isfile(path):
logger.debug("%s was not extracted", path)
return items
def __repr__(self) -> str:
return f"<FFprobeVideoContainer {self.extension}: {self.path}>"
def check_integrity(
subtitle: FFprobeSubtitleStream, path: str, sec_offset_threshold=900
):
"""A relative check for the integriy of a file. This can be used to find a failed
ffmpeg extraction where the final file might not be complete or might be corrupted.
Currently, only ASS and Subrip are supported.
:param subtitle: FFprobeSubtitle instance
:param path: the path of the subtitle file (ass or srt)
:param sec_offset_threshold: the maximum seconds offset to determine if the file is complete
:raises: InvalidFile
"""
if subtitle.extension not in (ASS, SRT):
raise InvalidFile(f"Extension not supported: {subtitle.extension}")
try:
sub = pysubs2.load(path)
except (pysubs2.Pysubs2Error, UnicodeError, OSError, FileNotFoundError) as error:
raise InvalidFile(error) from error
else:
off = abs(int(sub[-1].end) - subtitle.duration_ts)
if off > abs(sec_offset_threshold) * 1000:
raise InvalidFile(
f"The last subtitle timestamp ({sub[-1].end/1000} sec) is {off/1000} sec ahead"
f" from the subtitle stream total duration ({subtitle.duration} sec)"
)
logger.debug("Integrity check passed (%d sec offset)", off / 1000)
def to_srt(
source: str, output: Optional[str] = None, remove_source: bool = False
) -> str:
"""Convert a subtitle to SubRip. Currently, only ASS is supported. SubRip
files will be silently ignored.
raises: ConversionError, OSError"""
if source.endswith(".srt"):
return source
split_path = os.path.splitext(source)
if split_path[-1] not in (".ass"):
raise ConversionError(
f"No converter found for extension: {split_path[-1]}"
) from None
output = output or f"{split_path[0]}.srt"
try:
parsed = pysubs2.load(source)
parsed.save(output)
except (pysubs2.Pysubs2Error, UnicodeError) as error:
raise ConversionError(f"Exception converting {output}: {error}") from error
logger.debug("Converted: %s", output)
if remove_source and source != output:
try:
os.remove(source)
except OSError as error:
logger.debug("Can't remove source: %s (%s)", source, error)
return output
_subtitle_extensions = {"subrip": "srt", "ass": "ass"}
_content_types = {
"hearing_impaired": re.compile(r"sdh|hearing impaired"),
"forced": re.compile(r"forced"),
"comment": re.compile(r"comment"),
"visual_impaired": re.compile(r"signs|visual impair"),
"karaoke": re.compile(r"karaoke|songs"),
}
_ffprobe_exceptions = (
subprocess.SubprocessError,
json.JSONDecodeError,
FileNotFoundError,
KeyError,
)
_extra_languages = {
"spa": {
"matches": (
"es-la",
"spa-la",
"spl",
"mx",
"latin",
"mexic",
"argent",
"latam",
),
"language_args": ("spa", "MX"),
},
"por": {
"matches": ("pt-br", "pob", "pb", "brazilian", "brasil", "brazil"),
"language_args": ("por", "BR"),
},
}

View File

@ -50,6 +50,8 @@ class Subtitle(object):
#: Encoding to decode with when accessing :attr:`text`
self.encoding = None
self.release_info = None
# validate the encoding
if encoding:
try:

View File

@ -187,16 +187,15 @@ class SZProviderPool(ProviderPool):
if (str(provider), str(s.id)) in self.blacklist:
logger.info("Skipping blacklisted subtitle: %s", s)
continue
if hasattr(s, 'release_info'):
if s.release_info is not None:
if any([x for x in self.ban_list["must_not_contain"]
if re.search(x, s.release_info, flags=re.IGNORECASE) is not None]):
logger.info("Skipping subtitle because release name contains prohibited string: %s", s)
continue
if any([x for x in self.ban_list["must_contain"]
if re.search(x, s.release_info, flags=re.IGNORECASE) is None]):
logger.info("Skipping subtitle because release name does not contains required string: %s", s)
continue
if s.release_info is not None:
if any([x for x in self.ban_list["must_not_contain"]
if re.search(x, s.release_info, flags=re.IGNORECASE) is not None]):
logger.info("Skipping subtitle because release name contains prohibited string: %s", s)
continue
if any([x for x in self.ban_list["must_contain"]
if re.search(x, s.release_info, flags=re.IGNORECASE) is None]):
logger.info("Skipping subtitle because release name does not contains required string: %s", s)
continue
if s.id in seen:
continue
s.plex_media_fps = float(video.fps) if video.fps else None

View File

@ -0,0 +1,162 @@
# -*- coding: utf-8 -*-
import logging
import os
import shutil
import tempfile
from babelfish import language_converters
import fese
from fese import check_integrity
from fese import FFprobeSubtitleStream
from fese import FFprobeVideoContainer
from fese import to_srt
from subliminal.subtitle import fix_line_ending
from subliminal_patch.core import Episode
from subliminal_patch.core import Movie
from subliminal_patch.providers import Provider
from subliminal_patch.subtitle import Subtitle
from subzero.language import Language
logger = logging.getLogger(__name__)
# Replace Babelfish's Language with Subzero's Language
fese.Language = Language
class EmbeddedSubtitle(Subtitle):
provider_name = "embeddedsubtitles"
hash_verifiable = False
def __init__(self, stream, container, matches):
super().__init__(stream.language, stream.disposition.hearing_impaired)
self.stream: FFprobeSubtitleStream = stream
self.container: FFprobeVideoContainer = container
self._matches: set = matches
self.page_link = self.container.path
self.release_info = os.path.basename(self.page_link)
def get_matches(self, video):
if self.hearing_impaired:
self._matches.add("hearing_impaired")
self._matches.add("hash")
return self._matches
@property
def id(self):
return f"{self.container.path}_{self.stream.index}"
class EmbeddedSubtitlesProvider(Provider):
provider_name = "embeddedsubtitles"
languages = {Language("por", "BR"), Language("spa", "MX")} | {
Language.fromalpha2(l) for l in language_converters["alpha2"].codes
}
languages.update(set(Language.rebuild(lang, hi=True) for lang in languages))
# TODO: add forced support
# languages.update(set(Language.rebuild(lang, forced=True) for lang in languages))
video_types = (Episode, Movie)
subtitle_class = EmbeddedSubtitle
def __init__(
self, include_ass=True, cache_dir=None, ffprobe_path=None, ffmpeg_path=None
):
self._include_ass = include_ass
self._cache_dir = os.path.join(
cache_dir or tempfile.gettempdir(), self.__class__.__name__.lower()
)
self._cached_paths = {}
fese.FFPROBE_PATH = ffprobe_path or fese.FFPROBE_PATH
fese.FFMPEG_PATH = ffmpeg_path or fese.FFMPEG_PATH
if logger.getEffectiveLevel() == logging.DEBUG:
fese.FF_LOG_LEVEL = "warning"
else:
# Default is True
fese.FFMPEG_STATS = False
def initialize(self):
os.makedirs(self._cache_dir, exist_ok=True)
def terminate(self):
# Remove leftovers
shutil.rmtree(self._cache_dir, ignore_errors=True)
def query(self, path: str, languages):
video = FFprobeVideoContainer(path)
try:
streams = video.get_subtitles()
except fese.InvalidSource as error:
logger.error("Error trying to get subtitles for %s: %s", video, error)
streams = []
if not streams:
logger.debug("No subtitles found for container: %s", video)
subtitles = []
for stream in streams:
# Only subrip and ass are currently supported
if stream.codec_name not in ("subrip", "ass"):
logger.debug("Ignoring codec: %s", stream)
continue
if not self._include_ass and stream.codec_name == "ass":
logger.debug("Ignoring ASS subtitle: %s", stream)
continue
if stream.language not in languages:
continue
disposition = stream.disposition
if disposition.generic or disposition.hearing_impaired:
logger.debug("Appending subtitle: %s", stream)
subtitles.append(EmbeddedSubtitle(stream, video, {"hash"}))
else:
logger.debug("Ignoring unwanted subtitle: %s", stream)
return subtitles
def list_subtitles(self, video, languages):
return self.query(video.name, languages)
def download_subtitle(self, subtitle):
path = self._get_subtitle_path(subtitle)
with open(path, "rb") as sub:
content = sub.read()
subtitle.content = fix_line_ending(content)
def _get_subtitle_path(self, subtitle: EmbeddedSubtitle):
container = subtitle.container
# Check if the container is not already in the instance
if container.path not in self._cached_paths:
# Extract all subittle streams to avoid reading the entire
# container over and over
streams = filter(_check_allowed_extensions, container.get_subtitles())
extracted = container.extract_subtitles(list(streams), self._cache_dir)
# Add the extracted paths to the containter path key
self._cached_paths[container.path] = extracted
cached_path = self._cached_paths[container.path]
# Get the subtitle file by index
subtitle_path = cached_path[subtitle.stream.index]
check_integrity(subtitle.stream, subtitle_path)
# Convert to SRT if the subtitle is ASS
new_subtitle_path = to_srt(subtitle_path, remove_source=True)
if new_subtitle_path != subtitle_path:
cached_path[subtitle.stream.index] = new_subtitle_path
return new_subtitle_path
def _check_allowed_extensions(subtitle: FFprobeSubtitleStream):
return subtitle.extension in ("ass", "srt")

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,115 @@
# -*- coding: utf-8 -*-
import os
import fese
import pytest
from subliminal_patch.core import Episode, Movie
from subliminal_patch.providers.embeddedsubtitles import EmbeddedSubtitlesProvider
from subzero.language import Language
_DATA = os.path.join(os.path.abspath(os.path.dirname(__file__)), "data")
fese.Language = Language
@pytest.fixture
def video_single_language():
# Has only ASS streams in english
return Episode(
os.path.join(_DATA, "file_1.mkv"),
"Serial Experiments Lain",
1,
1,
source="Web",
)
@pytest.fixture
def video_multiple_languages():
# Has SubRip streams in multiple languages
return Movie(
os.path.join(_DATA, "file_2.mkv"),
"I'm No Longer Here",
year=2019,
source="Web",
)
@pytest.fixture
def video_inexistent(tmpdir):
return Movie(
os.path.join(tmpdir, "inexistent_video.mkv"),
"Dummy",
year=2021,
source="Web",
)
def test_inexistent_video(video_inexistent):
with EmbeddedSubtitlesProvider() as provider:
subtitles = provider.list_subtitles(video_inexistent, {})
assert len(subtitles) == 0
def test_list_subtitles_single_language(video_single_language):
with EmbeddedSubtitlesProvider() as provider:
subs = provider.list_subtitles(
video_single_language, {Language.fromalpha2("en")}
)
for sub in subs:
assert sub.language == Language.fromalpha2("en")
def test_list_subtitles_multiple_languages(video_multiple_languages):
with EmbeddedSubtitlesProvider() as provider:
languages = {Language.fromalpha2(code) for code in ("en", "it", "fr", "es")} | {
Language("por", "BR")
}
subs = provider.list_subtitles(video_multiple_languages, languages)
for expected in languages:
assert any(sub.language == expected for sub in subs)
def test_list_subtitles_wo_ass(video_single_language):
with EmbeddedSubtitlesProvider(include_ass=False) as provider:
subs = provider.list_subtitles(
video_single_language, {Language.fromalpha2("en")}
)
assert not subs
def test_download_subtitle_multiple(video_multiple_languages):
with EmbeddedSubtitlesProvider() as provider:
languages = {Language.fromalpha2(code) for code in ("en", "it", "fr")} | {
Language("por", "BR")
}
subs = provider.list_subtitles(video_multiple_languages, languages)
for sub in subs:
provider.download_subtitle(sub)
assert sub.content is not None
def test_download_subtitle_single(video_single_language):
with EmbeddedSubtitlesProvider() as provider:
subtitle = provider.list_subtitles(
video_single_language, {Language.fromalpha2("en")}
)[0]
provider.download_subtitle(subtitle)
assert subtitle.content is not None
def test_download_invalid_subtitle(video_single_language):
with EmbeddedSubtitlesProvider() as provider:
subtitle = provider.list_subtitles(
video_single_language, {Language.fromalpha2("en")}
)[0]
provider._cached_paths[subtitle.container.path] = {
subtitle.stream.index: "dummy.srt"
}
with pytest.raises(fese.InvalidFile):
provider.download_subtitle(subtitle)