diff --git a/README.md b/README.md index 109804d85..b1a1c24bd 100644 --- a/README.md +++ b/README.md @@ -46,6 +46,7 @@ If you need something that is not already part of Bazarr, feel free to create a * Assrt * BetaSeries * BSplayer +* Embedded Subtitles * GreekSubtitles * Hosszupuska * LegendasDivx diff --git a/bazarr/config.py b/bazarr/config.py index 78af8b39c..c27931344 100644 --- a/bazarr/config.py +++ b/bazarr/config.py @@ -190,6 +190,9 @@ defaults = { 'approved_only': 'False', 'multithreading': 'True' }, + 'embeddedsubtitles': { + 'include_ass': 'True', + }, 'subsync': { 'use_subsync': 'False', 'use_subsync_threshold': 'False', diff --git a/bazarr/get_providers.py b/bazarr/get_providers.py index bf230931e..9e1d652f6 100644 --- a/bazarr/get_providers.py +++ b/bazarr/get_providers.py @@ -13,6 +13,7 @@ import ast from get_args import args from config import settings, get_array_from from event_handler import event_stream +from utils import get_binary from subliminal_patch.exceptions import TooManyRequests, APIThrottled, ParseResponseError, IPAddressBlocked from subliminal.providers.opensubtitles import DownloadLimitReached from subliminal.exceptions import DownloadLimitExceeded, ServiceUnavailable @@ -198,6 +199,12 @@ def get_providers_auth(): 'email': settings.ktuvit.email, 'hashed_password': settings.ktuvit.hashed_password, }, + 'embeddedsubtitles': { + 'include_ass': settings.embeddedsubtitles.getboolean('include_ass'), + 'cache_dir': os.path.join(args.config_dir, "cache"), + 'ffprobe_path': get_binary("ffprobe"), + 'ffmpeg_path': get_binary("ffmpeg"), + } } diff --git a/frontend/src/Settings/Providers/list.ts b/frontend/src/Settings/Providers/list.ts index dc076eb1c..c5defab07 100644 --- a/frontend/src/Settings/Providers/list.ts +++ b/frontend/src/Settings/Providers/list.ts @@ -46,6 +46,17 @@ export const ProviderList: Readonly = [ key: "bsplayer", name: "BSplayer", }, + { + key: "embeddedsubtitles", + name: "Embedded Subtitles", + description: "Embedded Subtitles from your Media Files", + defaultKey: { + include_ass: true, + }, + keyNameOverride: { + include_ass: "Convert embedded ASS to SRT", + }, + }, { key: "greeksubs", name: "GreekSubs", diff --git a/libs/subliminal_patch/providers/embeddedsubtitles.py b/libs/subliminal_patch/providers/embeddedsubtitles.py new file mode 100644 index 000000000..bf89340c5 --- /dev/null +++ b/libs/subliminal_patch/providers/embeddedsubtitles.py @@ -0,0 +1,162 @@ +# -*- coding: utf-8 -*- + +import logging +import os +import shutil +import tempfile + +from babelfish import language_converters +import fese +from fese import check_integrity +from fese import FFprobeSubtitleStream +from fese import FFprobeVideoContainer +from fese import to_srt +from subliminal.subtitle import fix_line_ending +from subliminal_patch.core import Episode +from subliminal_patch.core import Movie +from subliminal_patch.providers import Provider +from subliminal_patch.subtitle import Subtitle +from subzero.language import Language + +logger = logging.getLogger(__name__) + +# Replace Babelfish's Language with Subzero's Language +fese.Language = Language + + +class EmbeddedSubtitle(Subtitle): + provider_name = "embeddedsubtitles" + hash_verifiable = False + + def __init__(self, stream, container, matches): + super().__init__(stream.language, stream.disposition.hearing_impaired) + self.stream: FFprobeSubtitleStream = stream + self.container: FFprobeVideoContainer = container + self._matches: set = matches + self.page_link = self.container.path + self.release_info = os.path.basename(self.page_link) + + def get_matches(self, video): + if self.hearing_impaired: + self._matches.add("hearing_impaired") + + self._matches.add("hash") + return self._matches + + @property + def id(self): + return f"{self.container.path}_{self.stream.index}" + + +class EmbeddedSubtitlesProvider(Provider): + provider_name = "embeddedsubtitles" + + languages = {Language("por", "BR"), Language("spa", "MX")} | { + Language.fromalpha2(l) for l in language_converters["alpha2"].codes + } + languages.update(set(Language.rebuild(lang, hi=True) for lang in languages)) + + # TODO: add forced support + # languages.update(set(Language.rebuild(lang, forced=True) for lang in languages)) + + video_types = (Episode, Movie) + subtitle_class = EmbeddedSubtitle + + def __init__( + self, include_ass=True, cache_dir=None, ffprobe_path=None, ffmpeg_path=None + ): + self._include_ass = include_ass + self._cache_dir = os.path.join( + cache_dir or tempfile.gettempdir(), self.__class__.__name__.lower() + ) + self._cached_paths = {} + + fese.FFPROBE_PATH = ffprobe_path or fese.FFPROBE_PATH + fese.FFMPEG_PATH = ffmpeg_path or fese.FFMPEG_PATH + + if logger.getEffectiveLevel() == logging.DEBUG: + fese.FF_LOG_LEVEL = "warning" + else: + # Default is True + fese.FFMPEG_STATS = False + + def initialize(self): + os.makedirs(self._cache_dir, exist_ok=True) + + def terminate(self): + # Remove leftovers + shutil.rmtree(self._cache_dir, ignore_errors=True) + + def query(self, path: str, languages): + video = FFprobeVideoContainer(path) + + try: + streams = video.get_subtitles() + except fese.InvalidSource as error: + logger.error("Error trying to get subtitles for %s: %s", video, error) + streams = [] + + if not streams: + logger.debug("No subtitles found for container: %s", video) + + subtitles = [] + + for stream in streams: + # Only subrip and ass are currently supported + if stream.codec_name not in ("subrip", "ass"): + logger.debug("Ignoring codec: %s", stream) + continue + + if not self._include_ass and stream.codec_name == "ass": + logger.debug("Ignoring ASS subtitle: %s", stream) + continue + + if stream.language not in languages: + continue + + disposition = stream.disposition + if disposition.generic or disposition.hearing_impaired: + logger.debug("Appending subtitle: %s", stream) + subtitles.append(EmbeddedSubtitle(stream, video, {"hash"})) + else: + logger.debug("Ignoring unwanted subtitle: %s", stream) + + return subtitles + + def list_subtitles(self, video, languages): + return self.query(video.name, languages) + + def download_subtitle(self, subtitle): + path = self._get_subtitle_path(subtitle) + with open(path, "rb") as sub: + content = sub.read() + subtitle.content = fix_line_ending(content) + + def _get_subtitle_path(self, subtitle: EmbeddedSubtitle): + container = subtitle.container + + # Check if the container is not already in the instance + if container.path not in self._cached_paths: + # Extract all subittle streams to avoid reading the entire + # container over and over + streams = filter(_check_allowed_extensions, container.get_subtitles()) + extracted = container.extract_subtitles(list(streams), self._cache_dir) + # Add the extracted paths to the containter path key + self._cached_paths[container.path] = extracted + + cached_path = self._cached_paths[container.path] + # Get the subtitle file by index + subtitle_path = cached_path[subtitle.stream.index] + + check_integrity(subtitle.stream, subtitle_path) + + # Convert to SRT if the subtitle is ASS + new_subtitle_path = to_srt(subtitle_path, remove_source=True) + if new_subtitle_path != subtitle_path: + cached_path[subtitle.stream.index] = new_subtitle_path + + return new_subtitle_path + + +def _check_allowed_extensions(subtitle: FFprobeSubtitleStream): + return subtitle.extension in ("ass", "srt") diff --git a/tests/subliminal_patch/data/file_1.mkv b/tests/subliminal_patch/data/file_1.mkv new file mode 100644 index 000000000..61112e0c2 Binary files /dev/null and b/tests/subliminal_patch/data/file_1.mkv differ diff --git a/tests/subliminal_patch/data/file_2.mkv b/tests/subliminal_patch/data/file_2.mkv new file mode 100644 index 000000000..eefabffe9 Binary files /dev/null and b/tests/subliminal_patch/data/file_2.mkv differ diff --git a/tests/subliminal_patch/test_embeddedsubtitles.py b/tests/subliminal_patch/test_embeddedsubtitles.py new file mode 100644 index 000000000..59ffaf0f2 --- /dev/null +++ b/tests/subliminal_patch/test_embeddedsubtitles.py @@ -0,0 +1,115 @@ +# -*- coding: utf-8 -*- +import os + +import fese +import pytest +from subliminal_patch.core import Episode, Movie +from subliminal_patch.providers.embeddedsubtitles import EmbeddedSubtitlesProvider +from subzero.language import Language + +_DATA = os.path.join(os.path.abspath(os.path.dirname(__file__)), "data") + + +fese.Language = Language + + +@pytest.fixture +def video_single_language(): + # Has only ASS streams in english + return Episode( + os.path.join(_DATA, "file_1.mkv"), + "Serial Experiments Lain", + 1, + 1, + source="Web", + ) + + +@pytest.fixture +def video_multiple_languages(): + # Has SubRip streams in multiple languages + return Movie( + os.path.join(_DATA, "file_2.mkv"), + "I'm No Longer Here", + year=2019, + source="Web", + ) + + +@pytest.fixture +def video_inexistent(tmpdir): + return Movie( + os.path.join(tmpdir, "inexistent_video.mkv"), + "Dummy", + year=2021, + source="Web", + ) + + +def test_inexistent_video(video_inexistent): + with EmbeddedSubtitlesProvider() as provider: + subtitles = provider.list_subtitles(video_inexistent, {}) + assert len(subtitles) == 0 + + +def test_list_subtitles_single_language(video_single_language): + with EmbeddedSubtitlesProvider() as provider: + subs = provider.list_subtitles( + video_single_language, {Language.fromalpha2("en")} + ) + + for sub in subs: + assert sub.language == Language.fromalpha2("en") + + +def test_list_subtitles_multiple_languages(video_multiple_languages): + with EmbeddedSubtitlesProvider() as provider: + languages = {Language.fromalpha2(code) for code in ("en", "it", "fr", "es")} | { + Language("por", "BR") + } + + subs = provider.list_subtitles(video_multiple_languages, languages) + for expected in languages: + assert any(sub.language == expected for sub in subs) + + +def test_list_subtitles_wo_ass(video_single_language): + with EmbeddedSubtitlesProvider(include_ass=False) as provider: + subs = provider.list_subtitles( + video_single_language, {Language.fromalpha2("en")} + ) + assert not subs + + +def test_download_subtitle_multiple(video_multiple_languages): + with EmbeddedSubtitlesProvider() as provider: + languages = {Language.fromalpha2(code) for code in ("en", "it", "fr")} | { + Language("por", "BR") + } + + subs = provider.list_subtitles(video_multiple_languages, languages) + for sub in subs: + provider.download_subtitle(sub) + assert sub.content is not None + + +def test_download_subtitle_single(video_single_language): + with EmbeddedSubtitlesProvider() as provider: + subtitle = provider.list_subtitles( + video_single_language, {Language.fromalpha2("en")} + )[0] + provider.download_subtitle(subtitle) + assert subtitle.content is not None + + +def test_download_invalid_subtitle(video_single_language): + with EmbeddedSubtitlesProvider() as provider: + subtitle = provider.list_subtitles( + video_single_language, {Language.fromalpha2("en")} + )[0] + + provider._cached_paths[subtitle.container.path] = { + subtitle.stream.index: "dummy.srt" + } + with pytest.raises(fese.InvalidFile): + provider.download_subtitle(subtitle)