From be75d78b939ca8b798c50d9c95b5d8b98ada984d Mon Sep 17 00:00:00 2001 From: Vitiko Date: Thu, 19 Jan 2023 00:21:21 -0400 Subject: [PATCH] Embedded Subtitles provider: update filters Avoid unknown language fallback is such language is already present --- .../providers/embeddedsubtitles.py | 30 +++++++++++------ .../test_embeddedsubtitles.py | 33 ++++++++++++++++--- 2 files changed, 48 insertions(+), 15 deletions(-) diff --git a/libs/subliminal_patch/providers/embeddedsubtitles.py b/libs/subliminal_patch/providers/embeddedsubtitles.py index cc322ba43..83a6f10d8 100644 --- a/libs/subliminal_patch/providers/embeddedsubtitles.py +++ b/libs/subliminal_patch/providers/embeddedsubtitles.py @@ -6,12 +6,13 @@ import os import re import shutil import tempfile +from typing import List from babelfish import language_converters -from fese import tags from fese import container from fese import FFprobeSubtitleStream from fese import FFprobeVideoContainer +from fese import tags from fese.exceptions import InvalidSource from subliminal.subtitle import fix_line_ending from subliminal_patch.core import Episode @@ -119,13 +120,13 @@ class EmbeddedSubtitlesProvider(Provider): video = _get_memoized_video_container(path) try: - streams = filter(_check_allowed_codecs, video.get_subtitles()) + streams = list(_filter_subtitles(video.get_subtitles())) except InvalidSource as error: logger.error("Error trying to get subtitles for %s: %s", video, error) self._blacklist.add(path) streams = [] - streams = _discard_possible_incomplete_subtitles(list(streams)) + streams = _discard_possible_incomplete_subtitles(streams) if not streams: logger.debug("No subtitles found for container: %s", video) @@ -207,9 +208,10 @@ class EmbeddedSubtitlesProvider(Provider): if container.path not in self._cached_paths: # Extract all subittle streams to avoid reading the entire # container over and over - streams = filter(_check_allowed_codecs, container.get_subtitles()) + subs = list(_filter_subtitles(container.get_subtitles())) + extracted = container.copy_subtitles( - list(streams), + subs, self._cache_dir, timeout=self._timeout, fallback_to_convert=True, @@ -245,12 +247,20 @@ def _get_memoized_video_container(path: str): return _MemoizedFFprobeVideoContainer(path) -def _check_allowed_codecs(subtitle: FFprobeSubtitleStream): - if subtitle.codec_name not in _ALLOWED_CODECS: - logger.debug("Unallowed codec: %s", subtitle) - return False +def _filter_subtitles(subtitles: List[FFprobeSubtitleStream]): + for subtitle in subtitles: + if subtitle.codec_name not in _ALLOWED_CODECS: + logger.debug("Unallowed codec: %s", subtitle) + continue - return True + if subtitle.tags.language_fallback is True and any( + (subtitle.language == sub.language) and (subtitle.index != sub.index) + for sub in subtitles + ): + logger.debug("Not using language fallback. Language already found") + continue + + yield subtitle def _check_hi_fallback(streams, languages): diff --git a/tests/subliminal_patch/test_embeddedsubtitles.py b/tests/subliminal_patch/test_embeddedsubtitles.py index ab0dc1c41..2c04aa377 100644 --- a/tests/subliminal_patch/test_embeddedsubtitles.py +++ b/tests/subliminal_patch/test_embeddedsubtitles.py @@ -126,8 +126,8 @@ def fake_streams(): @pytest.mark.parametrize("tags_", [{}, {"language": "und", "title": "Unknown"}]) -def test_list_subtitles_unknown_as_english(mocker, tags_): - with EmbeddedSubtitlesProvider(unknown_as_english=True): +def test_list_subtitles_unknown_as_english(mocker, tags_, video_single_language): + with EmbeddedSubtitlesProvider(unknown_as_english=True) as provider: fake = FFprobeSubtitleStream( {"index": 3, "codec_name": "subrip", "tags": tags_} ) @@ -135,9 +135,32 @@ def test_list_subtitles_unknown_as_english(mocker, tags_): "subliminal_patch.providers.embeddedsubtitles._MemoizedFFprobeVideoContainer.get_subtitles", return_value=[fake], ) - streams = _MemoizedFFprobeVideoContainer.get_subtitles("") - assert len(streams) == 1 - assert streams[0].language == Language.fromietf("en") + result = provider.list_subtitles( + video_single_language, {Language.fromalpha2("en")} + ) + assert len(result) == 1 + + +def test_list_subtitles_unknown_as_english_w_real_english_subtitles( + video_single_language, mocker +): + with EmbeddedSubtitlesProvider(unknown_as_english=True) as provider: + fakes = [ + FFprobeSubtitleStream( + {"index": 3, "codec_name": "subrip", "tags": {"language": "und"}} + ), + FFprobeSubtitleStream( + {"index": 2, "codec_name": "subrip", "tags": {"language": "eng"}} + ), + ] + mocker.patch( + "subliminal_patch.providers.embeddedsubtitles._MemoizedFFprobeVideoContainer.get_subtitles", + return_value=fakes, + ) + result = provider.list_subtitles( + video_single_language, {Language.fromalpha2("en")} + ) + assert len(result) == 1 @pytest.mark.parametrize("tags_", [{}, {"language": "und", "title": "Unknown"}])