Embedded Subtitles provider: update filters

Avoid unknown language fallback is such language is already present
This commit is contained in:
Vitiko 2023-01-19 00:21:21 -04:00
parent b08c8cf1a8
commit be75d78b93
2 changed files with 48 additions and 15 deletions

View File

@ -6,12 +6,13 @@ import os
import re
import shutil
import tempfile
from typing import List
from babelfish import language_converters
from fese import tags
from fese import container
from fese import FFprobeSubtitleStream
from fese import FFprobeVideoContainer
from fese import tags
from fese.exceptions import InvalidSource
from subliminal.subtitle import fix_line_ending
from subliminal_patch.core import Episode
@ -119,13 +120,13 @@ class EmbeddedSubtitlesProvider(Provider):
video = _get_memoized_video_container(path)
try:
streams = filter(_check_allowed_codecs, video.get_subtitles())
streams = list(_filter_subtitles(video.get_subtitles()))
except InvalidSource as error:
logger.error("Error trying to get subtitles for %s: %s", video, error)
self._blacklist.add(path)
streams = []
streams = _discard_possible_incomplete_subtitles(list(streams))
streams = _discard_possible_incomplete_subtitles(streams)
if not streams:
logger.debug("No subtitles found for container: %s", video)
@ -207,9 +208,10 @@ class EmbeddedSubtitlesProvider(Provider):
if container.path not in self._cached_paths:
# Extract all subittle streams to avoid reading the entire
# container over and over
streams = filter(_check_allowed_codecs, container.get_subtitles())
subs = list(_filter_subtitles(container.get_subtitles()))
extracted = container.copy_subtitles(
list(streams),
subs,
self._cache_dir,
timeout=self._timeout,
fallback_to_convert=True,
@ -245,12 +247,20 @@ def _get_memoized_video_container(path: str):
return _MemoizedFFprobeVideoContainer(path)
def _check_allowed_codecs(subtitle: FFprobeSubtitleStream):
if subtitle.codec_name not in _ALLOWED_CODECS:
logger.debug("Unallowed codec: %s", subtitle)
return False
def _filter_subtitles(subtitles: List[FFprobeSubtitleStream]):
for subtitle in subtitles:
if subtitle.codec_name not in _ALLOWED_CODECS:
logger.debug("Unallowed codec: %s", subtitle)
continue
return True
if subtitle.tags.language_fallback is True and any(
(subtitle.language == sub.language) and (subtitle.index != sub.index)
for sub in subtitles
):
logger.debug("Not using language fallback. Language already found")
continue
yield subtitle
def _check_hi_fallback(streams, languages):

View File

@ -126,8 +126,8 @@ def fake_streams():
@pytest.mark.parametrize("tags_", [{}, {"language": "und", "title": "Unknown"}])
def test_list_subtitles_unknown_as_english(mocker, tags_):
with EmbeddedSubtitlesProvider(unknown_as_english=True):
def test_list_subtitles_unknown_as_english(mocker, tags_, video_single_language):
with EmbeddedSubtitlesProvider(unknown_as_english=True) as provider:
fake = FFprobeSubtitleStream(
{"index": 3, "codec_name": "subrip", "tags": tags_}
)
@ -135,9 +135,32 @@ def test_list_subtitles_unknown_as_english(mocker, tags_):
"subliminal_patch.providers.embeddedsubtitles._MemoizedFFprobeVideoContainer.get_subtitles",
return_value=[fake],
)
streams = _MemoizedFFprobeVideoContainer.get_subtitles("")
assert len(streams) == 1
assert streams[0].language == Language.fromietf("en")
result = provider.list_subtitles(
video_single_language, {Language.fromalpha2("en")}
)
assert len(result) == 1
def test_list_subtitles_unknown_as_english_w_real_english_subtitles(
video_single_language, mocker
):
with EmbeddedSubtitlesProvider(unknown_as_english=True) as provider:
fakes = [
FFprobeSubtitleStream(
{"index": 3, "codec_name": "subrip", "tags": {"language": "und"}}
),
FFprobeSubtitleStream(
{"index": 2, "codec_name": "subrip", "tags": {"language": "eng"}}
),
]
mocker.patch(
"subliminal_patch.providers.embeddedsubtitles._MemoizedFFprobeVideoContainer.get_subtitles",
return_value=fakes,
)
result = provider.list_subtitles(
video_single_language, {Language.fromalpha2("en")}
)
assert len(result) == 1
@pytest.mark.parametrize("tags_", [{}, {"language": "und", "title": "Unknown"}])