diff --git a/libs/subliminal_patch/providers/embeddedsubtitles.py b/libs/subliminal_patch/providers/embeddedsubtitles.py index 324438a1c..63e7c8fde 100644 --- a/libs/subliminal_patch/providers/embeddedsubtitles.py +++ b/libs/subliminal_patch/providers/embeddedsubtitles.py @@ -117,6 +117,8 @@ class EmbeddedSubtitlesProvider(Provider): self._blacklist.add(path) streams = [] + streams = _discard_possible_incomplete_subtitles(list(streams)) + if not streams: logger.debug("No subtitles found for container: %s", video) @@ -260,6 +262,39 @@ def _check_hi_fallback(streams, languages): logger.debug("HI fallback not needed: %s", compatible_streams) +def _discard_possible_incomplete_subtitles(streams): + """Check number_of_frames attributes from subtitle streams in order to find + supposedly incomplete subtitles""" + try: + max_frames = max(stream.number_of_frames for stream in streams) + except ValueError: + return [] + + # Blatantly assume there's nothing to discard as some ffprobe streams don't + # have number_of_frames tags + if not max_frames: + return streams + + logger.debug("Checking possible incomplete subtitles (max frames: %d)", max_frames) + + valid_streams = [] + + for stream in streams: + # 500 < 1200 + if stream.number_of_frames < max_frames // 2: + logger.debug( + "Possible bad subtitle found: %s (%s frames - %s frames)", + stream, + stream.number_of_frames, + max_frames, + ) + continue + + valid_streams.append(stream) + + return valid_streams + + def _is_fuse_rclone_mount(path: str): # Experimental! diff --git a/tests/subliminal_patch/test_embeddedsubtitles.py b/tests/subliminal_patch/test_embeddedsubtitles.py index 942c5a56a..0bda12072 100644 --- a/tests/subliminal_patch/test_embeddedsubtitles.py +++ b/tests/subliminal_patch/test_embeddedsubtitles.py @@ -9,10 +9,11 @@ import subliminal_patch from subliminal_patch.core import Episode from subliminal_patch.core import Movie from subliminal_patch.exceptions import MustGetBlacklisted -from subliminal_patch.providers.embeddedsubtitles import \ - _MemoizedFFprobeVideoContainer -from subliminal_patch.providers.embeddedsubtitles import \ - EmbeddedSubtitlesProvider +from subliminal_patch.providers.embeddedsubtitles import _MemoizedFFprobeVideoContainer +from subliminal_patch.providers.embeddedsubtitles import EmbeddedSubtitlesProvider +from subliminal_patch.providers.embeddedsubtitles import ( + _discard_possible_incomplete_subtitles, +) from subzero.language import Language @@ -249,3 +250,30 @@ def test_memoized(video_single_language, mocker): ] is not None ) + + +@pytest.mark.parametrize( + "number_of_frames,expected_len", + [((34, 811), 1), ((0, 0), 2), ((811, 34), 1), ((900, 1000), 2), ((0, 900), 1)], +) +def test_discard_possible_incomplete_subtitles(number_of_frames, expected_len): + subtitle_1 = FFprobeSubtitleStream( + { + "index": 1, + "codec_name": "subrip", + "codec_long_name": "SubRip subtitle", + "disposition": {}, + "tags": {"language": "eng", "NUMBER_OF_FRAMES": number_of_frames[0]}, + } + ) + subtitle_2 = FFprobeSubtitleStream( + { + "index": 2, + "codec_name": "subrip", + "codec_long_name": "SubRip subtitle", + "disposition": {}, + "tags": {"language": "eng", "NUMBER_OF_FRAMES": number_of_frames[1]}, + } + ) + new_list = _discard_possible_incomplete_subtitles([subtitle_1, subtitle_2]) + assert len(new_list) == expected_len