bazarr/bazarr/subtitles/tools/embedded_subs_reader.py

# coding=utf-8

import logging
import pickle

from knowit.api import know

from languages.custom_lang import CustomLanguage
from app.database import TableEpisodes, TableMovies
from utilities.path_mappings import path_mappings
from app.config import settings


def _handle_alpha3(detected_language: dict):
    alpha3 = detected_language["language"].alpha3
    custom = CustomLanguage.from_value(alpha3, "official_alpha3")

    if custom and custom.ffprobe_found(detected_language):
        logging.debug("Custom embedded language found: %s", custom.name)
        return custom.alpha3

    return alpha3


def embedded_subs_reader(file, file_size, episode_file_id=None, movie_file_id=None, use_cache=True):
    data = parse_video_metadata(file, file_size, episode_file_id, movie_file_id, use_cache=use_cache)

    subtitles_list = []
    if data["ffprobe"] and "subtitle" in data["ffprobe"]:
        for detected_language in data["ffprobe"]["subtitle"]:
            if "language" not in detected_language:
                continue

            # Avoid commentary subtitles
            name = detected_language.get("name", "").lower()
            if "commentary" in name:
                logging.debug("Ignoring commentary subtitle: %s", name)
                continue

            language = _handle_alpha3(detected_language)

            forced = detected_language.get("forced", False)
            hearing_impaired = detected_language.get("hearing_impaired", False)
            codec = detected_language.get("format")  # or None
            subtitles_list.append([language, forced, hearing_impaired, codec])

    elif 'mediainfo' in data and data["mediainfo"] and "subtitle" in data["mediainfo"]:
        for detected_language in data["mediainfo"]["subtitle"]:
            if "language" not in detected_language:
                continue

            # Avoid commentary subtitles
            name = detected_language.get("name", "").lower()
            if "commentary" in name:
                logging.debug("Ignoring commentary subtitle: %s", name)
                continue

            language = _handle_alpha3(detected_language)

            forced = detected_language.get("forced", False)
            hearing_impaired = detected_language.get("hearing_impaired", False)
            codec = detected_language.get("format")  # or None
            subtitles_list.append([language, forced, hearing_impaired, codec])

    return subtitles_list


def parse_video_metadata(file, file_size, episode_file_id=None, movie_file_id=None, use_cache=True):
    # Define default data keys value
    data = {
        "ffprobe": {},
        "mediainfo": {},
        "file_id": episode_file_id or movie_file_id,
        "file_size": file_size,
    }

    embedded_subs_parser = settings.general.embedded_subtitles_parser

    if use_cache:
        # Get the actual cache value form database
        if episode_file_id:
            cache_key = TableEpisodes.select(TableEpisodes.ffprobe_cache)\
                .where(TableEpisodes.path == path_mappings.path_replace_reverse(file))\
                .dicts()\
                .get_or_none()
        elif movie_file_id:
            cache_key = TableMovies.select(TableMovies.ffprobe_cache)\
                .where(TableMovies.path == path_mappings.path_replace_reverse_movie(file))\
                .dicts()\
                .get_or_none()
        else:
            cache_key = None

        # check if we have a value for that cache key
        try:
            # Unpickle ffprobe cache
            cached_value = pickle.loads(cache_key['ffprobe_cache'])
        except Exception:
            pass
        else:
            # Check if file size and file id matches and if so, we return the cached value if available for the
            # desired parser
            if cached_value['file_size'] == file_size and cached_value['file_id'] in [episode_file_id, movie_file_id]:
                if embedded_subs_parser in cached_value and cached_value[embedded_subs_parser]:
                    return cached_value
                else:
                    # no valid cache
                    pass
            else:
                # cache mut be renewed
                pass

    # if not, we retrieve the metadata from the file
    from utilities.binaries import get_binary

    ffprobe_path = mediainfo_path = None
    if embedded_subs_parser == 'ffprobe':
        ffprobe_path = get_binary("ffprobe")
    elif embedded_subs_parser == 'mediainfo':
        mediainfo_path = get_binary("mediainfo")

    # if we have ffprobe available
    if ffprobe_path:
        data["ffprobe"] = know(video_path=file, context={"provider": "ffmpeg", "ffmpeg": ffprobe_path})
    # or if we have mediainfo available
    elif mediainfo_path:
        data["mediainfo"] = know(video_path=file, context={"provider": "mediainfo", "mediainfo": mediainfo_path})
    # else, we warn user of missing binary
    else:
        logging.error("BAZARR require ffmpeg/ffprobe or mediainfo, please install it and make sure to choose it in "
                      "Settings-->Subtitles.")
        return

    # we write to db the result and return the newly cached ffprobe dict
    if episode_file_id:
        TableEpisodes.update({TableEpisodes.ffprobe_cache: pickle.dumps(data, pickle.HIGHEST_PROTOCOL)})\
            .where(TableEpisodes.path == path_mappings.path_replace_reverse(file))\
            .execute()
    elif movie_file_id:
        TableMovies.update({TableEpisodes.ffprobe_cache: pickle.dumps(data, pickle.HIGHEST_PROTOCOL)})\
            .where(TableMovies.path == path_mappings.path_replace_reverse_movie(file))\
            .execute()
    return data
Languages profiles (#1232) Implementing the languages profiles functionality. 2021-01-19 04:49:51 +00:00			`# coding=utf-8`

Embedded subs detector now uses ffprobe 2019-04-23 12:35:50 +00:00			`import logging`
Improved the ffprobe call caching mechanism by storing result to DB and using it for indexing and subtitles search. 2021-05-06 03:07:23 +00:00			`import pickle`
Reworked Bazarr file structure to improve support and optimization 2022-05-01 12:00:20 +00:00
no log: fixed imports 2022-06-09 01:34:21 +00:00			`from knowit.api import know`
Improved the ffprobe call caching mechanism by storing result to DB and using it for indexing and subtitles search. 2021-05-06 03:07:23 +00:00
Reworked Bazarr file structure to improve support and optimization 2022-05-01 12:00:20 +00:00			`from languages.custom_lang import CustomLanguage`
			`from app.database import TableEpisodes, TableMovies`
			`from utilities.path_mappings import path_mappings`
Added mediainfo as potential embedded subtitles parser. #2007 2022-12-21 04:37:52 +00:00			`from app.config import settings`
Added custom language class to make it easier to implement non-standard/regional languages 2021-06-06 13:57:29 +00:00
Ignore embedded commentary subtitles 2021-05-15 12:14:46 +00:00
			`def _handle_alpha3(detected_language: dict):`
			`alpha3 = detected_language["language"].alpha3`
Added custom language class to make it easier to implement non-standard/regional languages 2021-06-06 13:57:29 +00:00			`custom = CustomLanguage.from_value(alpha3, "official_alpha3")`
Ignore embedded commentary subtitles 2021-05-15 12:14:46 +00:00
Added custom language class to make it easier to implement non-standard/regional languages 2021-06-06 13:57:29 +00:00			`if custom and custom.ffprobe_found(detected_language):`
Reworked Bazarr file structure to improve support and optimization 2022-05-01 12:00:20 +00:00			`logging.debug("Custom embedded language found: %s", custom.name)`
Added custom language class to make it easier to implement non-standard/regional languages 2021-06-06 13:57:29 +00:00			`return custom.alpha3`
Ignore embedded commentary subtitles 2021-05-15 12:14:46 +00:00
Added custom language class to make it easier to implement non-standard/regional languages 2021-06-06 13:57:29 +00:00			`return alpha3`
Ignore embedded commentary subtitles 2021-05-15 12:14:46 +00:00
Improved the ffprobe call caching mechanism by storing result to DB and using it for indexing and subtitles search. 2021-05-06 03:07:23 +00:00
Fixed scan disk function to not use cached ffprobe result and force a refresh of the cache. #1434 2021-06-11 21:32:12 +00:00			`def embedded_subs_reader(file, file_size, episode_file_id=None, movie_file_id=None, use_cache=True):`
			`data = parse_video_metadata(file, file_size, episode_file_id, movie_file_id, use_cache=use_cache)`
Add dogpile.cache for ffprobe results to prevent repetitive disk IO. 2021-05-02 21:30:35 +00:00
Improved the ffprobe call caching mechanism by storing result to DB and using it for indexing and subtitles search. 2021-05-06 03:07:23 +00:00			`subtitles_list = []`
Added custom language class to make it easier to implement non-standard/regional languages 2021-06-06 13:57:29 +00:00			`if data["ffprobe"] and "subtitle" in data["ffprobe"]:`
			`for detected_language in data["ffprobe"]["subtitle"]:`
Improved readability and maintenance by splitting get_subtitle.py into multiple files 2022-01-03 03:59:30 +00:00			`if "language" not in detected_language:`
Ignore embedded commentary subtitles 2021-05-15 12:14:46 +00:00			`continue`

			`# Avoid commentary subtitles`
			`name = detected_language.get("name", "").lower()`
			`if "commentary" in name:`
			`logging.debug("Ignoring commentary subtitle: %s", name)`
			`continue`

			`language = _handle_alpha3(detected_language)`

			`forced = detected_language.get("forced", False)`
			`hearing_impaired = detected_language.get("hearing_impaired", False)`
Added custom language class to make it easier to implement non-standard/regional languages 2021-06-06 13:57:29 +00:00			`codec = detected_language.get("format") # or None`
Ignore embedded commentary subtitles 2021-05-15 12:14:46 +00:00			`subtitles_list.append([language, forced, hearing_impaired, codec])`

Added mediainfo as potential embedded subtitles parser. #2007 2022-12-21 04:37:52 +00:00			`elif 'mediainfo' in data and data["mediainfo"] and "subtitle" in data["mediainfo"]:`
			`for detected_language in data["mediainfo"]["subtitle"]:`
			`if "language" not in detected_language:`
			`continue`

			`# Avoid commentary subtitles`
			`name = detected_language.get("name", "").lower()`
			`if "commentary" in name:`
			`logging.debug("Ignoring commentary subtitle: %s", name)`
			`continue`

			`language = _handle_alpha3(detected_language)`

			`forced = detected_language.get("forced", False)`
			`hearing_impaired = detected_language.get("hearing_impaired", False)`
			`codec = detected_language.get("format") # or None`
			`subtitles_list.append([language, forced, hearing_impaired, codec])`

Improved the ffprobe call caching mechanism by storing result to DB and using it for indexing and subtitles search. 2021-05-06 03:07:23 +00:00			`return subtitles_list`
Add dogpile.cache for ffprobe results to prevent repetitive disk IO. 2021-05-02 21:30:35 +00:00
Added subtitles blacklisting. 2020-07-19 20:02:38 +00:00
Fixed scan disk function to not use cached ffprobe result and force a refresh of the cache. #1434 2021-06-11 21:32:12 +00:00			`def parse_video_metadata(file, file_size, episode_file_id=None, movie_file_id=None, use_cache=True):`
Improved the ffprobe call caching mechanism by storing result to DB and using it for indexing and subtitles search. 2021-05-06 03:07:23 +00:00			`# Define default data keys value`
			`data = {`
Added custom language class to make it easier to implement non-standard/regional languages 2021-06-06 13:57:29 +00:00			`"ffprobe": {},`
Added mediainfo as potential embedded subtitles parser. #2007 2022-12-21 04:37:52 +00:00			`"mediainfo": {},`
Added custom language class to make it easier to implement non-standard/regional languages 2021-06-06 13:57:29 +00:00			`"file_id": episode_file_id or movie_file_id,`
			`"file_size": file_size,`
Improved the ffprobe call caching mechanism by storing result to DB and using it for indexing and subtitles search. 2021-05-06 03:07:23 +00:00			`}`
Possible fix for #860 2020-03-18 19:33:54 +00:00
Fixed mediainfo integration issues. #2007 2022-12-22 00:30:59 +00:00			`embedded_subs_parser = settings.general.embedded_subtitles_parser`

Fixed scan disk function to not use cached ffprobe result and force a refresh of the cache. #1434 2021-06-11 21:32:12 +00:00			`if use_cache:`
			`# Get the actual cache value form database`
			`if episode_file_id:`
			`cache_key = TableEpisodes.select(TableEpisodes.ffprobe_cache)\`
Fixed an issue with ffprobe caching. Could be fixing a small part of the issue in #1502. 2021-08-17 17:10:27 +00:00			`.where(TableEpisodes.path == path_mappings.path_replace_reverse(file))\`
Fixed scan disk function to not use cached ffprobe result and force a refresh of the cache. #1434 2021-06-11 21:32:12 +00:00			`.dicts()\`
no log: improved usage of some database queries to prevent exceptions. 2022-02-11 15:59:09 +00:00			`.get_or_none()`
Fixed scan disk function to not use cached ffprobe result and force a refresh of the cache. #1434 2021-06-11 21:32:12 +00:00			`elif movie_file_id:`
			`cache_key = TableMovies.select(TableMovies.ffprobe_cache)\`
Fixed an issue with ffprobe caching. Could be fixing a small part of the issue in #1502. 2021-08-17 17:10:27 +00:00			`.where(TableMovies.path == path_mappings.path_replace_reverse_movie(file))\`
Fixed scan disk function to not use cached ffprobe result and force a refresh of the cache. #1434 2021-06-11 21:32:12 +00:00			`.dicts()\`
no log: improved usage of some database queries to prevent exceptions. 2022-02-11 15:59:09 +00:00			`.get_or_none()`
Fixed scan disk function to not use cached ffprobe result and force a refresh of the cache. #1434 2021-06-11 21:32:12 +00:00			`else:`
			`cache_key = None`

			`# check if we have a value for that cache key`
			`try:`
			`# Unpickle ffprobe cache`
			`cached_value = pickle.loads(cache_key['ffprobe_cache'])`
Improved readability and maintenance by splitting get_subtitle.py into multiple files 2022-01-03 03:59:30 +00:00			`except Exception:`
Fixed scan disk function to not use cached ffprobe result and force a refresh of the cache. #1434 2021-06-11 21:32:12 +00:00			`pass`
			`else:`
Fixed mediainfo integration issues. #2007 2022-12-22 00:30:59 +00:00			`# Check if file size and file id matches and if so, we return the cached value if available for the`
			`# desired parser`
Fixed scan disk function to not use cached ffprobe result and force a refresh of the cache. #1434 2021-06-11 21:32:12 +00:00			`if cached_value['file_size'] == file_size and cached_value['file_id'] in [episode_file_id, movie_file_id]:`
Removed Enzyme for embedded subtitles indexing and fixed mediainfo integration issues. #2007 2022-12-22 16:37:37 +00:00			`if embedded_subs_parser in cached_value and cached_value[embedded_subs_parser]:`
Fixed mediainfo integration issues. #2007 2022-12-22 00:30:59 +00:00			`return cached_value`
Removed Enzyme for embedded subtitles indexing and fixed mediainfo integration issues. #2007 2022-12-22 16:37:37 +00:00			`else:`
			`# no valid cache`
			`pass`
			`else:`
			`# cache mut be renewed`
			`pass`
Continuing development. 2019-04-27 12:13:47 +00:00
Improved the ffprobe call caching mechanism by storing result to DB and using it for indexing and subtitles search. 2021-05-06 03:07:23 +00:00			`# if not, we retrieve the metadata from the file`
Reworked Bazarr file structure to improve support and optimization 2022-05-01 12:00:20 +00:00			`from utilities.binaries import get_binary`
Added custom language class to make it easier to implement non-standard/regional languages 2021-06-06 13:57:29 +00:00
Added mediainfo as potential embedded subtitles parser. #2007 2022-12-21 04:37:52 +00:00			`ffprobe_path = mediainfo_path = None`
Fixed mediainfo integration issues. #2007 2022-12-22 00:30:59 +00:00			`if embedded_subs_parser == 'ffprobe':`
Added mediainfo as potential embedded subtitles parser. #2007 2022-12-21 04:37:52 +00:00			`ffprobe_path = get_binary("ffprobe")`
Fixed mediainfo integration issues. #2007 2022-12-22 00:30:59 +00:00			`elif embedded_subs_parser == 'mediainfo':`
Added mediainfo as potential embedded subtitles parser. #2007 2022-12-21 04:37:52 +00:00			`mediainfo_path = get_binary("mediainfo")`
Continuing development. 2019-04-27 12:13:47 +00:00
Improved the ffprobe call caching mechanism by storing result to DB and using it for indexing and subtitles search. 2021-05-06 03:07:23 +00:00			`# if we have ffprobe available`
			`if ffprobe_path:`
no log: fixed imports 2022-06-09 01:34:21 +00:00			`data["ffprobe"] = know(video_path=file, context={"provider": "ffmpeg", "ffmpeg": ffprobe_path})`
Added mediainfo as potential embedded subtitles parser. #2007 2022-12-21 04:37:52 +00:00			`# or if we have mediainfo available`
			`elif mediainfo_path:`
			`data["mediainfo"] = know(video_path=file, context={"provider": "mediainfo", "mediainfo": mediainfo_path})`
Removed Enzyme for embedded subtitles indexing and fixed mediainfo integration issues. #2007 2022-12-22 16:37:37 +00:00			`# else, we warn user of missing binary`
Improved the ffprobe call caching mechanism by storing result to DB and using it for indexing and subtitles search. 2021-05-06 03:07:23 +00:00			`else:`
Removed Enzyme for embedded subtitles indexing and fixed mediainfo integration issues. #2007 2022-12-22 16:37:37 +00:00			`logging.error("BAZARR require ffmpeg/ffprobe or mediainfo, please install it and make sure to choose it in "`
			`"Settings-->Subtitles.")`
			`return`
Embedded subs detector now uses ffprobe 2019-04-23 12:35:50 +00:00
Improved the ffprobe call caching mechanism by storing result to DB and using it for indexing and subtitles search. 2021-05-06 03:07:23 +00:00			`# we write to db the result and return the newly cached ffprobe dict`
			`if episode_file_id:`
Implemented Peewee ORM in replacement of raw SQL queries. 2021-05-26 20:47:14 +00:00			`TableEpisodes.update({TableEpisodes.ffprobe_cache: pickle.dumps(data, pickle.HIGHEST_PROTOCOL)})\`
Fixed an issue with ffprobe caching. Could be fixing a small part of the issue in #1502. 2021-08-17 17:10:27 +00:00			`.where(TableEpisodes.path == path_mappings.path_replace_reverse(file))\`
Implemented Peewee ORM in replacement of raw SQL queries. 2021-05-26 20:47:14 +00:00			`.execute()`
Improved the ffprobe call caching mechanism by storing result to DB and using it for indexing and subtitles search. 2021-05-06 03:07:23 +00:00			`elif movie_file_id:`
Implemented Peewee ORM in replacement of raw SQL queries. 2021-05-26 20:47:14 +00:00			`TableMovies.update({TableEpisodes.ffprobe_cache: pickle.dumps(data, pickle.HIGHEST_PROTOCOL)})\`
Fixed an issue with ffprobe caching. Could be fixing a small part of the issue in #1502. 2021-08-17 17:10:27 +00:00			`.where(TableMovies.path == path_mappings.path_replace_reverse_movie(file))\`
Implemented Peewee ORM in replacement of raw SQL queries. 2021-05-26 20:47:14 +00:00			`.execute()`
Improved the ffprobe call caching mechanism by storing result to DB and using it for indexing and subtitles search. 2021-05-06 03:07:23 +00:00			`return data`