Tried to improve full disk daily indexing tasks resources usage. #1916

This commit is contained in:
morpheus65535 2022-08-10 22:34:06 -04:00
parent b0abe81d12
commit c74f9e9602
3 changed files with 63 additions and 15 deletions

View File

@ -55,7 +55,7 @@ def store_subtitles_movie(original_path, reversed_path, use_cache=True):
if subtitle_hi:
lang = lang + ':hi'
logging.debug("BAZARR embedded subtitles detected: " + lang)
actual_subtitles.append([lang, None])
actual_subtitles.append([lang, None, None])
except Exception:
logging.debug("BAZARR unable to index this unrecognized language: " + subtitle_language)
pass
@ -68,6 +68,22 @@ def store_subtitles_movie(original_path, reversed_path, use_cache=True):
try:
dest_folder = get_subtitle_destination_folder() or ''
core.CUSTOM_PATHS = [dest_folder] if dest_folder else []
# get previously indexed subtitles that haven't changed:
item = TableMovies.select(TableMovies.subtitles) \
.where(TableMovies.path == original_path) \
.dicts() \
.get_or_none()
if not item:
previously_indexed_subtitles_to_exclude = []
else:
previously_indexed_subtitles = ast.literal_eval(item['subtitles'])
previously_indexed_subtitles_to_exclude = [x for x in previously_indexed_subtitles
if len(x) == 3 and
x[1] and
os.path.isfile(path_mappings.path_replace(x[1])) and
os.stat(path_mappings.path_replace(x[1])).st_size == x[2]]
subtitles = search_external_subtitles(reversed_path, languages=get_language_set())
full_dest_folder_path = os.path.dirname(reversed_path)
if dest_folder:
@ -75,7 +91,8 @@ def store_subtitles_movie(original_path, reversed_path, use_cache=True):
full_dest_folder_path = dest_folder
elif settings.general.subfolder == "relative":
full_dest_folder_path = os.path.join(os.path.dirname(reversed_path), dest_folder)
subtitles = guess_external_subtitles(full_dest_folder_path, subtitles)
subtitles = guess_external_subtitles(full_dest_folder_path, subtitles, "movie",
previously_indexed_subtitles_to_exclude)
except Exception:
logging.exception("BAZARR unable to index external subtitles.")
pass
@ -107,7 +124,8 @@ def store_subtitles_movie(original_path, reversed_path, use_cache=True):
else:
language_str = str(language)
logging.debug("BAZARR external subtitles detected: " + language_str)
actual_subtitles.append([language_str, path_mappings.path_replace_reverse_movie(subtitle_path)])
actual_subtitles.append([language_str, path_mappings.path_replace_reverse_movie(subtitle_path),
os.stat(subtitle_path).st_size])
TableMovies.update({TableMovies.subtitles: str(actual_subtitles)})\
.where(TableMovies.path == original_path)\

View File

@ -55,7 +55,7 @@ def store_subtitles(original_path, reversed_path, use_cache=True):
if subtitle_hi:
lang = lang + ":hi"
logging.debug("BAZARR embedded subtitles detected: " + lang)
actual_subtitles.append([lang, None])
actual_subtitles.append([lang, None, None])
except Exception as error:
logging.debug("BAZARR unable to index this unrecognized language: %s (%s)", subtitle_language, error)
except Exception:
@ -66,6 +66,22 @@ def store_subtitles(original_path, reversed_path, use_cache=True):
try:
dest_folder = get_subtitle_destination_folder()
core.CUSTOM_PATHS = [dest_folder] if dest_folder else []
# get previously indexed subtitles that haven't changed:
item = TableEpisodes.select(TableEpisodes.subtitles) \
.where(TableEpisodes.path == original_path) \
.dicts() \
.get_or_none()
if not item:
previously_indexed_subtitles_to_exclude = []
else:
previously_indexed_subtitles = ast.literal_eval(item['subtitles'])
previously_indexed_subtitles_to_exclude = [x for x in previously_indexed_subtitles
if len(x) == 3 and
x[1] and
os.path.isfile(path_mappings.path_replace(x[1])) and
os.stat(path_mappings.path_replace(x[1])).st_size == x[2]]
subtitles = search_external_subtitles(reversed_path, languages=get_language_set(),
only_one=settings.general.getboolean('single_language'))
full_dest_folder_path = os.path.dirname(reversed_path)
@ -74,7 +90,8 @@ def store_subtitles(original_path, reversed_path, use_cache=True):
full_dest_folder_path = dest_folder
elif settings.general.subfolder == "relative":
full_dest_folder_path = os.path.join(os.path.dirname(reversed_path), dest_folder)
subtitles = guess_external_subtitles(full_dest_folder_path, subtitles)
subtitles = guess_external_subtitles(full_dest_folder_path, subtitles, "series",
previously_indexed_subtitles_to_exclude)
except Exception:
logging.exception("BAZARR unable to index external subtitles.")
else:
@ -105,7 +122,8 @@ def store_subtitles(original_path, reversed_path, use_cache=True):
else:
language_str = str(language)
logging.debug("BAZARR external subtitles detected: " + language_str)
actual_subtitles.append([language_str, path_mappings.path_replace_reverse(subtitle_path)])
actual_subtitles.append([language_str, path_mappings.path_replace_reverse(subtitle_path),
os.stat(subtitle_path).st_size])
TableEpisodes.update({TableEpisodes.subtitles: str(actual_subtitles)})\
.where(TableEpisodes.path == original_path)\

View File

@ -11,6 +11,7 @@ from charamel import Detector
from app.config import settings
from constants import hi_regex
from utilities.path_mappings import path_mappings
def get_external_subtitles_path(file, subtitle):
@ -40,16 +41,27 @@ def get_external_subtitles_path(file, subtitle):
return path
def guess_external_subtitles(dest_folder, subtitles):
def guess_external_subtitles(dest_folder, subtitles, media_type, previously_indexed_subtitles_to_exclude=None):
for subtitle, language in subtitles.items():
subtitle_path = os.path.join(dest_folder, subtitle)
reversed_subtitle_path = path_mappings.path_replace_reverse(subtitle_path) if media_type == "series" \
else path_mappings.path_replace_reverse_movie(subtitle_path)
if previously_indexed_subtitles_to_exclude:
if [x for x in previously_indexed_subtitles_to_exclude
if x[1] == reversed_subtitle_path and x[2] == os.stat(subtitle_path).st_size]:
continue
if not language:
subtitle_path = os.path.join(dest_folder, subtitle)
if os.path.exists(subtitle_path) and os.path.splitext(subtitle_path)[1] in core.SUBTITLE_EXTENSIONS:
logging.debug("BAZARR falling back to file content analysis to detect language.")
detected_language = None
# detect forced subtitles
forced = True if os.path.splitext(os.path.splitext(subtitle)[0])[1] == '.forced' else False
# to improve performance, skip detection of files larger that 1M
if os.path.getsize(subtitle_path) > 1*1024*1024:
if os.path.getsize(subtitle_path) > 1 * 1024 * 1024:
logging.debug("BAZARR subtitles file is too large to be text based. Skipping this file: " +
subtitle_path)
continue
@ -92,21 +104,21 @@ def guess_external_subtitles(dest_folder, subtitles):
logging.debug("BAZARR external subtitles detected and guessed this language: " + str(
detected_language))
try:
subtitles[subtitle] = Language.rebuild(Language.fromietf(detected_language), forced=False,
subtitles[subtitle] = Language.rebuild(Language.fromietf(detected_language), forced=forced,
hi=False)
except Exception:
pass
# If language is still None (undetected), skip it
if not language:
pass
if hasattr(subtitles[subtitle], 'basename') and not subtitles[subtitle].basename:
continue
# Skip HI detection if forced
elif language.forced:
pass
if hasattr(language, 'forced') and language.forced:
continue
# Detect hearing-impaired external subtitles not identified in filename
elif not subtitles[subtitle].hi:
if hasattr(subtitles[subtitle], 'hi') and not subtitles[subtitle].hi:
subtitle_path = os.path.join(dest_folder, subtitle)
# check if file exist: