bazarr/bazarr/list_subtitles.py

425 lines
19 KiB
Python
Raw Normal View History

# coding=utf-8
import gc
import os
import babelfish
import logging
import ast
import langdetect
import subliminal
import subliminal_patch
2019-08-20 10:23:25 +00:00
import operator
from subliminal import core
2018-11-29 13:53:13 +00:00
from subliminal_patch import search_external_subtitles
from bs4 import UnicodeDammit
from itertools import islice
2019-08-20 10:23:25 +00:00
from database import TableShows, TableEpisodes, TableMovies
from peewee import fn, JOIN
from get_args import args
from get_languages import alpha2_from_alpha3, get_language_set
2018-12-15 00:36:28 +00:00
from config import settings
from helper import path_replace, path_replace_movie, path_replace_reverse, \
path_replace_reverse_movie, get_subtitle_destination_folder
from queueconfig import notifications
from embedded_subs_reader import embedded_subs_reader
2018-12-31 17:49:11 +00:00
gc.enable()
def store_subtitles(file):
logging.debug('BAZARR started subtitles indexing for this file: ' + file)
actual_subtitles = []
if os.path.exists(file):
if settings.general.getboolean('use_embedded_subs'):
logging.debug("BAZARR is trying to index embedded subtitles.")
try:
subtitle_languages = embedded_subs_reader.list_languages(file)
2019-04-27 12:13:47 +00:00
for subtitle_language, subtitle_forced in subtitle_languages:
try:
if alpha2_from_alpha3(subtitle_language) is not None:
lang = str(alpha2_from_alpha3(subtitle_language))
2019-04-27 12:13:47 +00:00
if subtitle_forced:
lang = lang + ":forced"
logging.debug("BAZARR embedded subtitles detected: " + lang)
actual_subtitles.append([lang, None])
except:
logging.debug("BAZARR unable to index this unrecognized language: " + subtitle_language)
pass
except Exception as e:
2019-06-11 18:45:48 +00:00
logging.exception(
"BAZARR error when trying to analyze this %s file: %s" % (os.path.splitext(file)[1], file))
pass
2019-01-15 16:25:13 +00:00
brazilian_portuguese = [".pt-br", ".pob", "pb"]
brazilian_portuguese_forced = [".pt-br.forced", ".pob.forced", "pb.forced"]
try:
dest_folder = get_subtitle_destination_folder()
subliminal_patch.core.CUSTOM_PATHS = [dest_folder] if dest_folder else []
subtitles = search_external_subtitles(file, languages=get_language_set(),
only_one=settings.general.getboolean('single_language'))
except Exception as e:
logging.exception("BAZARR unable to index external subtitles.")
pass
else:
for subtitle, language in subtitles.iteritems():
subtitle_path = get_external_subtitles_path(file, subtitle)
2018-12-15 00:36:28 +00:00
if str(os.path.splitext(subtitle)[0]).lower().endswith(tuple(brazilian_portuguese)):
logging.debug("BAZARR external subtitles detected: " + "pb")
actual_subtitles.append(
2019-04-04 11:11:53 +00:00
[str("pb"), path_replace_reverse(subtitle_path)])
elif str(os.path.splitext(subtitle)[0]).lower().endswith(tuple(brazilian_portuguese_forced)):
logging.debug("BAZARR external subtitles detected: " + "pb:forced")
actual_subtitles.append(
2019-04-04 11:11:53 +00:00
[str("pb:forced"), path_replace_reverse(subtitle_path)])
2019-06-11 18:45:48 +00:00
elif str(language) != 'und':
logging.debug("BAZARR external subtitles detected: " + str(language))
actual_subtitles.append(
2019-04-04 11:11:53 +00:00
[str(language), path_replace_reverse(subtitle_path)])
else:
if os.path.splitext(subtitle)[1] != ".sub":
logging.debug("BAZARR falling back to file content analysis to detect language.")
with open(os.path.join(os.path.dirname(file), subtitle), 'r') as f:
text = list(islice(f, 100))
text = ' '.join(text)
encoding = UnicodeDammit(text)
try:
text = text.decode(encoding.original_encoding)
detected_language = langdetect.detect(text)
except Exception as e:
logging.exception(
'BAZARR Error trying to detect language for this subtitles file: ' +
os.path.join(os.path.dirname(file), subtitle) +
' You should try to delete this subtitles file manually and ask Bazarr to download it again.')
else:
if len(detected_language) > 0:
2019-01-15 16:25:13 +00:00
logging.debug(
"BAZARR external subtitles detected and analysis guessed this language: " + str(
detected_language))
actual_subtitles.append([str(detected_language), path_replace_reverse(
2019-01-15 16:25:13 +00:00
os.path.join(os.path.dirname(file), subtitle))])
update_count = TableEpisodes.update(
2019-08-20 10:23:25 +00:00
{
TableEpisodes.subtitles: str(actual_subtitles)
}
).where(
TableEpisodes.path == path_replace_reverse(file)
).execute()
if update_count > 0:
logging.debug("BAZARR storing those languages to DB: " + str(actual_subtitles))
else:
logging.debug("BAZARR haven't been able to update existing subtitles to DB : " + str(actual_subtitles))
2018-12-31 17:49:11 +00:00
else:
logging.debug("BAZARR this file doesn't seems to exist or isn't accessible.")
2019-01-15 16:25:13 +00:00
logging.debug('BAZARR ended subtitles indexing for this file: ' + file)
2019-01-15 16:25:13 +00:00
return actual_subtitles
def store_subtitles_movie(file):
logging.debug('BAZARR started subtitles indexing for this file: ' + file)
actual_subtitles = []
if os.path.exists(file):
if settings.general.getboolean('use_embedded_subs'):
logging.debug("BAZARR is trying to index embedded subtitles.")
try:
subtitle_languages = embedded_subs_reader.list_languages(file)
2019-04-27 12:13:47 +00:00
for subtitle_language, subtitle_forced in subtitle_languages:
try:
if alpha2_from_alpha3(subtitle_language) is not None:
lang = str(alpha2_from_alpha3(subtitle_language))
2019-04-27 12:13:47 +00:00
if subtitle_forced:
lang = lang + ':forced'
logging.debug("BAZARR embedded subtitles detected: " + lang)
actual_subtitles.append([lang, None])
except:
logging.debug("BAZARR unable to index this unrecognized language: " + subtitle_language)
pass
except Exception as e:
2019-06-11 18:45:48 +00:00
logging.exception(
"BAZARR error when trying to analyze this %s file: %s" % (os.path.splitext(file)[1], file))
pass
2019-06-11 18:45:48 +00:00
dest_folder = get_subtitle_destination_folder() or ''
subliminal_patch.core.CUSTOM_PATHS = [dest_folder] if dest_folder else []
brazilian_portuguese = [".pt-br", ".pob", "pb"]
brazilian_portuguese_forced = [".pt-br.forced", ".pob.forced", "pb.forced"]
try:
subtitles = search_external_subtitles(file, languages=get_language_set(),
only_one=settings.general.getboolean('single_language'))
except Exception as e:
logging.exception("BAZARR unable to index external subtitles.")
pass
else:
for subtitle, language in subtitles.iteritems():
if str(os.path.splitext(subtitle)[0]).lower().endswith(tuple(brazilian_portuguese)) is True:
logging.debug("BAZARR external subtitles detected: " + "pb")
actual_subtitles.append(
2019-06-11 18:45:48 +00:00
[str("pb"),
path_replace_reverse_movie(os.path.join(os.path.dirname(file), dest_folder, subtitle))])
elif str(os.path.splitext(subtitle)[0]).lower().endswith(tuple(brazilian_portuguese_forced)) is True:
logging.debug("BAZARR external subtitles detected: " + "pb:forced")
actual_subtitles.append(
2019-06-11 18:45:48 +00:00
[str("pb:forced"),
path_replace_reverse_movie(os.path.join(os.path.dirname(file), dest_folder, subtitle))])
elif str(language) != 'und':
logging.debug("BAZARR external subtitles detected: " + str(language))
actual_subtitles.append(
2019-06-11 18:45:48 +00:00
[str(language),
path_replace_reverse_movie(os.path.join(os.path.dirname(file), dest_folder, subtitle))])
else:
if os.path.splitext(subtitle)[1] != ".sub":
logging.debug("BAZARR falling back to file content analysis to detect language.")
with open(os.path.join(os.path.dirname(file), dest_folder, subtitle), 'r') as f:
text = list(islice(f, 100))
text = ' '.join(text)
encoding = UnicodeDammit(text)
try:
text = text.decode(encoding.original_encoding)
detected_language = langdetect.detect(text)
except Exception as e:
logging.exception(
'BAZARR Error trying to detect language for this subtitles file: ' +
os.path.join(os.path.dirname(file), subtitle) +
' You should try to delete this subtitles file manually and ask Bazarr to download it again.')
else:
if len(detected_language) > 0:
2019-01-15 16:25:13 +00:00
logging.debug(
"BAZARR external subtitles detected and analysis guessed this language: " + str(
detected_language))
actual_subtitles.append([str(detected_language), path_replace_reverse_movie(
os.path.join(os.path.dirname(file), dest_folder, subtitle))])
2019-01-15 16:25:13 +00:00
update_count = TableMovies.update(
2019-08-20 10:23:25 +00:00
{
TableMovies.subtitles: str(actual_subtitles)
}
).where(
2019-08-20 14:18:01 +00:00
TableMovies.path == path_replace_reverse_movie(file)
2019-08-20 10:23:25 +00:00
).execute()
if update_count > 0:
logging.debug("BAZARR storing those languages to DB: " + str(actual_subtitles))
else:
logging.debug("BAZARR haven't been able to update existing subtitles to DB : " + str(actual_subtitles))
2018-12-31 17:49:11 +00:00
else:
logging.debug("BAZARR this file doesn't seems to exist or isn't accessible.")
2019-01-15 16:25:13 +00:00
logging.debug('BAZARR ended subtitles indexing for this file: ' + file)
2019-01-15 16:25:13 +00:00
return actual_subtitles
2019-08-20 17:07:16 +00:00
def list_missing_subtitles(no=None):
2019-08-20 10:23:25 +00:00
episodes_subtitles_clause = {TableShows.sonarr_series_id.is_null(False)}
2019-08-20 17:07:16 +00:00
if no is not None:
episodes_subtitles_clause = {TableShows.sonarr_series_id ** no}
2019-08-20 10:23:25 +00:00
episodes_subtitles = TableEpisodes.select(
TableEpisodes.sonarr_episode_id,
TableEpisodes.subtitles,
TableShows.languages,
TableShows.forced
).join_from(
TableEpisodes, TableShows, JOIN.LEFT_OUTER
).where(
episodes_subtitles_clause
).objects()
missing_subtitles_global = []
use_embedded_subs = settings.general.getboolean('use_embedded_subs')
for episode_subtitles in episodes_subtitles:
actual_subtitles_temp = []
2019-04-29 03:01:05 +00:00
desired_subtitles_temp = []
actual_subtitles = []
desired_subtitles = []
missing_subtitles = []
2019-08-26 23:07:20 +00:00
if episode_subtitles.subtitles is not None:
2018-12-15 00:36:28 +00:00
if use_embedded_subs:
2019-08-20 10:23:25 +00:00
actual_subtitles = ast.literal_eval(episode_subtitles.subtitles)
else:
2019-08-20 10:23:25 +00:00
actual_subtitles_temp = ast.literal_eval(episode_subtitles.subtitles)
for subtitle in actual_subtitles_temp:
2018-12-15 00:36:28 +00:00
if subtitle[1] is not None:
actual_subtitles.append(subtitle)
2019-08-20 10:23:25 +00:00
if episode_subtitles.languages is not None:
desired_subtitles = ast.literal_eval(episode_subtitles.languages)
if episode_subtitles.forced == "True" and desired_subtitles is not None:
2019-04-02 02:34:03 +00:00
for i, desired_subtitle in enumerate(desired_subtitles):
desired_subtitles[i] = desired_subtitle + ":forced"
2019-08-20 10:23:25 +00:00
elif episode_subtitles.forced == "Both" and desired_subtitles is not None:
2019-04-29 03:01:05 +00:00
for desired_subtitle in desired_subtitles:
desired_subtitles_temp.append(desired_subtitle)
desired_subtitles_temp.append(desired_subtitle + ":forced")
desired_subtitles = desired_subtitles_temp
actual_subtitles_list = []
2018-12-15 00:36:28 +00:00
if desired_subtitles is None:
2019-08-20 10:23:25 +00:00
missing_subtitles_global.append(tuple(['[]', episode_subtitles.sonarr_episode_id]))
else:
for item in actual_subtitles:
if item[0] == "pt-BR":
actual_subtitles_list.append("pb")
2019-04-29 03:01:05 +00:00
elif item[0] == "pt-BR:forced":
actual_subtitles_list.append("pb:forced")
else:
actual_subtitles_list.append(item[0])
missing_subtitles = list(set(desired_subtitles) - set(actual_subtitles_list))
2019-08-20 10:23:25 +00:00
missing_subtitles_global.append(tuple([str(missing_subtitles), episode_subtitles.sonarr_episode_id]))
for missing_subtitles_item in missing_subtitles_global:
TableEpisodes.update(
{
TableEpisodes.missing_subtitles: missing_subtitles_item[0]
}
).where(
TableEpisodes.sonarr_episode_id == missing_subtitles_item[1]
).execute()
2019-08-20 17:07:16 +00:00
def list_missing_subtitles_movies(no=None):
2019-08-20 14:18:01 +00:00
movies_subtitles_clause = {TableMovies.radarr_id.is_null(False)}
2019-08-20 17:07:16 +00:00
if no is not None:
movies_subtitles_clause = {TableMovies.radarr_id ** no}
2019-08-20 14:18:01 +00:00
movies_subtitles = TableMovies.select(
TableMovies.radarr_id,
TableMovies.subtitles,
TableMovies.languages,
TableMovies.forced
).where(
movies_subtitles_clause
)
2019-01-15 16:25:13 +00:00
missing_subtitles_global = []
use_embedded_subs = settings.general.getboolean('use_embedded_subs')
for movie_subtitles in movies_subtitles:
actual_subtitles_temp = []
2019-04-29 03:01:05 +00:00
desired_subtitles_temp = []
actual_subtitles = []
desired_subtitles = []
missing_subtitles = []
2019-08-20 14:18:01 +00:00
if movie_subtitles.subtitles is not None:
2018-12-15 00:36:28 +00:00
if use_embedded_subs:
2019-08-20 14:18:01 +00:00
actual_subtitles = ast.literal_eval(movie_subtitles.subtitles)
else:
2019-08-20 14:18:01 +00:00
actual_subtitles_temp = ast.literal_eval(movie_subtitles.subtitles)
for subtitle in actual_subtitles_temp:
2018-12-15 00:36:28 +00:00
if subtitle[1] is not None:
actual_subtitles.append(subtitle)
2019-08-20 14:18:01 +00:00
if movie_subtitles.languages is not None:
desired_subtitles = ast.literal_eval(movie_subtitles.languages)
if movie_subtitles.forced == "True" and desired_subtitles is not None:
2019-04-02 02:34:03 +00:00
for i, desired_subtitle in enumerate(desired_subtitles):
desired_subtitles[i] = desired_subtitle + ":forced"
2019-08-20 14:18:01 +00:00
elif movie_subtitles.forced == "Both" and desired_subtitles is not None:
2019-04-29 03:01:05 +00:00
for desired_subtitle in desired_subtitles:
desired_subtitles_temp.append(desired_subtitle)
desired_subtitles_temp.append(desired_subtitle + ":forced")
desired_subtitles = desired_subtitles_temp
actual_subtitles_list = []
2018-12-15 00:36:28 +00:00
if desired_subtitles is None:
2019-08-20 14:18:01 +00:00
missing_subtitles_global.append(tuple(['[]', movie_subtitles.radarr_id]))
else:
for item in actual_subtitles:
if item[0] == "pt-BR":
actual_subtitles_list.append("pb")
2019-04-29 03:01:05 +00:00
elif item[0] == "pt-BR:forced":
actual_subtitles_list.append("pb:forced")
else:
actual_subtitles_list.append(item[0])
missing_subtitles = list(set(desired_subtitles) - set(actual_subtitles_list))
2019-08-20 14:18:01 +00:00
missing_subtitles_global.append(tuple([str(missing_subtitles), movie_subtitles.radarr_id]))
2019-01-15 16:25:13 +00:00
2019-08-20 14:18:01 +00:00
for missing_subtitles_item in missing_subtitles_global:
TableMovies.update(
{
TableMovies.missing_subtitles: missing_subtitles_item[0]
}
).where(
TableMovies.radarr_id == missing_subtitles_item[1]
).execute()
def series_full_scan_subtitles():
2019-08-20 17:07:16 +00:00
episodes = TableEpisodes.select(
TableEpisodes.path
)
count_episodes = episodes.count()
2019-06-11 18:45:48 +00:00
2019-04-10 20:05:33 +00:00
for i, episode in enumerate(episodes, 1):
2019-04-19 19:49:45 +00:00
notifications.write(msg='Updating all episodes subtitles from disk...',
2019-04-23 01:08:26 +00:00
queue='list_subtitles_series', item=i, length=count_episodes)
2019-08-20 17:07:16 +00:00
store_subtitles(path_replace(episode.path))
2019-01-15 16:25:13 +00:00
gc.collect()
def movies_full_scan_subtitles():
2019-08-20 17:07:16 +00:00
movies = TableMovies.select(
TableMovies.path
)
count_movies = movies.count()
2019-06-11 18:45:48 +00:00
2019-04-10 20:05:33 +00:00
for i, movie in enumerate(movies, 1):
2019-04-19 19:49:45 +00:00
notifications.write(msg='Updating all movies subtitles from disk...',
2019-04-23 01:08:26 +00:00
queue='list_subtitles_movies', item=i, length=count_movies)
2019-08-20 17:07:16 +00:00
store_subtitles_movie(path_replace_movie(movie.path))
2019-01-15 16:25:13 +00:00
gc.collect()
def series_scan_subtitles(no):
2019-08-20 17:07:16 +00:00
episodes = TableEpisodes.select(
TableEpisodes.path
).where(
TableEpisodes.sonarr_series_id == no
)
2019-01-15 16:25:13 +00:00
for episode in episodes:
2019-08-20 17:07:16 +00:00
store_subtitles(path_replace(episode.path))
2019-01-15 16:25:13 +00:00
list_missing_subtitles(no)
def movies_scan_subtitles(no):
2019-08-20 17:07:16 +00:00
movies = TableMovies.select(
TableMovies.path
).where(
TableMovies.radarr_id == no
)
2019-01-15 16:25:13 +00:00
for movie in movies:
2019-08-20 17:07:16 +00:00
store_subtitles_movie(path_replace_movie(movie.path))
2019-01-15 16:25:13 +00:00
list_missing_subtitles_movies(no)
def get_external_subtitles_path(file, subtitle):
fld = os.path.dirname(file)
2019-06-11 18:45:48 +00:00
if settings.general.subfolder == "current":
path = os.path.join(fld, subtitle)
elif settings.general.subfolder == "absolute":
custom_fld = settings.general.subfolder_custom
if os.path.exists(os.path.join(fld, subtitle)):
path = os.path.join(fld, subtitle)
elif os.path.exists(os.path.join(custom_fld, subtitle)):
path = os.path.join(custom_fld, subtitle)
else:
path = None
elif settings.general.subfolder == "relative":
custom_fld = os.path.join(fld, settings.general.subfolder_custom)
if os.path.exists(os.path.join(fld, subtitle)):
path = os.path.join(fld, subtitle)
elif os.path.exists(os.path.join(custom_fld, subtitle)):
path = os.path.join(custom_fld, subtitle)
else:
path = None
else:
path = None
2019-06-11 18:45:48 +00:00
return path