bazarr/bazarr/list_subtitles.py

513 lines
25 KiB
Python
Raw Normal View History

# coding=utf-8
import gc
import os
import logging
import ast
2020-09-10 18:26:37 +00:00
import re
from guess_language import guess_language
from subliminal_patch import core, search_external_subtitles
2019-11-04 02:48:21 +00:00
from subzero.language import Language
2019-10-23 10:59:04 +00:00
from database import database
from get_languages import alpha2_from_alpha3, get_language_set
2018-12-15 00:36:28 +00:00
from config import settings
2020-05-19 13:27:13 +00:00
from helper import path_mappings, get_subtitle_destination_folder
from embedded_subs_reader import embedded_subs_reader
2020-05-12 12:25:03 +00:00
from event_handler import event_stream
from charamel import Detector
2018-12-31 17:49:11 +00:00
gc.enable()
global hi_regex
hi_regex = re.compile(r'[*¶♫♪].{3,}[*¶♫♪]|[\[\(\{].{3,}[\]\)\}](?<!{\\an\d})')
2020-09-10 18:26:37 +00:00
2019-10-11 04:00:23 +00:00
def store_subtitles(original_path, reversed_path):
logging.debug('BAZARR started subtitles indexing for this file: ' + reversed_path)
actual_subtitles = []
2019-10-11 04:00:23 +00:00
if os.path.exists(reversed_path):
if settings.general.getboolean('use_embedded_subs'):
logging.debug("BAZARR is trying to index embedded subtitles.")
try:
2019-10-11 04:00:23 +00:00
subtitle_languages = embedded_subs_reader.list_languages(reversed_path)
2020-09-10 18:26:37 +00:00
for subtitle_language, subtitle_forced, subtitle_hi, subtitle_codec in subtitle_languages:
try:
2020-08-05 00:30:28 +00:00
if (settings.general.getboolean("ignore_pgs_subs") and subtitle_codec.lower() == "pgs") or \
(settings.general.getboolean("ignore_vobsub_subs") and subtitle_codec.lower() ==
"vobsub"):
logging.debug("BAZARR skipping %s sub for language: %s" % (subtitle_codec, alpha2_from_alpha3(subtitle_language)))
continue
if alpha2_from_alpha3(subtitle_language) is not None:
lang = str(alpha2_from_alpha3(subtitle_language))
2019-04-27 12:13:47 +00:00
if subtitle_forced:
lang = lang + ":forced"
2020-09-10 18:26:37 +00:00
if subtitle_hi:
lang = lang + ":hi"
logging.debug("BAZARR embedded subtitles detected: " + lang)
actual_subtitles.append([lang, None])
except:
logging.debug("BAZARR unable to index this unrecognized language: " + subtitle_language)
pass
except Exception as e:
2019-06-11 18:45:48 +00:00
logging.exception(
2019-10-11 04:00:23 +00:00
"BAZARR error when trying to analyze this %s file: %s" % (os.path.splitext(reversed_path)[1], reversed_path))
pass
2019-10-11 04:00:23 +00:00
brazilian_portuguese = [".pt-br", ".pob", "pb"]
brazilian_portuguese_forced = [".pt-br.forced", ".pob.forced", "pb.forced"]
try:
dest_folder = get_subtitle_destination_folder()
core.CUSTOM_PATHS = [dest_folder] if dest_folder else []
2019-10-11 04:00:23 +00:00
subtitles = search_external_subtitles(reversed_path, languages=get_language_set(),
only_one=settings.general.getboolean('single_language'))
full_dest_folder_path = os.path.dirname(reversed_path)
if dest_folder:
if settings.general.subfolder == "absolute":
full_dest_folder_path = dest_folder
elif settings.general.subfolder == "relative":
full_dest_folder_path = os.path.join(os.path.dirname(reversed_path), dest_folder)
subtitles = guess_external_subtitles(full_dest_folder_path, subtitles)
except Exception as e:
logging.exception("BAZARR unable to index external subtitles.")
pass
else:
2020-02-13 04:16:22 +00:00
for subtitle, language in subtitles.items():
2019-10-11 04:00:23 +00:00
subtitle_path = get_external_subtitles_path(reversed_path, subtitle)
2018-12-15 00:36:28 +00:00
if str(os.path.splitext(subtitle)[0]).lower().endswith(tuple(brazilian_portuguese)):
logging.debug("BAZARR external subtitles detected: " + "pb")
actual_subtitles.append(
2020-05-19 13:27:13 +00:00
[str("pb"), path_mappings.path_replace_reverse(subtitle_path)])
elif str(os.path.splitext(subtitle)[0]).lower().endswith(tuple(brazilian_portuguese_forced)):
logging.debug("BAZARR external subtitles detected: " + "pb:forced")
actual_subtitles.append(
2020-05-19 13:27:13 +00:00
[str("pb:forced"), path_mappings.path_replace_reverse(subtitle_path)])
2020-09-15 12:09:05 +00:00
elif not language:
2019-10-29 10:56:20 +00:00
continue
2020-10-24 02:49:15 +00:00
elif str(language) != 'und':
if language.forced:
language_str = str(language)
elif language.hi:
language_str = str(language) + ':hi'
else:
language_str = str(language)
logging.debug("BAZARR external subtitles detected: " + language_str)
actual_subtitles.append([language_str, path_mappings.path_replace_reverse(subtitle_path)])
2019-10-27 01:16:59 +00:00
database.execute("UPDATE table_episodes SET subtitles=? WHERE path=?",
(str(actual_subtitles), original_path))
2020-01-12 17:50:27 +00:00
matching_episodes = database.execute("SELECT sonarrEpisodeId, sonarrSeriesId FROM table_episodes WHERE path=?",
(original_path,))
for episode in matching_episodes:
if episode:
logging.debug("BAZARR storing those languages to DB: " + str(actual_subtitles))
list_missing_subtitles(epno=episode['sonarrEpisodeId'])
else:
logging.debug("BAZARR haven't been able to update existing subtitles to DB : " + str(actual_subtitles))
2018-12-31 17:49:11 +00:00
else:
logging.debug("BAZARR this file doesn't seems to exist or isn't accessible.")
2019-01-15 16:25:13 +00:00
2019-10-11 04:00:23 +00:00
logging.debug('BAZARR ended subtitles indexing for this file: ' + reversed_path)
2019-10-19 20:37:40 +00:00
return actual_subtitles
2019-10-11 04:00:23 +00:00
def store_subtitles_movie(original_path, reversed_path):
logging.debug('BAZARR started subtitles indexing for this file: ' + reversed_path)
actual_subtitles = []
2019-10-11 04:00:23 +00:00
if os.path.exists(reversed_path):
if settings.general.getboolean('use_embedded_subs'):
logging.debug("BAZARR is trying to index embedded subtitles.")
try:
2019-10-11 04:00:23 +00:00
subtitle_languages = embedded_subs_reader.list_languages(reversed_path)
2020-09-10 18:26:37 +00:00
for subtitle_language, subtitle_forced, subtitle_hi, subtitle_codec in subtitle_languages:
try:
2020-08-05 00:30:28 +00:00
if (settings.general.getboolean("ignore_pgs_subs") and subtitle_codec.lower() == "pgs") or \
(settings.general.getboolean("ignore_vobsub_subs") and subtitle_codec.lower() ==
"vobsub"):
logging.debug("BAZARR skipping %s sub for language: %s" % (subtitle_codec, alpha2_from_alpha3(subtitle_language)))
continue
if alpha2_from_alpha3(subtitle_language) is not None:
lang = str(alpha2_from_alpha3(subtitle_language))
2019-04-27 12:13:47 +00:00
if subtitle_forced:
lang = lang + ':forced'
2020-09-10 18:26:37 +00:00
if subtitle_hi:
lang = lang + ':hi'
logging.debug("BAZARR embedded subtitles detected: " + lang)
actual_subtitles.append([lang, None])
except:
logging.debug("BAZARR unable to index this unrecognized language: " + subtitle_language)
pass
except Exception as e:
2019-06-11 18:45:48 +00:00
logging.exception(
2019-10-11 04:00:23 +00:00
"BAZARR error when trying to analyze this %s file: %s" % (os.path.splitext(reversed_path)[1], reversed_path))
pass
brazilian_portuguese = [".pt-br", ".pob", "pb"]
brazilian_portuguese_forced = [".pt-br.forced", ".pob.forced", "pb.forced"]
try:
2019-11-04 02:48:21 +00:00
dest_folder = get_subtitle_destination_folder() or ''
core.CUSTOM_PATHS = [dest_folder] if dest_folder else []
subtitles = search_external_subtitles(reversed_path, languages=get_language_set())
full_dest_folder_path = os.path.dirname(reversed_path)
if dest_folder:
if settings.general.subfolder == "absolute":
full_dest_folder_path = dest_folder
elif settings.general.subfolder == "relative":
full_dest_folder_path = os.path.join(os.path.dirname(reversed_path), dest_folder)
subtitles = guess_external_subtitles(full_dest_folder_path, subtitles)
except Exception as e:
logging.exception("BAZARR unable to index external subtitles.")
pass
else:
2020-02-13 04:16:22 +00:00
for subtitle, language in subtitles.items():
2019-10-29 02:09:24 +00:00
subtitle_path = get_external_subtitles_path(reversed_path, subtitle)
2019-10-11 04:00:23 +00:00
if str(os.path.splitext(subtitle)[0]).lower().endswith(tuple(brazilian_portuguese)):
logging.debug("BAZARR external subtitles detected: " + "pb")
2020-05-19 13:27:13 +00:00
actual_subtitles.append([str("pb"), path_mappings.path_replace_reverse_movie(subtitle_path)])
2019-10-11 04:00:23 +00:00
elif str(os.path.splitext(subtitle)[0]).lower().endswith(tuple(brazilian_portuguese_forced)):
logging.debug("BAZARR external subtitles detected: " + "pb:forced")
2020-05-19 13:27:13 +00:00
actual_subtitles.append([str("pb:forced"), path_mappings.path_replace_reverse_movie(subtitle_path)])
2020-09-15 12:08:12 +00:00
elif not language:
2019-10-29 10:56:20 +00:00
continue
2020-09-10 18:26:37 +00:00
elif str(language.basename) != 'und':
2020-10-24 02:49:15 +00:00
if language.forced:
language_str = str(language)
elif language.hi:
language_str = str(language) + ':hi'
else:
language_str = str(language)
logging.debug("BAZARR external subtitles detected: " + language_str)
actual_subtitles.append([language_str, path_mappings.path_replace_reverse_movie(subtitle_path)])
2019-01-15 16:25:13 +00:00
2019-10-24 02:05:55 +00:00
database.execute("UPDATE table_movies SET subtitles=? WHERE path=?",
(str(actual_subtitles), original_path))
matching_movies = database.execute("SELECT radarrId FROM table_movies WHERE path=?", (original_path,))
for movie in matching_movies:
if movie:
logging.debug("BAZARR storing those languages to DB: " + str(actual_subtitles))
list_missing_subtitles_movies(no=movie['radarrId'])
else:
logging.debug("BAZARR haven't been able to update existing subtitles to DB : " + str(actual_subtitles))
2018-12-31 17:49:11 +00:00
else:
logging.debug("BAZARR this file doesn't seems to exist or isn't accessible.")
2019-01-15 16:25:13 +00:00
2019-10-11 04:00:23 +00:00
logging.debug('BAZARR ended subtitles indexing for this file: ' + reversed_path)
2019-10-19 20:37:40 +00:00
return actual_subtitles
def list_missing_subtitles(no=None, epno=None, send_event=True):
2019-08-20 17:07:16 +00:00
if no is not None:
2019-10-26 18:52:22 +00:00
episodes_subtitles_clause = " WHERE table_episodes.sonarrSeriesId=" + str(no)
2019-10-19 20:37:40 +00:00
elif epno is not None:
2019-10-26 18:52:22 +00:00
episodes_subtitles_clause = " WHERE table_episodes.sonarrEpisodeId=" + str(epno)
2019-10-23 10:59:04 +00:00
else:
episodes_subtitles_clause = ""
episodes_subtitles = database.execute("SELECT table_shows.sonarrSeriesId, table_episodes.sonarrEpisodeId, "
2020-09-10 18:26:37 +00:00
"table_episodes.subtitles, table_shows.languages, table_shows.forced, "
"table_shows.hearing_impaired FROM table_episodes LEFT JOIN table_shows "
2019-10-23 10:59:04 +00:00
"on table_episodes.sonarrSeriesId = table_shows.sonarrSeriesId" +
episodes_subtitles_clause)
2020-02-13 04:16:22 +00:00
if isinstance(episodes_subtitles, str):
2019-12-24 14:02:08 +00:00
logging.error("BAZARR list missing subtitles query to DB returned this instead of rows: " + episodes_subtitles)
2019-12-24 14:00:42 +00:00
return
2019-08-20 10:23:25 +00:00
missing_subtitles_global = []
use_embedded_subs = settings.general.getboolean('use_embedded_subs')
for episode_subtitles in episodes_subtitles:
actual_subtitles_temp = []
2019-04-29 03:01:05 +00:00
desired_subtitles_temp = []
actual_subtitles = []
desired_subtitles = []
missing_subtitles = []
2019-10-26 18:52:22 +00:00
if episode_subtitles['subtitles'] is not None:
2018-12-15 00:36:28 +00:00
if use_embedded_subs:
2019-10-26 18:52:22 +00:00
actual_subtitles = ast.literal_eval(episode_subtitles['subtitles'])
else:
2019-10-26 18:52:22 +00:00
actual_subtitles_temp = ast.literal_eval(episode_subtitles['subtitles'])
for subtitle in actual_subtitles_temp:
2018-12-15 00:36:28 +00:00
if subtitle[1] is not None:
actual_subtitles.append(subtitle)
2019-10-26 18:52:22 +00:00
if episode_subtitles['languages'] is not None:
desired_subtitles = ast.literal_eval(episode_subtitles['languages'])
2020-09-10 18:26:37 +00:00
if desired_subtitles:
desired_subtitles_enum = enumerate(desired_subtitles)
else:
desired_subtitles_enum = None
if episode_subtitles['hearing_impaired'] == "True" and desired_subtitles is not None:
for i, desired_subtitle in desired_subtitles_enum:
desired_subtitles[i] = desired_subtitle + ":hi"
elif episode_subtitles['forced'] == "True" and desired_subtitles is not None:
for i, desired_subtitle in desired_subtitles_enum:
2019-04-02 02:34:03 +00:00
desired_subtitles[i] = desired_subtitle + ":forced"
2019-10-26 18:52:22 +00:00
elif episode_subtitles['forced'] == "Both" and desired_subtitles is not None:
2019-04-29 03:01:05 +00:00
for desired_subtitle in desired_subtitles:
desired_subtitles_temp.append(desired_subtitle)
desired_subtitles_temp.append(desired_subtitle + ":forced")
desired_subtitles = desired_subtitles_temp
actual_subtitles_list = []
2018-12-15 00:36:28 +00:00
if desired_subtitles is None:
2020-01-24 11:33:50 +00:00
missing_subtitles_global.append(tuple(['[]', episode_subtitles['sonarrEpisodeId'],
episode_subtitles['sonarrSeriesId']]))
else:
for item in actual_subtitles:
if item[0] == "pt-BR":
actual_subtitles_list.append("pb")
2019-04-29 03:01:05 +00:00
elif item[0] == "pt-BR:forced":
actual_subtitles_list.append("pb:forced")
else:
actual_subtitles_list.append(item[0])
missing_subtitles = list(set(desired_subtitles) - set(actual_subtitles_list))
hi_subs_to_remove = []
for item in missing_subtitles:
if item + ':hi' in actual_subtitles_list:
hi_subs_to_remove.append(item)
missing_subtitles = list(set(missing_subtitles) - set(hi_subs_to_remove))
2020-01-22 04:54:32 +00:00
missing_subtitles_global.append(tuple([str(missing_subtitles), episode_subtitles['sonarrEpisodeId'],
episode_subtitles['sonarrSeriesId']]))
2019-08-20 10:23:25 +00:00
for missing_subtitles_item in missing_subtitles_global:
2019-10-27 01:16:59 +00:00
database.execute("UPDATE table_episodes SET missing_subtitles=? WHERE sonarrEpisodeId=?",
2019-10-23 10:59:04 +00:00
(missing_subtitles_item[0], missing_subtitles_item[1]))
2020-07-26 13:45:25 +00:00
if send_event:
event_stream(type='episode', action='update', series=missing_subtitles_item[2],
episode=missing_subtitles_item[1])
event_stream(type='badges_series')
2020-01-22 04:54:32 +00:00
def list_missing_subtitles_movies(no=None, send_event=True):
2019-08-20 17:07:16 +00:00
if no is not None:
2019-10-27 03:17:14 +00:00
movies_subtitles_clause = " WHERE radarrId=" + str(no)
2019-10-23 10:59:04 +00:00
else:
movies_subtitles_clause = ""
2019-08-20 14:18:01 +00:00
2020-09-10 18:26:37 +00:00
movies_subtitles = database.execute("SELECT radarrId, subtitles, languages, forced, hearing_impaired FROM "
"table_movies" + movies_subtitles_clause)
2020-02-13 04:16:22 +00:00
if isinstance(movies_subtitles, str):
2019-12-24 14:00:42 +00:00
logging.error("BAZARR list missing subtitles query to DB returned this instead of rows: " + movies_subtitles)
return
2019-01-15 16:25:13 +00:00
missing_subtitles_global = []
use_embedded_subs = settings.general.getboolean('use_embedded_subs')
for movie_subtitles in movies_subtitles:
actual_subtitles_temp = []
2019-04-29 03:01:05 +00:00
desired_subtitles_temp = []
actual_subtitles = []
desired_subtitles = []
missing_subtitles = []
2019-10-26 18:52:22 +00:00
if movie_subtitles['subtitles'] is not None:
2018-12-15 00:36:28 +00:00
if use_embedded_subs:
2019-10-26 18:52:22 +00:00
actual_subtitles = ast.literal_eval(movie_subtitles['subtitles'])
else:
2019-10-26 18:52:22 +00:00
actual_subtitles_temp = ast.literal_eval(movie_subtitles['subtitles'])
for subtitle in actual_subtitles_temp:
2018-12-15 00:36:28 +00:00
if subtitle[1] is not None:
actual_subtitles.append(subtitle)
2019-10-26 18:52:22 +00:00
if movie_subtitles['languages'] is not None:
desired_subtitles = ast.literal_eval(movie_subtitles['languages'])
2020-09-10 18:26:37 +00:00
if desired_subtitles:
desired_subtitles_enum = enumerate(desired_subtitles)
else:
desired_subtitles_enum = None
if movie_subtitles['hearing_impaired'] == "True" and desired_subtitles is not None:
for i, desired_subtitle in desired_subtitles_enum:
desired_subtitles[i] = desired_subtitle + ":hi"
elif movie_subtitles['forced'] == "True" and desired_subtitles is not None:
for i, desired_subtitle in desired_subtitles_enum:
2019-04-02 02:34:03 +00:00
desired_subtitles[i] = desired_subtitle + ":forced"
2019-10-26 18:52:22 +00:00
elif movie_subtitles['forced'] == "Both" and desired_subtitles is not None:
2019-04-29 03:01:05 +00:00
for desired_subtitle in desired_subtitles:
desired_subtitles_temp.append(desired_subtitle)
desired_subtitles_temp.append(desired_subtitle + ":forced")
desired_subtitles = desired_subtitles_temp
actual_subtitles_list = []
2018-12-15 00:36:28 +00:00
if desired_subtitles is None:
2019-10-26 18:52:22 +00:00
missing_subtitles_global.append(tuple(['[]', movie_subtitles['radarrId']]))
else:
for item in actual_subtitles:
if item[0] == "pt-BR":
actual_subtitles_list.append("pb")
2019-04-29 03:01:05 +00:00
elif item[0] == "pt-BR:forced":
actual_subtitles_list.append("pb:forced")
else:
actual_subtitles_list.append(item[0])
missing_subtitles = list(set(desired_subtitles) - set(actual_subtitles_list))
hi_subs_to_remove = []
for item in missing_subtitles:
if item + ':hi' in actual_subtitles_list:
hi_subs_to_remove.append(item)
missing_subtitles = list(set(missing_subtitles) - set(hi_subs_to_remove))
2019-10-26 18:52:22 +00:00
missing_subtitles_global.append(tuple([str(missing_subtitles), movie_subtitles['radarrId']]))
2019-01-15 16:25:13 +00:00
2019-08-20 14:18:01 +00:00
for missing_subtitles_item in missing_subtitles_global:
2019-10-27 03:17:14 +00:00
database.execute("UPDATE table_movies SET missing_subtitles=? WHERE radarrId=?",
2019-10-23 10:59:04 +00:00
(missing_subtitles_item[0], missing_subtitles_item[1]))
2020-07-26 13:45:25 +00:00
if send_event:
event_stream(type='movie', action='update', movie=missing_subtitles_item[1])
event_stream(type='badges_movies')
2020-02-12 17:41:40 +00:00
def series_full_scan_subtitles():
2019-10-23 10:59:04 +00:00
episodes = database.execute("SELECT path FROM table_episodes")
2019-06-11 18:45:48 +00:00
2019-04-10 20:05:33 +00:00
for i, episode in enumerate(episodes, 1):
2020-05-19 13:27:13 +00:00
store_subtitles(episode['path'], path_mappings.path_replace(episode['path']))
2019-01-15 16:25:13 +00:00
gc.collect()
def movies_full_scan_subtitles():
2019-10-23 10:59:04 +00:00
movies = database.execute("SELECT path FROM table_movies")
2019-06-11 18:45:48 +00:00
2019-04-10 20:05:33 +00:00
for i, movie in enumerate(movies, 1):
2020-05-19 13:27:13 +00:00
store_subtitles_movie(movie['path'], path_mappings.path_replace_movie(movie['path']))
2019-01-15 16:25:13 +00:00
gc.collect()
def series_scan_subtitles(no):
2020-01-20 04:00:03 +00:00
episodes = database.execute("SELECT path FROM table_episodes WHERE sonarrSeriesId=? ORDER BY sonarrEpisodeId",
(no,))
2019-01-15 16:25:13 +00:00
for episode in episodes:
2020-05-19 13:27:13 +00:00
store_subtitles(episode['path'], path_mappings.path_replace(episode['path']))
def movies_scan_subtitles(no):
2020-01-20 04:00:03 +00:00
movies = database.execute("SELECT path FROM table_movies WHERE radarrId=? ORDER BY radarrId", (no,))
2019-01-15 16:25:13 +00:00
for movie in movies:
2020-05-19 13:27:13 +00:00
store_subtitles_movie(movie['path'], path_mappings.path_replace_movie(movie['path']))
def get_external_subtitles_path(file, subtitle):
fld = os.path.dirname(file)
2019-06-11 18:45:48 +00:00
if settings.general.subfolder == "current":
path = os.path.join(fld, subtitle)
elif settings.general.subfolder == "absolute":
custom_fld = settings.general.subfolder_custom
if os.path.exists(os.path.join(fld, subtitle)):
path = os.path.join(fld, subtitle)
elif os.path.exists(os.path.join(custom_fld, subtitle)):
path = os.path.join(custom_fld, subtitle)
else:
path = None
elif settings.general.subfolder == "relative":
custom_fld = os.path.join(fld, settings.general.subfolder_custom)
if os.path.exists(os.path.join(fld, subtitle)):
path = os.path.join(fld, subtitle)
elif os.path.exists(os.path.join(custom_fld, subtitle)):
path = os.path.join(custom_fld, subtitle)
else:
path = None
else:
path = None
2019-06-11 18:45:48 +00:00
return path
2019-11-04 02:48:21 +00:00
def guess_external_subtitles(dest_folder, subtitles):
2020-02-13 04:16:22 +00:00
for subtitle, language in subtitles.items():
2019-11-04 02:48:21 +00:00
if not language:
subtitle_path = os.path.join(dest_folder, subtitle)
if os.path.exists(subtitle_path) and os.path.splitext(subtitle_path)[1] in core.SUBTITLE_EXTENSIONS:
logging.debug("BAZARR falling back to file content analysis to detect language.")
detected_language = None
# to improve performance, skip detection of files larger that 1M
if os.path.getsize(subtitle_path) > 1*1024*1024:
logging.debug("BAZARR subtitles file is too large to be text based. Skipping this file: " +
subtitle_path)
continue
with open(subtitle_path, 'rb') as f:
2020-01-29 11:53:29 +00:00
text = f.read()
try:
text = text.decode('utf-8')
2020-09-23 17:36:52 +00:00
detected_language = guess_language(text)
except UnicodeDecodeError:
detector = Detector()
try:
guess = detector.detect(text)
except:
logging.debug("BAZARR skipping this subtitles because we can't guess the encoding. "
"It's probably a binary file: " + subtitle_path)
continue
else:
logging.debug('BAZARR detected encoding %r', guess)
try:
text = text.decode(guess)
except:
logging.debug(
"BAZARR skipping this subtitles because we can't decode the file using the "
"guessed encoding. It's probably a binary file: " + subtitle_path)
continue
detected_language = guess_language(text)
except:
2020-09-23 17:36:52 +00:00
logging.debug('BAZARR was unable to detect encoding for this subtitles file: %r', subtitle_path)
finally:
if detected_language:
logging.debug("BAZARR external subtitles detected and guessed this language: " + str(
detected_language))
try:
subtitles[subtitle] = Language.rebuild(Language.fromietf(detected_language), forced=False,
hi=False)
except:
pass
2020-09-10 18:26:37 +00:00
2020-10-24 21:39:01 +00:00
# If language is still None (undetected), skip it
if not language:
pass
2020-10-24 02:49:15 +00:00
# Skip HI detection if forced
2020-10-24 21:39:01 +00:00
elif language.forced:
2020-10-24 02:49:15 +00:00
pass
2020-09-10 18:26:37 +00:00
# Detect hearing-impaired external subtitles not identified in filename
2020-10-24 02:49:15 +00:00
elif not subtitles[subtitle].hi:
2020-09-10 18:26:37 +00:00
subtitle_path = os.path.join(dest_folder, subtitle)
2020-10-01 15:46:44 +00:00
# to improve performance, skip detection of files larger that 1M
if os.path.getsize(subtitle_path) > 1 * 1024 * 1024:
logging.debug("BAZARR subtitles file is too large to be text based. Skipping this file: " +
subtitle_path)
continue
2020-10-01 15:46:44 +00:00
2020-09-10 18:26:37 +00:00
with open(subtitle_path, 'rb') as f:
text = f.read()
try:
text = text.decode('utf-8')
except UnicodeDecodeError:
detector = Detector()
2020-10-01 15:46:44 +00:00
try:
guess = detector.detect(text)
except:
logging.debug("BAZARR skipping this subtitles because we can't guess the encoding. "
"It's probably a binary file: " + subtitle_path)
2020-10-01 15:46:44 +00:00
continue
else:
logging.debug('BAZARR detected encoding %r', guess)
try:
text = text.decode(guess)
except:
logging.debug("BAZARR skipping this subtitles because we can't decode the file using the "
"guessed encoding. It's probably a binary file: " + subtitle_path)
continue
if bool(re.search(hi_regex, text)):
subtitles[subtitle] = Language.rebuild(subtitles[subtitle], forced=False, hi=True)
2019-11-04 02:48:21 +00:00
return subtitles