# coding=utf-8 import gc import os import logging import ast import re from guess_language import guess_language from subliminal_patch import core, search_external_subtitles from subzero.language import Language from custom_lang import CustomLanguage from database import get_profiles_list, get_profile_cutoff, TableEpisodes, TableShows, TableMovies from get_languages import alpha2_from_alpha3, language_from_alpha2, get_language_set from config import settings from helper import path_mappings, get_subtitle_destination_folder from embedded_subs_reader import embedded_subs_reader from event_handler import event_stream, show_progress, hide_progress from charamel import Detector gc.enable() global hi_regex hi_regex = re.compile(r'[*¶♫♪].{3,}[*¶♫♪]|[\[\(\{].{3,}[\]\)\}](? 1: if subtitles[1] == 'forced': forced = True hi = False elif subtitles[1] == 'hi': forced = False hi = True actual_subtitles_list.append([lang, str(forced), str(hi)]) # check if cutoff is reached and skip any further check cutoff_met = False cutoff_temp_list = get_profile_cutoff(profile_id=episode_subtitles['profileId']) if cutoff_temp_list: for cutoff_temp in cutoff_temp_list: cutoff_language = [cutoff_temp['language'], cutoff_temp['forced'], cutoff_temp['hi']] if cutoff_language in actual_subtitles_list: cutoff_met = True missing_subtitles_text = str([]) elif cutoff_language and [cutoff_language[0], 'True', 'False'] in actual_subtitles_list: cutoff_met = True missing_subtitles_text = str([]) elif cutoff_language and [cutoff_language[0], 'False', 'True'] in actual_subtitles_list: cutoff_met = True missing_subtitles_text = str([]) if not cutoff_met: # if cutoff isn't met or None, we continue # get difference between desired and existing subtitles missing_subtitles_list = [] for item in desired_subtitles_list: if item not in actual_subtitles_list: missing_subtitles_list.append(item) # remove missing that have hi subtitles for this language in existing for item in actual_subtitles_list: if item[2] == 'True': try: missing_subtitles_list.remove([item[0], 'False', 'False']) except ValueError: pass # make the missing languages list looks like expected missing_subtitles_output_list = [] for item in missing_subtitles_list: lang = item[0] if item[1] == 'True': lang += ':forced' elif item[2] == 'True': lang += ':hi' missing_subtitles_output_list.append(lang) missing_subtitles_text = str(missing_subtitles_output_list) TableEpisodes.update({TableEpisodes.missing_subtitles: missing_subtitles_text})\ .where(TableEpisodes.sonarrEpisodeId == episode_subtitles['sonarrEpisodeId'])\ .execute() if send_event: event_stream(type='episode', payload=episode_subtitles['sonarrEpisodeId']) event_stream(type='badges') def list_missing_subtitles_movies(no=None, send_event=True): movies_subtitles = TableMovies.select(TableMovies.radarrId, TableMovies.subtitles, TableMovies.profileId, TableMovies.audio_language)\ .where((TableMovies.radarrId == no) if no else None)\ .dicts() if isinstance(movies_subtitles, str): logging.error("BAZARR list missing subtitles query to DB returned this instead of rows: " + movies_subtitles) return use_embedded_subs = settings.general.getboolean('use_embedded_subs') for movie_subtitles in movies_subtitles: missing_subtitles_text = '[]' if movie_subtitles['profileId']: # get desired subtitles desired_subtitles_temp = get_profiles_list(profile_id=movie_subtitles['profileId']) desired_subtitles_list = [] if desired_subtitles_temp: for language in desired_subtitles_temp['items']: if language['audio_exclude'] == "True": cutoff_lang_temp = get_profile_cutoff(profile_id=movie_subtitles['profileId']) if cutoff_lang_temp: if language_from_alpha2(cutoff_lang_temp[0]['language']) in ast.literal_eval( movie_subtitles['audio_language']): desired_subtitles_list = [] break if language_from_alpha2(language['language']) in ast.literal_eval( movie_subtitles['audio_language']): continue desired_subtitles_list.append([language['language'], language['forced'], language['hi']]) # get existing subtitles actual_subtitles_list = [] if movie_subtitles['subtitles'] is not None: if use_embedded_subs: actual_subtitles_temp = ast.literal_eval(movie_subtitles['subtitles']) else: actual_subtitles_temp = [x for x in ast.literal_eval(movie_subtitles['subtitles']) if x[1]] for subtitles in actual_subtitles_temp: subtitles = subtitles[0].split(':') lang = subtitles[0] forced = False hi = False if len(subtitles) > 1: if subtitles[1] == 'forced': forced = True hi = False elif subtitles[1] == 'hi': forced = False hi = True actual_subtitles_list.append([lang, str(forced), str(hi)]) # check if cutoff is reached and skip any further check cutoff_met = False cutoff_temp_list = get_profile_cutoff(profile_id=movie_subtitles['profileId']) if cutoff_temp_list: for cutoff_temp in cutoff_temp_list: cutoff_language = [cutoff_temp['language'], cutoff_temp['forced'], cutoff_temp['hi']] if cutoff_language in actual_subtitles_list: cutoff_met = True missing_subtitles_text = str([]) elif cutoff_language and [cutoff_language[0], 'True', 'False'] in actual_subtitles_list: cutoff_met = True missing_subtitles_text = str([]) elif cutoff_language and [cutoff_language[0], 'False', 'True'] in actual_subtitles_list: cutoff_met = True missing_subtitles_text = str([]) if not cutoff_met: # get difference between desired and existing subtitles missing_subtitles_list = [] for item in desired_subtitles_list: if item not in actual_subtitles_list: missing_subtitles_list.append(item) # remove missing that have forced or hi subtitles for this language in existing for item in actual_subtitles_list: if item[1] == 'True' or item[2] == 'True': try: missing_subtitles_list.remove([item[0], 'False', 'False']) except ValueError: pass # make the missing languages list looks like expected missing_subtitles_output_list = [] for item in missing_subtitles_list: lang = item[0] if item[1] == 'True': lang += ':forced' elif item[2] == 'True': lang += ':hi' missing_subtitles_output_list.append(lang) missing_subtitles_text = str(missing_subtitles_output_list) TableMovies.update({TableMovies.missing_subtitles: missing_subtitles_text})\ .where(TableMovies.radarrId == movie_subtitles['radarrId'])\ .execute() if send_event: event_stream(type='movie', payload=movie_subtitles['radarrId']) event_stream(type='badges') def series_full_scan_subtitles(): use_ffprobe_cache = settings.sonarr.getboolean('use_ffprobe_cache') episodes = TableEpisodes.select(TableEpisodes.path).dicts() count_episodes = len(episodes) for i, episode in enumerate(episodes): show_progress(id='episodes_disk_scan', header='Full disk scan...', name='Episodes subtitles', value=i, count=count_episodes) store_subtitles(episode['path'], path_mappings.path_replace(episode['path']), use_cache=use_ffprobe_cache) hide_progress(id='episodes_disk_scan') gc.collect() def movies_full_scan_subtitles(): use_ffprobe_cache = settings.radarr.getboolean('use_ffprobe_cache') movies = TableMovies.select(TableMovies.path).dicts() count_movies = len(movies) for i, movie in enumerate(movies): show_progress(id='movies_disk_scan', header='Full disk scan...', name='Movies subtitles', value=i, count=count_movies) store_subtitles_movie(movie['path'], path_mappings.path_replace_movie(movie['path']), use_cache=use_ffprobe_cache) hide_progress(id='movies_disk_scan') gc.collect() def series_scan_subtitles(no): episodes = TableEpisodes.select(TableEpisodes.path)\ .where(TableEpisodes.sonarrSeriesId == no)\ .order_by(TableEpisodes.sonarrEpisodeId)\ .dicts() for episode in episodes: store_subtitles(episode['path'], path_mappings.path_replace(episode['path']), use_cache=False) def movies_scan_subtitles(no): movies = TableMovies.select(TableMovies.path)\ .where(TableMovies.radarrId == no)\ .order_by(TableMovies.radarrId)\ .dicts() for movie in movies: store_subtitles_movie(movie['path'], path_mappings.path_replace_movie(movie['path']), use_cache=False) def get_external_subtitles_path(file, subtitle): fld = os.path.dirname(file) if settings.general.subfolder == "current": path = os.path.join(fld, subtitle) elif settings.general.subfolder == "absolute": custom_fld = settings.general.subfolder_custom if os.path.exists(os.path.join(fld, subtitle)): path = os.path.join(fld, subtitle) elif os.path.exists(os.path.join(custom_fld, subtitle)): path = os.path.join(custom_fld, subtitle) else: path = None elif settings.general.subfolder == "relative": custom_fld = os.path.join(fld, settings.general.subfolder_custom) if os.path.exists(os.path.join(fld, subtitle)): path = os.path.join(fld, subtitle) elif os.path.exists(os.path.join(custom_fld, subtitle)): path = os.path.join(custom_fld, subtitle) else: path = None else: path = None return path def guess_external_subtitles(dest_folder, subtitles): for subtitle, language in subtitles.items(): if not language: subtitle_path = os.path.join(dest_folder, subtitle) if os.path.exists(subtitle_path) and os.path.splitext(subtitle_path)[1] in core.SUBTITLE_EXTENSIONS: logging.debug("BAZARR falling back to file content analysis to detect language.") detected_language = None # to improve performance, skip detection of files larger that 1M if os.path.getsize(subtitle_path) > 1*1024*1024: logging.debug("BAZARR subtitles file is too large to be text based. Skipping this file: " + subtitle_path) continue with open(subtitle_path, 'rb') as f: text = f.read() try: text = text.decode('utf-8') detected_language = guess_language(text) #add simplified and traditional chinese detection if detected_language == 'zh': traditional_chinese_fuzzy = [u"繁", u"雙語"] traditional_chinese = [".cht", ".tc", ".zh-tw", ".zht",".zh-hant",".zhhant",".zh_hant",".hant", ".big5", ".traditional"] if str(os.path.splitext(subtitle)[0]).lower().endswith(tuple(traditional_chinese)) or (str(subtitle_path).lower())[:-5] in traditional_chinese_fuzzy: detected_language == 'zt' except UnicodeDecodeError: detector = Detector() try: guess = detector.detect(text) except: logging.debug("BAZARR skipping this subtitles because we can't guess the encoding. " "It's probably a binary file: " + subtitle_path) continue else: logging.debug('BAZARR detected encoding %r', guess) try: text = text.decode(guess) except: logging.debug( "BAZARR skipping this subtitles because we can't decode the file using the " "guessed encoding. It's probably a binary file: " + subtitle_path) continue detected_language = guess_language(text) except: logging.debug('BAZARR was unable to detect encoding for this subtitles file: %r', subtitle_path) finally: if detected_language: logging.debug("BAZARR external subtitles detected and guessed this language: " + str( detected_language)) try: subtitles[subtitle] = Language.rebuild(Language.fromietf(detected_language), forced=False, hi=False) except: pass # If language is still None (undetected), skip it if not language: pass # Skip HI detection if forced elif language.forced: pass # Detect hearing-impaired external subtitles not identified in filename elif not subtitles[subtitle].hi: subtitle_path = os.path.join(dest_folder, subtitle) # check if file exist: if os.path.exists(subtitle_path) and os.path.splitext(subtitle_path)[1] in core.SUBTITLE_EXTENSIONS: # to improve performance, skip detection of files larger that 1M if os.path.getsize(subtitle_path) > 1 * 1024 * 1024: logging.debug("BAZARR subtitles file is too large to be text based. Skipping this file: " + subtitle_path) continue with open(subtitle_path, 'rb') as f: text = f.read() try: text = text.decode('utf-8') except UnicodeDecodeError: detector = Detector() try: guess = detector.detect(text) except: logging.debug("BAZARR skipping this subtitles because we can't guess the encoding. " "It's probably a binary file: " + subtitle_path) continue else: logging.debug('BAZARR detected encoding %r', guess) try: text = text.decode(guess) except: logging.debug("BAZARR skipping this subtitles because we can't decode the file using the " "guessed encoding. It's probably a binary file: " + subtitle_path) continue if bool(re.search(hi_regex, text)): subtitles[subtitle] = Language.rebuild(subtitles[subtitle], forced=False, hi=True) return subtitles