# coding=utf-8 import gc import os import logging import ast import re from guess_language import guess_language from subliminal_patch import core, search_external_subtitles from subzero.language import Language from database import database, get_profiles_list, get_profile_cutoff from get_languages import alpha2_from_alpha3, language_from_alpha2, get_language_set from config import settings from helper import path_mappings, get_subtitle_destination_folder from embedded_subs_reader import embedded_subs_reader from event_handler import event_stream from charamel import Detector gc.enable() global hi_regex hi_regex = re.compile(r'[*¶♫♪].{3,}[*¶♫♪]|[\[\(\{].{3,}[\]\)\}](? 1: if subtitles[1] == 'forced': forced = True hi = False elif subtitles[1] == 'hi': forced = False hi = True actual_subtitles_list.append([lang, str(forced), str(hi)]) # check if cutoff is reached and skip any further check cutoff_met = False cutoff_temp_list = get_profile_cutoff(profile_id=episode_subtitles['profileId']) if cutoff_temp_list: for cutoff_temp in cutoff_temp_list: cutoff_language = [cutoff_temp['language'], cutoff_temp['forced'], cutoff_temp['hi']] if cutoff_language in actual_subtitles_list: cutoff_met = True missing_subtitles_text = str([]) elif cutoff_language and [cutoff_language[0], 'True', 'False'] in actual_subtitles_list: cutoff_met = True missing_subtitles_text = str([]) elif cutoff_language and [cutoff_language[0], 'False', 'True'] in actual_subtitles_list: cutoff_met = True missing_subtitles_text = str([]) if not cutoff_met: # if cutoff isn't met or None, we continue # get difference between desired and existing subtitles missing_subtitles_list = [] for item in desired_subtitles_list: if item not in actual_subtitles_list: missing_subtitles_list.append(item) # remove missing that have forced or hi subtitles for this language in existing for item in actual_subtitles_list: if item[1] == 'True' or item[2] == 'True': try: missing_subtitles_list.remove([item[0], 'False', 'False']) except ValueError: pass # make the missing languages list looks like expected missing_subtitles_output_list = [] for item in missing_subtitles_list: lang = item[0] if item[1] == 'True': lang += ':forced' elif item[2] == 'True': lang += ':hi' missing_subtitles_output_list.append(lang) missing_subtitles_text = str(missing_subtitles_output_list) database.execute("UPDATE table_episodes SET missing_subtitles=? WHERE sonarrEpisodeId=?", (missing_subtitles_text, episode_subtitles['sonarrEpisodeId'])) if send_event: event_stream(type='episode', action='update', series=episode_subtitles['sonarrSeriesId'], episode=episode_subtitles['sonarrEpisodeId']) event_stream(type='badges_series') def list_missing_subtitles_movies(no=None, epno=None, send_event=True): if no is not None: movies_subtitles_clause = " WHERE radarrId=" + str(no) else: movies_subtitles_clause = "" movies_subtitles = database.execute("SELECT radarrId, subtitles, profileId, audio_language FROM table_movies" + movies_subtitles_clause) if isinstance(movies_subtitles, str): logging.error("BAZARR list missing subtitles query to DB returned this instead of rows: " + movies_subtitles) return use_embedded_subs = settings.general.getboolean('use_embedded_subs') for movie_subtitles in movies_subtitles: missing_subtitles_text = '[]' if movie_subtitles['profileId']: # get desired subtitles desired_subtitles_temp = get_profiles_list(profile_id=movie_subtitles['profileId']) desired_subtitles_list = [] if desired_subtitles_temp: for language in ast.literal_eval(desired_subtitles_temp['items']): if language['audio_exclude'] == "True": if language_from_alpha2(language['language']) in ast.literal_eval(movie_subtitles['audio_language']): continue desired_subtitles_list.append([language['language'], language['forced'], language['hi']]) # get existing subtitles actual_subtitles_list = [] if movie_subtitles['subtitles'] is not None: if use_embedded_subs: actual_subtitles_temp = ast.literal_eval(movie_subtitles['subtitles']) else: actual_subtitles_temp = [x for x in ast.literal_eval(movie_subtitles['subtitles']) if x[1]] for subtitles in actual_subtitles_temp: subtitles = subtitles[0].split(':') lang = subtitles[0] forced = False hi = False if len(subtitles) > 1: if subtitles[1] == 'forced': forced = True hi = False elif subtitles[1] == 'hi': forced = False hi = True actual_subtitles_list.append([lang, str(forced), str(hi)]) # check if cutoff is reached and skip any further check cutoff_met = False cutoff_temp_list = get_profile_cutoff(profile_id=movie_subtitles['profileId']) if cutoff_temp_list: for cutoff_temp in cutoff_temp_list: cutoff_language = [cutoff_temp['language'], cutoff_temp['forced'], cutoff_temp['hi']] if cutoff_language in actual_subtitles_list: cutoff_met = True missing_subtitles_text = str([]) elif cutoff_language and [cutoff_language[0], 'True', 'False'] in actual_subtitles_list: cutoff_met = True missing_subtitles_text = str([]) elif cutoff_language and [cutoff_language[0], 'False', 'True'] in actual_subtitles_list: cutoff_met = True missing_subtitles_text = str([]) if not cutoff_met: # get difference between desired and existing subtitles missing_subtitles_list = [] for item in desired_subtitles_list: if item not in actual_subtitles_list: missing_subtitles_list.append(item) # remove missing that have forced or hi subtitles for this language in existing for item in actual_subtitles_list: if item[1] == 'True' or item[2] == 'True': try: missing_subtitles_list.remove([item[0], 'False', 'False']) except ValueError: pass # make the missing languages list looks like expected missing_subtitles_output_list = [] for item in missing_subtitles_list: lang = item[0] if item[1] == 'True': lang += ':forced' elif item[2] == 'True': lang += ':hi' missing_subtitles_output_list.append(lang) missing_subtitles_text = str(missing_subtitles_output_list) database.execute("UPDATE table_movies SET missing_subtitles=? WHERE radarrId=?", (missing_subtitles_text, movie_subtitles['radarrId'])) if send_event: event_stream(type='movie', action='update', movie=movie_subtitles['radarrId']) event_stream(type='badges_movies') def series_full_scan_subtitles(): episodes = database.execute("SELECT path FROM table_episodes") for i, episode in enumerate(episodes, 1): store_subtitles(episode['path'], path_mappings.path_replace(episode['path'])) gc.collect() def movies_full_scan_subtitles(): movies = database.execute("SELECT path FROM table_movies") for i, movie in enumerate(movies, 1): store_subtitles_movie(movie['path'], path_mappings.path_replace_movie(movie['path'])) gc.collect() def series_scan_subtitles(no): episodes = database.execute("SELECT path FROM table_episodes WHERE sonarrSeriesId=? ORDER BY sonarrEpisodeId", (no,)) for episode in episodes: store_subtitles(episode['path'], path_mappings.path_replace(episode['path'])) def movies_scan_subtitles(no): movies = database.execute("SELECT path FROM table_movies WHERE radarrId=? ORDER BY radarrId", (no,)) for movie in movies: store_subtitles_movie(movie['path'], path_mappings.path_replace_movie(movie['path'])) def get_external_subtitles_path(file, subtitle): fld = os.path.dirname(file) if settings.general.subfolder == "current": path = os.path.join(fld, subtitle) elif settings.general.subfolder == "absolute": custom_fld = settings.general.subfolder_custom if os.path.exists(os.path.join(fld, subtitle)): path = os.path.join(fld, subtitle) elif os.path.exists(os.path.join(custom_fld, subtitle)): path = os.path.join(custom_fld, subtitle) else: path = None elif settings.general.subfolder == "relative": custom_fld = os.path.join(fld, settings.general.subfolder_custom) if os.path.exists(os.path.join(fld, subtitle)): path = os.path.join(fld, subtitle) elif os.path.exists(os.path.join(custom_fld, subtitle)): path = os.path.join(custom_fld, subtitle) else: path = None else: path = None return path def guess_external_subtitles(dest_folder, subtitles): for subtitle, language in subtitles.items(): if not language: subtitle_path = os.path.join(dest_folder, subtitle) if os.path.exists(subtitle_path) and os.path.splitext(subtitle_path)[1] in core.SUBTITLE_EXTENSIONS: logging.debug("BAZARR falling back to file content analysis to detect language.") detected_language = None # to improve performance, skip detection of files larger that 1M if os.path.getsize(subtitle_path) > 1*1024*1024: logging.debug("BAZARR subtitles file is too large to be text based. Skipping this file: " + subtitle_path) continue with open(subtitle_path, 'rb') as f: text = f.read() try: text = text.decode('utf-8') detected_language = guess_language(text) #add simplified and traditional chinese detection if detected_language == 'zh': simplified_chinese = [".chs", ".sc", ".zhs", ".hans", ".gb", u"简", u"双语"] if any(ext in str(subtitle_path) for ext in simplified_chinese): detected_language == 'zh' else: detected_language == 'zt' except UnicodeDecodeError: detector = Detector() try: guess = detector.detect(text) except: logging.debug("BAZARR skipping this subtitles because we can't guess the encoding. " "It's probably a binary file: " + subtitle_path) continue else: logging.debug('BAZARR detected encoding %r', guess) try: text = text.decode(guess) except: logging.debug( "BAZARR skipping this subtitles because we can't decode the file using the " "guessed encoding. It's probably a binary file: " + subtitle_path) continue detected_language = guess_language(text) except: logging.debug('BAZARR was unable to detect encoding for this subtitles file: %r', subtitle_path) finally: if detected_language: logging.debug("BAZARR external subtitles detected and guessed this language: " + str( detected_language)) try: subtitles[subtitle] = Language.rebuild(Language.fromietf(detected_language), forced=False, hi=False) except: pass # If language is still None (undetected), skip it if not language: pass # Skip HI detection if forced elif language.forced: pass # Detect hearing-impaired external subtitles not identified in filename elif not subtitles[subtitle].hi: subtitle_path = os.path.join(dest_folder, subtitle) # check if file exist: if os.path.exists(subtitle_path) and os.path.splitext(subtitle_path)[1] in core.SUBTITLE_EXTENSIONS: # to improve performance, skip detection of files larger that 1M if os.path.getsize(subtitle_path) > 1 * 1024 * 1024: logging.debug("BAZARR subtitles file is too large to be text based. Skipping this file: " + subtitle_path) continue with open(subtitle_path, 'rb') as f: text = f.read() try: text = text.decode('utf-8') except UnicodeDecodeError: detector = Detector() try: guess = detector.detect(text) except: logging.debug("BAZARR skipping this subtitles because we can't guess the encoding. " "It's probably a binary file: " + subtitle_path) continue else: logging.debug('BAZARR detected encoding %r', guess) try: text = text.decode(guess) except: logging.debug("BAZARR skipping this subtitles because we can't decode the file using the " "guessed encoding. It's probably a binary file: " + subtitle_path) continue if bool(re.search(hi_regex, text)): subtitles[subtitle] = Language.rebuild(subtitles[subtitle], forced=False, hi=True) return subtitles