Added more debug logging for the existing subtitles indexing process.

This commit is contained in:
morpheus65535 2018-12-04 20:30:03 -05:00
parent 552cae7483
commit b0e6136de1
1 changed files with 83 additions and 46 deletions

View File

@ -18,10 +18,11 @@ from get_languages import alpha2_from_alpha3
gc.enable() gc.enable()
def store_subtitles(file): def store_subtitles(file):
# languages = [] logging.debug('BAZARR started subtitles indexing for this file: ' + file)
actual_subtitles = [] actual_subtitles = []
if os.path.exists(file): if os.path.exists(file):
if os.path.splitext(file)[1] == '.mkv': if os.path.splitext(file)[1] == '.mkv':
logging.debug("BAZARR is trying to index embedded subtitles.")
try: try:
with open(file, 'rb') as f: with open(file, 'rb') as f:
mkv = enzyme.MKV(f) mkv = enzyme.MKV(f)
@ -29,54 +30,71 @@ def store_subtitles(file):
for subtitle_track in mkv.subtitle_tracks: for subtitle_track in mkv.subtitle_tracks:
try: try:
if alpha2_from_alpha3(subtitle_track.language) != None: if alpha2_from_alpha3(subtitle_track.language) != None:
actual_subtitles.append([str(alpha2_from_alpha3(subtitle_track.language)),None]) lang = str(alpha2_from_alpha3(subtitle_track.language))
logging.debug("BAZARR embedded subtitles detected: " + lang)
actual_subtitles.append([lang,None])
except: except:
logging.debug("BAZARR unable to index this unrecognized language: " + subtitle_track.language)
pass pass
except: except Exception as e:
logging.exception("BAZARR error when trying to analyze this mkv file: " + file)
pass pass
else:
logging.debug("BAZARR This file isn't an .mkv file.")
brazilian_portuguese = [".pt-br", ".pob", "pb"] brazilian_portuguese = [".pt-br", ".pob", "pb"]
try: try:
subtitles = core.search_external_subtitles(file) subtitles = core.search_external_subtitles(file)
except: except Exception as e:
logging.exception("BAZARR unable to index external subtitles.")
pass pass
else: else:
for subtitle, language in subtitles.iteritems(): for subtitle, language in subtitles.iteritems():
if str(os.path.splitext(subtitle)[0]).lower().endswith(tuple(brazilian_portuguese)) is True: if str(os.path.splitext(subtitle)[0]).lower().endswith(tuple(brazilian_portuguese)) is True:
logging.debug("BAZARR external subtitles detected: " + "pb")
actual_subtitles.append([str("pb"), path_replace_reverse(os.path.join(os.path.dirname(file), subtitle))]) actual_subtitles.append([str("pb"), path_replace_reverse(os.path.join(os.path.dirname(file), subtitle))])
elif str(language) != 'und': elif str(language) != 'und':
logging.debug("BAZARR external subtitles detected: " + str(language))
actual_subtitles.append([str(language), path_replace_reverse(os.path.join(os.path.dirname(file), subtitle))]) actual_subtitles.append([str(language), path_replace_reverse(os.path.join(os.path.dirname(file), subtitle))])
else: else:
with open(path_replace(os.path.join(os.path.dirname(file), subtitle)), 'r') as f: if os.path.splitext(subtitle)[1] != ".sub":
text = list(islice(f, 100)) logging.debug("BAZARR falling back to file content analysis to detect language.")
text = ' '.join(text) with open(path_replace(os.path.join(os.path.dirname(file), subtitle)), 'r') as f:
encoding = UnicodeDammit(text) text = list(islice(f, 100))
try: text = ' '.join(text)
text = text.decode(encoding.original_encoding) encoding = UnicodeDammit(text)
detected_language = langdetect.detect(text) try:
except Exception as e: text = text.decode(encoding.original_encoding)
logging.exception('BAZARR Error trying to detect character encoding for this subtitles file: ' + path_replace(os.path.join(os.path.dirname(file), subtitle)) + ' You should try to delete this subtitles file manually and ask Bazarr to download it again.') detected_language = langdetect.detect(text)
else: except Exception as e:
if len(detected_language) > 0: logging.exception('BAZARR Error trying to detect language for this subtitles file: ' + path_replace(os.path.join(os.path.dirname(file), subtitle)) + ' You should try to delete this subtitles file manually and ask Bazarr to download it again.')
actual_subtitles.append([str(detected_language), path_replace_reverse(os.path.join(os.path.dirname(file), subtitle))]) else:
if len(detected_language) > 0:
logging.debug("BAZARR external subtitles detected and analysis guessed this language: " + str(detected_language))
actual_subtitles.append([str(detected_language), path_replace_reverse(os.path.join(os.path.dirname(file), subtitle))])
conn_db = sqlite3.connect(os.path.join(config_dir, 'db/bazarr.db'), timeout=30) conn_db = sqlite3.connect(os.path.join(config_dir, 'db/bazarr.db'), timeout=30)
c_db = conn_db.cursor() c_db = conn_db.cursor()
logging.debug("BAZARR storing those languages to DB: " + str(actual_subtitles))
c_db.execute("UPDATE table_episodes SET subtitles = ? WHERE path = ?", (str(actual_subtitles), path_replace_reverse(file)))
conn_db.commit()
c_db.execute("UPDATE table_episodes SET subtitles = ? WHERE path = ?", (str(actual_subtitles), path_replace_reverse(file))) c_db.close()
conn_db.commit() else:
logging.debug("BAZARR this file doesn't seems to exist or isn't accessible.")
c_db.close() logging.debug('BAZARR ended subtitles indexing for this file: ' + file)
return actual_subtitles return actual_subtitles
def store_subtitles_movie(file): def store_subtitles_movie(file):
# languages = [] logging.debug('BAZARR started subtitles indexing for this file: ' + file)
actual_subtitles = [] actual_subtitles = []
if os.path.exists(file): if os.path.exists(file):
if os.path.splitext(file)[1] == '.mkv': if os.path.splitext(file)[1] == '.mkv':
logging.debug("BAZARR is trying to index embedded subtitles.")
try: try:
with open(file, 'rb') as f: with open(file, 'rb') as f:
mkv = enzyme.MKV(f) mkv = enzyme.MKV(f)
@ -84,42 +102,61 @@ def store_subtitles_movie(file):
for subtitle_track in mkv.subtitle_tracks: for subtitle_track in mkv.subtitle_tracks:
try: try:
if alpha2_from_alpha3(subtitle_track.language) != None: if alpha2_from_alpha3(subtitle_track.language) != None:
actual_subtitles.append([str(alpha2_from_alpha3(subtitle_track.language)), None]) lang = str(alpha2_from_alpha3(subtitle_track.language))
logging.debug("BAZARR embedded subtitles detected: " + lang)
actual_subtitles.append([lang, None])
except: except:
logging.debug("BAZARR unable to index this unrecognized language: " + subtitle_track.language)
pass pass
except: except Exception as e:
logging.exception("BAZARR error when trying to analyze this mkv file: " + file)
pass pass
else:
logging.debug("BAZARR This file isn't an .mkv file.")
subtitles = core.search_external_subtitles(file)
brazilian_portuguese = [".pt-br", ".pob", "pb"] brazilian_portuguese = [".pt-br", ".pob", "pb"]
try:
for subtitle, language in subtitles.iteritems(): subtitles = core.search_external_subtitles(file)
if str(os.path.splitext(subtitle)[0]).lower().endswith(tuple(brazilian_portuguese)) is True: except Exception as e:
actual_subtitles.append([str("pb"), path_replace_reverse_movie(os.path.join(os.path.dirname(file), subtitle))]) logging.exception("BAZARR unable to index external subtitles.")
elif str(language) != 'und': pass
actual_subtitles.append([str(language), path_replace_reverse_movie(os.path.join(os.path.dirname(file), subtitle))]) else:
else: for subtitle, language in subtitles.iteritems():
if os.path.splitext(subtitle)[1] != ".sub": if str(os.path.splitext(subtitle)[0]).lower().endswith(tuple(brazilian_portuguese)) is True:
with open(path_replace_movie(os.path.join(os.path.dirname(file), subtitle)), 'r') as f: logging.debug("BAZARR external subtitles detected: " + "pb")
text = list(islice(f, 100)) actual_subtitles.append([str("pb"), path_replace_reverse_movie(os.path.join(os.path.dirname(file), subtitle))])
text = ' '.join(text) elif str(language) != 'und':
encoding = UnicodeDammit(text) logging.debug("BAZARR external subtitles detected: " + str(language))
try: actual_subtitles.append([str(language), path_replace_reverse_movie(os.path.join(os.path.dirname(file), subtitle))])
text = text.decode(encoding.original_encoding) else:
detected_language = langdetect.detect(text) if os.path.splitext(subtitle)[1] != ".sub":
except Exception as e: logging.debug("BAZARR falling back to file content analysis to detect language.")
logging.exception('BAZARR Error trying to detect character encoding for this subtitles file: ' + path_replace_movie(os.path.join(os.path.dirname(file), subtitle)) + ' You should try to delete this subtitles file manually and ask Bazarr to download it again.') with open(path_replace_movie(os.path.join(os.path.dirname(file), subtitle)), 'r') as f:
else: text = list(islice(f, 100))
if len(detected_language) > 0: text = ' '.join(text)
actual_subtitles.append([str(detected_language), path_replace_reverse_movie(os.path.join(os.path.dirname(file), subtitle))]) encoding = UnicodeDammit(text)
try:
text = text.decode(encoding.original_encoding)
detected_language = langdetect.detect(text)
except Exception as e:
logging.exception('BAZARR Error trying to detect language for this subtitles file: ' + path_replace(os.path.join(os.path.dirname(file), subtitle)) + ' You should try to delete this subtitles file manually and ask Bazarr to download it again.')
else:
if len(detected_language) > 0:
logging.debug("BAZARR external subtitles detected and analysis guessed this language: " + str(detected_language))
actual_subtitles.append([str(detected_language), path_replace_reverse_movie(os.path.join(os.path.dirname(file), subtitle))])
conn_db = sqlite3.connect(os.path.join(config_dir, 'db/bazarr.db'), timeout=30) conn_db = sqlite3.connect(os.path.join(config_dir, 'db/bazarr.db'), timeout=30)
c_db = conn_db.cursor() c_db = conn_db.cursor()
logging.debug("BAZARR storing those languages to DB: " + str(actual_subtitles))
c_db.execute("UPDATE table_movies SET subtitles = ? WHERE path = ?", (str(actual_subtitles), path_replace_reverse_movie(file))) c_db.execute("UPDATE table_movies SET subtitles = ? WHERE path = ?", (str(actual_subtitles), path_replace_reverse_movie(file)))
conn_db.commit() conn_db.commit()
c_db.close() c_db.close()
else:
logging.debug("BAZARR this file doesn't seems to exist or isn't accessible.")
logging.debug('BAZARR ended subtitles indexing for this file: ' + file)
return actual_subtitles return actual_subtitles