Fix for exception management when detecting encoding of file.

This commit is contained in:
Louis Vézina 2020-10-02 12:35:12 -04:00
parent 47b3f05757
commit 5d68247c2d
1 changed files with 30 additions and 11 deletions

View File

@ -417,9 +417,21 @@ def guess_external_subtitles(dest_folder, subtitles):
detected_language = guess_language(text)
except UnicodeDecodeError:
detector = Detector()
guess = detector.detect(text)
logging.debug('BAZARR detected encoding %r', guess)
text = text.decode(guess)
try:
guess = detector.detect(text)
except:
logging.debug("BAZARR skipping this subtitles because we can't guess the encoding. "
"It's probably a binary file: " + subtitle_path)
continue
else:
logging.debug('BAZARR detected encoding %r', guess)
try:
text = text.decode(guess)
except:
logging.debug(
"BAZARR skipping this subtitles because we can't decode the file using the "
"guessed encoding. It's probably a binary file: " + subtitle_path)
continue
detected_language = guess_language(text)
except:
logging.debug('BAZARR was unable to detect encoding for this subtitles file: %r', subtitle_path)
@ -438,10 +450,10 @@ def guess_external_subtitles(dest_folder, subtitles):
subtitle_path = os.path.join(dest_folder, subtitle)
# to improve performance, skip detection of files larger that 1M
#if os.path.getsize(subtitle_path) > 1 * 1024 * 1024:
# logging.debug("BAZARR subtitles file is too large to be text based. Skipping this file: " +
# subtitle_path)
# continue
if os.path.getsize(subtitle_path) > 1 * 1024 * 1024:
logging.debug("BAZARR subtitles file is too large to be text based. Skipping this file: " +
subtitle_path)
continue
with open(subtitle_path, 'rb') as f:
text = f.read()
@ -450,13 +462,20 @@ def guess_external_subtitles(dest_folder, subtitles):
text = text.decode('utf-8')
except UnicodeDecodeError:
detector = Detector()
guess = detector.detect(text)
logging.debug('BAZARR detected encoding %r', guess)
try:
text = text.decode(guess)
guess = detector.detect(text)
except:
# text could not be decoded with guessed encoding, we wont try to detect HI fo rthis one
logging.debug("BAZARR skipping this subtitles because we can't guess the encoding. "
"It's probably a binary file: " + subtitle_path)
continue
else:
logging.debug('BAZARR detected encoding %r', guess)
try:
text = text.decode(guess)
except:
logging.debug("BAZARR skipping this subtitles because we can't decode the file using the "
"guessed encoding. It's probably a binary file: " + subtitle_path)
continue
if bool(re.search(hi_regex, text)):
subtitles[subtitle] = Language.rebuild(subtitles[subtitle], forced=False, hi=True)