no log: added failsafe to encoding detection

2023-06-23 10:06:46 -04:00 · 2023-06-23 10:06:46 -04:00 · f371d0585b
parent edfbb1a5ca
commit f371d0585b
1 changed files with 20 additions and 20 deletions
--- a/bazarr/subtitles/indexer/utils.py
+++ b/bazarr/subtitles/indexer/utils.py
@ -76,25 +76,26 @@ def guess_external_subtitles(dest_folder, subtitles, media_type, previously_inde
                with open(subtitle_path, 'rb') as f:
                    text = f.read()

-                encoding = detect(text)['encoding']
-                if not encoding:
+                encoding = detect(text)
+                if encoding and 'encoding' in encoding:
+                    encoding = detect(text)['encoding']
+                else:
                    logging.debug("BAZARR skipping this subtitles because we can't guess the encoding. "
                                  "It's probably a binary file: " + subtitle_path)
                    continue
-                if 'UTF' in encoding:
-                    text = text.decode('utf-8')
-                    detected_language = guess_language(text)
-                    # add simplified and traditional chinese detection
-                    if detected_language == 'zh':
-                        traditional_chinese_fuzzy = [u"繁", u"雙語"]
-                        traditional_chinese = [".cht", ".tc", ".zh-tw", ".zht", ".zh-hant", ".zhhant", ".zh_hant",
-                                               ".hant", ".big5", ".traditional"]
-                        if str(os.path.splitext(subtitle)[0]).lower().endswith(tuple(traditional_chinese)) or (str(subtitle_path).lower())[:-5] in traditional_chinese_fuzzy:
-                            detected_language == 'zt'
-                else:
-                    text = text.decode(encoding)
+                text = text.decode(encoding)

                detected_language = guess_language(text)
+
+                # add simplified and traditional chinese detection
+                if detected_language == 'zh':
+                    traditional_chinese_fuzzy = [u"繁", u"雙語"]
+                    traditional_chinese = [".cht", ".tc", ".zh-tw", ".zht", ".zh-hant", ".zhhant", ".zh_hant",
+                                           ".hant", ".big5", ".traditional"]
+                    if str(os.path.splitext(subtitle)[0]).lower().endswith(tuple(traditional_chinese)) or \
+                            (str(subtitle_path).lower())[:-5] in traditional_chinese_fuzzy:
+                        detected_language = 'zt'
+
                if detected_language:
                    logging.debug("BAZARR external subtitles detected and guessed this language: " + str(
                        detected_language))
@ -127,15 +128,14 @@ def guess_external_subtitles(dest_folder, subtitles, media_type, previously_inde
                with open(subtitle_path, 'rb') as f:
                    text = f.read()

-                encoding = detect(text)['encoding']
-                if not encoding:
+                encoding = detect(text)
+                if encoding and 'encoding' in encoding:
+                    encoding = detect(text)['encoding']
+                else:
                    logging.debug("BAZARR skipping this subtitles because we can't guess the encoding. "
                                  "It's probably a binary file: " + subtitle_path)
                    continue
-                if 'UTF' in encoding:
-                    text = text.decode('utf-8')
-                else:
-                    text = text.decode(encoding)
+                text = text.decode(encoding)

                if bool(re.search(hi_regex, text)):
                    subtitles[subtitle] = Language.rebuild(subtitles[subtitle], forced=False, hi=True)