mirror of https://github.com/morpheus65535/bazarr
164 lines
7.6 KiB
Python
164 lines
7.6 KiB
Python
# coding=utf-8
|
|
|
|
import os
|
|
import logging
|
|
import re
|
|
|
|
from guess_language import guess_language
|
|
from subliminal_patch import core
|
|
from subzero.language import Language
|
|
from charamel import Detector
|
|
|
|
from app.config import settings
|
|
from constants import hi_regex
|
|
from utilities.path_mappings import path_mappings
|
|
|
|
|
|
def get_external_subtitles_path(file, subtitle):
|
|
fld = os.path.dirname(file)
|
|
|
|
if settings.general.subfolder == "current":
|
|
path = os.path.join(fld, subtitle)
|
|
elif settings.general.subfolder == "absolute":
|
|
custom_fld = settings.general.subfolder_custom
|
|
if os.path.exists(os.path.join(fld, subtitle)):
|
|
path = os.path.join(fld, subtitle)
|
|
elif os.path.exists(os.path.join(custom_fld, subtitle)):
|
|
path = os.path.join(custom_fld, subtitle)
|
|
else:
|
|
path = None
|
|
elif settings.general.subfolder == "relative":
|
|
custom_fld = os.path.join(fld, settings.general.subfolder_custom)
|
|
if os.path.exists(os.path.join(fld, subtitle)):
|
|
path = os.path.join(fld, subtitle)
|
|
elif os.path.exists(os.path.join(custom_fld, subtitle)):
|
|
path = os.path.join(custom_fld, subtitle)
|
|
else:
|
|
path = None
|
|
else:
|
|
path = None
|
|
|
|
return path
|
|
|
|
|
|
def guess_external_subtitles(dest_folder, subtitles, media_type, previously_indexed_subtitles_to_exclude=None):
|
|
for subtitle, language in subtitles.items():
|
|
subtitle_path = os.path.join(dest_folder, subtitle)
|
|
reversed_subtitle_path = path_mappings.path_replace_reverse(subtitle_path) if media_type == "series" \
|
|
else path_mappings.path_replace_reverse_movie(subtitle_path)
|
|
|
|
if previously_indexed_subtitles_to_exclude:
|
|
x_found_lang = None
|
|
for x_lang, x_path, x_size in previously_indexed_subtitles_to_exclude:
|
|
if x_path == reversed_subtitle_path and x_size == os.stat(subtitle_path).st_size:
|
|
x_found_lang = x_lang
|
|
break
|
|
if x_found_lang:
|
|
if not language:
|
|
x_hi = ':hi' in x_found_lang
|
|
subtitles[subtitle] = Language.rebuild(Language.fromietf(x_found_lang), hi=x_hi)
|
|
continue
|
|
|
|
if not language:
|
|
if os.path.exists(subtitle_path) and os.path.splitext(subtitle_path)[1] in core.SUBTITLE_EXTENSIONS:
|
|
logging.debug("BAZARR falling back to file content analysis to detect language.")
|
|
detected_language = None
|
|
|
|
# detect forced subtitles
|
|
forced = True if os.path.splitext(os.path.splitext(subtitle)[0])[1] == '.forced' else False
|
|
|
|
# to improve performance, skip detection of files larger that 1M
|
|
if os.path.getsize(subtitle_path) > 1 * 1024 * 1024:
|
|
logging.debug("BAZARR subtitles file is too large to be text based. Skipping this file: " +
|
|
subtitle_path)
|
|
continue
|
|
|
|
with open(subtitle_path, 'rb') as f:
|
|
text = f.read()
|
|
|
|
try:
|
|
text = text.decode('utf-8')
|
|
detected_language = guess_language(text)
|
|
# add simplified and traditional chinese detection
|
|
if detected_language == 'zh':
|
|
traditional_chinese_fuzzy = [u"繁", u"雙語"]
|
|
traditional_chinese = [".cht", ".tc", ".zh-tw", ".zht", ".zh-hant", ".zhhant", ".zh_hant",
|
|
".hant", ".big5", ".traditional"]
|
|
if str(os.path.splitext(subtitle)[0]).lower().endswith(tuple(traditional_chinese)) or (str(subtitle_path).lower())[:-5] in traditional_chinese_fuzzy:
|
|
detected_language == 'zt'
|
|
except UnicodeDecodeError:
|
|
detector = Detector()
|
|
try:
|
|
guess = detector.detect(text)
|
|
except Exception:
|
|
logging.debug("BAZARR skipping this subtitles because we can't guess the encoding. "
|
|
"It's probably a binary file: " + subtitle_path)
|
|
continue
|
|
else:
|
|
logging.debug('BAZARR detected encoding %r', guess)
|
|
try:
|
|
text = text.decode(guess)
|
|
except Exception:
|
|
logging.debug(
|
|
"BAZARR skipping this subtitles because we can't decode the file using the "
|
|
"guessed encoding. It's probably a binary file: " + subtitle_path)
|
|
continue
|
|
detected_language = guess_language(text)
|
|
except Exception:
|
|
logging.debug('BAZARR was unable to detect encoding for this subtitles file: %r', subtitle_path)
|
|
finally:
|
|
if detected_language:
|
|
logging.debug("BAZARR external subtitles detected and guessed this language: " + str(
|
|
detected_language))
|
|
try:
|
|
subtitles[subtitle] = Language.rebuild(Language.fromietf(detected_language), forced=forced,
|
|
hi=False)
|
|
except Exception:
|
|
pass
|
|
|
|
# If language is still None (undetected), skip it
|
|
if hasattr(subtitles[subtitle], 'basename') and not subtitles[subtitle].basename:
|
|
continue
|
|
|
|
# Skip HI detection if forced
|
|
if hasattr(language, 'forced') and language.forced:
|
|
continue
|
|
|
|
# Detect hearing-impaired external subtitles not identified in filename
|
|
if hasattr(subtitles[subtitle], 'hi') and not subtitles[subtitle].hi:
|
|
subtitle_path = os.path.join(dest_folder, subtitle)
|
|
|
|
# check if file exist:
|
|
if os.path.exists(subtitle_path) and os.path.splitext(subtitle_path)[1] in core.SUBTITLE_EXTENSIONS:
|
|
# to improve performance, skip detection of files larger that 1M
|
|
if os.path.getsize(subtitle_path) > 1 * 1024 * 1024:
|
|
logging.debug("BAZARR subtitles file is too large to be text based. Skipping this file: " +
|
|
subtitle_path)
|
|
continue
|
|
|
|
with open(subtitle_path, 'rb') as f:
|
|
text = f.read()
|
|
|
|
try:
|
|
text = text.decode('utf-8')
|
|
except UnicodeDecodeError:
|
|
detector = Detector()
|
|
try:
|
|
guess = detector.detect(text)
|
|
except Exception:
|
|
logging.debug("BAZARR skipping this subtitles because we can't guess the encoding. "
|
|
"It's probably a binary file: " + subtitle_path)
|
|
continue
|
|
else:
|
|
logging.debug('BAZARR detected encoding %r', guess)
|
|
try:
|
|
text = text.decode(guess)
|
|
except Exception:
|
|
logging.debug("BAZARR skipping this subtitles because we can't decode the file using the "
|
|
"guessed encoding. It's probably a binary file: " + subtitle_path)
|
|
continue
|
|
|
|
if bool(re.search(hi_regex, text)):
|
|
subtitles[subtitle] = Language.rebuild(subtitles[subtitle], forced=False, hi=True)
|
|
return subtitles
|