Read the whole text file when guessing the actual language of an external subtitles with a filename that doesn't include language code.

This commit is contained in:
Louis Vézina 2019-11-17 19:32:41 -05:00
parent e0aac7de4a
commit 8008c5f019
1 changed files with 5 additions and 8 deletions

View File

@ -13,7 +13,6 @@ from subliminal import core
from subliminal_patch import search_external_subtitles
from subzero.language import Language
from bs4 import UnicodeDammit
from itertools import islice
from get_args import args
from database import database
@ -88,10 +87,9 @@ def store_subtitles(file):
if os.path.splitext(subtitle)[1] != ".sub":
logging.debug("BAZARR falling back to file content analysis to detect language.")
with open(os.path.join(os.path.dirname(file), subtitle), 'r') as f:
text = list(islice(f, 100))
text = ' '.join(text)
encoding = UnicodeDammit(text)
text = f.read()
try:
encoding = UnicodeDammit(text)
text = text.decode(encoding.original_encoding)
detected_language = langdetect.detect(text)
except Exception as e:
@ -186,10 +184,9 @@ def store_subtitles_movie(file):
if os.path.splitext(subtitle)[1] != ".sub":
logging.debug("BAZARR falling back to file content analysis to detect language.")
with open(os.path.join(os.path.dirname(file), dest_folder, subtitle), 'r') as f:
text = list(islice(f, 100))
text = ' '.join(text)
encoding = UnicodeDammit(text)
text = f.read()
try:
encoding = UnicodeDammit(text)
text = text.decode(encoding.original_encoding)
detected_language = langdetect.detect(text)
except Exception as e:
@ -408,7 +405,7 @@ def guess_external_subtitles(dest_folder, subtitles):
logging.debug("BAZARR falling back to file content analysis to detect language.")
detected_language = None
with open(subtitle_path, 'r') as f:
text = ' '.join(list(islice(f, 100)))
text = f.read()
try:
encoding = UnicodeDammit(text)
text = text.decode(encoding.original_encoding)