Fix subtitle character encoding detection

Increase the size of the language recognition buffer to 32K
This commit is contained in:
josdion 2020-03-25 22:04:04 +02:00 committed by GitHub
parent c982e37024
commit 354454688d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 2 additions and 2 deletions

View File

@ -378,8 +378,8 @@ def guess_external_subtitles(dest_folder, subtitles):
text = f.read()
try:
# to improve performance, use only the first 8K to detect encoding
if len(text) > 8192: guess = chardet.detect(text[:8192])
# to improve performance, use only the first 32K to detect encoding
if len(text) > 32768: guess = chardet.detect(text[:32768])
else: guess = chardet.detect(text)
if guess["confidence"] < 0.8:
raise UnicodeError