From 354454688d69e9bad009a87168b874bf19e9768c Mon Sep 17 00:00:00 2001 From: josdion Date: Wed, 25 Mar 2020 22:04:04 +0200 Subject: [PATCH] Fix subtitle character encoding detection Increase the size of the language recognition buffer to 32K --- bazarr/list_subtitles.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bazarr/list_subtitles.py b/bazarr/list_subtitles.py index 8a41f018f..9a5a28805 100644 --- a/bazarr/list_subtitles.py +++ b/bazarr/list_subtitles.py @@ -378,8 +378,8 @@ def guess_external_subtitles(dest_folder, subtitles): text = f.read() try: - # to improve performance, use only the first 8K to detect encoding - if len(text) > 8192: guess = chardet.detect(text[:8192]) + # to improve performance, use only the first 32K to detect encoding + if len(text) > 32768: guess = chardet.detect(text[:32768]) else: guess = chardet.detect(text) if guess["confidence"] < 0.8: raise UnicodeError