Adding simplified and traditional Chinese subtitles support (#1236)

This commit is contained in:
Xavier Xiong 2021-02-12 20:15:19 +01:00 committed by GitHub
parent 06092c14a9
commit 1548263a6c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 102 additions and 13 deletions

View File

@ -20,10 +20,15 @@ class EmbeddedSubsReader:
api.initialize({'provider': 'ffmpeg', 'ffmpeg': self.ffprobe})
data = api.know(file)
traditional_chinese = ["cht", "tc", "traditional", "zht", "hant", "big5", u"", u"雙語"]
if 'subtitle' in data:
for detected_language in data['subtitle']:
if 'language' in detected_language:
language = detected_language['language'].alpha3
if language == 'zho' and 'name' in detected_language:
if any (ext in (detected_language['name'].lower()) for ext in traditional_chinese):
language = 'zht'
forced = detected_language['forced'] if 'forced' in detected_language else False
hearing_impaired = detected_language['hearing_impaired'] if 'hearing_impaired' in \
detected_language else False

View File

@ -19,6 +19,9 @@ def load_language_in_db():
database.execute("INSERT OR IGNORE INTO table_settings_languages (code3, code2, name) "
"VALUES ('pob', 'pb', 'Brazilian Portuguese')")
database.execute("INSERT OR IGNORE INTO table_settings_languages (code3, code2, name) "
"VALUES ('zht', 'zt', 'Chinese Traditional')")
langs = [[lang.bibliographic, lang.alpha_3]
for lang in pycountry.languages
if hasattr(lang, 'alpha_2') and hasattr(lang, 'bibliographic')]
@ -32,6 +35,9 @@ def load_language_in_db():
def create_languages_dict():
global languages_dict
#replace chinese by chinese simplified
database.execute("UPDATE table_settings_languages SET name = 'Chinese Simplified' WHERE code3 = 'zho'")
languages_dict = database.execute("SELECT name, code2, code3, code3b FROM table_settings_languages")
@ -69,6 +75,8 @@ def get_language_set():
for lang in languages:
if lang['code3'] == 'pob':
language_set.add(Language('por', 'BR'))
elif lang['code3'] == 'zht':
language_set.add(Language('zho', 'TW'))
else:
language_set.add(Language(lang['code3']))

View File

@ -135,6 +135,12 @@ def download_subtitle(path, language, audio_language, hi, forced, providers, pro
lang_obj = Language.rebuild(lang_obj, forced=True)
if hi == "force HI":
lang_obj = Language.rebuild(lang_obj, hi=True)
elif l == 'zht':
lang_obj = Language('zho', 'TW')
if forced == "True":
lang_obj = Language.rebuild(lang_obj, forced=True)
if hi == "force HI":
lang_obj = Language.rebuild(lang_obj, hi=True)
else:
lang_obj = Language(l)
if forced == "True":
@ -214,6 +220,8 @@ def download_subtitle(path, language, audio_language, hi, forced, providers, pro
downloaded_provider = subtitle.provider_name
if subtitle.language == 'pt-BR':
downloaded_language_code3 = 'pob'
elif subtitle.language == 'zh-TW':
downloaded_language_code3 = 'zht'
else:
downloaded_language_code3 = subtitle.language.alpha3
downloaded_language = language_from_alpha3(downloaded_language_code3)
@ -323,6 +331,8 @@ def manual_search(path, profileId, providers, providers_auth, sceneName, title,
if lang == 'pob':
lang_obj = Language('por', 'BR')
elif lang == 'zht':
lang_obj = Language('zho', 'TW')
else:
lang_obj = Language(lang)
@ -530,6 +540,8 @@ def manual_download_subtitle(path, language, audio_language, hi, forced, subtitl
downloaded_provider = saved_subtitle.provider_name
if saved_subtitle.language == 'pt-BR':
downloaded_language_code3 = 'pob'
elif saved_subtitle.language == 'zh-TW':
downloaded_language_code3 = 'zht'
else:
downloaded_language_code3 = subtitle.language.alpha3
downloaded_language = language_from_alpha3(downloaded_language_code3)
@ -631,6 +643,8 @@ def manual_upload_subtitle(path, language, forced, title, scene_name, media_type
if language == 'pob':
lang_obj = Language('por', 'BR')
elif language == 'zht':
lang_obj = Language('zho', 'TW')
else:
lang_obj = Language(language)

View File

@ -58,6 +58,10 @@ def store_subtitles(original_path, reversed_path):
brazilian_portuguese = [".pt-br", ".pob", "pb"]
brazilian_portuguese_forced = [".pt-br.forced", ".pob.forced", "pb.forced"]
simplified_chinese = [".chs", ".sc", ".zhs", ".hans", ".gb", u"", u"双语"]
simplified_chinese_forced = [".chs.forced", ".sc.forced", ".zhs.forced", ".hans.forced", ".gb.forced", u"简体中文.forced", u"双语.forced"]
traditional_chinese = [".cht", ".tc", ".zht", ".hant", ".big5", u"", u"雙語"]
traditional_chinese_forced = [".cht.forced", ".tc.forced", ".zht.forced",".hant.forced", ".big5.forced", u"繁體中文.forced", u"雙語.forced"]
try:
dest_folder = get_subtitle_destination_folder()
core.CUSTOM_PATHS = [dest_folder] if dest_folder else []
@ -84,6 +88,22 @@ def store_subtitles(original_path, reversed_path):
logging.debug("BAZARR external subtitles detected: " + "pb:forced")
actual_subtitles.append(
[str("pb:forced"), path_mappings.path_replace_reverse(subtitle_path)])
elif any(ext in (str(os.path.splitext(subtitle)[0]).lower())[-12:] for ext in simplified_chinese):
logging.debug("BAZARR external subtitles detected: " + "zh")
actual_subtitles.append(
[str("zh"), path_mappings.path_replace_reverse(subtitle_path)])
elif any(ext in (str(os.path.splitext(subtitle)[0]).lower())[-12:] for ext in simplified_chinese_forced):
logging.debug("BAZARR external subtitles detected: " + "zh:forced")
actual_subtitles.append(
[str("zh:forced"), path_mappings.path_replace_reverse(subtitle_path)])
elif any(ext in (str(os.path.splitext(subtitle)[0]).lower())[-12:] for ext in traditional_chinese):
logging.debug("BAZARR external subtitles detected: " + "zt")
actual_subtitles.append(
[str("zt"), path_mappings.path_replace_reverse(subtitle_path)])
elif any(ext in (str(os.path.splitext(subtitle)[0]).lower())[-12:] for ext in traditional_chinese_forced):
logging.debug("BAZARR external subtitles detected: " + "zt:forced")
actual_subtitles.append(
[str("zt:forced"), path_mappings.path_replace_reverse(subtitle_path)])
elif not language:
continue
elif str(language) != 'und':
@ -149,6 +169,10 @@ def store_subtitles_movie(original_path, reversed_path):
brazilian_portuguese = [".pt-br", ".pob", "pb"]
brazilian_portuguese_forced = [".pt-br.forced", ".pob.forced", "pb.forced"]
simplified_chinese = [".chs", ".sc", ".zhs", ".hans", ".gb", u"", u"双语"]
simplified_chinese_forced = [".chs.forced", ".sc.forced", ".zhs.forced", ".hans.forced", ".gb.forced", u"简体中文.forced", u"双语.forced"]
traditional_chinese = [".cht", ".tc", ".zht", ".hant", ".big5", u"", u"雙語", "zh-tw"]
traditional_chinese_forced = [".cht.forced", ".tc.forced", ".zht.forced",".hant.forced", ".big5.forced", u"繁體中文.forced", u"雙語.forced", "zh-tw.forced"]
try:
dest_folder = get_subtitle_destination_folder() or ''
core.CUSTOM_PATHS = [dest_folder] if dest_folder else []
@ -172,6 +196,18 @@ def store_subtitles_movie(original_path, reversed_path):
elif str(os.path.splitext(subtitle)[0]).lower().endswith(tuple(brazilian_portuguese_forced)):
logging.debug("BAZARR external subtitles detected: " + "pb:forced")
actual_subtitles.append([str("pb:forced"), path_mappings.path_replace_reverse_movie(subtitle_path)])
elif any(ext in (str(os.path.splitext(subtitle)[0]).lower())[-12:] for ext in simplified_chinese):
logging.debug("BAZARR external subtitles detected: " + "zh")
actual_subtitles.append([str("zh"), path_mappings.path_replace_reverse_movie(subtitle_path)])
elif any(ext in (str(os.path.splitext(subtitle)[0]).lower())[-12:] for ext in simplified_chinese_forced):
logging.debug("BAZARR external subtitles detected: " + "zh:forced")
actual_subtitles.append([str("zh:forced"), path_mappings.path_replace_reverse_movie(subtitle_path)])
elif any(ext in (str(os.path.splitext(subtitle)[0]).lower())[-12:] for ext in traditional_chinese):
logging.debug("BAZARR external subtitles detected: " + "zt")
actual_subtitles.append([str("zt"), path_mappings.path_replace_reverse_movie(subtitle_path)])
elif any(ext in (str(os.path.splitext(subtitle)[0]).lower())[-12:] for ext in traditional_chinese_forced):
logging.debug("BAZARR external subtitles detected: " + "zt:forced")
actual_subtitles.append([str("zt:forced"), path_mappings.path_replace_reverse_movie(subtitle_path)])
elif not language:
continue
elif str(language.basename) != 'und':
@ -492,6 +528,13 @@ def guess_external_subtitles(dest_folder, subtitles):
try:
text = text.decode('utf-8')
detected_language = guess_language(text)
#add simplified and traditional chinese detection
if detected_language == 'zh':
simplified_chinese = [".chs", ".sc", ".zhs", ".hans", ".gb", u"", u"双语"]
if any(ext in str(subtitle_path) for ext in simplified_chinese):
detected_language == 'zh'
else:
detected_language == 'zt'
except UnicodeDecodeError:
detector = Detector()
try:

View File

@ -326,6 +326,8 @@ def subtitles_apply_mods(language, subtitle_path, mods):
if language == 'pob':
lang_obj = Language('por', 'BR')
elif language == 'zht':
lang_obj = Language('zho', 'TW')
else:
lang_obj = Language(language)

View File

@ -5,12 +5,12 @@ from subliminal.exceptions import ConfigurationError
class AssrtConverter(LanguageReverseConverter):
def __init__(self):
self.from_assrt = { u'简体': ('zho', None, 'Hans'), u'繁体': ('zho', None, 'Hant'),
u'簡體': ('zho', None, 'Hans'), u'繁體': ('zho', None, 'Hant'),
self.from_assrt = { u'简体': ('zho', 'CN', None), u'繁体': ('zho', 'TW', None),
u'簡體': ('zho', 'CN', None), u'繁體': ('zho', 'TW', None),
u'英文': ('eng',),
u'chs': ('zho', None, 'Hans'), u'cht': ('zho', None, 'Hant'),
u'chn': ('zho', None, 'Hans'), u'twn': ('zho', None, 'Hant')}
self.to_assrt = { ('zho', None, 'Hans'): u'chs', ('zho', None, 'Hant'): u'cht',
u'chs': ('zho', 'CN', None), u'cht': ('zho', 'TW', None),
u'chn': ('zho', 'CN', None), u'twn': ('zho', 'TW', None)}
self.to_assrt = { ('zho', 'CN', None): u'chs', ('zho', 'TW', None): u'cht',
('eng', None, None) : u'eng', ('zho', None, None): u'chs'}
self.codes = set(self.from_assrt.keys())

View File

@ -636,6 +636,13 @@ def _search_external_subtitles(path, languages=None, only_one=False, scandir_gen
hi_tag = ["hi", "cc", "sdh"]
hi = any(i for i in hi_tag if i in adv_tag)
#add simplified/traditional chinese detection
simplified_chinese = ["chs", "sc", "zhs", "hans", "gb", u"", u"双语"]
traditional_chinese = ["cht", "tc", "zht", "hant", "big5", u"", u"雙語"]
FULL_LANGUAGE_LIST.extend(simplified_chinese)
FULL_LANGUAGE_LIST.extend(traditional_chinese)
p_root = p_root.replace('zh-TW', 'zht')
# remove possible language code for matching
p_root_bare = ENDSWITH_LANGUAGECODE_RE.sub(
lambda m: "" if str(m.group(1)).lower() in FULL_LANGUAGE_LIST else m.group(0), p_root)
@ -655,14 +662,24 @@ def _search_external_subtitles(path, languages=None, only_one=False, scandir_gen
try:
language_code = p_root.rsplit(".", 1)[1].replace('_', '-')
try:
language = Language.fromietf(language_code)
language = Language.fromietf(language_code)
language.forced = forced
language.hi = hi
except (ValueError, LanguageReverseError):
logger.error('Cannot parse language code %r', language_code)
language_code = None
#add simplified/traditional chinese detection
if any(ext in str(language_code) for ext in simplified_chinese):
language = Language.fromietf('zh')
language.forced = forced
language.hi = hi
elif any(ext in str(language_code) for ext in traditional_chinese):
language = Language.fromietf('zh')
language.forced = forced
language.hi = hi
else:
logger.error('Cannot parse language code %r', language_code)
language_code = None
except IndexError:
language_code = None
language_code = None
if not language and not language_code and only_one:
language = Language.rebuild(list(languages)[0], forced=forced, hi=hi)

View File

@ -75,15 +75,15 @@ def test_get_matches_movie_name(movies):
@pytest.mark.converter
def test_converter_convert_alpha3():
assert language_converters['assrt'].convert('zho', None, 'Hans') == 'chs'
assert language_converters['assrt'].convert('zho', None, 'Hant') == 'cht'
assert language_converters['assrt'].convert('zho', None, 'Hans') == 'chi'
assert language_converters['assrt'].convert('zho', None, 'Hant') == 'zht'
assert language_converters['assrt'].convert('eng') == 'eng'
@pytest.mark.converter
def test_converter_reverse():
assert language_converters['assrt'].reverse('chs') == ('zho', None, 'Hans')
assert language_converters['assrt'].reverse('cht') == ('zho', None, 'Hant')
assert language_converters['assrt'].reverse('chi') == ('zho', None, 'Hans')
assert language_converters['assrt'].reverse('zht') == ('zho', None, 'Hant')
assert language_converters['assrt'].reverse(u'簡體') == ('zho', None, 'Hans')
assert language_converters['assrt'].reverse(u'繁體') == ('zho', None, 'Hant')
assert language_converters['assrt'].reverse(u'简体') == ('zho', None, 'Hans')