mirror of https://github.com/morpheus65535/bazarr
Adding simplified and traditional Chinese subtitles support (#1236)
This commit is contained in:
parent
06092c14a9
commit
1548263a6c
|
@ -20,10 +20,15 @@ class EmbeddedSubsReader:
|
|||
api.initialize({'provider': 'ffmpeg', 'ffmpeg': self.ffprobe})
|
||||
data = api.know(file)
|
||||
|
||||
traditional_chinese = ["cht", "tc", "traditional", "zht", "hant", "big5", u"繁", u"雙語"]
|
||||
|
||||
if 'subtitle' in data:
|
||||
for detected_language in data['subtitle']:
|
||||
if 'language' in detected_language:
|
||||
language = detected_language['language'].alpha3
|
||||
if language == 'zho' and 'name' in detected_language:
|
||||
if any (ext in (detected_language['name'].lower()) for ext in traditional_chinese):
|
||||
language = 'zht'
|
||||
forced = detected_language['forced'] if 'forced' in detected_language else False
|
||||
hearing_impaired = detected_language['hearing_impaired'] if 'hearing_impaired' in \
|
||||
detected_language else False
|
||||
|
|
|
@ -19,6 +19,9 @@ def load_language_in_db():
|
|||
database.execute("INSERT OR IGNORE INTO table_settings_languages (code3, code2, name) "
|
||||
"VALUES ('pob', 'pb', 'Brazilian Portuguese')")
|
||||
|
||||
database.execute("INSERT OR IGNORE INTO table_settings_languages (code3, code2, name) "
|
||||
"VALUES ('zht', 'zt', 'Chinese Traditional')")
|
||||
|
||||
langs = [[lang.bibliographic, lang.alpha_3]
|
||||
for lang in pycountry.languages
|
||||
if hasattr(lang, 'alpha_2') and hasattr(lang, 'bibliographic')]
|
||||
|
@ -32,6 +35,9 @@ def load_language_in_db():
|
|||
|
||||
def create_languages_dict():
|
||||
global languages_dict
|
||||
#replace chinese by chinese simplified
|
||||
database.execute("UPDATE table_settings_languages SET name = 'Chinese Simplified' WHERE code3 = 'zho'")
|
||||
|
||||
languages_dict = database.execute("SELECT name, code2, code3, code3b FROM table_settings_languages")
|
||||
|
||||
|
||||
|
@ -69,6 +75,8 @@ def get_language_set():
|
|||
for lang in languages:
|
||||
if lang['code3'] == 'pob':
|
||||
language_set.add(Language('por', 'BR'))
|
||||
elif lang['code3'] == 'zht':
|
||||
language_set.add(Language('zho', 'TW'))
|
||||
else:
|
||||
language_set.add(Language(lang['code3']))
|
||||
|
||||
|
|
|
@ -135,6 +135,12 @@ def download_subtitle(path, language, audio_language, hi, forced, providers, pro
|
|||
lang_obj = Language.rebuild(lang_obj, forced=True)
|
||||
if hi == "force HI":
|
||||
lang_obj = Language.rebuild(lang_obj, hi=True)
|
||||
elif l == 'zht':
|
||||
lang_obj = Language('zho', 'TW')
|
||||
if forced == "True":
|
||||
lang_obj = Language.rebuild(lang_obj, forced=True)
|
||||
if hi == "force HI":
|
||||
lang_obj = Language.rebuild(lang_obj, hi=True)
|
||||
else:
|
||||
lang_obj = Language(l)
|
||||
if forced == "True":
|
||||
|
@ -214,6 +220,8 @@ def download_subtitle(path, language, audio_language, hi, forced, providers, pro
|
|||
downloaded_provider = subtitle.provider_name
|
||||
if subtitle.language == 'pt-BR':
|
||||
downloaded_language_code3 = 'pob'
|
||||
elif subtitle.language == 'zh-TW':
|
||||
downloaded_language_code3 = 'zht'
|
||||
else:
|
||||
downloaded_language_code3 = subtitle.language.alpha3
|
||||
downloaded_language = language_from_alpha3(downloaded_language_code3)
|
||||
|
@ -323,6 +331,8 @@ def manual_search(path, profileId, providers, providers_auth, sceneName, title,
|
|||
|
||||
if lang == 'pob':
|
||||
lang_obj = Language('por', 'BR')
|
||||
elif lang == 'zht':
|
||||
lang_obj = Language('zho', 'TW')
|
||||
else:
|
||||
lang_obj = Language(lang)
|
||||
|
||||
|
@ -530,6 +540,8 @@ def manual_download_subtitle(path, language, audio_language, hi, forced, subtitl
|
|||
downloaded_provider = saved_subtitle.provider_name
|
||||
if saved_subtitle.language == 'pt-BR':
|
||||
downloaded_language_code3 = 'pob'
|
||||
elif saved_subtitle.language == 'zh-TW':
|
||||
downloaded_language_code3 = 'zht'
|
||||
else:
|
||||
downloaded_language_code3 = subtitle.language.alpha3
|
||||
downloaded_language = language_from_alpha3(downloaded_language_code3)
|
||||
|
@ -631,6 +643,8 @@ def manual_upload_subtitle(path, language, forced, title, scene_name, media_type
|
|||
|
||||
if language == 'pob':
|
||||
lang_obj = Language('por', 'BR')
|
||||
elif language == 'zht':
|
||||
lang_obj = Language('zho', 'TW')
|
||||
else:
|
||||
lang_obj = Language(language)
|
||||
|
||||
|
|
|
@ -58,6 +58,10 @@ def store_subtitles(original_path, reversed_path):
|
|||
|
||||
brazilian_portuguese = [".pt-br", ".pob", "pb"]
|
||||
brazilian_portuguese_forced = [".pt-br.forced", ".pob.forced", "pb.forced"]
|
||||
simplified_chinese = [".chs", ".sc", ".zhs", ".hans", ".gb", u"简", u"双语"]
|
||||
simplified_chinese_forced = [".chs.forced", ".sc.forced", ".zhs.forced", ".hans.forced", ".gb.forced", u"简体中文.forced", u"双语.forced"]
|
||||
traditional_chinese = [".cht", ".tc", ".zht", ".hant", ".big5", u"繁", u"雙語"]
|
||||
traditional_chinese_forced = [".cht.forced", ".tc.forced", ".zht.forced",".hant.forced", ".big5.forced", u"繁體中文.forced", u"雙語.forced"]
|
||||
try:
|
||||
dest_folder = get_subtitle_destination_folder()
|
||||
core.CUSTOM_PATHS = [dest_folder] if dest_folder else []
|
||||
|
@ -84,6 +88,22 @@ def store_subtitles(original_path, reversed_path):
|
|||
logging.debug("BAZARR external subtitles detected: " + "pb:forced")
|
||||
actual_subtitles.append(
|
||||
[str("pb:forced"), path_mappings.path_replace_reverse(subtitle_path)])
|
||||
elif any(ext in (str(os.path.splitext(subtitle)[0]).lower())[-12:] for ext in simplified_chinese):
|
||||
logging.debug("BAZARR external subtitles detected: " + "zh")
|
||||
actual_subtitles.append(
|
||||
[str("zh"), path_mappings.path_replace_reverse(subtitle_path)])
|
||||
elif any(ext in (str(os.path.splitext(subtitle)[0]).lower())[-12:] for ext in simplified_chinese_forced):
|
||||
logging.debug("BAZARR external subtitles detected: " + "zh:forced")
|
||||
actual_subtitles.append(
|
||||
[str("zh:forced"), path_mappings.path_replace_reverse(subtitle_path)])
|
||||
elif any(ext in (str(os.path.splitext(subtitle)[0]).lower())[-12:] for ext in traditional_chinese):
|
||||
logging.debug("BAZARR external subtitles detected: " + "zt")
|
||||
actual_subtitles.append(
|
||||
[str("zt"), path_mappings.path_replace_reverse(subtitle_path)])
|
||||
elif any(ext in (str(os.path.splitext(subtitle)[0]).lower())[-12:] for ext in traditional_chinese_forced):
|
||||
logging.debug("BAZARR external subtitles detected: " + "zt:forced")
|
||||
actual_subtitles.append(
|
||||
[str("zt:forced"), path_mappings.path_replace_reverse(subtitle_path)])
|
||||
elif not language:
|
||||
continue
|
||||
elif str(language) != 'und':
|
||||
|
@ -149,6 +169,10 @@ def store_subtitles_movie(original_path, reversed_path):
|
|||
|
||||
brazilian_portuguese = [".pt-br", ".pob", "pb"]
|
||||
brazilian_portuguese_forced = [".pt-br.forced", ".pob.forced", "pb.forced"]
|
||||
simplified_chinese = [".chs", ".sc", ".zhs", ".hans", ".gb", u"简", u"双语"]
|
||||
simplified_chinese_forced = [".chs.forced", ".sc.forced", ".zhs.forced", ".hans.forced", ".gb.forced", u"简体中文.forced", u"双语.forced"]
|
||||
traditional_chinese = [".cht", ".tc", ".zht", ".hant", ".big5", u"繁", u"雙語", "zh-tw"]
|
||||
traditional_chinese_forced = [".cht.forced", ".tc.forced", ".zht.forced",".hant.forced", ".big5.forced", u"繁體中文.forced", u"雙語.forced", "zh-tw.forced"]
|
||||
try:
|
||||
dest_folder = get_subtitle_destination_folder() or ''
|
||||
core.CUSTOM_PATHS = [dest_folder] if dest_folder else []
|
||||
|
@ -172,6 +196,18 @@ def store_subtitles_movie(original_path, reversed_path):
|
|||
elif str(os.path.splitext(subtitle)[0]).lower().endswith(tuple(brazilian_portuguese_forced)):
|
||||
logging.debug("BAZARR external subtitles detected: " + "pb:forced")
|
||||
actual_subtitles.append([str("pb:forced"), path_mappings.path_replace_reverse_movie(subtitle_path)])
|
||||
elif any(ext in (str(os.path.splitext(subtitle)[0]).lower())[-12:] for ext in simplified_chinese):
|
||||
logging.debug("BAZARR external subtitles detected: " + "zh")
|
||||
actual_subtitles.append([str("zh"), path_mappings.path_replace_reverse_movie(subtitle_path)])
|
||||
elif any(ext in (str(os.path.splitext(subtitle)[0]).lower())[-12:] for ext in simplified_chinese_forced):
|
||||
logging.debug("BAZARR external subtitles detected: " + "zh:forced")
|
||||
actual_subtitles.append([str("zh:forced"), path_mappings.path_replace_reverse_movie(subtitle_path)])
|
||||
elif any(ext in (str(os.path.splitext(subtitle)[0]).lower())[-12:] for ext in traditional_chinese):
|
||||
logging.debug("BAZARR external subtitles detected: " + "zt")
|
||||
actual_subtitles.append([str("zt"), path_mappings.path_replace_reverse_movie(subtitle_path)])
|
||||
elif any(ext in (str(os.path.splitext(subtitle)[0]).lower())[-12:] for ext in traditional_chinese_forced):
|
||||
logging.debug("BAZARR external subtitles detected: " + "zt:forced")
|
||||
actual_subtitles.append([str("zt:forced"), path_mappings.path_replace_reverse_movie(subtitle_path)])
|
||||
elif not language:
|
||||
continue
|
||||
elif str(language.basename) != 'und':
|
||||
|
@ -492,6 +528,13 @@ def guess_external_subtitles(dest_folder, subtitles):
|
|||
try:
|
||||
text = text.decode('utf-8')
|
||||
detected_language = guess_language(text)
|
||||
#add simplified and traditional chinese detection
|
||||
if detected_language == 'zh':
|
||||
simplified_chinese = [".chs", ".sc", ".zhs", ".hans", ".gb", u"简", u"双语"]
|
||||
if any(ext in str(subtitle_path) for ext in simplified_chinese):
|
||||
detected_language == 'zh'
|
||||
else:
|
||||
detected_language == 'zt'
|
||||
except UnicodeDecodeError:
|
||||
detector = Detector()
|
||||
try:
|
||||
|
|
|
@ -326,6 +326,8 @@ def subtitles_apply_mods(language, subtitle_path, mods):
|
|||
|
||||
if language == 'pob':
|
||||
lang_obj = Language('por', 'BR')
|
||||
elif language == 'zht':
|
||||
lang_obj = Language('zho', 'TW')
|
||||
else:
|
||||
lang_obj = Language(language)
|
||||
|
||||
|
|
|
@ -5,12 +5,12 @@ from subliminal.exceptions import ConfigurationError
|
|||
|
||||
class AssrtConverter(LanguageReverseConverter):
|
||||
def __init__(self):
|
||||
self.from_assrt = { u'简体': ('zho', None, 'Hans'), u'繁体': ('zho', None, 'Hant'),
|
||||
u'簡體': ('zho', None, 'Hans'), u'繁體': ('zho', None, 'Hant'),
|
||||
self.from_assrt = { u'简体': ('zho', 'CN', None), u'繁体': ('zho', 'TW', None),
|
||||
u'簡體': ('zho', 'CN', None), u'繁體': ('zho', 'TW', None),
|
||||
u'英文': ('eng',),
|
||||
u'chs': ('zho', None, 'Hans'), u'cht': ('zho', None, 'Hant'),
|
||||
u'chn': ('zho', None, 'Hans'), u'twn': ('zho', None, 'Hant')}
|
||||
self.to_assrt = { ('zho', None, 'Hans'): u'chs', ('zho', None, 'Hant'): u'cht',
|
||||
u'chs': ('zho', 'CN', None), u'cht': ('zho', 'TW', None),
|
||||
u'chn': ('zho', 'CN', None), u'twn': ('zho', 'TW', None)}
|
||||
self.to_assrt = { ('zho', 'CN', None): u'chs', ('zho', 'TW', None): u'cht',
|
||||
('eng', None, None) : u'eng', ('zho', None, None): u'chs'}
|
||||
self.codes = set(self.from_assrt.keys())
|
||||
|
||||
|
|
|
@ -636,6 +636,13 @@ def _search_external_subtitles(path, languages=None, only_one=False, scandir_gen
|
|||
hi_tag = ["hi", "cc", "sdh"]
|
||||
hi = any(i for i in hi_tag if i in adv_tag)
|
||||
|
||||
#add simplified/traditional chinese detection
|
||||
simplified_chinese = ["chs", "sc", "zhs", "hans", "gb", u"简", u"双语"]
|
||||
traditional_chinese = ["cht", "tc", "zht", "hant", "big5", u"繁", u"雙語"]
|
||||
FULL_LANGUAGE_LIST.extend(simplified_chinese)
|
||||
FULL_LANGUAGE_LIST.extend(traditional_chinese)
|
||||
p_root = p_root.replace('zh-TW', 'zht')
|
||||
|
||||
# remove possible language code for matching
|
||||
p_root_bare = ENDSWITH_LANGUAGECODE_RE.sub(
|
||||
lambda m: "" if str(m.group(1)).lower() in FULL_LANGUAGE_LIST else m.group(0), p_root)
|
||||
|
@ -655,14 +662,24 @@ def _search_external_subtitles(path, languages=None, only_one=False, scandir_gen
|
|||
try:
|
||||
language_code = p_root.rsplit(".", 1)[1].replace('_', '-')
|
||||
try:
|
||||
language = Language.fromietf(language_code)
|
||||
language = Language.fromietf(language_code)
|
||||
language.forced = forced
|
||||
language.hi = hi
|
||||
except (ValueError, LanguageReverseError):
|
||||
logger.error('Cannot parse language code %r', language_code)
|
||||
language_code = None
|
||||
#add simplified/traditional chinese detection
|
||||
if any(ext in str(language_code) for ext in simplified_chinese):
|
||||
language = Language.fromietf('zh')
|
||||
language.forced = forced
|
||||
language.hi = hi
|
||||
elif any(ext in str(language_code) for ext in traditional_chinese):
|
||||
language = Language.fromietf('zh')
|
||||
language.forced = forced
|
||||
language.hi = hi
|
||||
else:
|
||||
logger.error('Cannot parse language code %r', language_code)
|
||||
language_code = None
|
||||
except IndexError:
|
||||
language_code = None
|
||||
language_code = None
|
||||
|
||||
if not language and not language_code and only_one:
|
||||
language = Language.rebuild(list(languages)[0], forced=forced, hi=hi)
|
||||
|
|
|
@ -75,15 +75,15 @@ def test_get_matches_movie_name(movies):
|
|||
|
||||
@pytest.mark.converter
|
||||
def test_converter_convert_alpha3():
|
||||
assert language_converters['assrt'].convert('zho', None, 'Hans') == 'chs'
|
||||
assert language_converters['assrt'].convert('zho', None, 'Hant') == 'cht'
|
||||
assert language_converters['assrt'].convert('zho', None, 'Hans') == 'chi'
|
||||
assert language_converters['assrt'].convert('zho', None, 'Hant') == 'zht'
|
||||
assert language_converters['assrt'].convert('eng') == 'eng'
|
||||
|
||||
|
||||
@pytest.mark.converter
|
||||
def test_converter_reverse():
|
||||
assert language_converters['assrt'].reverse('chs') == ('zho', None, 'Hans')
|
||||
assert language_converters['assrt'].reverse('cht') == ('zho', None, 'Hant')
|
||||
assert language_converters['assrt'].reverse('chi') == ('zho', None, 'Hans')
|
||||
assert language_converters['assrt'].reverse('zht') == ('zho', None, 'Hant')
|
||||
assert language_converters['assrt'].reverse(u'簡體') == ('zho', None, 'Hans')
|
||||
assert language_converters['assrt'].reverse(u'繁體') == ('zho', None, 'Hant')
|
||||
assert language_converters['assrt'].reverse(u'简体') == ('zho', None, 'Hans')
|
||||
|
|
Loading…
Reference in New Issue