From 4ebcd49546ed7772cb6f3a9c83079e5aea08e15a Mon Sep 17 00:00:00 2001 From: Vitiko <59455966+vitiko98@users.noreply.github.com> Date: Sun, 6 Jun 2021 09:57:29 -0400 Subject: [PATCH] Added custom language class to make it easier to implement non-standard/regional languages --- bazarr/custom_lang.py | 204 ++++++++++++++++++ bazarr/embedded_subs_reader.py | 76 +++---- bazarr/get_languages.py | 33 +-- bazarr/get_subtitle.py | 76 +++---- bazarr/list_subtitles.py | 86 ++------ bazarr/utils.py | 10 +- frontend/src/Settings/Providers/list.ts | 8 +- libs/subliminal_patch/core.py | 4 +- libs/subliminal_patch/language.py | 6 +- libs/subliminal_patch/providers/argenteam.py | 63 +++--- libs/subliminal_patch/providers/subdivx.py | 22 +- libs/subliminal_patch/providers/sucha.py | 78 +++---- .../subliminal_patch/providers/tusubtitulo.py | 18 +- libs/subzero/language.py | 5 + 14 files changed, 408 insertions(+), 281 deletions(-) create mode 100644 bazarr/custom_lang.py diff --git a/bazarr/custom_lang.py b/bazarr/custom_lang.py new file mode 100644 index 000000000..091ca36e4 --- /dev/null +++ b/bazarr/custom_lang.py @@ -0,0 +1,204 @@ +# -*- coding: utf-8 -*- + +import logging +import os + +from subzero.language import Language + +logger = logging.getLogger(__name__) + + +class CustomLanguage: + """Base class for custom languages.""" + + alpha2 = "pb" + alpha3 = "pob" + language = "pt-BR" + official_alpha2 = "pt" + official_alpha3 = "por" + name = "Brazilian Portuguese" + iso = "BR" + _possible_matches = ("pt-br", "pob", "pb", "brazilian", "brasil", "brazil") + _extensions = (".pt-br", ".pob", ".pb") + _extensions_forced = (".pt-br.forced", ".pob.forced", ".pb.forced") + + def subzero_language(self): + return Language(self.official_alpha3, self.iso) + + @classmethod + def from_value(cls, value, attr="alpha3"): + """Return a custom language subclass by value and attribute + if found, otherwise return None. + + :param value: + :param attr: + """ + for sub in cls.__subclasses__(): + if getattr(sub, attr) == str(value): + return sub() + + return None + + @classmethod + def register(cls, table): + "Register the custom language subclasses in the database." + + for sub in cls.__subclasses__(): + table.insert( + {table.code3: sub.alpha3, table.code2: sub.alpha2, table.name: sub.name} + ).on_conflict(action="IGNORE").execute() + + @classmethod + def found_external(cls, subtitle, subtitle_path): + for sub in cls.__subclasses__(): + code = sub.get_alpha_type(subtitle, subtitle_path) + if code: + return code + + return None + + @classmethod + def get_alpha_type(cls, subtitle: str, subtitle_path=None): + assert subtitle_path is not None + + extension = str(os.path.splitext(subtitle)[0]).lower() + to_return = None + + if extension.endswith(cls._extensions): + to_return = cls.alpha2 + + if extension.endswith(cls._extensions_forced): + to_return = f"{cls.alpha2}:forced" + + if to_return is not None: + logging.debug("BAZARR external subtitles detected: %s", to_return) + + return to_return + + def ffprobe_found(self, detected_language: dict) -> bool: + name = detected_language.get("name", "").lower() + if not name: + return False + + return any(ext in name for ext in self._possible_matches) + + +class BrazilianPortuguese(CustomLanguage): + # Same attributes as base class + pass + + +class ChineseTraditional(CustomLanguage): + alpha2 = "zt" + alpha3 = "zht" + language = "zh-TW" + official_alpha2 = "zh" + official_alpha3 = "zho" + name = "Chinese Traditional" + iso = "TW" + _extensions = ( + ".cht", + ".tc", + ".zh-tw", + ".zht", + ".zh-hant", + ".zhhant", + ".zh_hant", + ".hant", + ".big5", + ".traditional", + ) + _extensions_forced = ( + ".cht.forced", + ".tc.forced", + ".zht.forced", + "hant.forced", + ".big5.forced", + "繁體中文.forced", + "雙語.forced", + ".zh-tw.forced", + ) + _extensions_fuzzy = ("繁", "雙語") + _extensions_disamb_fuzzy = ("简", "双语") + _extensions_disamb = ( + ".chs", + ".sc", + ".zhs", + ".zh-hans", + ".hans", + ".zh_hans", + ".zhhans", + ".gb", + ".simplified", + ) + _extensions_disamb_forced = ( + ".chs.forced", + ".sc.forced", + ".zhs.forced", + "hans.forced", + ".gb.forced", + "简体中文.forced", + "双语.forced", + ) + + @classmethod + def get_alpha_type(cls, subtitle, subtitle_path=None): + subtitle_path = str(subtitle_path).lower() + extension = str(os.path.splitext(subtitle)[0]).lower() + + to_return = None + + # Simplified chinese + if ( + extension.endswith(cls._extensions_disamb) + or subtitle_path in cls._extensions_disamb_fuzzy + ): + to_return = "zh" + + elif any(ext in extension[-12:] for ext in cls._extensions_disamb_forced): + to_return = "zh:forced" + + # Traditional chinese + elif ( + extension.endswith(cls._extensions) + or subtitle_path[:-5] in cls._extensions_fuzzy + ): + to_return = "zt" + + elif any(ext in extension[-12:] for ext in cls._extensions_forced): + to_return = "zt:forced" + + if to_return is not None: + logging.debug("BAZARR external subtitles detected: %s", to_return) + + return to_return + + +class LatinAmericanSpanish(CustomLanguage): + alpha2 = "ea" # Only one available I can think of + alpha3 = "spl" + language = "es-LA" + official_alpha2 = "es" + official_alpha3 = "spa" + name = "Latin American Spanish" + iso = "MX" # Not fair, but ok + _possible_matches = ( + "es-la", + "spa-la", + "spl", + "mx", + "latin", + "mexic", + "argent", + "latam", + ) + _extensions = (".es-la", ".spl", ".spa-la", ".ea", ".es-mx", ".lat", ".es.ar") + _extensions_forced = ( + ".es-la.forced", + ".spl.forced", + ".spa-la.forced", + ".ea.forced", + ".es-mx.forced", + ".lat.forced", + ".es.ar.forced", + ) diff --git a/bazarr/embedded_subs_reader.py b/bazarr/embedded_subs_reader.py index 853719176..ad871b0a2 100644 --- a/bazarr/embedded_subs_reader.py +++ b/bazarr/embedded_subs_reader.py @@ -7,39 +7,29 @@ from knowit import api import enzyme from enzyme.exceptions import MalformedMKVError from enzyme.exceptions import MalformedMKVError +from custom_lang import CustomLanguage from database import TableEpisodes, TableMovies -_FFPROBE_SPECIAL_LANGS = { - "zho": { - "list": ["cht", "tc", "traditional", "zht", "hant", "big5", u"繁", u"雙語"], - "alpha3": "zht", - }, - "por": { - "list": ["pt-br", "pob", "pb", "brazilian", "brasil", "brazil"], - "alpha3": "pob", - }, -} +logger = logging.getLogger(__name__) + def _handle_alpha3(detected_language: dict): alpha3 = detected_language["language"].alpha3 + custom = CustomLanguage.from_value(alpha3, "official_alpha3") - name = detected_language.get("name", "").lower() - special_lang = _FFPROBE_SPECIAL_LANGS.get(alpha3) + if custom and custom.ffprobe_found(detected_language): + logger.debug("Custom embedded language found: %s", custom.name) + return custom.alpha3 - if special_lang is None or not name: - return alpha3 # The original alpha3 + return alpha3 - if any(ext in name for ext in special_lang["list"]): - return special_lang["alpha3"] # Guessed alpha from _FFPROBE_OTHER_LANGS - - return alpha3 # In any case def embedded_subs_reader(file, file_size, episode_file_id=None, movie_file_id=None): data = parse_video_metadata(file, file_size, episode_file_id, movie_file_id) subtitles_list = [] - if data['ffprobe'] and 'subtitle' in data['ffprobe']: - for detected_language in data['ffprobe']['subtitle']: + if data["ffprobe"] and "subtitle" in data["ffprobe"]: + for detected_language in data["ffprobe"]["subtitle"]: if not "language" in detected_language: continue @@ -53,15 +43,23 @@ def embedded_subs_reader(file, file_size, episode_file_id=None, movie_file_id=No forced = detected_language.get("forced", False) hearing_impaired = detected_language.get("hearing_impaired", False) - codec = detected_language.get("format") # or None + codec = detected_language.get("format") # or None subtitles_list.append([language, forced, hearing_impaired, codec]) - elif data['enzyme']: - for subtitle_track in data['enzyme'].subtitle_tracks: - hearing_impaired = subtitle_track.name and "sdh" in subtitle_track.name.lower() + elif data["enzyme"]: + for subtitle_track in data["enzyme"].subtitle_tracks: + hearing_impaired = ( + subtitle_track.name and "sdh" in subtitle_track.name.lower() + ) - subtitles_list.append([subtitle_track.language, subtitle_track.forced, hearing_impaired, - subtitle_track.codec_id]) + subtitles_list.append( + [ + subtitle_track.language, + subtitle_track.forced, + hearing_impaired, + subtitle_track.codec_id, + ] + ) return subtitles_list @@ -69,10 +67,10 @@ def embedded_subs_reader(file, file_size, episode_file_id=None, movie_file_id=No def parse_video_metadata(file, file_size, episode_file_id=None, movie_file_id=None): # Define default data keys value data = { - 'ffprobe': {}, - 'enzyme': {}, - 'file_id': episode_file_id or movie_file_id, - 'file_size': file_size + "ffprobe": {}, + "enzyme": {}, + "file_id": episode_file_id or movie_file_id, + "file_size": file_size, } # Get the actual cache value form database @@ -104,24 +102,26 @@ def parse_video_metadata(file, file_size, episode_file_id=None, movie_file_id=No # if not, we retrieve the metadata from the file from utils import get_binary + ffprobe_path = get_binary("ffprobe") # if we have ffprobe available if ffprobe_path: - api.initialize({'provider': 'ffmpeg', 'ffmpeg': ffprobe_path}) - data['ffprobe'] = api.know(file) + api.initialize({"provider": "ffmpeg", "ffmpeg": ffprobe_path}) + data["ffprobe"] = api.know(file) # if nto, we use enzyme for mkv files else: - if os.path.splitext(file)[1] == '.mkv': - with open(file, 'rb') as f: + if os.path.splitext(file)[1] == ".mkv": + with open(file, "rb") as f: try: mkv = enzyme.MKV(f) except MalformedMKVError: - logging.error( - 'BAZARR cannot analyze this MKV with our built-in MKV parser, you should install ' - 'ffmpeg/ffprobe: ' + file) + logger.error( + "BAZARR cannot analyze this MKV with our built-in MKV parser, you should install " + "ffmpeg/ffprobe: " + file + ) else: - data['enzyme'] = mkv + data["enzyme"] = mkv # we write to db the result and return the newly cached ffprobe dict if episode_file_id: diff --git a/bazarr/get_languages.py b/bazarr/get_languages.py index 70c7a7fad..7aefae41c 100644 --- a/bazarr/get_languages.py +++ b/bazarr/get_languages.py @@ -3,7 +3,8 @@ import pycountry from subzero.language import Language -from database import database, TableSettingsLanguages +from custom_lang import CustomLanguage +from database import TableSettingsLanguages def load_language_in_db(): @@ -13,22 +14,7 @@ def load_language_in_db(): if hasattr(lang, 'alpha_2')] # Insert languages in database table - TableSettingsLanguages.insert_many(langs, - fields=[TableSettingsLanguages.code3, TableSettingsLanguages.code2, - TableSettingsLanguages.name]) \ - .on_conflict(action='IGNORE') \ - .execute() - - TableSettingsLanguages.insert({TableSettingsLanguages.code3: 'pob', TableSettingsLanguages.code2: 'pb', - TableSettingsLanguages.name: 'Brazilian Portuguese'}) \ - .on_conflict(action='IGNORE') \ - .execute() - - # insert chinese languages - TableSettingsLanguages.insert({TableSettingsLanguages.code3: 'zht', TableSettingsLanguages.code2: 'zt', - TableSettingsLanguages.name: 'Chinese Traditional'}) \ - .on_conflict(action='IGNORE')\ - .execute() + CustomLanguage.register(TableSettingsLanguages) langs = [[lang.bibliographic, lang.alpha_3] for lang in pycountry.languages @@ -88,15 +74,14 @@ def get_language_set(): .where(TableSettingsLanguages.enabled == 1).dicts() language_set = set() - + for lang in languages: - if lang['code3'] == 'pob': - language_set.add(Language('por', 'BR')) - elif lang['code3'] == 'zht': - language_set.add(Language('zho', 'TW')) + custom = CustomLanguage.from_value(lang["code3"], "alpha3") + if custom is None: + language_set.add(Language(lang["code3"])) else: - language_set.add(Language(lang['code3'])) - + language_set.add(custom.subzero_language()) + return language_set diff --git a/bazarr/get_subtitle.py b/bazarr/get_subtitle.py index 50a6a4ee7..70d76dc50 100644 --- a/bazarr/get_subtitle.py +++ b/bazarr/get_subtitle.py @@ -34,6 +34,7 @@ from get_providers import get_providers, get_providers_auth, provider_throttle, from knowit import api from subsyncer import subsync from guessit import guessit +from custom_lang import CustomLanguage from database import dict_mapper, get_exclusion_clause, get_profiles_list, get_audio_profile_languages, \ get_desired_languages, TableShows, TableEpisodes, TableMovies, TableHistory, TableHistoryMovie from event_handler import event_stream, show_progress, hide_progress @@ -139,24 +140,13 @@ def download_subtitle(path, language, audio_language, hi, forced, providers, pro # Always use alpha2 in API Request l = alpha3_from_alpha2(l) - if l == 'pob': - lang_obj = Language('por', 'BR') - if forced == "True": - lang_obj = Language.rebuild(lang_obj, forced=True) - if hi == "force HI": - lang_obj = Language.rebuild(lang_obj, hi=True) - elif l == 'zht': - lang_obj = Language('zho', 'TW') - if forced == "True": - lang_obj = Language.rebuild(lang_obj, forced=True) - if hi == "force HI": - lang_obj = Language.rebuild(lang_obj, hi=True) - else: - lang_obj = Language(l) - if forced == "True": - lang_obj = Language.rebuild(lang_obj, forced=True) - if hi == "force HI": - lang_obj = Language.rebuild(lang_obj, hi=True) + lang_obj = _get_lang_obj(l) + + if forced == "True": + lang_obj = Language.rebuild(lang_obj, forced=True) + if hi == "force HI": + lang_obj = Language.rebuild(lang_obj, hi=True) + language_set.add(lang_obj) minimum_score = settings.general.minimum_score @@ -165,6 +155,7 @@ def download_subtitle(path, language, audio_language, hi, forced, providers, pro postprocessing_cmd = settings.general.postprocessing_cmd single = settings.general.getboolean('single_language') + # todo: """ AsyncProviderPool: @@ -228,12 +219,8 @@ def download_subtitle(path, language, audio_language, hi, forced, providers, pro saved_any = True for subtitle in saved_subtitles: downloaded_provider = subtitle.provider_name - if subtitle.language == 'pt-BR': - downloaded_language_code3 = 'pob' - elif subtitle.language == 'zh-TW': - downloaded_language_code3 = 'zht' - else: - downloaded_language_code3 = subtitle.language.alpha3 + downloaded_language_code3 = _get_download_code3(subtitle) + downloaded_language = language_from_alpha3(downloaded_language_code3) downloaded_language_code2 = alpha2_from_alpha3(downloaded_language_code3) audio_language_code2 = alpha2_from_language(audio_language) @@ -346,12 +333,7 @@ def manual_search(path, profileId, providers, providers_auth, sceneName, title, lang = alpha3_from_alpha2(language) - if lang == 'pob': - lang_obj = Language('por', 'BR') - elif lang == 'zht': - lang_obj = Language('zho', 'TW') - else: - lang_obj = Language(lang) + lang_obj = _get_lang_obj(lang) if forced == "True": lang_obj = Language.rebuild(lang_obj, forced=True) @@ -562,12 +544,8 @@ def manual_download_subtitle(path, language, audio_language, hi, forced, subtitl if saved_subtitles: for saved_subtitle in saved_subtitles: downloaded_provider = saved_subtitle.provider_name - if saved_subtitle.language == 'pt-BR': - downloaded_language_code3 = 'pob' - elif saved_subtitle.language == 'zh-TW': - downloaded_language_code3 = 'zht' - else: - downloaded_language_code3 = subtitle.language.alpha3 + downloaded_language_code3 = _get_download_code3(subtitle) + downloaded_language = language_from_alpha3(downloaded_language_code3) downloaded_language_code2 = alpha2_from_alpha3(downloaded_language_code3) audio_language_code2 = alpha2_from_language(audio_language) @@ -666,13 +644,12 @@ def manual_upload_subtitle(path, language, forced, title, scene_name, media_type 'win') and settings.general.getboolean('chmod_enabled') else None language = alpha3_from_alpha2(language) - - if language == 'pob': - lang_obj = Language('por', 'BR') - elif language == 'zht': - lang_obj = Language('zho', 'TW') - else: + + custom = Language.from_value(language) + if custom is None: lang_obj = Language(language) + else: + lang_obj = custom.subzero_language() if forced: lang_obj = Language.rebuild(lang_obj, forced=True) @@ -1685,3 +1662,18 @@ def sync_subtitles(video_path, srt_path, srt_lang, media_type, percent_score, so logging.debug("BAZARR subsync skipped because subtitles score isn't below this " "threshold value: " + subsync_threshold + "%") return False + + +def _get_download_code3(subtitle): + custom = CustomLanguage.from_value(subtitle.language, "language") + if custom is None: + return subtitle.language.alpha3 + return custom.alpha3 + + +def _get_lang_obj(alpha3): + sub = CustomLanguage.from_value(alpha3, "alpha3") + if sub is None: + return Language(alpha3) + + return sub.subzero_language() diff --git a/bazarr/list_subtitles.py b/bazarr/list_subtitles.py index dccedca29..3ac41c3b6 100644 --- a/bazarr/list_subtitles.py +++ b/bazarr/list_subtitles.py @@ -10,6 +10,7 @@ from subliminal_patch import core, search_external_subtitles from subzero.language import Language from gevent import sleep +from custom_lang import CustomLanguage from database import get_profiles_list, get_profile_cutoff, TableEpisodes, TableShows, TableMovies from get_languages import alpha2_from_alpha3, language_from_alpha2, get_language_set from config import settings @@ -64,16 +65,6 @@ def store_subtitles(original_path, reversed_path): logging.exception( "BAZARR error when trying to analyze this %s file: %s" % (os.path.splitext(reversed_path)[1], reversed_path)) pass - - brazilian_portuguese = [".pt-br", ".pob", "pb"] - brazilian_portuguese_forced = [".pt-br.forced", ".pob.forced", "pb.forced"] - simplified_chinese_fuzzy = [u"简", u"双语"] - simplified_chinese = [".chs", ".sc", ".zhs",".zh-hans",".hans",".zh_hans",".zhhans",".gb",".simplified"] - simplified_chinese_forced = [".chs.forced", ".sc.forced", ".zhs.forced", "hans.forced", ".gb.forced", u"简体中文.forced", u"双语.forced"] - traditional_chinese_fuzzy = [u"繁", u"雙語"] - traditional_chinese = [".cht", ".tc", ".zh-tw", ".zht",".zh-hant",".zhhant",".zh_hant",".hant", ".big5", ".traditional"] - traditional_chinese_forced = [".cht.forced", ".tc.forced", ".zht.forced", "hant.forced", ".big5.forced", u"繁體中文.forced", u"雙語.forced", "zh-tw.forced"] - try: dest_folder = get_subtitle_destination_folder() core.CUSTOM_PATHS = [dest_folder] if dest_folder else [] @@ -86,38 +77,19 @@ def store_subtitles(original_path, reversed_path): elif settings.general.subfolder == "relative": full_dest_folder_path = os.path.join(os.path.dirname(reversed_path), dest_folder) subtitles = guess_external_subtitles(full_dest_folder_path, subtitles) - except Exception as e: + except Exception: logging.exception("BAZARR unable to index external subtitles.") - pass else: for subtitle, language in subtitles.items(): - subtitle_path = get_external_subtitles_path(reversed_path, subtitle) - if str(os.path.splitext(subtitle)[0]).lower().endswith(tuple(brazilian_portuguese)): - logging.debug("BAZARR external subtitles detected: " + "pb") - actual_subtitles.append( - [str("pb"), path_mappings.path_replace_reverse(subtitle_path)]) - elif str(os.path.splitext(subtitle)[0]).lower().endswith(tuple(brazilian_portuguese_forced)): - logging.debug("BAZARR external subtitles detected: " + "pb:forced") - actual_subtitles.append( - [str("pb:forced"), path_mappings.path_replace_reverse(subtitle_path)]) - elif str(os.path.splitext(subtitle)[0]).lower().endswith(tuple(simplified_chinese)) or (str(subtitle_path).lower())[:-5] in simplified_chinese_fuzzy: - logging.debug("BAZARR external subtitles detected: " + "zh") - actual_subtitles.append( - [str("zh"), path_mappings.path_replace_reverse(subtitle_path)]) - elif any(ext in (str(os.path.splitext(subtitle)[0]).lower())[-12:] for ext in simplified_chinese_forced): - logging.debug("BAZARR external subtitles detected: " + "zh:forced") - actual_subtitles.append( - [str("zh:forced"), path_mappings.path_replace_reverse(subtitle_path)]) - elif str(os.path.splitext(subtitle)[0]).lower().endswith(tuple(traditional_chinese)) or (str(subtitle_path).lower())[:-5] in traditional_chinese_fuzzy: - logging.debug("BAZARR external subtitles detected: " + "zt") - actual_subtitles.append( - [str("zt"), path_mappings.path_replace_reverse(subtitle_path)]) - elif any(ext in (str(os.path.splitext(subtitle)[0]).lower())[-12:] for ext in traditional_chinese_forced): - logging.debug("BAZARR external subtitles detected: " + "zt:forced") - actual_subtitles.append( - [str("zt:forced"), path_mappings.path_replace_reverse(subtitle_path)]) - elif not language: + if not language: continue + + subtitle_path = get_external_subtitles_path(reversed_path, subtitle) + + custom = CustomLanguage.found_external(subtitle, subtitle_path) + if custom is not None: + actual_subtitles.append([custom, path_mappings.path_replace_reverse(subtitle_path)]) + elif str(language) != 'und': if language.forced: language_str = str(language) @@ -184,19 +156,11 @@ def store_subtitles_movie(original_path, reversed_path): except: logging.debug("BAZARR unable to index this unrecognized language: " + subtitle_language) pass - except Exception as e: + except Exception: logging.exception( "BAZARR error when trying to analyze this %s file: %s" % (os.path.splitext(reversed_path)[1], reversed_path)) pass - brazilian_portuguese = [".pt-br", ".pob", "pb"] - brazilian_portuguese_forced = [".pt-br.forced", ".pob.forced", "pb.forced"] - simplified_chinese_fuzzy = [u"简", u"双语"] - simplified_chinese = [".chs", ".sc", ".zhs",".zh-hans",".hans",".zh_hans",".zhhans",".gb",".simplified"] - simplified_chinese_forced = [".chs.forced", ".sc.forced", ".zhs.forced", "hans.forced", ".gb.forced", u"简体中文.forced", u"双语.forced"] - traditional_chinese_fuzzy = [u"繁", u"雙語"] - traditional_chinese = [".cht", ".tc", ".zh-tw", ".zht",".zh-hant",".zhhant",".zh_hant",".hant", ".big5", ".traditional"] - traditional_chinese_forced = [".cht.forced", ".tc.forced", ".zht.forced", "hant.forced", ".big5.forced", u"繁體中文.forced", u"雙語.forced", "zh-tw.forced"] try: dest_folder = get_subtitle_destination_folder() or '' core.CUSTOM_PATHS = [dest_folder] if dest_folder else [] @@ -213,27 +177,15 @@ def store_subtitles_movie(original_path, reversed_path): pass else: for subtitle, language in subtitles.items(): - subtitle_path = get_external_subtitles_path(reversed_path, subtitle) - if str(os.path.splitext(subtitle)[0]).lower().endswith(tuple(brazilian_portuguese)): - logging.debug("BAZARR external subtitles detected: " + "pb") - actual_subtitles.append([str("pb"), path_mappings.path_replace_reverse_movie(subtitle_path)]) - elif str(os.path.splitext(subtitle)[0]).lower().endswith(tuple(brazilian_portuguese_forced)): - logging.debug("BAZARR external subtitles detected: " + "pb:forced") - actual_subtitles.append([str("pb:forced"), path_mappings.path_replace_reverse_movie(subtitle_path)]) - elif str(os.path.splitext(subtitle)[0]).lower().endswith(tuple(simplified_chinese)) or (str(subtitle_path).lower())[:-5] in simplified_chinese_fuzzy: - logging.debug("BAZARR external subtitles detected: " + "zh") - actual_subtitles.append([str("zh"), path_mappings.path_replace_reverse_movie(subtitle_path)]) - elif any(ext in (str(os.path.splitext(subtitle)[0]).lower())[-12:] for ext in simplified_chinese_forced): - logging.debug("BAZARR external subtitles detected: " + "zh:forced") - actual_subtitles.append([str("zh:forced"), path_mappings.path_replace_reverse_movie(subtitle_path)]) - elif str(os.path.splitext(subtitle)[0]).lower().endswith(tuple(traditional_chinese)) or (str(subtitle_path).lower())[:-5] in traditional_chinese_fuzzy: - logging.debug("BAZARR external subtitles detected: " + "zt") - actual_subtitles.append([str("zt"), path_mappings.path_replace_reverse_movie(subtitle_path)]) - elif any(ext in (str(os.path.splitext(subtitle)[0]).lower())[-12:] for ext in traditional_chinese_forced): - logging.debug("BAZARR external subtitles detected: " + "zt:forced") - actual_subtitles.append([str("zt:forced"), path_mappings.path_replace_reverse_movie(subtitle_path)]) - elif not language: + if not language: continue + + subtitle_path = get_external_subtitles_path(reversed_path, subtitle) + custom = CustomLanguage.found_external(subtitle, subtitle_path) + + if custom is not None: + actual_subtitles.append([custom, path_mappings.path_replace_reverse_movie(subtitle_path)]) + elif str(language.basename) != 'und': if language.forced: language_str = str(language) diff --git a/bazarr/utils.py b/bazarr/utils.py index d8f8476fe..4674d5c79 100644 --- a/bazarr/utils.py +++ b/bazarr/utils.py @@ -13,6 +13,7 @@ import stat from whichcraft import which from get_args import args from config import settings, url_sonarr, url_radarr +from custom_lang import CustomLanguage from database import TableHistory, TableHistoryMovie, TableBlacklist, TableBlacklistMovie, TableShowsRootfolder, \ TableMoviesRootfolder from event_handler import event_stream @@ -375,12 +376,11 @@ def delete_subtitles(media_type, language, forced, hi, media_path, subtitles_pat def subtitles_apply_mods(language, subtitle_path, mods): language = alpha3_from_alpha2(language) - if language == 'pob': - lang_obj = Language('por', 'BR') - elif language == 'zht': - lang_obj = Language('zho', 'TW') - else: + custom = CustomLanguage.from_value(language, "alpha3") + if custom is None: lang_obj = Language(language) + else: + lang_obj = custom.subzero_language() sub = Subtitle(lang_obj, mods=mods) with open(subtitle_path, 'rb') as f: diff --git a/frontend/src/Settings/Providers/list.ts b/frontend/src/Settings/Providers/list.ts index 4754086b6..df26f91ba 100644 --- a/frontend/src/Settings/Providers/list.ts +++ b/frontend/src/Settings/Providers/list.ts @@ -23,7 +23,7 @@ export const ProviderList: Readonly = [ password: "", }, }, - { key: "argenteam", description: "Spanish Subtitles Provider" }, + { key: "argenteam", description: "LATAM Spanish Subtitles Provider" }, { key: "assrt", description: "Chinese Subtitles Provider", @@ -130,7 +130,7 @@ export const ProviderList: Readonly = [ name: "Sous-Titres.eu", description: "Mostly French Subtitles Provider", }, - { key: "subdivx", description: "Spanish Subtitles Provider" }, + { key: "subdivx", description: "LATAM Spanish / Spanish Subtitles Provider" }, { key: "subssabbz", name: "Subs.sab.bz", @@ -171,7 +171,7 @@ export const ProviderList: Readonly = [ name: "Subtitulamos.tv", description: "Spanish Subtitles Provider", }, - { key: "sucha", description: "Spanish Subtitles Provider" }, + { key: "sucha", description: "LATAM Spanish Subtitles Provider" }, { key: "supersubtitles" }, { key: "titlovi", @@ -184,7 +184,7 @@ export const ProviderList: Readonly = [ { key: "tusubtitulo", name: "Tusubtitulo.com", - description: "Spanish / English Subtitles Provider for TV Shows", + description: "LATAM Spanish / Spanish / English Subtitles Provider for TV Shows", }, { key: "tvsubtitles", name: "TVSubtitles" }, { key: "wizdom", description: "Wizdom.xyz Subtitles Provider." }, diff --git a/libs/subliminal_patch/core.py b/libs/subliminal_patch/core.py index c262c4418..2a31cd81a 100644 --- a/libs/subliminal_patch/core.py +++ b/libs/subliminal_patch/core.py @@ -643,8 +643,6 @@ def _search_external_subtitles(path, languages=None, only_one=False, scandir_gen #add simplified/traditional chinese detection simplified_chinese = ["chs", "sc", "zhs", "hans","zh-hans", "gb", "简", "简中", "简体", "简体中文", "中英双语", "中日双语","中法双语","简体&英文"] traditional_chinese = ["cht", "tc", "zht", "hant","zh-hant", "big5", "繁", "繁中", "繁体", "繁體","繁体中文", "繁體中文", "正體中文", "中英雙語", "中日雙語","中法雙語","繁体&英文"] - FULL_LANGUAGE_LIST.extend(simplified_chinese) - FULL_LANGUAGE_LIST.extend(traditional_chinese) p_root = p_root.replace('zh-TW', 'zht') # remove possible language code for matching @@ -676,7 +674,7 @@ def _search_external_subtitles(path, languages=None, only_one=False, scandir_gen language.forced = forced language.hi = hi elif any(ext in str(language_code) for ext in traditional_chinese): - language = Language.fromietf('zh') + language = Language.fromietf('zh') language.forced = forced language.hi = hi else: diff --git a/libs/subliminal_patch/language.py b/libs/subliminal_patch/language.py index b001bf5d1..97337c1e7 100644 --- a/libs/subliminal_patch/language.py +++ b/libs/subliminal_patch/language.py @@ -21,10 +21,12 @@ class PatchedOpenSubtitlesConverter(OpenSubtitlesConverter): self.to_opensubtitles.update({ ('srp', None, "Latn"): 'scc', ('srp', None, "Cyrl"): 'scc', - ('chi', None, 'Hant'): 'zht' + ('chi', None, 'Hant'): 'zht', + ('spa', 'MX'): 'spl', }) self.from_opensubtitles.update({ - 'zht': ('zho', None, 'Hant') + 'zht': ('zho', None, 'Hant'), + 'spl': ('spa', 'MX'), }) def convert(self, alpha3, country=None, script=None): diff --git a/libs/subliminal_patch/providers/argenteam.py b/libs/subliminal_patch/providers/argenteam.py index a02f71308..679877e96 100644 --- a/libs/subliminal_patch/providers/argenteam.py +++ b/libs/subliminal_patch/providers/argenteam.py @@ -46,7 +46,8 @@ class ArgenteamSubtitle(Subtitle): class ArgenteamProvider(Provider, ProviderSubtitleArchiveMixin): provider_name = "argenteam" - languages = {Language.fromalpha2(l) for l in ["es"]} + # Safe to assume every subtitle from Argenteam is Latam Spanish + languages = {Language("spa", "MX")} video_types = (Episode, Movie) subtitle_class = ArgenteamSubtitle hearing_impaired_verifiable = False @@ -59,9 +60,9 @@ class ArgenteamProvider(Provider, ProviderSubtitleArchiveMixin): def initialize(self): self.session = Session() - self.session.headers = { - "User-Agent": os.environ.get("SZ_USER_AGENT", "Sub-Zero/2") - } + self.session.headers.update( + {"User-Agent": os.environ.get("SZ_USER_AGENT", "Sub-Zero/2")} + ) def terminate(self): self.session.close() @@ -75,48 +76,38 @@ class ArgenteamProvider(Provider, ProviderSubtitleArchiveMixin): is_episode = True query = f"{title} S{kwargs['season']:02}E{kwargs['episode']:02}" - logger.info(f"Searching ID (episode: {is_episode}) for {query}") + logger.debug(f"Searching ID (episode: {is_episode}) for {query}") r = self.session.get(API_URL + "search", params={"q": query}, timeout=10) r.raise_for_status() results = r.json() match_ids = [] - if results["total"] >= 1: - for result in results["results"]: - if (result["type"] == "episode" and not is_episode) or ( - result["type"] == "movie" and is_episode - ): + for result in results["results"]: + if result["type"] == "movie" and is_episode: + continue + + imdb = f"tt{result.get('imdb', 'n/a')}" + if not is_episode and imdb == kwargs.get("imdb_id"): + logger.debug("Movie matched by IMDB ID, taking shortcut") + match_ids = [result["id"]] + break + + # advanced title check in case of multiple movie results + title_year = kwargs.get("year") and kwargs.get("title") + if results["total"] > 1 and not is_episode and title_year: + sanitized = sanitize(result["title"]) + titles = [f"{sanitize(name)} {kwargs['year']}" for name in titles] + if sanitized not in titles: continue - # shortcut in case of matching imdb id (don't match NoneType) - if not is_episode and f"tt{result.get('imdb', 'n/a')}" == kwargs.get( - "imdb_id" - ): - logger.debug(f"Movie matched by IMDB ID, taking shortcut") - match_ids = [result["id"]] - break - - # advanced title check in case of multiple movie results - if results["total"] > 1: - if not is_episode and kwargs.get("year"): - if result["title"] and not ( - sanitize(result["title"]) - in ( - "%s %s" % (sanitize(name), kwargs.get("year")) - for name in titles - ) - ): - continue - - match_ids.append(result["id"]) - else: - logger.error(f"No episode ID found for {query}") + match_ids.append(result["id"]) if match_ids: - logger.debug( - f"Found matching IDs: {', '.join(str(id) for id in match_ids)}" - ) + ids = ", ".join(str(id) for id in match_ids) + logger.debug("Found matching IDs: %s", ids) + else: + logger.debug("Nothing found from %s query", query) return match_ids diff --git a/libs/subliminal_patch/providers/subdivx.py b/libs/subliminal_patch/providers/subdivx.py index c66c5a0a8..dca0741d0 100644 --- a/libs/subliminal_patch/providers/subdivx.py +++ b/libs/subliminal_patch/providers/subdivx.py @@ -24,7 +24,7 @@ from subliminal_patch.providers import Provider from guessit import guessit -CLEAN_TITLE_RES = [ +_CLEAN_TITLE_RES = [ (r"subt[ií]tulos de", ""), (r"´|`", "'"), (r" {2,}", " "), @@ -82,7 +82,7 @@ class SubdivxSubtitle(Subtitle): class SubdivxSubtitlesProvider(Provider): provider_name = "subdivx" hash_verifiable = False - languages = {Language.fromalpha2(lang) for lang in ["es"]} + languages = {Language("spa", "MX")} | {Language.fromalpha2("es")} subtitle_class = SubdivxSubtitle server_url = "https://www.subdivx.com/" @@ -176,22 +176,28 @@ class SubdivxSubtitlesProvider(Provider): for subtitle in range(0, len(title_soups)): title_soup, body_soup = title_soups[subtitle], body_soups[subtitle] - # title title = self._clean_title(title_soup.find("a").text) - # filter by year if video.year and str(video.year) not in title: continue - page_link = title_soup.find("a")["href"] + # Data + datos = body_soup.find("div", {"id": "buscador_detalle_sub_datos"}).text + # Ignore multi-disc and non-srt subtitles + if not any(item in datos for item in ("Cds: 1", "SubRip")): + continue + + spain = "/pais/7.gif" in datos + language = Language.fromalpha2("es") if spain else Language("spa", "MX") # description - description = body_soup.find("div", {"id": "buscador_detalle_sub"}).text - description = description.replace(",", " ").lower() + sub_details = body_soup.find("div", {"id": "buscador_detalle_sub"}).text + description = sub_details.replace(",", " ").lower() # uploader uploader = body_soup.find("a", {"class": "link1"}).text + page_link = title_soup.find("a")["href"] subtitle = self.subtitle_class( language, video, page_link, title, description, uploader @@ -228,7 +234,7 @@ class SubdivxSubtitlesProvider(Provider): Normalize apostrophes and spaces to avoid matching problems (e.g. Subtitulos de Carlito´s Way -> Carlito's Way) """ - for og, new in CLEAN_TITLE_RES: + for og, new in _CLEAN_TITLE_RES: title = re.sub(og, new, title, flags=re.IGNORECASE) return title diff --git a/libs/subliminal_patch/providers/sucha.py b/libs/subliminal_patch/providers/sucha.py index 073935105..18d965067 100644 --- a/libs/subliminal_patch/providers/sucha.py +++ b/libs/subliminal_patch/providers/sucha.py @@ -17,8 +17,8 @@ from subzero.language import Language logger = logging.getLogger(__name__) -SERVER_URL = "http://sapidb.caretas.club/" -PAGE_URL = "https://sucha.caretas.club/" +SERVER_URL = "http://sapidb.caretas.club" +PAGE_URL = "https://sucha.caretas.club" UNDESIRED_FILES = ("[eng]", ".en.", ".eng.", ".fr.", ".pt.") @@ -53,41 +53,40 @@ class SuchaSubtitle(Subtitle): return self.download_id def get_matches(self, video): + type_ = "episode" if isinstance(video, Episode) else "movie" self.found_matches |= guess_matches( video, - guessit( - self.filename, - {"type": "episode" if isinstance(video, Episode) else "movie"}, - ), + guessit(self.filename, {"type": type_}), ) self.found_matches |= guess_matches( video, - guessit( - self.guessed_release_info, - {"type": "episode" if isinstance(video, Episode) else "movie"}, - ), + guessit(self.guessed_release_info, {"type": type_}), ) return self.found_matches class SuchaProvider(Provider): """Sucha Provider""" - languages = {Language.fromalpha2(l) for l in ["es"]} + + # This is temporary. Castilian spanish subtitles may exist, but are rare + # and currently impossible to guess from the API. + languages = {Language("spa", "MX")} language_list = list(languages) video_types = (Episode, Movie) def initialize(self): self.session = Session() - self.session.headers = { - "User-Agent": os.environ.get("SZ_USER_AGENT", "Sub-Zero/2") - } + self.session.headers.update( + {"User-Agent": os.environ.get("SZ_USER_AGENT", "Sub-Zero/2")} + ) def terminate(self): self.session.close() def query(self, languages, video): - movie_year = video.year if video.year else "0" + movie_year = video.year or "0" is_episode = isinstance(video, Episode) + type_str = "episode" if is_episode else "movie" language = self.language_list[0] if is_episode: @@ -96,43 +95,37 @@ class SuchaProvider(Provider): q = {"query": video.title, "year": movie_year} logger.debug(f"Searching subtitles: {q}") - result = self.session.get( - SERVER_URL + ("episode" if is_episode else "movie"), params=q, timeout=10 - ) + result = self.session.get(f"{SERVER_URL}/{type_str}", params=q, timeout=10) result.raise_for_status() - result_ = result.json() + results = result.json() subtitles = [] - for i in result_: + for item in results: matches = set() - try: - if ( - video.title.lower() in i["title"].lower() - or video.title.lower() in i["alt_title"].lower() - ): - matches.add("title") - except TypeError: + title = item.get("title", "").lower() + alt_title = item.get("alt_title", title).lower() + if not title: logger.debug("No subtitles found") return [] - if is_episode: - if ( - q["query"].lower() in i["title"].lower() - or q["query"].lower() in i["alt_title"].lower() - ): - matches_ = ("title", "series", "season", "episode", "year") - [matches.add(match) for match in matches_] + if any(video.title.lower() in item for item in (title, alt_title)): + matches.add("title") - if str(i["year"]) == video.year: + if str(item["year"]) == video.year: matches.add("year") + if is_episode and any( + q["query"].lower() in item for item in (title, alt_title) + ): + matches.update("title", "series", "season", "episode", "year") + subtitles.append( SuchaSubtitle( language, - i["release"], - i["filename"], - str(i["id"]), - "episode" if is_episode else "movie", + item["release"], + item["filename"], + str(item["id"]), + type_str, matches, ) ) @@ -141,10 +134,6 @@ class SuchaProvider(Provider): def list_subtitles(self, video, languages): return self.query(languages, video) - def _check_response(self, response): - if response.status_code != 200: - raise ServiceUnavailable(f"Bad status code: {response.status_code}") - def _get_archive(self, content): archive_stream = io.BytesIO(content) @@ -177,12 +166,11 @@ class SuchaProvider(Provider): def download_subtitle(self, subtitle): logger.info("Downloading subtitle %r", subtitle) response = self.session.get( - SERVER_URL + "download", + f"{SERVER_URL}/download", params={"id": subtitle.download_id, "type": subtitle.download_type}, timeout=10, ) response.raise_for_status() - self._check_response(response) archive = self._get_archive(response.content) subtitle_file = self.get_file(archive) subtitle.content = fix_line_ending(subtitle_file) diff --git a/libs/subliminal_patch/providers/tusubtitulo.py b/libs/subliminal_patch/providers/tusubtitulo.py index 2dc9c2e95..6b4970b59 100644 --- a/libs/subliminal_patch/providers/tusubtitulo.py +++ b/libs/subliminal_patch/providers/tusubtitulo.py @@ -57,7 +57,9 @@ class TuSubtituloSubtitle(Subtitle): class TuSubtituloProvider(Provider): """TuSubtitulo.com Provider""" - languages = {Language.fromietf(lang) for lang in ["en", "es"]} + languages = {Language.fromietf(lang) for lang in ["en", "es"]} | { + Language("spa", "MX") + } logger.debug(languages) video_types = (Episode,) @@ -123,11 +125,13 @@ class TuSubtituloProvider(Provider): try: content = tables[tr + inc].find_all("td") - language = content[4].text - if "eng" in language.lower(): - language = "en" - elif "esp" in language.lower(): - language = "es" + language = content[4].text.lower() + if "eng" in language: + language = Language.fromietf("en") + elif "lat" in language: + language = Language("spa", "MX") + elif "esp" in language: + language = Language.fromietf("es") else: language = None @@ -236,7 +240,7 @@ class TuSubtituloProvider(Provider): matches.update(["title", "series", "season", "episode", "year"]) subtitles.append( TuSubtituloSubtitle( - Language.fromietf(sub["language"]), + sub["language"], sub, matches, ) diff --git a/libs/subzero/language.py b/libs/subzero/language.py index 43dc29754..199856704 100644 --- a/libs/subzero/language.py +++ b/libs/subzero/language.py @@ -32,10 +32,15 @@ repl_map = { "tib": "bo", } +CUSTOM_LIST = ["chs", "sc", "zhs", "hans", "gb", u"简", u"双语", + "cht", "tc", "zht", "hant", "big5", u"繁", u"雙語", + "spl", "ea", "pob", "pb"] + ALPHA2_LIST = list(set(filter(lambda x: x, map(lambda x: x.alpha2, LANGUAGE_MATRIX)))) + list(repl_map.values()) ALPHA3b_LIST = list(set(filter(lambda x: x, map(lambda x: x.alpha3, LANGUAGE_MATRIX)))) + \ list(set(filter(lambda x: len(x) == 3, list(repl_map.keys())))) FULL_LANGUAGE_LIST = ALPHA2_LIST + ALPHA3b_LIST +FULL_LANGUAGE_LIST.extend(CUSTOM_LIST) def language_from_stream(l):