diff --git a/bazarr/app/config.py b/bazarr/app/config.py index aebdf5dc3..b0a8c62ba 100644 --- a/bazarr/app/config.py +++ b/bazarr/app/config.py @@ -300,6 +300,12 @@ validators = [ # analytics section Validator('analytics.enabled', must_exist=True, default=True, is_type_of=bool), + + # jimaku section + Validator('jimaku.api_key', must_exist=True, default='', is_type_of=str), + Validator('jimaku.enable_name_search_fallback', must_exist=True, default=True, is_type_of=bool), + Validator('jimaku.enable_archives_download', must_exist=True, default=False, is_type_of=bool), + Validator('jimaku.enable_ai_subs', must_exist=True, default=False, is_type_of=bool), # titlovi section Validator('titlovi.username', must_exist=True, default='', is_type_of=str, cast=str), diff --git a/bazarr/app/get_providers.py b/bazarr/app/get_providers.py index b9ce975ff..fe1445497 100644 --- a/bazarr/app/get_providers.py +++ b/bazarr/app/get_providers.py @@ -285,6 +285,12 @@ def get_providers_auth(): 'username': settings.titlovi.username, 'password': settings.titlovi.password, }, + 'jimaku': { + 'api_key': settings.jimaku.api_key, + 'enable_name_search_fallback': settings.jimaku.enable_name_search_fallback, + 'enable_archives_download': settings.jimaku.enable_archives_download, + 'enable_ai_subs': settings.jimaku.enable_ai_subs, + }, 'ktuvit': { 'email': settings.ktuvit.email, 'hashed_password': settings.ktuvit.hashed_password, diff --git a/bazarr/subtitles/refiners/__init__.py b/bazarr/subtitles/refiners/__init__.py index ff1e715a0..9fbdecbb2 100644 --- a/bazarr/subtitles/refiners/__init__.py +++ b/bazarr/subtitles/refiners/__init__.py @@ -4,10 +4,12 @@ from .ffprobe import refine_from_ffprobe from .database import refine_from_db from .arr_history import refine_from_arr_history from .anidb import refine_from_anidb +from .anilist import refine_from_anilist registered = { "database": refine_from_db, "ffprobe": refine_from_ffprobe, "arr_history": refine_from_arr_history, "anidb": refine_from_anidb, + "anilist": refine_from_anilist, # Must run AFTER AniDB } diff --git a/bazarr/subtitles/refiners/anidb.py b/bazarr/subtitles/refiners/anidb.py index c680bba5a..5faa9878a 100644 --- a/bazarr/subtitles/refiners/anidb.py +++ b/bazarr/subtitles/refiners/anidb.py @@ -20,7 +20,10 @@ except ImportError: except ImportError: import xml.etree.ElementTree as etree -refined_providers = {'animetosho'} +refined_providers = {'animetosho', 'jimaku'} +providers_requiring_anidb_api = {'animetosho'} + +logger = logging.getLogger(__name__) api_url = 'http://api.anidb.net:9001/httpapi' @@ -40,6 +43,10 @@ class AniDBClient(object): @property def is_throttled(self): return self.cache and self.cache.get('is_throttled') + + @property + def has_api_credentials(self): + return self.api_client_key != '' and self.api_client_key is not None @property def daily_api_request_count(self): @@ -62,7 +69,9 @@ class AniDBClient(object): return r.content @region.cache_on_arguments(expiration_time=timedelta(days=1).total_seconds()) - def get_series_id(self, mappings, tvdb_series_season, tvdb_series_id, episode): + def get_show_information(self, tvdb_series_id, tvdb_series_season, episode): + mappings = etree.fromstring(self.get_series_mappings()) + # Enrich the collection of anime with the episode offset animes = [ self.AnimeInfo(anime, int(anime.attrib.get('episodeoffset', 0))) @@ -71,49 +80,60 @@ class AniDBClient(object): ) ] + is_special_entry = False if not animes: - return None, None + # Some entries will store TVDB seasons in a nested mapping list, identifiable by the value 'a' as the season + special_entries = mappings.findall( + f".//anime[@tvdbid='{tvdb_series_id}'][@defaulttvdbseason='a']" + ) - # Sort the anime by offset in ascending order - animes.sort(key=lambda a: a.episode_offset) + if not special_entries: + return None, None, None - # Different from Tvdb, Anidb have different ids for the Parts of a season - anidb_id = None - offset = 0 + is_special_entry = True + for special_entry in special_entries: + mapping_list = special_entry.findall(f".//mapping[@tvdbseason='{tvdb_series_season}']") + if len(mapping_list) > 0: + anidb_id = int(special_entry.attrib.get('anidbid')) + offset = int(mapping_list[0].attrib.get('offset', 0)) - for index, anime_info in enumerate(animes): - anime, episode_offset = anime_info + if not is_special_entry: + # Sort the anime by offset in ascending order + animes.sort(key=lambda a: a.episode_offset) - mapping_list = anime.find('mapping-list') + # Different from Tvdb, Anidb have different ids for the Parts of a season + anidb_id = None + offset = 0 - # Handle mapping list for Specials - if mapping_list: - for mapping in mapping_list.findall("mapping"): - # Mapping values are usually like ;1-1;2-1;3-1; - for episode_ref in mapping.text.split(';'): - if not episode_ref: - continue + for index, anime_info in enumerate(animes): + anime, episode_offset = anime_info + + mapping_list = anime.find('mapping-list') - anidb_episode, tvdb_episode = map(int, episode_ref.split('-')) - if tvdb_episode == episode: - anidb_id = int(anime.attrib.get('anidbid')) + # Handle mapping list for Specials + if mapping_list: + for mapping in mapping_list.findall("mapping"): + # Mapping values are usually like ;1-1;2-1;3-1; + for episode_ref in mapping.text.split(';'): + if not episode_ref: + continue - return anidb_id, anidb_episode + anidb_episode, tvdb_episode = map(int, episode_ref.split('-')) + if tvdb_episode == episode: + anidb_id = int(anime.attrib.get('anidbid')) - if episode > episode_offset: - anidb_id = int(anime.attrib.get('anidbid')) - offset = episode_offset + return anidb_id, anidb_episode, 0 - return anidb_id, episode - offset + if episode > episode_offset: + anidb_id = int(anime.attrib.get('anidbid')) + offset = episode_offset + + return anidb_id, episode - offset, offset @region.cache_on_arguments(expiration_time=timedelta(days=1).total_seconds()) - def get_series_episodes_ids(self, tvdb_series_id, season, episode): - mappings = etree.fromstring(self.get_series_mappings()) - - series_id, episode_no = self.get_series_id(mappings, season, tvdb_series_id, episode) - + def get_episode_ids(self, series_id, episode_no): if not series_id: - return None, None + return None episodes = etree.fromstring(self.get_episodes(series_id)) @@ -177,7 +197,7 @@ class AniDBClient(object): def refine_from_anidb(path, video): if not isinstance(video, Episode) or not video.series_tvdb_id: - logging.debug(f'Video is not an Anime TV series, skipping refinement for {video}') + logger.debug(f'Video is not an Anime TV series, skipping refinement for {video}') return @@ -190,27 +210,35 @@ def refine_anidb_ids(video): season = video.season if video.season else 0 - if anidb_client.is_throttled: - logging.warning(f'API daily limit reached. Skipping refinement for {video.series}') - - return video - - try: - anidb_series_id, anidb_episode_id = anidb_client.get_series_episodes_ids( - video.series_tvdb_id, - season, video.episode, - ) - except TooManyRequests: - logging.error(f'API daily limit reached while refining {video.series}') - - anidb_client.mark_as_throttled() - - return video - - if not anidb_episode_id: - logging.error(f'Could not find anime series {video.series}') - + anidb_series_id, anidb_episode_no, anidb_season_episode_offset = anidb_client.get_show_information( + video.series_tvdb_id, + season, + video.episode, + ) + + if not anidb_series_id: + logger.error(f'Could not find anime series {video.series}') return video + + anidb_episode_id = None + if anidb_client.has_api_credentials: + if anidb_client.is_throttled: + logger.warning(f'API daily limit reached. Skipping episode ID refinement for {video.series}') + else: + try: + anidb_episode_id = anidb_client.get_episode_ids( + anidb_series_id, + anidb_episode_no + ) + except TooManyRequests: + logger.error(f'API daily limit reached while refining {video.series}') + anidb_client.mark_as_throttled() + else: + intersect = providers_requiring_anidb_api.intersection(settings.general.enabled_providers) + if len(intersect) >= 1: + logger.warn(f'AniDB API credentials are not fully set up, the following providers may not work: {intersect}') video.series_anidb_id = anidb_series_id video.series_anidb_episode_id = anidb_episode_id + video.series_anidb_episode_no = anidb_episode_no + video.series_anidb_season_episode_offset = anidb_season_episode_offset diff --git a/bazarr/subtitles/refiners/anilist.py b/bazarr/subtitles/refiners/anilist.py new file mode 100644 index 000000000..3d0bb7b35 --- /dev/null +++ b/bazarr/subtitles/refiners/anilist.py @@ -0,0 +1,77 @@ +# coding=utf-8 +# fmt: off + +import logging +import time +import requests +from collections import namedtuple +from datetime import timedelta + +from app.config import settings +from subliminal import Episode, region, __short_version__ + +logger = logging.getLogger(__name__) +refined_providers = {'jimaku'} + +class AniListClient(object): + def __init__(self, session=None, timeout=10): + self.session = session or requests.Session() + self.session.timeout = timeout + self.session.headers['Content-Type'] = 'application/json' + self.session.headers['User-Agent'] = 'Subliminal/%s' % __short_version__ + + @region.cache_on_arguments(expiration_time=timedelta(days=1).total_seconds()) + def get_series_mappings(self): + r = self.session.get( + 'https://raw.githubusercontent.com/Fribb/anime-lists/master/anime-list-mini.json' + ) + + r.raise_for_status() + return r.json() + + def get_series_id(self, candidate_id_name, candidate_id_value): + anime_list = self.get_series_mappings() + + tag_map = { + "series_anidb_id": "anidb_id", + "imdb_id": "imdb_id" + } + mapped_tag = tag_map.get(candidate_id_name, candidate_id_name) + + obj = [obj for obj in anime_list if mapped_tag in obj and str(obj[mapped_tag]) == str(candidate_id_value)] + logger.debug(f"Based on '{mapped_tag}': '{candidate_id_value}', anime-list matched: {obj}") + + if len(obj) > 0: + return obj[0]["anilist_id"] + else: + logger.debug(f"Could not find corresponding AniList ID with '{mapped_tag}': {candidate_id_value}") + return None + +def refine_from_anilist(path, video): + # Safety checks + if isinstance(video, Episode): + if not video.series_anidb_id: + logger.error(f"Will not refine '{video.series}' as it does not have an AniDB ID.") + return + + if refined_providers.intersection(settings.general.enabled_providers) and video.anilist_id is None: + refine_anilist_ids(video) + +def refine_anilist_ids(video): + anilist_client = AniListClient() + + if isinstance(video, Episode): + candidate_id_name = "series_anidb_id" + else: + candidate_id_name = "imdb_id" + + candidate_id_value = getattr(video, candidate_id_name, None) + if not candidate_id_value: + logger.error(f"Found no value for property {candidate_id_name} of video.") + return video + + anilist_id = anilist_client.get_series_id(candidate_id_name, candidate_id_value) + if not anilist_id: + return video + + video.anilist_id = anilist_id \ No newline at end of file diff --git a/custom_libs/subliminal/video.py b/custom_libs/subliminal/video.py index 2168d91a9..66c090945 100644 --- a/custom_libs/subliminal/video.py +++ b/custom_libs/subliminal/video.py @@ -130,7 +130,8 @@ class Episode(Video): """ def __init__(self, name, series, season, episode, title=None, year=None, original_series=True, tvdb_id=None, series_tvdb_id=None, series_imdb_id=None, alternative_series=None, series_anidb_id=None, - series_anidb_episode_id=None, **kwargs): + series_anidb_episode_id=None, series_anidb_season_episode_offset=None, + anilist_id=None, **kwargs): super(Episode, self).__init__(name, **kwargs) #: Series of the episode @@ -163,8 +164,11 @@ class Episode(Video): #: Alternative names of the series self.alternative_series = alternative_series or [] + #: Anime specific information self.series_anidb_episode_id = series_anidb_episode_id self.series_anidb_id = series_anidb_id + self.series_anidb_season_episode_offset = series_anidb_season_episode_offset + self.anilist_id = anilist_id @classmethod def fromguess(cls, name, guess): @@ -207,10 +211,11 @@ class Movie(Video): :param str title: title of the movie. :param int year: year of the movie. :param list alternative_titles: alternative titles of the movie + :param int anilist_id: AniList ID of movie (if Anime) :param \*\*kwargs: additional parameters for the :class:`Video` constructor. """ - def __init__(self, name, title, year=None, alternative_titles=None, **kwargs): + def __init__(self, name, title, year=None, alternative_titles=None, anilist_id=None, **kwargs): super(Movie, self).__init__(name, **kwargs) #: Title of the movie @@ -221,6 +226,9 @@ class Movie(Video): #: Alternative titles of the movie self.alternative_titles = alternative_titles or [] + + #: AniList ID of the movie + self.anilist_id = anilist_id @classmethod def fromguess(cls, name, guess): diff --git a/custom_libs/subliminal_patch/providers/jimaku.py b/custom_libs/subliminal_patch/providers/jimaku.py new file mode 100644 index 000000000..68393821d --- /dev/null +++ b/custom_libs/subliminal_patch/providers/jimaku.py @@ -0,0 +1,419 @@ +from __future__ import absolute_import + +from datetime import timedelta +import logging +import os +import re +import time + +from requests import Session +from subliminal import region, __short_version__ +from subliminal.cache import REFINER_EXPIRATION_TIME +from subliminal.exceptions import ConfigurationError, AuthenticationError, ServiceUnavailable +from subliminal.utils import sanitize +from subliminal.video import Episode, Movie +from subliminal_patch.providers import Provider +from subliminal_patch.subtitle import Subtitle +from subliminal_patch.exceptions import APIThrottled +from subliminal_patch.providers.utils import get_subtitle_from_archive, get_archive_from_bytes +from urllib.parse import urlencode, urljoin +from guessit import guessit +from subzero.language import Language, FULL_LANGUAGE_LIST + +logger = logging.getLogger(__name__) + +# Unhandled formats, such files will always get filtered out +unhandled_archive_formats = (".7z",) +accepted_archive_formats = (".zip", ".rar") + +class JimakuSubtitle(Subtitle): + '''Jimaku Subtitle.''' + provider_name = 'jimaku' + + hash_verifiable = False + + def __init__(self, language, video, download_url, filename): + super(JimakuSubtitle, self).__init__(language, page_link=download_url) + + self.video = video + self.download_url = download_url + self.filename = filename + self.release_info = filename + self.is_archive = filename.endswith(accepted_archive_formats) + + @property + def id(self): + return self.download_url + + def get_matches(self, video): + matches = set() + + # Episode/Movie specific matches + if isinstance(video, Episode): + if sanitize(video.series) and sanitize(self.video.series) in ( + sanitize(name) for name in [video.series] + video.alternative_series): + matches.add('series') + + if video.season and self.video.season is None or video.season and video.season == self.video.season: + matches.add('season') + elif isinstance(video, Movie): + if sanitize(video.title) and sanitize(self.video.title) in ( + sanitize(name) for name in [video.title] + video.alternative_titles): + matches.add('title') + + # General matches + if video.year and video.year == self.video.year: + matches.add('year') + + video_type = 'movie' if isinstance(video, Movie) else 'episode' + matches.add(video_type) + + guess = guessit(self.filename, {'type': video_type}) + for g in guess: + if g[0] == "release_group" or "source": + if video.release_group == g[1]: + matches.add('release_group') + break + + # Prioritize .srt by repurposing the audio_codec match + if self.filename.endswith(".srt"): + matches.add('audio_codec') + + return matches + +class JimakuProvider(Provider): + '''Jimaku Provider.''' + video_types = (Episode, Movie) + + api_url = 'https://jimaku.cc/api' + api_ratelimit_max_delay_seconds = 5 + api_ratelimit_backoff_limit = 3 + + corrupted_file_size_threshold = 500 + + languages = {Language.fromietf("ja")} + + def __init__(self, enable_name_search_fallback, enable_archives_download, enable_ai_subs, api_key): + if api_key: + self.api_key = api_key + else: + raise ConfigurationError('Missing api_key.') + + self.enable_name_search_fallback = enable_name_search_fallback + self.download_archives = enable_archives_download + self.enable_ai_subs = enable_ai_subs + self.session = None + + def initialize(self): + self.session = Session() + self.session.headers['Content-Type'] = 'application/json' + self.session.headers['Authorization'] = self.api_key + self.session.headers['User-Agent'] = os.environ.get("SZ_USER_AGENT") + + def terminate(self): + self.session.close() + + def _query(self, video): + if isinstance(video, Movie): + media_name = video.title.lower() + elif isinstance(video, Episode): + media_name = video.series.lower() + + # With entries that have a season larger than 1, Jimaku appends the corresponding season number to the name. + # We'll reassemble media_name here to account for cases where we can only search by name alone. + season_addendum = str(video.season) if video.season > 1 else None + media_name = f"{media_name} {season_addendum}" if season_addendum else media_name + + # Search for entry + searching_for_entry_attempts = 0 + additional_url_params = {} + while searching_for_entry_attempts < 2: + searching_for_entry_attempts += 1 + url = self._assemble_jimaku_search_url(video, media_name, additional_url_params) + if not url: + return None + + searching_for_entry = "query" in url + data = self._search_for_entry(url) + + if not data: + if searching_for_entry and searching_for_entry_attempts < 2: + logger.info("Maybe this is live action media? Will retry search without anime parameter...") + additional_url_params = {'anime': "false"} + else: + return None + else: + break + + # We only go for the first entry + entry = data[0] + + entry_id = entry.get('id') + anilist_id = entry.get('anilist_id', None) + entry_name = entry.get('name') + is_movie = entry.get('flags', {}).get('movie', False) + + if isinstance(video, Episode) and is_movie: + logger.warn("Bazarr thinks this is a series, but Jimaku says this is a movie! May not be able to match subtitles...") + + logger.info(f"Matched entry: ID: '{entry_id}', anilist_id: '{anilist_id}', name: '{entry_name}', english_name: '{entry.get('english_name')}', movie: {is_movie}") + if entry.get("flags").get("unverified"): + logger.warning(f"This entry '{entry_id}' is unverified, subtitles might be incomplete or have quality issues!") + + # Get a list of subtitles for entry + episode_number = video.episode if "episode" in dir(video) else None + url_params = {'episode': episode_number} if isinstance(video, Episode) and not is_movie else {} + only_look_for_archives = False + + has_offset = isinstance(video, Episode) and video.series_anidb_season_episode_offset is not None + + retry_count = 0 + adjusted_ep_num = None + while retry_count <= 1: + # Account for positive episode offset first + if isinstance(video, Episode) and not is_movie and retry_count < 1: + if video.season > 1 and has_offset: + offset_value = video.series_anidb_season_episode_offset + offset_value = offset_value if offset_value > 0 else -offset_value + + if episode_number < offset_value: + adjusted_ep_num = episode_number + offset_value + logger.warning(f"Will try using adjusted episode number {adjusted_ep_num} first") + url_params = {'episode': adjusted_ep_num} + + url = f"entries/{entry_id}/files" + data = self._search_for_subtitles(url, url_params) + + if not data: + if isinstance(video, Episode) and not is_movie and has_offset and retry_count < 1: + logger.warning(f"Found no subtitles for adjusted episode number, but will retry with normal episode number {episode_number}") + url_params = {'episode': episode_number} + elif isinstance(video, Episode) and not is_movie and retry_count < 1: + logger.warning(f"Found no subtitles for episode number {episode_number}, but will retry without 'episode' parameter") + url_params = {} + only_look_for_archives = True + else: + return None + + retry_count += 1 + else: + if adjusted_ep_num: + video.episode = adjusted_ep_num + logger.debug(f"This videos episode attribute has been updated to: {video.episode}") + break + + # Filter subtitles + list_of_subtitles = [] + + data = [item for item in data if not item['name'].endswith(unhandled_archive_formats)] + + # Detect only archives being uploaded + archive_entries = [item for item in data if item['name'].endswith(accepted_archive_formats)] + subtitle_entries = [item for item in data if not item['name'].endswith(accepted_archive_formats)] + has_only_archives = len(archive_entries) > 0 and len(subtitle_entries) == 0 + if has_only_archives: + logger.warning("Have only found archived subtitles") + + elif only_look_for_archives: + data = [item for item in data if item['name'].endswith(accepted_archive_formats)] + + for item in data: + filename = item.get('name') + download_url = item.get('url') + is_archive = filename.endswith(accepted_archive_formats) + + # Archives will still be considered if they're the only files available, as is mostly the case for movies. + if is_archive and not has_only_archives and not self.download_archives: + logger.warning(f"Skipping archive '{filename}' because normal subtitles are available instead") + continue + + if not self.enable_ai_subs: + p = re.compile(r'[\[\(]?(whisperai)[\]\)]?|[\[\(]whisper[\]\)]', re.IGNORECASE) + if p.search(filename): + logger.warning(f"Skipping subtitle '{filename}' as it's suspected of being AI generated") + continue + + sub_languages = self._try_determine_subtitle_languages(filename) + if len(sub_languages) > 1: + logger.warning(f"Skipping subtitle '{filename}' as it's suspected of containing multiple languages") + continue + + # Check if file is obviously corrupt. If no size is returned, assume OK + filesize = item.get('size', self.corrupted_file_size_threshold) + if filesize < self.corrupted_file_size_threshold: + logger.warning(f"Skipping possibly corrupt file '{filename}': Filesize is just {filesize} bytes") + continue + + if not filename.endswith(unhandled_archive_formats): + lang = sub_languages[0] if len(sub_languages) > 1 else Language("jpn") + list_of_subtitles.append(JimakuSubtitle(lang, video, download_url, filename)) + else: + logger.debug(f"Skipping archive '{filename}' as it's not a supported format") + + return list_of_subtitles + + def list_subtitles(self, video, languages=None): + subtitles = self._query(video) + if not subtitles: + return [] + + return [s for s in subtitles] + + def download_subtitle(self, subtitle: JimakuSubtitle): + target_url = subtitle.download_url + response = self.session.get(target_url, timeout=10) + response.raise_for_status() + + if subtitle.is_archive: + archive = get_archive_from_bytes(response.content) + if archive: + if isinstance(subtitle.video, Episode): + subtitle.content = get_subtitle_from_archive( + archive, + episode=subtitle.video.episode, + episode_title=subtitle.video.title + ) + else: + subtitle.content = get_subtitle_from_archive( + archive + ) + else: + logger.warning("Archive seems to not be an archive! File possibly corrupt?") + return None + else: + subtitle.content = response.content + + def _do_jimaku_request(self, url_path, url_params={}): + url = urljoin(f"{self.api_url}/{url_path}", '?' + urlencode(url_params)) + + retry_count = 0 + while retry_count < self.api_ratelimit_backoff_limit: + response = self.session.get(url, timeout=10) + + if response.status_code == 429: + reset_time = 5 + retry_count + 1 + + logger.warning(f"Jimaku ratelimit hit, waiting for '{reset_time}' seconds ({retry_count}/{self.api_ratelimit_backoff_limit} tries)") + time.sleep(reset_time) + continue + elif response.status_code == 401: + raise AuthenticationError("Unauthorized. API key possibly invalid") + else: + response.raise_for_status() + + data = response.json() + logger.debug(f"Length of response on {url}: {len(data)}") + if len(data) == 0: + logger.error(f"Jimaku returned no items for our our query: {url}") + return None + elif 'error' in data: + raise ServiceUnavailable(f"Jimaku returned an error: '{data.get('error')}', Code: '{data.get('code')}'") + else: + return data + + raise APIThrottled(f"Jimaku ratelimit max backoff limit of {self.api_ratelimit_backoff_limit} reached, aborting") + + # Wrapper functions to indirectly call _do_jimaku_request with different cache configs + @region.cache_on_arguments(expiration_time=REFINER_EXPIRATION_TIME) + def _search_for_entry(self, url_path, url_params={}): + return self._do_jimaku_request(url_path, url_params) + + @region.cache_on_arguments(expiration_time=timedelta(minutes=1).total_seconds()) + def _search_for_subtitles(self, url_path, url_params={}): + return self._do_jimaku_request(url_path, url_params) + + @staticmethod + def _try_determine_subtitle_languages(filename): + # This is more like a guess and not a 100% fool-proof way of detecting multi-lang subs: + # It assumes that language codes, if present, are in the last metadata group of the subs filename. + # If such codes are not present, or we failed to match any at all, then we'll just assume that the sub is purely Japanese. + default_language = Language("jpn") + + dot_delimit = filename.split(".") + bracket_delimit = re.split(r'[\[\]\(\)]+', filename) + + candidate_list = list() + if len(dot_delimit) > 2: + candidate_list = dot_delimit[-2] + elif len(bracket_delimit) > 2: + candidate_list = bracket_delimit[-2] + + candidates = [] if len(candidate_list) == 0 else re.split(r'[,\-\+\& ]+', candidate_list) + + # Discard match group if any candidate... + # ...contains any numbers, as the group is likely encoding information + if any(re.compile(r'\d').search(string) for string in candidates): + return [default_language] + # ...is >= 5 chars long, as the group is likely other unrelated metadata + if any(len(string) >= 5 for string in candidates): + return [default_language] + + languages = list() + for candidate in candidates: + candidate = candidate.lower() + if candidate in ["ass", "srt"]: + continue + + # Sometimes, languages are hidden in 4 character blocks, i.e. "JPSC" + if len(candidate) == 4: + for addendum in [candidate[:2], candidate[2:]]: + candidates.append(addendum) + continue + + # Sometimes, language codes can have additional info such as 'cc' or 'sdh'. For example: "ja[cc]" + if len(dot_delimit) > 2 and any(c in candidate for c in '[]()'): + candidate = re.split(r'[\[\]\(\)]+', candidate)[0] + + try: + language_squash = { + "jp": "ja", + "jap": "ja", + "chs": "zho", + "cht": "zho", + "zhi": "zho", + "cn": "zho" + } + + candidate = language_squash[candidate] if candidate in language_squash else candidate + if len(candidate) > 2: + language = Language(candidate) + else: + language = Language.fromietf(candidate) + + if not any(l.alpha3 == language.alpha3 for l in languages): + languages.append(language) + except: + if candidate in FULL_LANGUAGE_LIST: + # Create a dummy for the unknown language + languages.append(Language("zul")) + + if len(languages) > 1: + # Sometimes a metadata group that actually contains info about codecs gets processed as valid languages. + # To prevent false positives, we'll check if Japanese language codes are in the processed languages list. + # If not, then it's likely that we didn't actually match language codes -> Assume Japanese only subtitle. + contains_jpn = any([l for l in languages if l.alpha3 == "jpn"]) + + return languages if contains_jpn else [Language("jpn")] + else: + return [default_language] + + def _assemble_jimaku_search_url(self, video, media_name, additional_params={}): + endpoint = "entries/search" + anilist_id = video.anilist_id + + params = {} + if anilist_id: + params = {'anilist_id': anilist_id} + else: + if self.enable_name_search_fallback or isinstance(video, Movie): + params = {'query': media_name} + else: + logger.error(f"Skipping '{media_name}': Got no AniList ID and fuzzy matching using name is disabled") + return None + + if additional_params: + params.update(additional_params) + + logger.info(f"Will search for entry based on params: {params}") + return urljoin(endpoint, '?' + urlencode(params)) \ No newline at end of file diff --git a/custom_libs/subliminal_patch/video.py b/custom_libs/subliminal_patch/video.py index f5df0c92e..96101cf54 100644 --- a/custom_libs/subliminal_patch/video.py +++ b/custom_libs/subliminal_patch/video.py @@ -35,6 +35,8 @@ class Video(Video_): info_url=None, series_anidb_id=None, series_anidb_episode_id=None, + series_anidb_season_episode_offset=None, + anilist_id=None, **kwargs ): super(Video, self).__init__( @@ -61,3 +63,5 @@ class Video(Video_): self.info_url = info_url self.series_anidb_series_id = series_anidb_id, self.series_anidb_episode_id = series_anidb_episode_id, + self.series_anidb_season_episode_offset = series_anidb_season_episode_offset, + self.anilist_id = anilist_id, diff --git a/frontend/src/pages/Settings/Providers/list.ts b/frontend/src/pages/Settings/Providers/list.ts index b2f9a33c7..8d7a86a99 100644 --- a/frontend/src/pages/Settings/Providers/list.ts +++ b/frontend/src/pages/Settings/Providers/list.ts @@ -218,6 +218,35 @@ export const ProviderList: Readonly = [ }, ], }, + { + key: "jimaku", + name: "Jimaku.cc", + description: "Japanese Subtitles Provider", + message: + "API key required. Subtitles stem from various sources and might have quality/timing issues.", + inputs: [ + { + type: "password", + key: "api_key", + name: "API key", + }, + { + type: "switch", + key: "enable_name_search_fallback", + name: "Search by name if no AniList ID was determined (Less accurate, required for live action)", + }, + { + type: "switch", + key: "enable_archives_download", + name: "Also consider archives alongside uncompressed subtitles", + }, + { + type: "switch", + key: "enable_ai_subs", + name: "Download AI generated subtitles", + }, + ], + }, { key: "hosszupuska", description: "Hungarian Subtitles Provider" }, { key: "karagarga",