mirror of
https://github.com/morpheus65535/bazarr
synced 2024-12-21 23:32:31 +00:00
Added jimaku provider (#2505)
Co-authored-by: Danny <1695103+Rapptz@users.noreply.github.com>
This commit is contained in:
parent
e5edf6203c
commit
866b1d5894
9 changed files with 634 additions and 55 deletions
|
@ -300,6 +300,12 @@ validators = [
|
|||
|
||||
# analytics section
|
||||
Validator('analytics.enabled', must_exist=True, default=True, is_type_of=bool),
|
||||
|
||||
# jimaku section
|
||||
Validator('jimaku.api_key', must_exist=True, default='', is_type_of=str),
|
||||
Validator('jimaku.enable_name_search_fallback', must_exist=True, default=True, is_type_of=bool),
|
||||
Validator('jimaku.enable_archives_download', must_exist=True, default=False, is_type_of=bool),
|
||||
Validator('jimaku.enable_ai_subs', must_exist=True, default=False, is_type_of=bool),
|
||||
|
||||
# titlovi section
|
||||
Validator('titlovi.username', must_exist=True, default='', is_type_of=str, cast=str),
|
||||
|
|
|
@ -285,6 +285,12 @@ def get_providers_auth():
|
|||
'username': settings.titlovi.username,
|
||||
'password': settings.titlovi.password,
|
||||
},
|
||||
'jimaku': {
|
||||
'api_key': settings.jimaku.api_key,
|
||||
'enable_name_search_fallback': settings.jimaku.enable_name_search_fallback,
|
||||
'enable_archives_download': settings.jimaku.enable_archives_download,
|
||||
'enable_ai_subs': settings.jimaku.enable_ai_subs,
|
||||
},
|
||||
'ktuvit': {
|
||||
'email': settings.ktuvit.email,
|
||||
'hashed_password': settings.ktuvit.hashed_password,
|
||||
|
|
|
@ -4,10 +4,12 @@ from .ffprobe import refine_from_ffprobe
|
|||
from .database import refine_from_db
|
||||
from .arr_history import refine_from_arr_history
|
||||
from .anidb import refine_from_anidb
|
||||
from .anilist import refine_from_anilist
|
||||
|
||||
registered = {
|
||||
"database": refine_from_db,
|
||||
"ffprobe": refine_from_ffprobe,
|
||||
"arr_history": refine_from_arr_history,
|
||||
"anidb": refine_from_anidb,
|
||||
"anilist": refine_from_anilist, # Must run AFTER AniDB
|
||||
}
|
||||
|
|
|
@ -20,7 +20,10 @@ except ImportError:
|
|||
except ImportError:
|
||||
import xml.etree.ElementTree as etree
|
||||
|
||||
refined_providers = {'animetosho'}
|
||||
refined_providers = {'animetosho', 'jimaku'}
|
||||
providers_requiring_anidb_api = {'animetosho'}
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
api_url = 'http://api.anidb.net:9001/httpapi'
|
||||
|
||||
|
@ -40,6 +43,10 @@ class AniDBClient(object):
|
|||
@property
|
||||
def is_throttled(self):
|
||||
return self.cache and self.cache.get('is_throttled')
|
||||
|
||||
@property
|
||||
def has_api_credentials(self):
|
||||
return self.api_client_key != '' and self.api_client_key is not None
|
||||
|
||||
@property
|
||||
def daily_api_request_count(self):
|
||||
|
@ -62,7 +69,9 @@ class AniDBClient(object):
|
|||
return r.content
|
||||
|
||||
@region.cache_on_arguments(expiration_time=timedelta(days=1).total_seconds())
|
||||
def get_series_id(self, mappings, tvdb_series_season, tvdb_series_id, episode):
|
||||
def get_show_information(self, tvdb_series_id, tvdb_series_season, episode):
|
||||
mappings = etree.fromstring(self.get_series_mappings())
|
||||
|
||||
# Enrich the collection of anime with the episode offset
|
||||
animes = [
|
||||
self.AnimeInfo(anime, int(anime.attrib.get('episodeoffset', 0)))
|
||||
|
@ -71,49 +80,60 @@ class AniDBClient(object):
|
|||
)
|
||||
]
|
||||
|
||||
is_special_entry = False
|
||||
if not animes:
|
||||
return None, None
|
||||
# Some entries will store TVDB seasons in a nested mapping list, identifiable by the value 'a' as the season
|
||||
special_entries = mappings.findall(
|
||||
f".//anime[@tvdbid='{tvdb_series_id}'][@defaulttvdbseason='a']"
|
||||
)
|
||||
|
||||
# Sort the anime by offset in ascending order
|
||||
animes.sort(key=lambda a: a.episode_offset)
|
||||
if not special_entries:
|
||||
return None, None, None
|
||||
|
||||
# Different from Tvdb, Anidb have different ids for the Parts of a season
|
||||
anidb_id = None
|
||||
offset = 0
|
||||
is_special_entry = True
|
||||
for special_entry in special_entries:
|
||||
mapping_list = special_entry.findall(f".//mapping[@tvdbseason='{tvdb_series_season}']")
|
||||
if len(mapping_list) > 0:
|
||||
anidb_id = int(special_entry.attrib.get('anidbid'))
|
||||
offset = int(mapping_list[0].attrib.get('offset', 0))
|
||||
|
||||
for index, anime_info in enumerate(animes):
|
||||
anime, episode_offset = anime_info
|
||||
if not is_special_entry:
|
||||
# Sort the anime by offset in ascending order
|
||||
animes.sort(key=lambda a: a.episode_offset)
|
||||
|
||||
mapping_list = anime.find('mapping-list')
|
||||
# Different from Tvdb, Anidb have different ids for the Parts of a season
|
||||
anidb_id = None
|
||||
offset = 0
|
||||
|
||||
# Handle mapping list for Specials
|
||||
if mapping_list:
|
||||
for mapping in mapping_list.findall("mapping"):
|
||||
# Mapping values are usually like ;1-1;2-1;3-1;
|
||||
for episode_ref in mapping.text.split(';'):
|
||||
if not episode_ref:
|
||||
continue
|
||||
for index, anime_info in enumerate(animes):
|
||||
anime, episode_offset = anime_info
|
||||
|
||||
mapping_list = anime.find('mapping-list')
|
||||
|
||||
anidb_episode, tvdb_episode = map(int, episode_ref.split('-'))
|
||||
if tvdb_episode == episode:
|
||||
anidb_id = int(anime.attrib.get('anidbid'))
|
||||
# Handle mapping list for Specials
|
||||
if mapping_list:
|
||||
for mapping in mapping_list.findall("mapping"):
|
||||
# Mapping values are usually like ;1-1;2-1;3-1;
|
||||
for episode_ref in mapping.text.split(';'):
|
||||
if not episode_ref:
|
||||
continue
|
||||
|
||||
return anidb_id, anidb_episode
|
||||
anidb_episode, tvdb_episode = map(int, episode_ref.split('-'))
|
||||
if tvdb_episode == episode:
|
||||
anidb_id = int(anime.attrib.get('anidbid'))
|
||||
|
||||
if episode > episode_offset:
|
||||
anidb_id = int(anime.attrib.get('anidbid'))
|
||||
offset = episode_offset
|
||||
return anidb_id, anidb_episode, 0
|
||||
|
||||
return anidb_id, episode - offset
|
||||
if episode > episode_offset:
|
||||
anidb_id = int(anime.attrib.get('anidbid'))
|
||||
offset = episode_offset
|
||||
|
||||
return anidb_id, episode - offset, offset
|
||||
|
||||
@region.cache_on_arguments(expiration_time=timedelta(days=1).total_seconds())
|
||||
def get_series_episodes_ids(self, tvdb_series_id, season, episode):
|
||||
mappings = etree.fromstring(self.get_series_mappings())
|
||||
|
||||
series_id, episode_no = self.get_series_id(mappings, season, tvdb_series_id, episode)
|
||||
|
||||
def get_episode_ids(self, series_id, episode_no):
|
||||
if not series_id:
|
||||
return None, None
|
||||
return None
|
||||
|
||||
episodes = etree.fromstring(self.get_episodes(series_id))
|
||||
|
||||
|
@ -177,7 +197,7 @@ class AniDBClient(object):
|
|||
|
||||
def refine_from_anidb(path, video):
|
||||
if not isinstance(video, Episode) or not video.series_tvdb_id:
|
||||
logging.debug(f'Video is not an Anime TV series, skipping refinement for {video}')
|
||||
logger.debug(f'Video is not an Anime TV series, skipping refinement for {video}')
|
||||
|
||||
return
|
||||
|
||||
|
@ -190,27 +210,35 @@ def refine_anidb_ids(video):
|
|||
|
||||
season = video.season if video.season else 0
|
||||
|
||||
if anidb_client.is_throttled:
|
||||
logging.warning(f'API daily limit reached. Skipping refinement for {video.series}')
|
||||
|
||||
return video
|
||||
|
||||
try:
|
||||
anidb_series_id, anidb_episode_id = anidb_client.get_series_episodes_ids(
|
||||
video.series_tvdb_id,
|
||||
season, video.episode,
|
||||
)
|
||||
except TooManyRequests:
|
||||
logging.error(f'API daily limit reached while refining {video.series}')
|
||||
|
||||
anidb_client.mark_as_throttled()
|
||||
|
||||
return video
|
||||
|
||||
if not anidb_episode_id:
|
||||
logging.error(f'Could not find anime series {video.series}')
|
||||
|
||||
anidb_series_id, anidb_episode_no, anidb_season_episode_offset = anidb_client.get_show_information(
|
||||
video.series_tvdb_id,
|
||||
season,
|
||||
video.episode,
|
||||
)
|
||||
|
||||
if not anidb_series_id:
|
||||
logger.error(f'Could not find anime series {video.series}')
|
||||
return video
|
||||
|
||||
anidb_episode_id = None
|
||||
if anidb_client.has_api_credentials:
|
||||
if anidb_client.is_throttled:
|
||||
logger.warning(f'API daily limit reached. Skipping episode ID refinement for {video.series}')
|
||||
else:
|
||||
try:
|
||||
anidb_episode_id = anidb_client.get_episode_ids(
|
||||
anidb_series_id,
|
||||
anidb_episode_no
|
||||
)
|
||||
except TooManyRequests:
|
||||
logger.error(f'API daily limit reached while refining {video.series}')
|
||||
anidb_client.mark_as_throttled()
|
||||
else:
|
||||
intersect = providers_requiring_anidb_api.intersection(settings.general.enabled_providers)
|
||||
if len(intersect) >= 1:
|
||||
logger.warn(f'AniDB API credentials are not fully set up, the following providers may not work: {intersect}')
|
||||
|
||||
video.series_anidb_id = anidb_series_id
|
||||
video.series_anidb_episode_id = anidb_episode_id
|
||||
video.series_anidb_episode_no = anidb_episode_no
|
||||
video.series_anidb_season_episode_offset = anidb_season_episode_offset
|
||||
|
|
77
bazarr/subtitles/refiners/anilist.py
Normal file
77
bazarr/subtitles/refiners/anilist.py
Normal file
|
@ -0,0 +1,77 @@
|
|||
# coding=utf-8
|
||||
# fmt: off
|
||||
|
||||
import logging
|
||||
import time
|
||||
import requests
|
||||
from collections import namedtuple
|
||||
from datetime import timedelta
|
||||
|
||||
from app.config import settings
|
||||
from subliminal import Episode, region, __short_version__
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
refined_providers = {'jimaku'}
|
||||
|
||||
class AniListClient(object):
|
||||
def __init__(self, session=None, timeout=10):
|
||||
self.session = session or requests.Session()
|
||||
self.session.timeout = timeout
|
||||
self.session.headers['Content-Type'] = 'application/json'
|
||||
self.session.headers['User-Agent'] = 'Subliminal/%s' % __short_version__
|
||||
|
||||
@region.cache_on_arguments(expiration_time=timedelta(days=1).total_seconds())
|
||||
def get_series_mappings(self):
|
||||
r = self.session.get(
|
||||
'https://raw.githubusercontent.com/Fribb/anime-lists/master/anime-list-mini.json'
|
||||
)
|
||||
|
||||
r.raise_for_status()
|
||||
return r.json()
|
||||
|
||||
def get_series_id(self, candidate_id_name, candidate_id_value):
|
||||
anime_list = self.get_series_mappings()
|
||||
|
||||
tag_map = {
|
||||
"series_anidb_id": "anidb_id",
|
||||
"imdb_id": "imdb_id"
|
||||
}
|
||||
mapped_tag = tag_map.get(candidate_id_name, candidate_id_name)
|
||||
|
||||
obj = [obj for obj in anime_list if mapped_tag in obj and str(obj[mapped_tag]) == str(candidate_id_value)]
|
||||
logger.debug(f"Based on '{mapped_tag}': '{candidate_id_value}', anime-list matched: {obj}")
|
||||
|
||||
if len(obj) > 0:
|
||||
return obj[0]["anilist_id"]
|
||||
else:
|
||||
logger.debug(f"Could not find corresponding AniList ID with '{mapped_tag}': {candidate_id_value}")
|
||||
return None
|
||||
|
||||
def refine_from_anilist(path, video):
|
||||
# Safety checks
|
||||
if isinstance(video, Episode):
|
||||
if not video.series_anidb_id:
|
||||
logger.error(f"Will not refine '{video.series}' as it does not have an AniDB ID.")
|
||||
return
|
||||
|
||||
if refined_providers.intersection(settings.general.enabled_providers) and video.anilist_id is None:
|
||||
refine_anilist_ids(video)
|
||||
|
||||
def refine_anilist_ids(video):
|
||||
anilist_client = AniListClient()
|
||||
|
||||
if isinstance(video, Episode):
|
||||
candidate_id_name = "series_anidb_id"
|
||||
else:
|
||||
candidate_id_name = "imdb_id"
|
||||
|
||||
candidate_id_value = getattr(video, candidate_id_name, None)
|
||||
if not candidate_id_value:
|
||||
logger.error(f"Found no value for property {candidate_id_name} of video.")
|
||||
return video
|
||||
|
||||
anilist_id = anilist_client.get_series_id(candidate_id_name, candidate_id_value)
|
||||
if not anilist_id:
|
||||
return video
|
||||
|
||||
video.anilist_id = anilist_id
|
|
@ -130,7 +130,8 @@ class Episode(Video):
|
|||
"""
|
||||
def __init__(self, name, series, season, episode, title=None, year=None, original_series=True, tvdb_id=None,
|
||||
series_tvdb_id=None, series_imdb_id=None, alternative_series=None, series_anidb_id=None,
|
||||
series_anidb_episode_id=None, **kwargs):
|
||||
series_anidb_episode_id=None, series_anidb_season_episode_offset=None,
|
||||
anilist_id=None, **kwargs):
|
||||
super(Episode, self).__init__(name, **kwargs)
|
||||
|
||||
#: Series of the episode
|
||||
|
@ -163,8 +164,11 @@ class Episode(Video):
|
|||
#: Alternative names of the series
|
||||
self.alternative_series = alternative_series or []
|
||||
|
||||
#: Anime specific information
|
||||
self.series_anidb_episode_id = series_anidb_episode_id
|
||||
self.series_anidb_id = series_anidb_id
|
||||
self.series_anidb_season_episode_offset = series_anidb_season_episode_offset
|
||||
self.anilist_id = anilist_id
|
||||
|
||||
@classmethod
|
||||
def fromguess(cls, name, guess):
|
||||
|
@ -207,10 +211,11 @@ class Movie(Video):
|
|||
:param str title: title of the movie.
|
||||
:param int year: year of the movie.
|
||||
:param list alternative_titles: alternative titles of the movie
|
||||
:param int anilist_id: AniList ID of movie (if Anime)
|
||||
:param \*\*kwargs: additional parameters for the :class:`Video` constructor.
|
||||
|
||||
"""
|
||||
def __init__(self, name, title, year=None, alternative_titles=None, **kwargs):
|
||||
def __init__(self, name, title, year=None, alternative_titles=None, anilist_id=None, **kwargs):
|
||||
super(Movie, self).__init__(name, **kwargs)
|
||||
|
||||
#: Title of the movie
|
||||
|
@ -221,6 +226,9 @@ class Movie(Video):
|
|||
|
||||
#: Alternative titles of the movie
|
||||
self.alternative_titles = alternative_titles or []
|
||||
|
||||
#: AniList ID of the movie
|
||||
self.anilist_id = anilist_id
|
||||
|
||||
@classmethod
|
||||
def fromguess(cls, name, guess):
|
||||
|
|
419
custom_libs/subliminal_patch/providers/jimaku.py
Normal file
419
custom_libs/subliminal_patch/providers/jimaku.py
Normal file
|
@ -0,0 +1,419 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
from datetime import timedelta
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
|
||||
from requests import Session
|
||||
from subliminal import region, __short_version__
|
||||
from subliminal.cache import REFINER_EXPIRATION_TIME
|
||||
from subliminal.exceptions import ConfigurationError, AuthenticationError, ServiceUnavailable
|
||||
from subliminal.utils import sanitize
|
||||
from subliminal.video import Episode, Movie
|
||||
from subliminal_patch.providers import Provider
|
||||
from subliminal_patch.subtitle import Subtitle
|
||||
from subliminal_patch.exceptions import APIThrottled
|
||||
from subliminal_patch.providers.utils import get_subtitle_from_archive, get_archive_from_bytes
|
||||
from urllib.parse import urlencode, urljoin
|
||||
from guessit import guessit
|
||||
from subzero.language import Language, FULL_LANGUAGE_LIST
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Unhandled formats, such files will always get filtered out
|
||||
unhandled_archive_formats = (".7z",)
|
||||
accepted_archive_formats = (".zip", ".rar")
|
||||
|
||||
class JimakuSubtitle(Subtitle):
|
||||
'''Jimaku Subtitle.'''
|
||||
provider_name = 'jimaku'
|
||||
|
||||
hash_verifiable = False
|
||||
|
||||
def __init__(self, language, video, download_url, filename):
|
||||
super(JimakuSubtitle, self).__init__(language, page_link=download_url)
|
||||
|
||||
self.video = video
|
||||
self.download_url = download_url
|
||||
self.filename = filename
|
||||
self.release_info = filename
|
||||
self.is_archive = filename.endswith(accepted_archive_formats)
|
||||
|
||||
@property
|
||||
def id(self):
|
||||
return self.download_url
|
||||
|
||||
def get_matches(self, video):
|
||||
matches = set()
|
||||
|
||||
# Episode/Movie specific matches
|
||||
if isinstance(video, Episode):
|
||||
if sanitize(video.series) and sanitize(self.video.series) in (
|
||||
sanitize(name) for name in [video.series] + video.alternative_series):
|
||||
matches.add('series')
|
||||
|
||||
if video.season and self.video.season is None or video.season and video.season == self.video.season:
|
||||
matches.add('season')
|
||||
elif isinstance(video, Movie):
|
||||
if sanitize(video.title) and sanitize(self.video.title) in (
|
||||
sanitize(name) for name in [video.title] + video.alternative_titles):
|
||||
matches.add('title')
|
||||
|
||||
# General matches
|
||||
if video.year and video.year == self.video.year:
|
||||
matches.add('year')
|
||||
|
||||
video_type = 'movie' if isinstance(video, Movie) else 'episode'
|
||||
matches.add(video_type)
|
||||
|
||||
guess = guessit(self.filename, {'type': video_type})
|
||||
for g in guess:
|
||||
if g[0] == "release_group" or "source":
|
||||
if video.release_group == g[1]:
|
||||
matches.add('release_group')
|
||||
break
|
||||
|
||||
# Prioritize .srt by repurposing the audio_codec match
|
||||
if self.filename.endswith(".srt"):
|
||||
matches.add('audio_codec')
|
||||
|
||||
return matches
|
||||
|
||||
class JimakuProvider(Provider):
|
||||
'''Jimaku Provider.'''
|
||||
video_types = (Episode, Movie)
|
||||
|
||||
api_url = 'https://jimaku.cc/api'
|
||||
api_ratelimit_max_delay_seconds = 5
|
||||
api_ratelimit_backoff_limit = 3
|
||||
|
||||
corrupted_file_size_threshold = 500
|
||||
|
||||
languages = {Language.fromietf("ja")}
|
||||
|
||||
def __init__(self, enable_name_search_fallback, enable_archives_download, enable_ai_subs, api_key):
|
||||
if api_key:
|
||||
self.api_key = api_key
|
||||
else:
|
||||
raise ConfigurationError('Missing api_key.')
|
||||
|
||||
self.enable_name_search_fallback = enable_name_search_fallback
|
||||
self.download_archives = enable_archives_download
|
||||
self.enable_ai_subs = enable_ai_subs
|
||||
self.session = None
|
||||
|
||||
def initialize(self):
|
||||
self.session = Session()
|
||||
self.session.headers['Content-Type'] = 'application/json'
|
||||
self.session.headers['Authorization'] = self.api_key
|
||||
self.session.headers['User-Agent'] = os.environ.get("SZ_USER_AGENT")
|
||||
|
||||
def terminate(self):
|
||||
self.session.close()
|
||||
|
||||
def _query(self, video):
|
||||
if isinstance(video, Movie):
|
||||
media_name = video.title.lower()
|
||||
elif isinstance(video, Episode):
|
||||
media_name = video.series.lower()
|
||||
|
||||
# With entries that have a season larger than 1, Jimaku appends the corresponding season number to the name.
|
||||
# We'll reassemble media_name here to account for cases where we can only search by name alone.
|
||||
season_addendum = str(video.season) if video.season > 1 else None
|
||||
media_name = f"{media_name} {season_addendum}" if season_addendum else media_name
|
||||
|
||||
# Search for entry
|
||||
searching_for_entry_attempts = 0
|
||||
additional_url_params = {}
|
||||
while searching_for_entry_attempts < 2:
|
||||
searching_for_entry_attempts += 1
|
||||
url = self._assemble_jimaku_search_url(video, media_name, additional_url_params)
|
||||
if not url:
|
||||
return None
|
||||
|
||||
searching_for_entry = "query" in url
|
||||
data = self._search_for_entry(url)
|
||||
|
||||
if not data:
|
||||
if searching_for_entry and searching_for_entry_attempts < 2:
|
||||
logger.info("Maybe this is live action media? Will retry search without anime parameter...")
|
||||
additional_url_params = {'anime': "false"}
|
||||
else:
|
||||
return None
|
||||
else:
|
||||
break
|
||||
|
||||
# We only go for the first entry
|
||||
entry = data[0]
|
||||
|
||||
entry_id = entry.get('id')
|
||||
anilist_id = entry.get('anilist_id', None)
|
||||
entry_name = entry.get('name')
|
||||
is_movie = entry.get('flags', {}).get('movie', False)
|
||||
|
||||
if isinstance(video, Episode) and is_movie:
|
||||
logger.warn("Bazarr thinks this is a series, but Jimaku says this is a movie! May not be able to match subtitles...")
|
||||
|
||||
logger.info(f"Matched entry: ID: '{entry_id}', anilist_id: '{anilist_id}', name: '{entry_name}', english_name: '{entry.get('english_name')}', movie: {is_movie}")
|
||||
if entry.get("flags").get("unverified"):
|
||||
logger.warning(f"This entry '{entry_id}' is unverified, subtitles might be incomplete or have quality issues!")
|
||||
|
||||
# Get a list of subtitles for entry
|
||||
episode_number = video.episode if "episode" in dir(video) else None
|
||||
url_params = {'episode': episode_number} if isinstance(video, Episode) and not is_movie else {}
|
||||
only_look_for_archives = False
|
||||
|
||||
has_offset = isinstance(video, Episode) and video.series_anidb_season_episode_offset is not None
|
||||
|
||||
retry_count = 0
|
||||
adjusted_ep_num = None
|
||||
while retry_count <= 1:
|
||||
# Account for positive episode offset first
|
||||
if isinstance(video, Episode) and not is_movie and retry_count < 1:
|
||||
if video.season > 1 and has_offset:
|
||||
offset_value = video.series_anidb_season_episode_offset
|
||||
offset_value = offset_value if offset_value > 0 else -offset_value
|
||||
|
||||
if episode_number < offset_value:
|
||||
adjusted_ep_num = episode_number + offset_value
|
||||
logger.warning(f"Will try using adjusted episode number {adjusted_ep_num} first")
|
||||
url_params = {'episode': adjusted_ep_num}
|
||||
|
||||
url = f"entries/{entry_id}/files"
|
||||
data = self._search_for_subtitles(url, url_params)
|
||||
|
||||
if not data:
|
||||
if isinstance(video, Episode) and not is_movie and has_offset and retry_count < 1:
|
||||
logger.warning(f"Found no subtitles for adjusted episode number, but will retry with normal episode number {episode_number}")
|
||||
url_params = {'episode': episode_number}
|
||||
elif isinstance(video, Episode) and not is_movie and retry_count < 1:
|
||||
logger.warning(f"Found no subtitles for episode number {episode_number}, but will retry without 'episode' parameter")
|
||||
url_params = {}
|
||||
only_look_for_archives = True
|
||||
else:
|
||||
return None
|
||||
|
||||
retry_count += 1
|
||||
else:
|
||||
if adjusted_ep_num:
|
||||
video.episode = adjusted_ep_num
|
||||
logger.debug(f"This videos episode attribute has been updated to: {video.episode}")
|
||||
break
|
||||
|
||||
# Filter subtitles
|
||||
list_of_subtitles = []
|
||||
|
||||
data = [item for item in data if not item['name'].endswith(unhandled_archive_formats)]
|
||||
|
||||
# Detect only archives being uploaded
|
||||
archive_entries = [item for item in data if item['name'].endswith(accepted_archive_formats)]
|
||||
subtitle_entries = [item for item in data if not item['name'].endswith(accepted_archive_formats)]
|
||||
has_only_archives = len(archive_entries) > 0 and len(subtitle_entries) == 0
|
||||
if has_only_archives:
|
||||
logger.warning("Have only found archived subtitles")
|
||||
|
||||
elif only_look_for_archives:
|
||||
data = [item for item in data if item['name'].endswith(accepted_archive_formats)]
|
||||
|
||||
for item in data:
|
||||
filename = item.get('name')
|
||||
download_url = item.get('url')
|
||||
is_archive = filename.endswith(accepted_archive_formats)
|
||||
|
||||
# Archives will still be considered if they're the only files available, as is mostly the case for movies.
|
||||
if is_archive and not has_only_archives and not self.download_archives:
|
||||
logger.warning(f"Skipping archive '{filename}' because normal subtitles are available instead")
|
||||
continue
|
||||
|
||||
if not self.enable_ai_subs:
|
||||
p = re.compile(r'[\[\(]?(whisperai)[\]\)]?|[\[\(]whisper[\]\)]', re.IGNORECASE)
|
||||
if p.search(filename):
|
||||
logger.warning(f"Skipping subtitle '{filename}' as it's suspected of being AI generated")
|
||||
continue
|
||||
|
||||
sub_languages = self._try_determine_subtitle_languages(filename)
|
||||
if len(sub_languages) > 1:
|
||||
logger.warning(f"Skipping subtitle '{filename}' as it's suspected of containing multiple languages")
|
||||
continue
|
||||
|
||||
# Check if file is obviously corrupt. If no size is returned, assume OK
|
||||
filesize = item.get('size', self.corrupted_file_size_threshold)
|
||||
if filesize < self.corrupted_file_size_threshold:
|
||||
logger.warning(f"Skipping possibly corrupt file '{filename}': Filesize is just {filesize} bytes")
|
||||
continue
|
||||
|
||||
if not filename.endswith(unhandled_archive_formats):
|
||||
lang = sub_languages[0] if len(sub_languages) > 1 else Language("jpn")
|
||||
list_of_subtitles.append(JimakuSubtitle(lang, video, download_url, filename))
|
||||
else:
|
||||
logger.debug(f"Skipping archive '{filename}' as it's not a supported format")
|
||||
|
||||
return list_of_subtitles
|
||||
|
||||
def list_subtitles(self, video, languages=None):
|
||||
subtitles = self._query(video)
|
||||
if not subtitles:
|
||||
return []
|
||||
|
||||
return [s for s in subtitles]
|
||||
|
||||
def download_subtitle(self, subtitle: JimakuSubtitle):
|
||||
target_url = subtitle.download_url
|
||||
response = self.session.get(target_url, timeout=10)
|
||||
response.raise_for_status()
|
||||
|
||||
if subtitle.is_archive:
|
||||
archive = get_archive_from_bytes(response.content)
|
||||
if archive:
|
||||
if isinstance(subtitle.video, Episode):
|
||||
subtitle.content = get_subtitle_from_archive(
|
||||
archive,
|
||||
episode=subtitle.video.episode,
|
||||
episode_title=subtitle.video.title
|
||||
)
|
||||
else:
|
||||
subtitle.content = get_subtitle_from_archive(
|
||||
archive
|
||||
)
|
||||
else:
|
||||
logger.warning("Archive seems to not be an archive! File possibly corrupt?")
|
||||
return None
|
||||
else:
|
||||
subtitle.content = response.content
|
||||
|
||||
def _do_jimaku_request(self, url_path, url_params={}):
|
||||
url = urljoin(f"{self.api_url}/{url_path}", '?' + urlencode(url_params))
|
||||
|
||||
retry_count = 0
|
||||
while retry_count < self.api_ratelimit_backoff_limit:
|
||||
response = self.session.get(url, timeout=10)
|
||||
|
||||
if response.status_code == 429:
|
||||
reset_time = 5
|
||||
retry_count + 1
|
||||
|
||||
logger.warning(f"Jimaku ratelimit hit, waiting for '{reset_time}' seconds ({retry_count}/{self.api_ratelimit_backoff_limit} tries)")
|
||||
time.sleep(reset_time)
|
||||
continue
|
||||
elif response.status_code == 401:
|
||||
raise AuthenticationError("Unauthorized. API key possibly invalid")
|
||||
else:
|
||||
response.raise_for_status()
|
||||
|
||||
data = response.json()
|
||||
logger.debug(f"Length of response on {url}: {len(data)}")
|
||||
if len(data) == 0:
|
||||
logger.error(f"Jimaku returned no items for our our query: {url}")
|
||||
return None
|
||||
elif 'error' in data:
|
||||
raise ServiceUnavailable(f"Jimaku returned an error: '{data.get('error')}', Code: '{data.get('code')}'")
|
||||
else:
|
||||
return data
|
||||
|
||||
raise APIThrottled(f"Jimaku ratelimit max backoff limit of {self.api_ratelimit_backoff_limit} reached, aborting")
|
||||
|
||||
# Wrapper functions to indirectly call _do_jimaku_request with different cache configs
|
||||
@region.cache_on_arguments(expiration_time=REFINER_EXPIRATION_TIME)
|
||||
def _search_for_entry(self, url_path, url_params={}):
|
||||
return self._do_jimaku_request(url_path, url_params)
|
||||
|
||||
@region.cache_on_arguments(expiration_time=timedelta(minutes=1).total_seconds())
|
||||
def _search_for_subtitles(self, url_path, url_params={}):
|
||||
return self._do_jimaku_request(url_path, url_params)
|
||||
|
||||
@staticmethod
|
||||
def _try_determine_subtitle_languages(filename):
|
||||
# This is more like a guess and not a 100% fool-proof way of detecting multi-lang subs:
|
||||
# It assumes that language codes, if present, are in the last metadata group of the subs filename.
|
||||
# If such codes are not present, or we failed to match any at all, then we'll just assume that the sub is purely Japanese.
|
||||
default_language = Language("jpn")
|
||||
|
||||
dot_delimit = filename.split(".")
|
||||
bracket_delimit = re.split(r'[\[\]\(\)]+', filename)
|
||||
|
||||
candidate_list = list()
|
||||
if len(dot_delimit) > 2:
|
||||
candidate_list = dot_delimit[-2]
|
||||
elif len(bracket_delimit) > 2:
|
||||
candidate_list = bracket_delimit[-2]
|
||||
|
||||
candidates = [] if len(candidate_list) == 0 else re.split(r'[,\-\+\& ]+', candidate_list)
|
||||
|
||||
# Discard match group if any candidate...
|
||||
# ...contains any numbers, as the group is likely encoding information
|
||||
if any(re.compile(r'\d').search(string) for string in candidates):
|
||||
return [default_language]
|
||||
# ...is >= 5 chars long, as the group is likely other unrelated metadata
|
||||
if any(len(string) >= 5 for string in candidates):
|
||||
return [default_language]
|
||||
|
||||
languages = list()
|
||||
for candidate in candidates:
|
||||
candidate = candidate.lower()
|
||||
if candidate in ["ass", "srt"]:
|
||||
continue
|
||||
|
||||
# Sometimes, languages are hidden in 4 character blocks, i.e. "JPSC"
|
||||
if len(candidate) == 4:
|
||||
for addendum in [candidate[:2], candidate[2:]]:
|
||||
candidates.append(addendum)
|
||||
continue
|
||||
|
||||
# Sometimes, language codes can have additional info such as 'cc' or 'sdh'. For example: "ja[cc]"
|
||||
if len(dot_delimit) > 2 and any(c in candidate for c in '[]()'):
|
||||
candidate = re.split(r'[\[\]\(\)]+', candidate)[0]
|
||||
|
||||
try:
|
||||
language_squash = {
|
||||
"jp": "ja",
|
||||
"jap": "ja",
|
||||
"chs": "zho",
|
||||
"cht": "zho",
|
||||
"zhi": "zho",
|
||||
"cn": "zho"
|
||||
}
|
||||
|
||||
candidate = language_squash[candidate] if candidate in language_squash else candidate
|
||||
if len(candidate) > 2:
|
||||
language = Language(candidate)
|
||||
else:
|
||||
language = Language.fromietf(candidate)
|
||||
|
||||
if not any(l.alpha3 == language.alpha3 for l in languages):
|
||||
languages.append(language)
|
||||
except:
|
||||
if candidate in FULL_LANGUAGE_LIST:
|
||||
# Create a dummy for the unknown language
|
||||
languages.append(Language("zul"))
|
||||
|
||||
if len(languages) > 1:
|
||||
# Sometimes a metadata group that actually contains info about codecs gets processed as valid languages.
|
||||
# To prevent false positives, we'll check if Japanese language codes are in the processed languages list.
|
||||
# If not, then it's likely that we didn't actually match language codes -> Assume Japanese only subtitle.
|
||||
contains_jpn = any([l for l in languages if l.alpha3 == "jpn"])
|
||||
|
||||
return languages if contains_jpn else [Language("jpn")]
|
||||
else:
|
||||
return [default_language]
|
||||
|
||||
def _assemble_jimaku_search_url(self, video, media_name, additional_params={}):
|
||||
endpoint = "entries/search"
|
||||
anilist_id = video.anilist_id
|
||||
|
||||
params = {}
|
||||
if anilist_id:
|
||||
params = {'anilist_id': anilist_id}
|
||||
else:
|
||||
if self.enable_name_search_fallback or isinstance(video, Movie):
|
||||
params = {'query': media_name}
|
||||
else:
|
||||
logger.error(f"Skipping '{media_name}': Got no AniList ID and fuzzy matching using name is disabled")
|
||||
return None
|
||||
|
||||
if additional_params:
|
||||
params.update(additional_params)
|
||||
|
||||
logger.info(f"Will search for entry based on params: {params}")
|
||||
return urljoin(endpoint, '?' + urlencode(params))
|
|
@ -35,6 +35,8 @@ class Video(Video_):
|
|||
info_url=None,
|
||||
series_anidb_id=None,
|
||||
series_anidb_episode_id=None,
|
||||
series_anidb_season_episode_offset=None,
|
||||
anilist_id=None,
|
||||
**kwargs
|
||||
):
|
||||
super(Video, self).__init__(
|
||||
|
@ -61,3 +63,5 @@ class Video(Video_):
|
|||
self.info_url = info_url
|
||||
self.series_anidb_series_id = series_anidb_id,
|
||||
self.series_anidb_episode_id = series_anidb_episode_id,
|
||||
self.series_anidb_season_episode_offset = series_anidb_season_episode_offset,
|
||||
self.anilist_id = anilist_id,
|
||||
|
|
|
@ -218,6 +218,35 @@ export const ProviderList: Readonly<ProviderInfo[]> = [
|
|||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
key: "jimaku",
|
||||
name: "Jimaku.cc",
|
||||
description: "Japanese Subtitles Provider",
|
||||
message:
|
||||
"API key required. Subtitles stem from various sources and might have quality/timing issues.",
|
||||
inputs: [
|
||||
{
|
||||
type: "password",
|
||||
key: "api_key",
|
||||
name: "API key",
|
||||
},
|
||||
{
|
||||
type: "switch",
|
||||
key: "enable_name_search_fallback",
|
||||
name: "Search by name if no AniList ID was determined (Less accurate, required for live action)",
|
||||
},
|
||||
{
|
||||
type: "switch",
|
||||
key: "enable_archives_download",
|
||||
name: "Also consider archives alongside uncompressed subtitles",
|
||||
},
|
||||
{
|
||||
type: "switch",
|
||||
key: "enable_ai_subs",
|
||||
name: "Download AI generated subtitles",
|
||||
},
|
||||
],
|
||||
},
|
||||
{ key: "hosszupuska", description: "Hungarian Subtitles Provider" },
|
||||
{
|
||||
key: "karagarga",
|
||||
|
|
Loading…
Reference in a new issue