bazarr/custom_libs/subliminal_patch/providers/animetosho.py

# -*- coding: utf-8 -*-
from __future__ import absolute_import

import logging
import lzma

from guessit import guessit
from requests import Session
from subzero.language import Language


from subliminal.exceptions import ConfigurationError, ProviderError
from subliminal_patch.providers import Provider
from subliminal_patch.providers.mixins import ProviderSubtitleArchiveMixin
from subliminal_patch.subtitle import Subtitle, guess_matches
from subliminal.video import Episode

try:
    from lxml import etree
except ImportError:
    try:
        import xml.etree.cElementTree as etree
    except ImportError:
        import xml.etree.ElementTree as etree

logger = logging.getLogger(__name__)

supported_languages = [
    "ara",  # Arabic
    "eng",  # English
    "fin",  # Finnish
    "fra",  # French
    "heb",  # Hebrew
    "ita",  # Italian
    "jpn",  # Japanese
    "por",  # Portuguese
    "pol",  # Polish
    "spa",  # Spanish
    "swe",  # Swedish
    "tha",  # Thai
    "tur",  # Turkish
]


class AnimeToshoSubtitle(Subtitle):
    """AnimeTosho.org Subtitle."""
    provider_name = 'animetosho'

    def __init__(self, language, download_link, meta, release_info):
        super(AnimeToshoSubtitle, self).__init__(language, page_link=download_link)
        self.meta = meta
        self.download_link = download_link
        self.release_info = release_info

    @property
    def id(self):
        return self.download_link

    def get_matches(self, video):
        matches = set()
        matches |= guess_matches(video, guessit(self.meta['filename']))

        # Add these data are explicit extracted from the API and they always have to match otherwise they wouldn't
        # arrive at this point and would stop on list_subtitles.
        matches.update(['title', 'series', 'tvdb_id', 'season', 'episode'])

        return matches


class AnimeToshoProvider(Provider, ProviderSubtitleArchiveMixin):
    """AnimeTosho.org Provider."""
    subtitle_class = AnimeToshoSubtitle
    languages = {Language('por', 'BR')} | {Language(sl) for sl in supported_languages}
    video_types = Episode

    def __init__(self, search_threshold=None):
        self.session = None

        if not all([search_threshold]):
            raise ConfigurationError("Search threshold, Api Client and Version must be specified!")

        self.search_threshold = search_threshold

    def initialize(self):
        self.session = Session()

    def terminate(self):
        self.session.close()

    def list_subtitles(self, video, languages):
        if not video.series_anidb_episode_id:
            logger.debug('Skipping video %r. It is not an anime or the anidb_episode_id could not be identified', video)

            return []

        return [s for s in self._get_series(video.series_anidb_episode_id) if s.language in languages]

    def download_subtitle(self, subtitle):
        logger.info('Downloading subtitle %r', subtitle)

        r = self.session.get(subtitle.page_link, timeout=10)
        r.raise_for_status()

        # Check if the bytes content starts with the xz magic number of the xz archives
        if not self._is_xz_file(r.content):
            raise ProviderError('Unidentified archive type')

        subtitle.content = lzma.decompress(r.content)

        return subtitle

    @staticmethod
    def _is_xz_file(content):
        return content.startswith(b'\xFD\x37\x7A\x58\x5A\x00')

    def _get_series(self, episode_id):
        storage_download_url = 'https://animetosho.org/storage/attach/'
        feed_api_url = 'https://feed.animetosho.org/json'

        subtitles = []

        entries = self._get_series_entries(episode_id)

        for entry in entries:
            r = self.session.get(
                feed_api_url,
                params={
                    'show': 'torrent',
                    'id': entry['id'],
                },
                timeout=10
            )
            r.raise_for_status()

            for file in r.json()['files']:
                if 'attachments' not in file:
                    continue

                subtitle_files = list(filter(lambda f: f['type'] == 'subtitle', file['attachments']))

                for subtitle_file in subtitle_files:
                    hex_id = format(subtitle_file['id'], '08x')

                    lang = Language.fromalpha3b(subtitle_file['info']['lang'])

                    # For Portuguese and Portuguese Brazilian they both share the same code, the name is the only
                    # identifier AnimeTosho provides. Also, some subtitles does not have name, in this case it could
                    # be a false negative but there is nothing we can use to guarantee it is PT-BR, we rather skip it.
                    if lang.alpha3 == 'por' and subtitle_file['info'].get('name', '').lower().find('brazil'):
                        lang = Language('por', 'BR')

                    subtitle = self.subtitle_class(
                        lang,
                        storage_download_url + '{}/{}.xz'.format(hex_id, subtitle_file['id']),
                        meta=file,
                        release_info=entry.get('title'),
                    )

                    logger.debug('Found subtitle %r', subtitle)

                    subtitles.append(subtitle)

        return subtitles

    def _get_series_entries(self, episode_id):
        api_url = 'https://feed.animetosho.org/json'

        r = self.session.get(
            api_url,
            params={
                'eid': episode_id,
            },
            timeout=10
        )

        r.raise_for_status()

        j = r.json()

        # Ignore records that are not yet ready or has been abandoned by AnimeTosho.
        entries = list(filter(lambda t: t['status'] == 'complete', j))[:self.search_threshold]

        # Return the latest entries that have been added as it is used to cutoff via the user configuration threshold
        entries.sort(key=lambda t: t['timestamp'], reverse=True)

        return entries
Added animetosho provider 2024-04-14 12:19:13 +00:00			`# -- coding: utf-8 --`
			`from __future__ import absolute_import`

			`import logging`
			`import lzma`

			`from guessit import guessit`
			`from requests import Session`
			`from subzero.language import Language`


			`from subliminal.exceptions import ConfigurationError, ProviderError`
			`from subliminal_patch.providers import Provider`
			`from subliminal_patch.providers.mixins import ProviderSubtitleArchiveMixin`
			`from subliminal_patch.subtitle import Subtitle, guess_matches`
			`from subliminal.video import Episode`

			`try:`
			`from lxml import etree`
			`except ImportError:`
			`try:`
			`import xml.etree.cElementTree as etree`
			`except ImportError:`
			`import xml.etree.ElementTree as etree`

			`logger = logging.getLogger(__name__)`

			`supported_languages = [`
Added additional languages to animetosho provider 2024-04-19 18:18:36 +00:00			`"ara", # Arabic`
Added animetosho provider 2024-04-14 12:19:13 +00:00			`"eng", # English`
Added additional languages to animetosho provider 2024-04-19 18:18:36 +00:00			`"fin", # Finnish`
			`"fra", # French`
			`"heb", # Hebrew`
Added animetosho provider 2024-04-14 12:19:13 +00:00			`"ita", # Italian`
Added additional languages to animetosho provider 2024-04-19 18:18:36 +00:00			`"jpn", # Japanese`
			`"por", # Portuguese`
			`"pol", # Polish`
			`"spa", # Spanish`
			`"swe", # Swedish`
			`"tha", # Thai`
			`"tur", # Turkish`
Added animetosho provider 2024-04-14 12:19:13 +00:00			`]`


			`class AnimeToshoSubtitle(Subtitle):`
			`"""AnimeTosho.org Subtitle."""`
			`provider_name = 'animetosho'`

Added animetosho release info 2024-05-04 17:19:36 +00:00			`def __init__(self, language, download_link, meta, release_info):`
Added animetosho provider 2024-04-14 12:19:13 +00:00			`super(AnimeToshoSubtitle, self).__init__(language, page_link=download_link)`
			`self.meta = meta`
			`self.download_link = download_link`
Added animetosho release info 2024-05-04 17:19:36 +00:00			`self.release_info = release_info`
Added animetosho provider 2024-04-14 12:19:13 +00:00
			`@property`
			`def id(self):`
			`return self.download_link`

			`def get_matches(self, video):`
			`matches = set()`
			`matches \|= guess_matches(video, guessit(self.meta['filename']))`

			`# Add these data are explicit extracted from the API and they always have to match otherwise they wouldn't`
			`# arrive at this point and would stop on list_subtitles.`
			`matches.update(['title', 'series', 'tvdb_id', 'season', 'episode'])`

			`return matches`


			`class AnimeToshoProvider(Provider, ProviderSubtitleArchiveMixin):`
			`"""AnimeTosho.org Provider."""`
			`subtitle_class = AnimeToshoSubtitle`
			`languages = {Language('por', 'BR')} \| {Language(sl) for sl in supported_languages}`
			`video_types = Episode`

			`def __init__(self, search_threshold=None):`
			`self.session = None`

			`if not all([search_threshold]):`
			`raise ConfigurationError("Search threshold, Api Client and Version must be specified!")`

			`self.search_threshold = search_threshold`

			`def initialize(self):`
			`self.session = Session()`

			`def terminate(self):`
			`self.session.close()`

			`def list_subtitles(self, video, languages):`
			`if not video.series_anidb_episode_id:`
Fixed Animetosho provider error for tv shows * chore: Skip anime * wip 2024-04-30 10:28:41 +00:00			`logger.debug('Skipping video %r. It is not an anime or the anidb_episode_id could not be identified', video)`

			`return []`
Added animetosho provider 2024-04-14 12:19:13 +00:00
			`return [s for s in self._get_series(video.series_anidb_episode_id) if s.language in languages]`

			`def download_subtitle(self, subtitle):`
			`logger.info('Downloading subtitle %r', subtitle)`

			`r = self.session.get(subtitle.page_link, timeout=10)`
			`r.raise_for_status()`

			`# Check if the bytes content starts with the xz magic number of the xz archives`
			`if not self._is_xz_file(r.content):`
			`raise ProviderError('Unidentified archive type')`

			`subtitle.content = lzma.decompress(r.content)`

			`return subtitle`

			`@staticmethod`
			`def _is_xz_file(content):`
			`return content.startswith(b'\xFD\x37\x7A\x58\x5A\x00')`

			`def _get_series(self, episode_id):`
			`storage_download_url = 'https://animetosho.org/storage/attach/'`
			`feed_api_url = 'https://feed.animetosho.org/json'`

			`subtitles = []`

			`entries = self._get_series_entries(episode_id)`

			`for entry in entries:`
			`r = self.session.get(`
			`feed_api_url,`
			`params={`
			`'show': 'torrent',`
			`'id': entry['id'],`
			`},`
			`timeout=10`
			`)`
			`r.raise_for_status()`

			`for file in r.json()['files']:`
			`if 'attachments' not in file:`
			`continue`

			`subtitle_files = list(filter(lambda f: f['type'] == 'subtitle', file['attachments']))`

			`for subtitle_file in subtitle_files:`
			`hex_id = format(subtitle_file['id'], '08x')`

Added additional languages to animetosho provider 2024-04-19 18:18:36 +00:00			`lang = Language.fromalpha3b(subtitle_file['info']['lang'])`

			`# For Portuguese and Portuguese Brazilian they both share the same code, the name is the only`
Fixed animetosho provider empty subtitle name. #2468 2024-04-25 00:27:04 +00:00			`# identifier AnimeTosho provides. Also, some subtitles does not have name, in this case it could`
			`# be a false negative but there is nothing we can use to guarantee it is PT-BR, we rather skip it.`
			`if lang.alpha3 == 'por' and subtitle_file['info'].get('name', '').lower().find('brazil'):`
Added additional languages to animetosho provider 2024-04-19 18:18:36 +00:00			`lang = Language('por', 'BR')`

Added animetosho provider 2024-04-14 12:19:13 +00:00			`subtitle = self.subtitle_class(`
Added additional languages to animetosho provider 2024-04-19 18:18:36 +00:00			`lang,`
Added animetosho provider 2024-04-14 12:19:13 +00:00			`storage_download_url + '{}/{}.xz'.format(hex_id, subtitle_file['id']),`
			`meta=file,`
Added animetosho release info 2024-05-04 17:19:36 +00:00			`release_info=entry.get('title'),`
Added animetosho provider 2024-04-14 12:19:13 +00:00			`)`

			`logger.debug('Found subtitle %r', subtitle)`

			`subtitles.append(subtitle)`

			`return subtitles`

			`def _get_series_entries(self, episode_id):`
			`api_url = 'https://feed.animetosho.org/json'`

			`r = self.session.get(`
			`api_url,`
			`params={`
			`'eid': episode_id,`
			`},`
			`timeout=10`
			`)`

			`r.raise_for_status()`

			`j = r.json()`

			`# Ignore records that are not yet ready or has been abandoned by AnimeTosho.`
			`entries = list(filter(lambda t: t['status'] == 'complete', j))[:self.search_threshold]`

			`# Return the latest entries that have been added as it is used to cutoff via the user configuration threshold`
			`entries.sort(key=lambda t: t['timestamp'], reverse=True)`

			`return entries`