bazarr/libs/subliminal_patch/providers/mixins.py

# coding=utf-8

import re
import time
import logging
import traceback
import types
import os
from httplib import ResponseNotReady

from guessit import guessit
from subliminal import ProviderError
from subliminal.exceptions import ServiceUnavailable, DownloadLimitExceeded
from subliminal.providers.opensubtitles import Unauthorized
from subliminal.subtitle import fix_line_ending
from subliminal_patch.exceptions import TooManyRequests

logger = logging.getLogger(__name__)


clean_whitespace_re = re.compile(r'\s+')


class PunctuationMixin(object):
    """
    provider mixin

    fixes show ids for stuff like "Mr. Petterson", as our matcher already sees it as "Mr Petterson" but addic7ed doesn't
    """

    def clean_punctuation(self, s):
        return s.replace(".", "").replace(":", "").replace("'", "").replace("&", "").replace("-", "")

    def clean_whitespace(self, s):
        return clean_whitespace_re.sub("", s)

    def full_clean(self, s):
        return self.clean_whitespace(self.clean_punctuation(s))


class ProviderRetryMixin(object):
    def retry(self, f, amount=2, exc=Exception, retry_timeout=10):
        i = 0
        while i <= amount:
            try:
                return f()
            except (Unauthorized, ServiceUnavailable, TooManyRequests, DownloadLimitExceeded, ResponseNotReady):
                raise
            except exc:
                formatted_exc = traceback.format_exc()
                i += 1
                if i == amount:
                    raise

            logger.debug(u"Retrying %s, try: %i/%i, exception: %s" % (self.__class__.__name__, i, amount, formatted_exc))
            time.sleep(retry_timeout)


class ProviderSubtitleArchiveMixin(object):
    """
    handles ZipFile and RarFile archives
    needs subtitle.episode, subtitle.season, subtitle.matches, subtitle.releases and subtitle.asked_for_episode to work
    """
    def get_subtitle_from_archive(self, subtitle, archive):
        # extract subtitle's content
        subs_in_archive = []
        for name in archive.namelist():
            for ext in (".srt", ".sub", ".ssa", ".ass"):
                if name.endswith(ext):
                    subs_in_archive.append(name)

        # select the correct subtitle file
        matching_sub = None
        subs_unsure = []
        subs_fallback = []
        if len(subs_in_archive) == 1:
            matching_sub = subs_in_archive[0]
        else:
            for sub_name in subs_in_archive:
                guess = guessit(sub_name)
                sub_name_lower = sub_name.lower()

                # consider subtitle valid if:
                # - episode and season match
                # - format matches (if it was matched before)
                # - release group matches (and we asked for one and it was matched, or it was not matched)
                # - not asked for forced and "forced" not in filename
                is_episode = subtitle.asked_for_episode

                if not subtitle.language.forced:
                    base, ext = os.path.splitext(sub_name_lower)
                    if base.endswith("forced") or "forced" in guess.get("release_group", ""):
                        continue

                episodes = guess.get("episode")
                if is_episode and episodes and not isinstance(episodes, list):
                    episodes = [episodes]

                if not is_episode or (
                        (
                                subtitle.episode in episodes
                                or (subtitle.is_pack and subtitle.asked_for_episode in episodes)
                        ) and guess.get("season") == subtitle.season):

                    format_matches = True
                    wanted_format_but_not_found = False

                    if "format" in subtitle.matches:
                        format_matches = False
                        if isinstance(subtitle.releases, types.ListType):
                            releases = ",".join(subtitle.releases).lower()
                        else:
                            releases = subtitle.releases.lower()

                        if "format" not in guess:
                            wanted_format_but_not_found = True

                        else:
                            formats = guess["format"]
                            if not isinstance(formats, types.ListType):
                                formats = [formats]

                            for f in formats:
                                format_matches = f.lower() in releases
                                if format_matches:
                                    break

                    release_group_matches = True
                    if subtitle.is_pack or (subtitle.asked_for_release_group and
                                            ("release_group" in subtitle.matches or
                                             "hash" in subtitle.matches)):

                        if subtitle.asked_for_release_group:
                            asked_for_rlsgrp = subtitle.asked_for_release_group.lower()

                            if asked_for_rlsgrp:
                                release_group_matches = False
                                if asked_for_rlsgrp in sub_name_lower:
                                    release_group_matches = True

                    if release_group_matches and format_matches:
                        matching_sub = sub_name
                        break

                    elif release_group_matches and wanted_format_but_not_found:
                        subs_unsure.append(sub_name)
                    else:
                        subs_fallback.append(sub_name)

        if not matching_sub and not subs_unsure and not subs_fallback:
            logger.error("None of expected subtitle found in archive")
            return

        elif subs_unsure:
            matching_sub = subs_unsure[0]

        elif subs_fallback:
            matching_sub = subs_fallback[0]

        try:
            matching_sub_unicode = matching_sub.decode("utf-8")
        except UnicodeDecodeError:
            try:
                matching_sub_unicode = matching_sub.decode("cp437")
            except UnicodeDecodeError:
                matching_sub_unicode = matching_sub.decode("utf-8", errors='replace')

        logger.info(u"Using %s from the archive", matching_sub_unicode)
        return fix_line_ending(archive.read(matching_sub))