# coding=utf-8 from __future__ import absolute_import import codecs import json import re import os import logging import datetime import socket import traceback import time import operator import unicodedata import itertools from six.moves.http_client import ResponseNotReady import rarfile import requests from collections import defaultdict from bs4 import UnicodeDammit from babelfish import LanguageReverseError from guessit.jsonutils import GuessitEncoder from subliminal import ProviderError, refiner_manager from concurrent.futures import as_completed from .extensions import provider_registry from .exceptions import MustGetBlacklisted from .score import compute_score as default_compute_score from subliminal.exceptions import ServiceUnavailable, DownloadLimitExceeded from subliminal.utils import hash_napiprojekt, hash_opensubtitles, hash_shooter, hash_thesubdb from subliminal.video import VIDEO_EXTENSIONS, Video, Episode, Movie from subliminal.core import guessit, ProviderPool, io, is_windows_special_path, \ ThreadPoolExecutor, check_video from subliminal_patch.exceptions import TooManyRequests, APIThrottled from subzero.language import Language, ENDSWITH_LANGUAGECODE_RE, FULL_LANGUAGE_LIST try: from os import scandir _scandir_generic = scandir except ImportError: from scandir import scandir, scandir_generic as _scandir_generic import six logger = logging.getLogger(__name__) # may be absolute or relative paths; set to selected options CUSTOM_PATHS = [] INCLUDE_EXOTIC_SUBS = True DOWNLOAD_TRIES = 0 DOWNLOAD_RETRY_SLEEP = 6 # fixme: this may be overkill REMOVE_CRAP_FROM_FILENAME = re.compile(r"(?i)(?:([\s_-]+(?:obfuscated|scrambled|nzbgeek|chamele0n|buymore|xpost|postbot" r"|asrequested)(?:\[.+\])?)|([\s_-]\w{2,})(\[.+\]))(?=\.\w+$|$)") SUBTITLE_EXTENSIONS = ('.srt', '.sub', '.smi', '.txt', '.ssa', '.ass', '.mpl', '.vtt') _POOL_LIFETIME = datetime.timedelta(hours=12) def remove_crap_from_fn(fn): # in case of the second regex part, the legit release group name will be in group(2), if it's followed by [string] # otherwise replace fully, because the first part matched def repl(m): return m.group(2) if len(m.groups()) == 3 else "" return REMOVE_CRAP_FROM_FILENAME.sub(repl, fn) def _nested_update(item, to_update): for k, v in to_update.items(): if isinstance(v, dict): item[k] = _nested_update(item.get(k, {}), v) else: item[k] = v return item class _ProviderConfigs(dict): def __init__(self, pool, *args, **kwargs): super().__init__(*args, **kwargs) self._pool = pool def update(self, items): updated = set() # Restart providers with new configs for key, val in items.items(): # Don't restart providers that are not enabled if key not in self._pool.providers: continue # key: provider's name; val: config dict registered_val = self.get(key) if registered_val is None or registered_val == val: continue updated.add(key) # The new dict might be a partial dict registered_val.update(val) logger.debug("Config changed. Restarting provider: %s", key) try: provider = provider_registry[key](**registered_val) # type: ignore provider.initialize() except Exception as error: self._pool.throttle_callback(key, error) else: self._pool.initialized_providers[key] = provider if updated: logger.debug("Providers with config updates: %s", updated) else: logger.debug("No provider config updates") _nested_update(self, items) return None class _Banlist: def __init__(self, must_not_contain, must_contain): self.must_not_contain = must_not_contain self.must_contain = must_contain def is_valid(self, subtitle): if subtitle.release_info is None: return True if any([x for x in self.must_not_contain if re.search(x, subtitle.release_info, flags=re.IGNORECASE) is not None]): logger.info("Skipping subtitle because release name contains prohibited string: %s", subtitle) return False if any([x for x in self.must_contain if re.search(x, subtitle.release_info, flags=re.IGNORECASE) is None]): logger.info("Skipping subtitle because release name does not contains required string: %s", subtitle) return False return True class _Blacklist(list): def is_valid(self, provider, subtitle): blacklisted = (str(provider), str(subtitle.id)) in self if blacklisted: logger.debug("Blacklisted subtitle: %s", subtitle) return not blacklisted class SZProviderPool(ProviderPool): def __init__(self, providers=None, provider_configs=None, blacklist=None, ban_list=None, throttle_callback=None, pre_download_hook=None, post_download_hook=None, language_hook=None): #: Name of providers to use self.providers = set(providers or []) #: Initialized providers self.initialized_providers = {} #: Discarded providers self.discarded_providers = set() self.blacklist = _Blacklist(blacklist or []) #: Should be a dict of 2 lists of strings self.ban_list = _Banlist(**(ban_list or {'must_contain': [], 'must_not_contain': []})) self.throttle_callback = throttle_callback self.pre_download_hook = pre_download_hook self.post_download_hook = post_download_hook self.language_hook = language_hook self._born = time.time() if not self.throttle_callback: self.throttle_callback = lambda x, y: x #: Provider configuration self.provider_configs = _ProviderConfigs(self) self.provider_configs.update(provider_configs or {}) def update(self, providers, provider_configs, blacklist, ban_list): # Check if the pool was initialized enough hours ago self._check_lifetime() providers = set(providers or []) # Check if any new provider has been added updated = providers != self.providers or ban_list != self.ban_list removed_providers = set(sorted(self.providers - providers)) logger.debug("Discarded providers: %s | New providers: %s", self.discarded_providers, providers) self.discarded_providers.difference_update(providers) logger.debug("Updated discarded providers: %s", self.discarded_providers) removed_providers.update(self.discarded_providers) logger.debug("Removed providers: %s", removed_providers) self.providers.difference_update(removed_providers) self.providers.update(list(providers)) # Terminate and delete removed providers from instance for removed in removed_providers: logger.debug("Removing provider: %s", removed) try: del self[removed] # If the user has updated the providers but hasn't made any # subtitle searches yet, the removed provider won't be in the # self dictionary except KeyError: pass # self.provider_configs = provider_configs self.provider_configs.update(provider_configs) self.blacklist = _Blacklist(blacklist or []) self.ban_list = _Banlist(**ban_list or {'must_contain': [], 'must_not_contain': []}) return updated def _check_lifetime(self): # This method is used to avoid possible memory leaks if abs(self._born - time.time()) > _POOL_LIFETIME.seconds: logger.info("%s elapsed. Terminating providers", _POOL_LIFETIME) self._born = time.time() self.terminate() def __enter__(self): return self def __exit__(self, exc_type, exc_value, traceback): self.terminate() def __getitem__(self, name): if name not in self.providers: raise KeyError if name not in self.initialized_providers: logger.info('Initializing provider %s', name) provider = provider_registry[name](**self.provider_configs.get(name, {})) provider.initialize() self.initialized_providers[name] = provider return self.initialized_providers[name] def __delitem__(self, name): if name not in self.initialized_providers: raise KeyError(name) try: logger.info('Terminating provider %s', name) self.initialized_providers[name].terminate() except (requests.Timeout, socket.timeout) as e: logger.error('Provider %r timed out, improperly terminated', name) self.throttle_callback(name, e) except Exception as e: logger.exception('Provider %r terminated unexpectedly', name) self.throttle_callback(name, e) del self.initialized_providers[name] def list_subtitles_provider(self, provider, video, languages): """List subtitles with a single provider. The video and languages are checked against the provider. patch: add traceback info :param str provider: name of the provider. :param video: video to list subtitles for. :type video: :class:`~subliminal.video.Video` :param languages: languages to search for. :type languages: set of :class:`~babelfish.language.Language` :return: found subtitles. :rtype: list of :class:`~subliminal.subtitle.Subtitle` or None """ if self.language_hook: languages_search_base = self.language_hook(provider) else: languages_search_base = languages # check video validity if not provider_registry[provider].check(video): logger.info('Skipping provider %r: not a valid video', provider) return [] # check whether we want to search this provider for the languages use_languages = languages_search_base & languages if not use_languages: logger.info('Skipping provider %r: no language to search for (advanced: %r, requested: %r)', provider, languages_search_base, languages) return [] # check supported languages provider_languages = provider_registry[provider].languages & use_languages if not provider_languages: logger.info('Skipping provider %r: no language to search for', provider) return [] # list subtitles logger.info('Listing subtitles with provider %r and languages %r', provider, provider_languages) results = [] try: results = self[provider].list_subtitles(video, provider_languages) seen = [] out = [] for s in results: if not self.blacklist.is_valid(provider, s): continue if not self.ban_list.is_valid(s): continue if s.id in seen: continue s.plex_media_fps = float(video.fps) if video.fps else None out.append(s) seen.append(s.id) return out except Exception as e: logger.exception('Unexpected error in provider %r: %s', provider, traceback.format_exc()) self.throttle_callback(provider, e) def list_subtitles(self, video, languages): """List subtitles. patch: handle LanguageReverseError :param video: video to list subtitles for. :type video: :class:`~subliminal.video.Video` :param languages: languages to search for. :type languages: set of :class:`~babelfish.language.Language` :return: found subtitles. :rtype: list of :class:`~subliminal.subtitle.Subtitle` """ subtitles = [] for name in self.providers: # check discarded providers if name in self.discarded_providers: logger.debug('Skipping discarded provider %r', name) continue # list subtitles try: provider_subtitles = self.list_subtitles_provider(name, video, languages) except LanguageReverseError: logger.exception("Unexpected language reverse error in %s, skipping. Error: %s", name, traceback.format_exc()) continue if provider_subtitles is None: logger.info('Discarding provider %s', name) self.discarded_providers.add(name) continue # add the subtitles subtitles.extend(provider_subtitles) return subtitles def download_subtitle(self, subtitle): """Download `subtitle`'s :attr:`~subliminal.subtitle.Subtitle.content`. patch: add retry functionality :param subtitle: subtitle to download. :type subtitle: :class:`~subliminal.subtitle.Subtitle` :return: `True` if the subtitle has been successfully downloaded, `False` otherwise. :rtype: bool """ # check discarded providers if subtitle.provider_name in self.discarded_providers: logger.warning('Provider %r is discarded', subtitle.provider_name) return False logger.info('Downloading subtitle %r', subtitle) tries = 0 # retry downloading on failure until settings' download retry limit hit while True: tries += 1 try: if self.pre_download_hook: self.pre_download_hook(subtitle) self[subtitle.provider_name].download_subtitle(subtitle) if self.post_download_hook: self.post_download_hook(subtitle) break except (requests.ConnectionError, requests.exceptions.ProxyError, requests.exceptions.SSLError, requests.Timeout, socket.timeout) as e: logger.error('Provider %r connection error', subtitle.provider_name) self.throttle_callback(subtitle.provider_name, e) except (rarfile.BadRarFile, MustGetBlacklisted) as e: self.throttle_callback(subtitle.provider_name, e) return False except Exception as e: logger.exception('Unexpected error in provider %r, Traceback: %s', subtitle.provider_name, traceback.format_exc()) self.throttle_callback(subtitle.provider_name, e) self.discarded_providers.add(subtitle.provider_name) return False if tries == DOWNLOAD_TRIES: self.discarded_providers.add(subtitle.provider_name) logger.error('Maximum retries reached for provider %r, discarding it', subtitle.provider_name) return False # don't hammer the provider logger.debug('Errors while downloading subtitle, retrying provider %r in %s seconds', subtitle.provider_name, DOWNLOAD_RETRY_SLEEP) time.sleep(DOWNLOAD_RETRY_SLEEP) # check subtitle validity if not subtitle.is_valid(): logger.error('Invalid subtitle') return False if not os.environ.get("SZ_KEEP_ENCODING", False): subtitle.normalize() return True def download_best_subtitles(self, subtitles, video, languages, min_score=0, hearing_impaired=False, only_one=False, compute_score=None): """Download the best matching subtitles. patch: - hearing_impaired is now string - add .score to subtitle - move all languages check further to the top (still necessary?) :param subtitles: the subtitles to use. :type subtitles: list of :class:`~subliminal.subtitle.Subtitle` :param video: video to download subtitles for. :type video: :class:`~subliminal.video.Video` :param languages: languages to download. :type languages: set of :class:`~babelfish.language.Language` :param int min_score: minimum score for a subtitle to be downloaded. :param bool hearing_impaired: hearing impaired preference. :param bool only_one: download only one subtitle, not one per language. :param compute_score: function that takes `subtitle` and `video` as positional arguments, `hearing_impaired` as keyword argument and returns the score. :return: downloaded subtitles. :rtype: list of :class:`~subliminal.subtitle.Subtitle` """ compute_score = compute_score or default_compute_score use_hearing_impaired = hearing_impaired in ("prefer", "force HI") is_episode = isinstance(video, Episode) # sort subtitles by score unsorted_subtitles = [] for s in subtitles: # get the matches if s.language not in languages: logger.debug("%r: Skipping, language not searched for", s) continue try: matches = s.get_matches(video) except AttributeError: logger.error("%r: Match computation failed: %s", s, traceback.format_exc()) continue orig_matches = matches.copy() logger.debug('%r: Found matches %r', s, matches) score, score_without_hash = compute_score(matches, s, video, use_hearing_impaired) unsorted_subtitles.append( (s, score, score_without_hash, matches, orig_matches)) # sort subtitles by score scored_subtitles = sorted(unsorted_subtitles, key=operator.itemgetter(1, 2), reverse=True) # download best subtitles, falling back on the next on error downloaded_subtitles = [] for subtitle, score, score_without_hash, matches, orig_matches in scored_subtitles: # check score if score < min_score: logger.info('%r: Score %d is below min_score (%d)', subtitle, score, min_score) break # stop when all languages are downloaded if set(s.language for s in downloaded_subtitles) == languages: logger.debug('All languages downloaded') break # check downloaded languages if subtitle.language in set(s.language for s in downloaded_subtitles): logger.debug('%r: Skipping subtitle: already downloaded', subtitle.language) continue # bail out if hearing_impaired was wrong if subtitle.hearing_impaired_verifiable and "hearing_impaired" not in matches and \ hearing_impaired in ("force HI", "force non-HI"): logger.debug('%r: Skipping subtitle with score %d because hearing-impaired set to %s', subtitle, score, hearing_impaired) continue if is_episode: can_verify_series = True if not subtitle.hash_verifiable and "hash" in matches: can_verify_series = False matches_series = False if {"season", "episode"}.issubset(orig_matches) and \ ("series" in orig_matches or "imdb_id" in orig_matches): matches_series = True if can_verify_series and not matches_series: logger.debug("%r: Skipping subtitle with score %d, because it doesn't match our series/episode", subtitle, score) continue # download logger.debug("%r: Trying to download subtitle with matches %s, score: %s; release(s): %s", subtitle, matches, score, subtitle.release_info) if self.download_subtitle(subtitle): subtitle.score = score downloaded_subtitles.append(subtitle) # stop if only one subtitle is requested if only_one: logger.debug('Only one subtitle downloaded') break return downloaded_subtitles def list_supported_languages(self): """List supported languages. :return: languages supported by the providers. :rtype: list of dicts """ languages = [] for name in self.providers: # list supported languages for a single provider try: provider_languages = self[name].languages except AttributeError: logger.exception(f"{name} provider doesn't have a languages attribute") continue if provider_languages is None: logger.info(f"Skipping provider {name} because it doesn't support any languages.") continue # add the languages for this provider languages.append({'provider': name, 'languages': provider_languages}) return languages def list_supported_video_types(self): """List supported video types. :return: video types supported by the providers. :rtype: tuple of video types """ video_types = [] for name in self.providers: # list supported video types for a single provider try: provider_video_type = self[name].video_types except AttributeError: logger.exception(f"{name} provider doesn't have a video_types method") continue if provider_video_type is None: logger.info(f"Skipping provider {name} because it doesn't support any video type.") continue # add the video types for this provider video_types.append({'provider': name, 'video_types': provider_video_type}) return video_types def __repr__(self): return ( f"{self.__class__.__name__} [{len(self.providers)} providers ({len(self.initialized_providers)} " f"initialized; {len(self.discarded_providers)} discarded)]" ) class SZAsyncProviderPool(SZProviderPool): """Subclass of :class:`ProviderPool` with asynchronous support for :meth:`~ProviderPool.list_subtitles`. :param int max_workers: maximum number of threads to use. If `None`, :attr:`max_workers` will be set to the number of :attr:`~ProviderPool.providers`. """ def __init__(self, max_workers=None, *args, **kwargs): super(SZAsyncProviderPool, self).__init__(*args, **kwargs) #: Maximum number of threads to use self._max_workers_set = max_workers is not None self.max_workers = (max_workers or len(self.providers)) or 1 logger.info("Using %d threads for %d providers (%s)", self.max_workers, len(self.providers), self.providers) def update(self, *args, **kwargs): updated = super().update(*args, **kwargs) if (len(self.providers) and not self._max_workers_set) and len(self.providers) != self.max_workers: logger.debug("This pool will use %d threads from now on", len(self.providers)) self.max_workers = len(self.providers) return updated def list_subtitles_provider(self, provider, video, languages): # list subtitles provider_subtitles = None try: provider_subtitles = super(SZAsyncProviderPool, self).list_subtitles_provider(provider, video, languages) except LanguageReverseError: logger.exception("Unexpected language reverse error in %s, skipping. Error: %s", provider, traceback.format_exc()) return provider, provider_subtitles def list_subtitles(self, video, languages, blacklist=None, ban_list=None): if is_windows_special_path: return super(SZAsyncProviderPool, self).list_subtitles(video, languages) subtitles = [] with ThreadPoolExecutor(self.max_workers) as executor: for provider, provider_subtitles in executor.map(self.list_subtitles_provider, self.providers, itertools.repeat(video, len(self.providers)), itertools.repeat(languages, len(self.providers))): # discard provider that failed if provider_subtitles is None: logger.info('Discarding provider %s', provider) self.discarded_providers.add(provider) continue # add subtitles subtitles.extend(provider_subtitles) return subtitles def list_supported_languages(self): """List supported languages asynchronously. :return: languages supported by the providers. :rtype: list of dicts """ languages = [] def get_providers_languages(provider_name): provider_languages = None try: provider_languages = {'provider': provider_name, 'languages': self[provider_name].languages} except AttributeError: logger.exception(f"{provider_name} provider doesn't have a languages attribute") return provider_languages with ThreadPoolExecutor(self.max_workers) as executor: for future in as_completed([executor.submit(get_providers_languages, x) for x in self.providers]): provider_languages = future.result() if provider_languages is None: continue # add the languages for this provider languages.append(provider_languages) return languages def list_supported_video_types(self): """List supported video types asynchronously. :return: video types supported by the providers. :rtype: tuple of video types """ video_types = [] def get_providers_video_types(provider_name): provider_video_types = None try: provider_video_types = {'provider': provider_name, 'video_types': self[provider_name].video_types} except AttributeError: logger.exception(f"{provider_name} provider doesn't have a video_types attribute") return provider_video_types with ThreadPoolExecutor(self.max_workers) as executor: for future in as_completed([executor.submit(get_providers_video_types, x) for x in self.providers]): provider_video_types = future.result() if provider_video_types is None: continue # add the languages for this provider video_types.append(provider_video_types) return video_types if is_windows_special_path: SZAsyncProviderPool = SZProviderPool def scan_video(path, dont_use_actual_file=False, hints=None, providers=None, skip_hashing=False, hash_from=None): """Scan a video from a `path`. patch: - allow passing of hints/options to guessit - allow dry-run with dont_use_actual_file - add crap removal (obfuscated/scrambled) - trust plex's movie name :param str path: existing path to the video. :return: the scanned video. :rtype: :class:`~subliminal.video.Video` """ hints = hints or {} # check for non-existing path if not dont_use_actual_file and not os.path.exists(path): raise ValueError('Path does not exist') # check video extension if not path.lower().endswith(VIDEO_EXTENSIONS): raise ValueError('%r is not a valid video extension' % os.path.splitext(path)[1]) dirpath, filename = os.path.split(path) logger.info('Determining basic video properties for %r in %r', filename, dirpath) hints["single_value"] = True # if "title" in hints: # hints["expected_title"] = [hints["title"]] guessed_result = guessit(path, options=hints) logger.debug('GuessIt found: %s', json.dumps(guessed_result, cls=GuessitEncoder, indent=4, ensure_ascii=False)) video = Video.fromguess(path, guessed_result) video.hints = hints # ? if dont_use_actual_file and not hash_from: return video # if all providers are throttled, skip hashing if not providers: skip_hashing = True # size and hashes if not skip_hashing: hash_path = hash_from or path video.size = os.path.getsize(hash_path) if video.size > 10485760: logger.debug('Size is %d', video.size) osub_hash = None if "bsplayer" in providers: video.hashes['bsplayer'] = osub_hash = hash_opensubtitles(hash_path) if "opensubtitles" in providers: video.hashes['opensubtitles'] = osub_hash = osub_hash or hash_opensubtitles(hash_path) if "opensubtitlescom" in providers: video.hashes['opensubtitlescom'] = osub_hash = osub_hash or hash_opensubtitles(hash_path) if "shooter" in providers: video.hashes['shooter'] = hash_shooter(hash_path) if "thesubdb" in providers: video.hashes['thesubdb'] = hash_thesubdb(hash_path) if "napiprojekt" in providers: try: video.hashes['napiprojekt'] = hash_napiprojekt(hash_path) except MemoryError: logger.warning(u"Couldn't compute napiprojekt hash for %s", hash_path) if "napisy24" in providers: # Napisy24 uses the same hash as opensubtitles video.hashes['napisy24'] = osub_hash or hash_opensubtitles(hash_path) logger.debug('Computed hashes %r', video.hashes) else: logger.warning('Size is lower than 10MB: hashes not computed') return video def _search_external_subtitles(path, languages=None, only_one=False, scandir_generic=False, match_strictness="strict"): dirpath, filename = os.path.split(path) dirpath = dirpath or '.' fn_no_ext, fileext = os.path.splitext(filename) fn_no_ext_lower = fn_no_ext.lower() subtitles = {} _scandir = _scandir_generic if scandir_generic else scandir for entry in _scandir(dirpath): if (not entry.name or entry.name in ('\x0c', '$', ',', '\x7f')) and not scandir_generic: logger.debug('Could not determine the name of the file, retrying with scandir_generic') return _search_external_subtitles(path, languages, only_one, True) if not entry.is_file(follow_symlinks=False): continue p = unicodedata.normalize('NFC', entry.name) # keep only valid subtitle filenames if not p.lower().endswith(SUBTITLE_EXTENSIONS): continue # not p.lower().startswith(fileroot.lower()) or not p_root, p_ext = os.path.splitext(p) if not INCLUDE_EXOTIC_SUBS and p_ext not in (".srt", ".ass", ".ssa", ".vtt"): continue if p_root.lower() == fn_no_ext_lower: # skip check for language code if the subtitle file name is the same as the video name subtitles[p] = None continue # extract potential forced/normal/default/hi tag # fixme: duplicate from subtitlehelpers split_tag = p_root.rsplit('.', 1) adv_tag = None if len(split_tag) > 1: adv_tag = split_tag[1].lower() if adv_tag in ['forced', 'normal', 'default', 'embedded', 'embedded-forced', 'custom', 'hi', 'cc', 'sdh']: p_root = split_tag[0] forced = False if adv_tag: forced = "forced" in adv_tag hi = False if adv_tag: hi_tag = ["hi", "cc", "sdh"] hi = any(i for i in hi_tag if i in adv_tag) #add simplified/traditional chinese detection simplified_chinese = ["chs", "sc", "zhs", "hans","zh-hans", "gb", "简", "简中", "简体", "简体中文", "中英双语", "中日双语","中法双语","简体&英文"] traditional_chinese = ["cht", "tc", "zht", "hant","zh-hant", "big5", "繁", "繁中", "繁体", "繁體","繁体中文", "繁體中文", "正體中文", "中英雙語", "中日雙語","中法雙語","繁体&英文"] p_root = p_root.replace('zh-TW', 'zht') # remove possible language code for matching p_root_bare = ENDSWITH_LANGUAGECODE_RE.sub( lambda m: "" if str(m.group(1)).lower() in FULL_LANGUAGE_LIST else m.group(0), p_root) p_root_lower = p_root_bare.lower() filename_matches = p_root_lower == fn_no_ext_lower filename_contains = p_root_lower in fn_no_ext_lower if not filename_matches: if match_strictness == "strict" or (match_strictness == "loose" and not filename_contains): continue language = None # extract the potential language code try: language_code = p_root.rsplit(".", 1)[1].replace('_', '-') try: language = Language.fromietf(language_code) language.forced = forced language.hi = hi except (ValueError, LanguageReverseError): #add simplified/traditional chinese detection if any(ext in str(language_code) for ext in simplified_chinese): language = Language.fromietf('zh') language.forced = forced language.hi = hi elif any(ext in str(language_code) for ext in traditional_chinese): language = Language.fromietf('zh') language.forced = forced language.hi = hi else: logger.error('Cannot parse language code %r', language_code) language_code = None except IndexError: language_code = None if not language and not language_code and only_one: language = Language.rebuild(list(languages)[0], forced=forced, hi=hi) subtitles[p] = language logger.debug('Found subtitles %r', subtitles) return subtitles def search_external_subtitles(path, languages=None, only_one=False, match_strictness="strict"): """ wrap original search_external_subtitles function to search multiple paths for one given video # todo: cleanup and merge with _search_external_subtitles """ video_path, video_filename = os.path.split(path) subtitles = {} for folder_or_subfolder in [video_path] + CUSTOM_PATHS: # folder_or_subfolder may be a relative path or an absolute one try: abspath = six.text_type(os.path.abspath( os.path.join(*[video_path if not os.path.isabs(folder_or_subfolder) else "", folder_or_subfolder, video_filename]))) except Exception as e: logger.error("skipping path %s because of %s", repr(folder_or_subfolder), e) continue logger.debug("external subs: scanning path %s", abspath) if os.path.isdir(os.path.dirname(abspath)): try: subtitles.update(_search_external_subtitles(abspath, languages=languages, only_one=only_one, match_strictness=match_strictness)) except OSError: subtitles.update(_search_external_subtitles(abspath, languages=languages, only_one=only_one, match_strictness=match_strictness, scandir_generic=True)) logger.debug("external subs: found %s", subtitles) return subtitles def list_all_subtitles(videos, languages, **kwargs): """List all available subtitles. patch: remove video check, it has been done before The `videos` must pass the `languages` check of :func:`check_video`. All other parameters are passed onwards to the :class:`ProviderPool` constructor. :param videos: videos to list subtitles for. :type videos: set of :class:`~subliminal.video.Video` :param languages: languages to search for. :type languages: set of :class:`~babelfish.language.Language` :return: found subtitles per video. :rtype: dict of :class:`~subliminal.video.Video` to list of :class:`~subliminal.subtitle.Subtitle` """ listed_subtitles = defaultdict(list) # return immediatly if no video passed the checks if not videos: return listed_subtitles # list subtitles with SZProviderPool(**kwargs) as pool: for video in videos: logger.info('Listing subtitles for %r', video) subtitles = pool.list_subtitles(video, languages - video.subtitle_languages) listed_subtitles[video].extend(subtitles) logger.info('Found %d subtitle(s)', len(subtitles)) return listed_subtitles def list_supported_languages(pool_class, **kwargs): with pool_class(**kwargs) as pool: return pool.list_supported_languages() def list_supported_video_types(pool_class, **kwargs): with pool_class(**kwargs) as pool: return pool.list_supported_video_types() def download_subtitles(subtitles, pool_class=ProviderPool, **kwargs): """Download :attr:`~subliminal.subtitle.Subtitle.content` of `subtitles`. :param subtitles: subtitles to download. :type subtitles: list of :class:`~subliminal.subtitle.Subtitle` :param pool_class: class to use as provider pool. :type pool_class: :class:`ProviderPool`, :class:`AsyncProviderPool` or similar :param \*\*kwargs: additional parameters for the provided `pool_class` constructor. """ with pool_class(**kwargs) as pool: for subtitle in subtitles: logger.info('Downloading subtitle %r with score %s', subtitle, subtitle.score) pool.download_subtitle(subtitle) def download_best_subtitles(videos, languages, min_score=0, hearing_impaired=False, only_one=False, compute_score=None, pool_class=ProviderPool, throttle_time=0, **kwargs): """List and download the best matching subtitles. The `videos` must pass the `languages` and `undefined` (`only_one`) checks of :func:`check_video`. :param videos: videos to download subtitles for. :type videos: set of :class:`~subliminal.video.Video` :param languages: languages to download. :type languages: set of :class:`~babelfish.language.Language` :param int min_score: minimum score for a subtitle to be downloaded. :param bool hearing_impaired: hearing impaired preference. :param bool only_one: download only one subtitle, not one per language. :param compute_score: function that takes `subtitle` and `video` as positional arguments, `hearing_impaired` as keyword argument and returns the score. :param pool_class: class to use as provider pool. :type pool_class: :class:`ProviderPool`, :class:`AsyncProviderPool` or similar :param \*\*kwargs: additional parameters for the provided `pool_class` constructor. :return: downloaded subtitles per video. :rtype: dict of :class:`~subliminal.video.Video` to list of :class:`~subliminal.subtitle.Subtitle` """ downloaded_subtitles = defaultdict(list) # check videos checked_videos = [] for video in videos: if not check_video(video, languages=languages, undefined=only_one): logger.info('Skipping video %r', video) continue checked_videos.append(video) # return immediately if no video passed the checks if not checked_videos: return downloaded_subtitles got_multiple = len(checked_videos) > 1 # download best subtitles with pool_class(**kwargs) as pool: for video in checked_videos: logger.info('Downloading best subtitles for %r', video) subtitles = pool.download_best_subtitles(pool.list_subtitles(video, languages - video.subtitle_languages), video, languages, min_score=min_score, hearing_impaired=hearing_impaired, only_one=only_one, compute_score=compute_score) logger.info('Downloaded %d subtitle(s)', len(subtitles)) downloaded_subtitles[video].extend(subtitles) if got_multiple and throttle_time: logger.debug("Waiting %ss before continuing ...", throttle_time) time.sleep(throttle_time) return downloaded_subtitles def get_subtitle_path(video_path, language=None, extension='.srt', forced_tag=False, hi_tag=False, tags=None): """Get the subtitle path using the `video_path` and `language`. :param str video_path: path to the video. :param language: language of the subtitle to put in the path. :type language: :class:`~babelfish.language.Language` :param str extension: extension of the subtitle. :param bool forced_tag: is the subtitles forced/foreign? :param bool hi_tag: is the subtitles hearing-impaired? :param list tags: list of custom tags :return: path of the subtitle. :rtype: str """ subtitle_root = os.path.splitext(video_path)[0] tags = tags or [] hi_extension = os.environ.get("SZ_HI_EXTENSION", "hi") if forced_tag: tags.append("forced") elif hi_tag: tags.append(hi_extension) if language: subtitle_root += '.' + str(language.basename) if tags: subtitle_root += ".%s" % "-".join(tags) return subtitle_root + extension def save_subtitles(file_path, subtitles, single=False, directory=None, chmod=None, formats=("srt",), tags=None, path_decoder=None, debug_mods=False): """Save subtitles on filesystem. Subtitles are saved in the order of the list. If a subtitle with a language has already been saved, other subtitles with the same language are silently ignored. The extension used is `.lang.srt` by default or `.srt` is `single` is `True`, with `lang` being the IETF code for the :attr:`~subliminal.subtitle.Subtitle.language` of the subtitle. :param file_path: video file path :param formats: list of "srt" and "vtt" :param subtitles: subtitles to save. :type subtitles: list of :class:`~subliminal.subtitle.Subtitle` :param bool single: save a single subtitle, default is to save one subtitle per language. :param str directory: path to directory where to save the subtitles, default is next to the video. :return: the saved subtitles :rtype: list of :class:`~subliminal.subtitle.Subtitle` patch: unicode path problems """ logger.debug("Subtitle formats requested: %r", formats) saved_subtitles = [] for subtitle in subtitles: # check if HI mods will be used to get the proper name for the subtitles file must_remove_hi = 'remove_HI' in subtitle.mods # check content if subtitle.content is None: logger.error('Skipping subtitle %r: no content', subtitle) continue # check language if subtitle.language in set(s.language for s in saved_subtitles): logger.debug('Skipping subtitle %r: language already saved', subtitle) continue # create subtitle path subtitle_path = get_subtitle_path(file_path, None if single else subtitle.language, forced_tag=subtitle.language.forced, hi_tag=False if must_remove_hi else subtitle.language.hi, tags=tags) if directory is not None: subtitle_path = os.path.join(directory, os.path.split(subtitle_path)[1]) if path_decoder: subtitle_path = path_decoder(subtitle_path) # force unicode subtitle_path = UnicodeDammit(subtitle_path).unicode_markup subtitle.storage_path = subtitle_path for format in formats: if format != "srt": subtitle_path = os.path.splitext(subtitle_path)[0] + (u".%s" % format) logger.debug(u"Saving %r to %r", subtitle, subtitle_path) content = subtitle.get_modified_content(format=format, debug=debug_mods) if content: if os.path.exists(subtitle_path): os.remove(subtitle_path) with open(subtitle_path, 'wb') as f: f.write(content) subtitle.storage_path = subtitle_path else: logger.error(u"Something went wrong when getting modified subtitle for %s", subtitle) # change chmod if requested if chmod: os.chmod(subtitle_path, chmod) saved_subtitles.append(subtitle) # check single if single: break return saved_subtitles def refine(video, episode_refiners=None, movie_refiners=None, **kwargs): """Refine a video using :ref:`refiners`. patch: add traceback logging .. note:: Exceptions raised in refiners are silently passed and logged. :param video: the video to refine. :type video: :class:`~subliminal.video.Video` :param tuple episode_refiners: refiners to use for episodes. :param tuple movie_refiners: refiners to use for movies. :param \*\*kwargs: additional parameters for the :func:`~subliminal.refiners.refine` functions. """ refiners = () if isinstance(video, Episode): refiners = episode_refiners or ('metadata', 'tvdb', 'omdb') elif isinstance(video, Movie): refiners = movie_refiners or ('metadata', 'omdb') for refiner in refiners: logger.info('Refining video with %s', refiner) try: refiner_manager[refiner].plugin(video, **kwargs) except: logger.error('Failed to refine video: %s', traceback.format_exc())