From d90d1cbfcc57ff07ad2cff136481260dbb1ef7e3 Mon Sep 17 00:00:00 2001 From: morpheus65535 Date: Sun, 28 May 2023 09:16:29 -0400 Subject: [PATCH] Fixed external subtitles indexation with accented characters that resulted in download in loop. #1961 --- libs/subliminal_patch/core.py | 79 ++++++++++++++--------------------- 1 file changed, 31 insertions(+), 48 deletions(-) diff --git a/libs/subliminal_patch/core.py b/libs/subliminal_patch/core.py index c31d5ecd0..948b25eb6 100644 --- a/libs/subliminal_patch/core.py +++ b/libs/subliminal_patch/core.py @@ -1,6 +1,5 @@ # coding=utf-8 -from __future__ import absolute_import -import codecs +import six import json import re import os @@ -11,37 +10,27 @@ import traceback import time import operator import unicodedata - import itertools -from six.moves.http_client import ResponseNotReady - import rarfile import requests +from os import scandir from collections import defaultdict from bs4 import UnicodeDammit from babelfish import LanguageReverseError from guessit.jsonutils import GuessitEncoder -from subliminal import ProviderError, refiner_manager +from subliminal import refiner_manager from concurrent.futures import as_completed from .extensions import provider_registry from .exceptions import MustGetBlacklisted from .score import compute_score as default_compute_score -from subliminal.exceptions import ServiceUnavailable, DownloadLimitExceeded from subliminal.utils import hash_napiprojekt, hash_opensubtitles, hash_shooter, hash_thesubdb from subliminal.video import VIDEO_EXTENSIONS, Video, Episode, Movie from subliminal.core import guessit, ProviderPool, io, is_windows_special_path, \ ThreadPoolExecutor, check_video -from subliminal_patch.exceptions import TooManyRequests, APIThrottled from subzero.language import Language, ENDSWITH_LANGUAGECODE_RE, FULL_LANGUAGE_LIST -try: - from os import scandir - _scandir_generic = scandir -except ImportError: - from scandir import scandir, scandir_generic as _scandir_generic -import six logger = logging.getLogger(__name__) @@ -106,7 +95,7 @@ class _ProviderConfigs(dict): logger.debug("Config changed. Restarting provider: %s", key) try: - provider = provider_registry[key](**registered_val) # type: ignore + provider = provider_registry[key](**registered_val) # type: ignore provider.initialize() except Exception as error: self._pool.throttle_callback(key, error) @@ -269,7 +258,7 @@ class SZProviderPool(ProviderPool): """List subtitles with a single provider. The video and languages are checked against the provider. - + patch: add traceback info :param str provider: name of the provider. @@ -333,7 +322,7 @@ class SZProviderPool(ProviderPool): def list_subtitles(self, video, languages): """List subtitles. - + patch: handle LanguageReverseError :param video: video to list subtitles for. @@ -372,9 +361,9 @@ class SZProviderPool(ProviderPool): def download_subtitle(self, subtitle): """Download `subtitle`'s :attr:`~subliminal.subtitle.Subtitle.content`. - + patch: add retry functionality - + :param subtitle: subtitle to download. :type subtitle: :class:`~subliminal.subtitle.Subtitle` :return: `True` if the subtitle has been successfully downloaded, `False` otherwise. @@ -442,8 +431,8 @@ class SZProviderPool(ProviderPool): def download_best_subtitles(self, subtitles, video, languages, min_score=0, hearing_impaired=False, only_one=False, compute_score=None): """Download the best matching subtitles. - - patch: + + patch: - hearing_impaired is now string - add .score to subtitle - move all languages check further to the top (still necessary?) @@ -513,7 +502,7 @@ class SZProviderPool(ProviderPool): # bail out if hearing_impaired was wrong if subtitle.hearing_impaired_verifiable and "hearing_impaired" not in matches and \ - hearing_impaired in ("force HI", "force non-HI"): + hearing_impaired in ("force HI", "force non-HI"): logger.debug('%r: Skipping subtitle with score %d because hearing-impaired set to %s', subtitle, score, hearing_impaired) continue @@ -525,7 +514,7 @@ class SZProviderPool(ProviderPool): matches_series = False if {"season", "episode"}.issubset(orig_matches) and \ - ("series" in orig_matches or "imdb_id" in orig_matches): + ("series" in orig_matches or "imdb_id" in orig_matches): matches_series = True if can_verify_series and not matches_series: @@ -534,8 +523,8 @@ class SZProviderPool(ProviderPool): continue # download - logger.debug("%r: Trying to download subtitle with matches %s, score: %s; release(s): %s", subtitle, matches, - score, subtitle.release_info) + logger.debug("%r: Trying to download subtitle with matches %s, score: %s; release(s): %s", subtitle, + matches, score, subtitle.release_info) if self.download_subtitle(subtitle): subtitle.score = score downloaded_subtitles.append(subtitle) @@ -613,6 +602,7 @@ class SZAsyncProviderPool(SZProviderPool): to the number of :attr:`~ProviderPool.providers`. """ + def __init__(self, max_workers=None, *args, **kwargs): super(SZAsyncProviderPool, self).__init__(*args, **kwargs) @@ -761,7 +751,7 @@ def scan_video(path, dont_use_actual_file=False, hints=None, providers=None, ski logger.debug('GuessIt found: %s', json.dumps(guessed_result, cls=GuessitEncoder, indent=4, ensure_ascii=False)) video = Video.fromguess(path, guessed_result) - video.hints = hints # ? + video.hints = hints # ? if dont_use_actual_file and not hash_from: return video @@ -810,18 +800,14 @@ def scan_video(path, dont_use_actual_file=False, hints=None, providers=None, ski return video -def _search_external_subtitles(path, languages=None, only_one=False, scandir_generic=False, match_strictness="strict"): +def _search_external_subtitles(path, languages=None, only_one=False, match_strictness="strict"): dirpath, filename = os.path.split(path) dirpath = dirpath or '.' fn_no_ext, fileext = os.path.splitext(filename) - fn_no_ext_lower = fn_no_ext.lower() + fn_no_ext_lower = unicodedata.normalize('NFC', fn_no_ext.lower()) subtitles = {} - _scandir = _scandir_generic if scandir_generic else scandir - for entry in _scandir(dirpath): - if (not entry.name or entry.name in ('\x0c', '$', ',', '\x7f')) and not scandir_generic: - logger.debug('Could not determine the name of the file, retrying with scandir_generic') - return _search_external_subtitles(path, languages, only_one, True) + for entry in scandir(dirpath): if not entry.is_file(follow_symlinks=False): continue @@ -860,9 +846,11 @@ def _search_external_subtitles(path, languages=None, only_one=False, scandir_gen hi_tag = ["hi", "cc", "sdh"] hi = any(i for i in hi_tag if i in adv_tag) - #add simplified/traditional chinese detection - simplified_chinese = ["chs", "sc", "zhs", "hans","zh-hans", "gb", "简", "简中", "简体", "简体中文", "中英双语", "中日双语","中法双语","简体&英文"] - traditional_chinese = ["cht", "tc", "zht", "hant","zh-hant", "big5", "繁", "繁中", "繁体", "繁體","繁体中文", "繁體中文", "正體中文", "中英雙語", "中日雙語","中法雙語","繁体&英文"] + # add simplified/traditional chinese detection + simplified_chinese = ["chs", "sc", "zhs", "hans", "zh-hans", "gb", "简", "简中", "简体", "简体中文", "中英双语", + "中日双语", "中法双语", "简体&英文"] + traditional_chinese = ["cht", "tc", "zht", "hant", "zh-hant", "big5", "繁", "繁中", "繁体", "繁體", "繁体中文", + "繁體中文", "正體中文", "中英雙語", "中日雙語", "中法雙語", "繁体&英文"] p_root = p_root.replace('zh-TW', 'zht') # remove possible language code for matching @@ -884,11 +872,11 @@ def _search_external_subtitles(path, languages=None, only_one=False, scandir_gen try: language_code = p_root.rsplit(".", 1)[1].replace('_', '-') try: - language = Language.fromietf(language_code) + language = Language.fromietf(language_code) language.forced = forced language.hi = hi except (ValueError, LanguageReverseError): - #add simplified/traditional chinese detection + # add simplified/traditional chinese detection if any(ext in str(language_code) for ext in simplified_chinese): language = Language.fromietf('zh') language.forced = forced @@ -901,7 +889,7 @@ def _search_external_subtitles(path, languages=None, only_one=False, scandir_gen logger.error('Cannot parse language code %r', language_code) language_code = None except IndexError: - language_code = None + language_code = None if not language and not language_code and only_one: language = Language.rebuild(list(languages)[0], forced=forced, hi=hi) @@ -932,20 +920,15 @@ def search_external_subtitles(path, languages=None, only_one=False, match_strict logger.debug("external subs: scanning path %s", abspath) if os.path.isdir(os.path.dirname(abspath)): - try: - subtitles.update(_search_external_subtitles(abspath, languages=languages, - only_one=only_one, match_strictness=match_strictness)) - except OSError: - subtitles.update(_search_external_subtitles(abspath, languages=languages, - only_one=only_one, match_strictness=match_strictness, - scandir_generic=True)) + subtitles.update(_search_external_subtitles(abspath, languages=languages, only_one=only_one, + match_strictness=match_strictness)) logger.debug("external subs: found %s", subtitles) return subtitles def list_all_subtitles(videos, languages, **kwargs): """List all available subtitles. - + patch: remove video check, it has been done before The `videos` must pass the `languages` check of :func:`check_video`. @@ -1177,7 +1160,7 @@ def save_subtitles(file_path, subtitles, single=False, directory=None, chmod=Non def refine(video, episode_refiners=None, movie_refiners=None, **kwargs): """Refine a video using :ref:`refiners`. - + patch: add traceback logging .. note::