Merge remote-tracking branch 'origin/development' into development

2020-10-05 11:20:39 -04:00 · 2020-10-05 11:20:39 -04:00 · 543b430862
parent 22172c37a9 198be1cc6e
commit 543b430862
2 changed files with 115 additions and 107 deletions
--- a/libs/subliminal_patch/providers/subdivx.py
+++ b/libs/subliminal_patch/providers/subdivx.py
@ -9,11 +9,6 @@ import zipfile
 import rarfile
 from subzero.language import Language
 from requests import Session
-from six import PY2
-if PY2:
-    from urlparse import urlparse
-else:
-    from urllib.parse import urlparse

 from subliminal import __short_version__
 from subliminal.exceptions import ServiceUnavailable
@ -74,22 +69,25 @@ class SubdivxSubtitle(Subtitle):
            formats = [video.source.lower()]
            if formats[0] == "web":
                formats.append("webdl")
+                formats.append("web-dl")
                formats.append("webrip")
                formats.append("web ")
            for frmt in formats:
-                if frmt.lower() in self.description:
+                if frmt in self.description:
                    matches.add('source')
                    break

        # video_codec
        if video.video_codec:
            video_codecs = [video.video_codec.lower()]
-            if video_codecs[0] == "H.264":
-                formats.append("x264")
-            elif video_codecs[0] == "H.265":
-                formats.append("x265")
-            for vc in formats:
-                if vc.lower() in self.description:
+            if video_codecs[0] == "h.264":
+                video_codecs.append("h264")
+                video_codecs.append("x264")
+            elif video_codecs[0] == "h.265":
+                video_codecs.append("h265")
+                video_codecs.append("x265")
+            for vc in video_codecs:
+                if vc in self.description:
                    matches.add('video_codec')
                    break

@ -99,7 +97,7 @@ class SubdivxSubtitle(Subtitle):
 class SubdivxSubtitlesProvider(Provider):
    provider_name = 'subdivx'
    hash_verifiable = False
-    languages = {Language.fromalpha2(l) for l in ['es']}
+    languages = {Language.fromalpha2(lang) for lang in ['es']}
    subtitle_class = SubdivxSubtitle

    server_url = 'https://www.subdivx.com/'
@ -117,7 +115,6 @@ class SubdivxSubtitlesProvider(Provider):
        self.session.close()

    def query(self, video, languages):
-        
        if isinstance(video, Episode):
            query = "{} S{:02d}E{:02d}".format(video.series, video.season, video.episode)
        else:
@ -179,14 +176,10 @@ class SubdivxSubtitlesProvider(Provider):
            subtitle_content = self._get_subtitle_from_archive(archive, subtitle)
            subtitle.content = fix_line_ending(subtitle_content)

-    def _check_response(self, response):
-        if response.status_code != 200:
-            raise ServiceUnavailable('Bad status code: ' + str(response.status_code))
-
    def _parse_subtitles_page(self, video, response, language):
        subtitles = []

-        page_soup = ParserBeautifulSoup(response.content.decode('iso-8859-1', 'ignore'), ['lxml', 'html.parser'])
+        page_soup = ParserBeautifulSoup(response.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser'])
        title_soups = page_soup.find_all("div", {'id': 'menu_detalle_buscador'})
        body_soups = page_soup.find_all("div", {'id': 'buscador_detalle'})

@ -215,7 +208,7 @@ class SubdivxSubtitlesProvider(Provider):
        response = self.session.get(subtitle.page_link, timeout=20)
        self._check_response(response)
        try:
-            page_soup = ParserBeautifulSoup(response.content.decode('iso-8859-1', 'ignore'), ['lxml', 'html.parser'])
+            page_soup = ParserBeautifulSoup(response.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser'])
            links_soup = page_soup.find_all("a", {'class': 'detalle_link'})
            for link_soup in links_soup:
                if link_soup['href'].startswith('bajar'):
@ -229,7 +222,13 @@ class SubdivxSubtitlesProvider(Provider):

        raise APIThrottled('Download link not found')

-    def _get_archive(self, content):
+    @staticmethod
+    def _check_response(response):
+        if response.status_code != 200:
+            raise ServiceUnavailable('Bad status code: ' + str(response.status_code))
+
+    @staticmethod
+    def _get_archive(content):
        # open the archive
        archive_stream = io.BytesIO(content)
        if rarfile.is_rarfile(archive_stream):
@ -243,35 +242,47 @@ class SubdivxSubtitlesProvider(Provider):

        return archive

-    def _get_subtitle_from_archive(self, archive, subtitle):
-        _max_score = 0
-        _scores = get_scores (subtitle.video)
-
+    @staticmethod
+    def _get_subtitle_from_archive(archive, subtitle):
+        _valid_names = []
        for name in archive.namelist():
            # discard hidden files
-            if os.path.split(name)[-1].startswith('.'):
-                continue
-
            # discard non-subtitle files
-            if not name.lower().endswith(SUBTITLE_EXTENSIONS):
-                continue
+            if not os.path.split(name)[-1].startswith('.') and name.lower().endswith(SUBTITLE_EXTENSIONS):
+                _valid_names.append(name)
+
+        # archive with only 1 subtitle
+        if len(_valid_names) == 1:
+            logger.debug("returning from archive: {} (single subtitle file)".format(_valid_names[0]))
+            return archive.read(_valid_names[0])
+
+        # in archives with more than 1 subtitle (season pack) we try to guess the best subtitle file
+        _scores = get_scores(subtitle.video)
+        _max_score = 0
+        _max_name = ""
+        for name in _valid_names:
+            _guess = guessit(name)
+            if 'season' not in _guess:
+                _guess['season'] = -1
+            if 'episode' not in _guess:
+                _guess['episode'] = -1

-            _guess = guessit (name)
            if isinstance(subtitle.video, Episode):
-                logger.debug ("guessing %s" % name)
-                logger.debug("subtitle S{}E{} video S{}E{}".format(_guess['season'],_guess['episode'],subtitle.video.season,subtitle.video.episode))
+                logger.debug("guessing %s" % name)
+                logger.debug("subtitle S{}E{} video S{}E{}".format(
+                    _guess['season'], _guess['episode'], subtitle.video.season, subtitle.video.episode))

                if subtitle.video.episode != _guess['episode'] or subtitle.video.season != _guess['season']:
                    logger.debug('subtitle does not match video, skipping')
                    continue

            matches = set()
-            matches |= guess_matches (subtitle.video, _guess)
-            _score = sum ((_scores.get (match, 0) for match in matches))
+            matches |= guess_matches(subtitle.video, _guess)
+            _score = sum((_scores.get(match, 0) for match in matches))
            logger.debug('srt matches: %s, score %d' % (matches, _score))
            if _score > _max_score:
-                _max_name = name
                _max_score = _score
+                _max_name = name
                logger.debug("new max: {} {}".format(name, _score))

        if _max_score > 0:
--- a/libs/subliminal_patch/providers/subtitulamostv.py
+++ b/libs/subliminal_patch/providers/subtitulamostv.py
@ -1,75 +1,69 @@
 # -*- coding: utf-8 -*-
-import json
 import logging
 import os
-import re
-import io

-from babelfish import language_converters
-from guessit import guessit
 from requests import Session
 from subzero.language import Language

 from subliminal import Movie, Episode, ProviderError, __short_version__
-from subliminal.exceptions import AuthenticationError, ConfigurationError, DownloadLimitExceeded, ProviderError
 from subliminal_patch.subtitle import Subtitle, guess_matches
+from subliminal.providers import ParserBeautifulSoup
 from subliminal.subtitle import fix_line_ending, SUBTITLE_EXTENSIONS
 from subliminal_patch.providers import Provider

 logger = logging.getLogger(__name__)

-server_url = 'https://subtitulamos.tv/'
-

 class SubtitulamosTVSubtitle(Subtitle):
    provider_name = 'subtitulamostv'
    hash_verifiable = False

-    def __init__(self, language, page_link, download_link, description, title, matches, release_info):
-        super(SubtitulamosTVSubtitle, self).__init__(language, hearing_impaired=False,
-                                              page_link=page_link)
+    def __init__(self, language, page_link, download_link, title, release_info):
+        super(SubtitulamosTVSubtitle, self).__init__(language, hearing_impaired=False, page_link=page_link)
        self.download_link = download_link
-        self.description = description.lower()
        self.title = title
        self.release_info = release_info
-        self.found_matches = matches

    @property
    def id(self):
        return self.download_link

    def get_matches(self, video):
-        matches = self.found_matches
+        matches = {'series', 'season', 'episode', 'year'}

-        # release_group
-        if video.release_group and video.release_group.lower() in self.description:
+        title_lower = self.title.lower()
+        release_info_lower = self.release_info.lower()
+
+        if video.title and video.title.lower() in title_lower:
+            matches.add('title')
+
+        if video.release_group and video.release_group.lower() in release_info_lower:
            matches.add('release_group')

-        # resolution
-        if video.resolution and video.resolution.lower() in self.description:
+        if video.resolution and video.resolution.lower() in release_info_lower:
            matches.add('resolution')

-        # source
        if video.source:
            formats = [video.source.lower()]
            if formats[0] == "web":
                formats.append("webdl")
+                formats.append("web-dl")
                formats.append("webrip")
-                formats.append("web ")
            for frmt in formats:
-                if frmt.lower() in self.description:
+                if frmt in release_info_lower:
                    matches.add('source')
                    break

-        # video_codec
        if video.video_codec:
            video_codecs = [video.video_codec.lower()]
-            if video_codecs[0] == "H.264":
-                formats.append("x264")
-            elif video_codecs[0] == "H.265":
-                formats.append("x265")
-            for vc in formats:
-                if vc.lower() in self.description:
+            if video_codecs[0] == "h.264":
+                video_codecs.append("h264")
+                video_codecs.append("x264")
+            elif video_codecs[0] == "h.265":
+                video_codecs.append("h265")
+                video_codecs.append("x265")
+            for vc in video_codecs:
+                if vc in release_info_lower:
                    matches.add('video_codec')
                    break

@ -78,9 +72,14 @@ class SubtitulamosTVSubtitle(Subtitle):

 class SubtitulamosTVProvider(Provider):
    """Subtitulamostv Provider"""
-    languages = {Language.fromietf(l) for l in ['en','es']}
+    languages = {Language.fromietf(lang) for lang in ['en', 'es']}
    video_types = (Episode,)

+    server_url = 'https://subtitulamos.tv'
+
+    def __init__(self):
+        self.session = None
+
    def initialize(self):
        self.session = Session()
        self.session.headers = {
@ -90,58 +89,56 @@ class SubtitulamosTVProvider(Provider):
        self.session.close()

    def query(self, languages, video):
-        # query the server
-        result = None
-        year = (" (%d)" % video.year) if video.year else ""
-        q = "%s%s %dx%02d" % (video.series, year, video.season, video.episode)
-        logger.debug('Searching subtitles "%s"', q)
+        subtitle_name = "%s %dx%02d" % (video.series, video.season, video.episode)
+        logger.debug('Searching subtitles "%s"' % subtitle_name)

-        res = self.session.get(
-            server_url + 'search/query', params={'q':q}, timeout=10)
-        res.raise_for_status()
-        result = res.json()
+        response = self.session.get(self.server_url + '/search/query', params={'q': video.series}, timeout=10)
+        response.raise_for_status()
+        result = response.json()

        subtitles = []
-        for s in [s for s in result if len(s['episodes'])]:
-            for e in s['episodes']:
-                res = self.session.get(
-                    server_url + 'episodes/%d' % e['id'], timeout=10)
-                res.raise_for_status()
-                html = res.text
-                for lang_m in re.finditer(r"<div class=\"subtitle_language\">(.*?)<\/div>.*?(?=<div class=\"subtitle_language\">|<div id=\"subtitle-actions\">)", html, re.S):
-                    lang = lang_m.group(1)
-                    language = "es"
-                    if "English" in lang:
+        for serie in result:
+            # skip non-matching series
+            if video.series.lower() != serie['name'].lower():
+                continue
+
+            response = self.session.get(self.server_url + "/shows/%d/season/%d" % (serie['id'], video.season),
+                                        timeout=10)
+            response.raise_for_status()
+            soup = ParserBeautifulSoup(response.text, ['lxml', 'html.parser'])
+
+            for episode in soup.select('div.episode'):
+                episode_soup = episode.find('a')
+                episode_name = episode_soup.text
+                episode_url = episode_soup['href']
+
+                # skip non-matching episodes
+                if subtitle_name.lower() not in episode_name.lower():
+                    continue
+
+                for lang in episode.select("div.subtitle-language"):
+                    if "English" in lang.text:
                        language = "en"
+                    elif "Español" in lang.text:
+                        language = "es"
+                    else:
+                        continue  # not supported yet
                    logger.debug('Found subtitles in "%s" language.', language)

-                    for subt_m in re.finditer(r"<div class=\"version_name\">(.*?)</div>.*?<a href=\"/(subtitles/\d+/download)\" rel=\"nofollow\">(?:.*?<div class=\"version_comments ?\">.*?</i>(.*?)</p>)?", lang_m.group(0), re.S):
-                        matches = set()
-                        if video.alternative_series is None:
-                            if video.series.lower() == s['name'].lower():
-                                matches.add('series')
-                        elif s['name'].lower() in [video.series.lower()]+list(map(lambda name: name.lower(), video.alternative_series)):
-                            matches.add('series')
-                        if video.season == e['season']:
-                            matches.add('season')
-                        if video.episode == e['number']:
-                            matches.add('episode')
-                        if video.title == e['name']:
-                            matches.add('title')
-                        #if video.year is None or ("(%d)" % video.year) in s['name']:
-                        matches.add('year')
+                    for release in lang.find_next_sibling("div").select("div.sub"):
+                        release_name = release.select('div.version-name')[0].text
+                        release_url = release.select('a[href*="/download"]')[0]['href']
+
                        subtitles.append(
                            SubtitulamosTVSubtitle(
-                                Language.fromietf(language), 
-                                server_url + 'episodes/%d' % e['id'], 
-                                server_url + subt_m.group(2),
-                                subt_m.group(1)+(subt_m.group(3) if not subt_m.group(3) is None else ""), 
-                                e['name'], 
-                                matches,
-                                '%s %dx%d,%s,%s' % (s['name'], e['season'], e['number'], subt_m.group(1), lang_m.group(1)), 
+                                Language.fromietf(language),
+                                self.server_url + episode_url,
+                                self.server_url + release_url,
+                                episode_name,
+                                release_name
                            )
                        )
-                        
+
        return subtitles

    def list_subtitles(self, video, languages):