core: update subliminal_patch to 2.6.4.2917-dev; fix addic7ed, subscene, titlovi; fix SSAStyle parsing in SRT

2019-01-15 13:39:34 +01:00 · 2019-01-15 13:39:34 +01:00 · 8879f5a82e
parent d896599417
commit 8879f5a82e
12 changed files with 3900 additions and 58 deletions
--- a/libs/inflect.py
+++ b/libs/inflect.py
--- a/libs/pysubs2/ssastyle.py
+++ b/libs/pysubs2/ssastyle.py
@ -56,7 +56,7 @@ class SSAStyle(object):
        self.encoding = 1 #: Charset

        for k, v in fields.items():
-            if k in self.FIELDS:
+            if k in self.FIELDS and v is not None:
                setattr(self, k, v)
            else:
                raise ValueError("SSAStyle has no field named %r" % k)
--- a/libs/pysubs2/substation.py
+++ b/libs/pysubs2/substation.py
@ -150,7 +150,14 @@ class SubstationFormat(FormatBase):
                if format_ == "ass":
                    return ass_rgba_to_color(v)
                else:
-                    return ssa_rgb_to_color(v)
+                    try:
+                        return ssa_rgb_to_color(v)
+                    except ValueError:
+                        try:
+                            return ass_rgba_to_color(v)
+                        except:
+                            return Color(255, 255, 255, 0)
+
            elif f in {"bold", "underline", "italic", "strikeout"}:
                return v == "-1"
            elif f in {"borderstyle", "encoding", "marginl", "marginr", "marginv", "layer", "alphalevel"}:
--- a/libs/subliminal_patch/core.py
+++ b/libs/subliminal_patch/core.py
@ -493,7 +493,7 @@ def scan_video(path, dont_use_actual_file=False, hints=None, providers=None, ski
        raise ValueError('%r is not a valid video extension' % os.path.splitext(path)[1])

    dirpath, filename = os.path.split(path)
-    logger.info('Scanning video %r in %r', filename, dirpath)
+    logger.info('Determining basic video properties for %r in %r', filename, dirpath)

    # hint guessit the filename itself and its 2 parent directories if we're an episode (most likely
    # Series name/Season/filename), else only one
--- a/libs/subliminal_patch/providers/addic7ed.py
+++ b/libs/subliminal_patch/providers/addic7ed.py
@ -84,32 +84,35 @@ class Addic7edProvider(_Addic7edProvider):
        # login
        if self.username and self.password:
            ccks = region.get("addic7ed_cookies", expiration_time=86400)
-            do_login = False
            if ccks != NO_VALUE:
-                self.session.cookies.update(ccks)
-                r = self.session.get(self.server_url + 'panel.php', allow_redirects=False, timeout=10)
-                if r.status_code == 302:
-                    logger.info('Addic7ed: Login expired')
-                    do_login = True
-                else:
-                    logger.info('Addic7ed: Reusing old login')
-                    self.logged_in = True
+                try:
+                    self.session.cookies._cookies.update(ccks)
+                    r = self.session.get(self.server_url + 'panel.php', allow_redirects=False, timeout=10)
+                    if r.status_code == 302:
+                        logger.info('Addic7ed: Login expired')
+                        region.delete("addic7ed_cookies")
+                    else:
+                        logger.info('Addic7ed: Reusing old login')
+                        self.logged_in = True
+                        return
+                except:
+                    pass

-            if do_login:
-                logger.info('Addic7ed: Logging in')
-                data = {'username': self.username, 'password': self.password, 'Submit': 'Log in'}
-                r = self.session.post(self.server_url + 'dologin.php', data, allow_redirects=False, timeout=10)
+            logger.info('Addic7ed: Logging in')
+            data = {'username': self.username, 'password': self.password, 'Submit': 'Log in'}
+            r = self.session.post(self.server_url + 'dologin.php', data, allow_redirects=False, timeout=10,
+                                  headers={"Referer": self.server_url + "login.php"})

-                if "relax, slow down" in r.content:
-                    raise TooManyRequests(self.username)
+            if "relax, slow down" in r.content:
+                raise TooManyRequests(self.username)

-                if r.status_code != 302:
-                    raise AuthenticationError(self.username)
+            if r.status_code != 302:
+                raise AuthenticationError(self.username)

-                region.set("addic7ed_cookies", r.cookies)
+            region.set("addic7ed_cookies", self.session.cookies._cookies)

-                logger.debug('Addic7ed: Logged in')
-                self.logged_in = True
+            logger.debug('Addic7ed: Logged in')
+            self.logged_in = True


    @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
--- a/libs/subliminal_patch/providers/podnapisi.py
+++ b/libs/subliminal_patch/providers/podnapisi.py
@ -18,7 +18,6 @@ except ImportError:
        import xml.etree.cElementTree as etree
    except ImportError:
        import xml.etree.ElementTree as etree
-
 from babelfish import language_converters
 from subliminal import Episode
 from subliminal import Movie
--- a/libs/subliminal_patch/providers/subscene.py
+++ b/libs/subliminal_patch/providers/subscene.py
@ -4,6 +4,7 @@ import io
 import logging
 import os
 import time
+import inflect

 from random import randint
 from zipfile import ZipFile
@ -20,6 +21,8 @@ from subliminal_patch.converters.subscene import language_ids, supported_languag
 from subscene_api.subscene import search, Subtitle as APISubtitle
 from subzero.language import Language

+p = inflect.engine()
+

 language_converters.register('subscene = subliminal_patch.converters.subscene:SubsceneConverter')
 logger = logging.getLogger(__name__)
@ -192,21 +195,27 @@ class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin):

    def query(self, video):
        vfn = get_video_filename(video)
+        subtitles = []
        logger.debug(u"Searching for: %s", vfn)
        film = search(vfn, session=self.session)
-
-        subtitles = []
        if film and film.subtitles:
+            logger.debug('Release results found: %s', len(film.subtitles))
            subtitles = self.parse_results(video, film)
+        else:
+            logger.debug('No release results found')

        # re-search for episodes without explicit release name
        if isinstance(video, Episode):
-            term = u"%s S%02iE%02i" % (video.series, video.season, video.episode)
+            #term = u"%s S%02iE%02i" % (video.series, video.season, video.episode)
+            term = u"%s - %s Season" % (video.series, p.number_to_words("%sth" % video.season).capitalize())
            time.sleep(self.search_throttle)
            logger.debug('Searching for alternative results: %s', term)
-            film = search(term, session=self.session)
+            film = search(term, session=self.session, release=False)
            if film and film.subtitles:
+                logger.debug('Alternative results found: %s', len(film.subtitles))
                subtitles += self.parse_results(video, film)
+            else:
+                logger.debug('No alternative results found')

            # packs
            if video.season_fully_aired:
@ -215,9 +224,17 @@ class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin):
                time.sleep(self.search_throttle)
                film = search(term, session=self.session)
                if film and film.subtitles:
+                    logger.debug('Pack results found: %s', len(film.subtitles))
                    subtitles += self.parse_results(video, film)
+                else:
+                    logger.debug('No pack results found')
            else:
                logger.debug("Not searching for packs, because the season hasn't fully aired")
+        else:
+            logger.debug('Searching for movie results: %s', video.title)
+            film = search(video.title, year=video.year, session=self.session, limit_to=None, release=False)
+            if film and film.subtitles:
+                subtitles += self.parse_results(video, film)

        logger.info("%s subtitles found" % len(subtitles))
        return subtitles
--- a/libs/subliminal_patch/providers/titlovi.py
+++ b/libs/subliminal_patch/providers/titlovi.py
@ -134,8 +134,8 @@ class TitloviProvider(Provider, ProviderSubtitleArchiveMixin):

    def initialize(self):
        self.session = Session()
-        self.session.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ' \
-                                             '(KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
+        self.session.headers['User-Agent'] = 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.3)' \
+                                             'Gecko/20100401 Firefox/3.6.3 ( .NET CLR 3.5.30729)'
        logger.debug('User-Agent set to %s', self.session.headers['User-Agent'])
        self.session.headers['Referer'] = self.server_url
        logger.debug('Referer set to %s', self.session.headers['Referer'])
@ -202,7 +202,7 @@ class TitloviProvider(Provider, ProviderSubtitleArchiveMixin):
                current_page = int(params['pg'])

            try:
-                sublist = soup.select('section.titlovi > ul.titlovi > li')
+                sublist = soup.select('section.titlovi > ul.titlovi > li.subtitleContainer.canEdit')
                for sub in sublist:
                    # subtitle id
                    sid = sub.find(attrs={'data-id': True}).attrs['data-id']
--- a/libs/subscene_api/subscene.py
+++ b/libs/subscene_api/subscene.py
@ -25,6 +25,7 @@ this script that does the job by parsing the website"s pages.

 # imports
 import re
+
 import enum
 import sys

@ -36,7 +37,7 @@ else:
    from contextlib import suppress
    from urllib2.request import Request, urlopen

-from bs4 import BeautifulSoup
+from bs4 import BeautifulSoup, NavigableString

 # constants
 HEADERS = {
@ -207,7 +208,7 @@ def section_exists(soup, section):
    return False


-def get_first_film(soup, section, session=None):
+def get_first_film(soup, section, year=None, session=None):
    tag_part = SectionsParts[section]
    tag = None

@ -220,12 +221,26 @@ def get_first_film(soup, section, session=None):
    if not tag:
        return

-    url = SITE_DOMAIN + tag.findNext("ul").find("li").div.a.get("href")
+    url = None
+
+    if not year:
+        url = SITE_DOMAIN + tag.findNext("ul").find("li").div.a.get("href")
+    else:
+        for t in tag.findNext("ul").findAll("li"):
+            if isinstance(t, NavigableString) or not t.div:
+                continue
+
+            if str(year) in t.div.a.string:
+                url = SITE_DOMAIN + t.div.a.get("href")
+                break
+        if not url:
+            return
+
    return Film.from_url(url, session=session)


-def search(term, session=None, limit_to=SearchTypes.Exact):
-    soup = soup_for("%s/subtitles/title?q=%s" % (SITE_DOMAIN, term), session=session)
+def search(term, release=True, session=None, year=None, limit_to=SearchTypes.Exact):
+    soup = soup_for("%s/subtitles/%s?q=%s" % (SITE_DOMAIN, "release" if release else "title", term), session=session)

    if "Subtitle search by" in str(soup):
        rows = soup.find("table").tbody.find_all("tr")
@ -234,7 +249,7 @@ def search(term, session=None, limit_to=SearchTypes.Exact):

    for junk, search_type in SearchTypes.__members__.items():
        if section_exists(soup, search_type):
-            return get_first_film(soup, search_type)
+            return get_first_film(soup, search_type, year=year, session=session)

        if limit_to == search_type:
            return
--- a/libs/subzero/language.py
+++ b/libs/subzero/language.py
@ -4,7 +4,6 @@ import types
 from babelfish.exceptions import LanguageError
 from babelfish import Language as Language_, basestr

-
 repl_map = {
    "dk": "da",
    "nld": "nl",
--- a/libs/subzero/modification/mods/common.py
+++ b/libs/subzero/modification/mods/common.py
@ -28,13 +28,16 @@ class CommonFixes(SubtitleTextModification):
        NReProcessor(re.compile(r'(?u)(\w|\b|\s|^)(-\s?-{1,2})'), ur"\1—", name="CM_multidash"),

        # line = _/-/\s
-        NReProcessor(re.compile(r'(?u)(^\W*[-_.:]+\W*$)'), "", name="CM_non_word_only"),
+        NReProcessor(re.compile(r'(?u)(^\W*[-_.:>~]+\W*$)'), "", name="CM_non_word_only"),
+
+        # remove >>
+        NReProcessor(re.compile(r'(?u)^\s?>>\s*'), "", name="CM_leading_crocodiles"),

        # line = : text
        NReProcessor(re.compile(r'(?u)(^\W*:\s*(?=\w+))'), "", name="CM_empty_colon_start"),

        # fix music symbols
-        NReProcessor(re.compile(ur'(?u)(^[-\s]*[*#¶]+\s*)|(\s*[*#¶]+\s*$)'),
+        NReProcessor(re.compile(ur'(?u)(^[-\s>~]*[*#¶]+\s*)|(\s*[*#¶]+\s*$)'),
                     lambda x: u"♪ " if x.group(1) else u" ♪",
                     name="CM_music_symbols"),

@ -85,9 +88,6 @@ class CommonFixes(SubtitleTextModification):

        # space before ending doublequote?

-        # remove >>
-        NReProcessor(re.compile(r'(?u)^\s?>>\s*'), "", name="CM_leading_crocodiles"),
-
        # replace uppercase I with lowercase L in words
        NReProcessor(re.compile(ur'(?u)([a-zà-ž]+)(I+)'),
                     lambda match: ur'%s%s' % (match.group(1), "l" * len(match.group(2))),
--- a/libs/subzero/modification/mods/hearing_impaired.py
+++ b/libs/subzero/modification/mods/hearing_impaired.py
@ -29,6 +29,22 @@ class HearingImpaired(SubtitleTextModification):
        FullBracketEntryProcessor(re.compile(ur'(?sux)^-?%(t)s[([].+(?=[^)\]]{3,}).+[)\]]%(t)s$' % {"t": TAG}),
                                  "", name="HI_brackets_full"),

+        # uppercase text before colon (at least 3 uppercase chars); at start or after a sentence,
+        # possibly with a dash in front; ignore anything ending with a quote
+        NReProcessor(re.compile(ur'(?u)(?:(?<=^)|(?<=[.\-!?\"\']))([\s\->~]*(?=[A-ZÀ-Ž&+]\s*[A-ZÀ-Ž&+]\s*[A-ZÀ-Ž&+])'
+                                ur'[A-zÀ-ž-_0-9\s\"\'&+()\[\],:]+:(?![\"\'’ʼ❜‘‛”“‟„])(?:\s+|$))(?![0-9])'), "",
+                     name="HI_before_colon_caps"),
+
+        # any text before colon (at least 3 chars); at start or after a sentence,
+        # possibly with a dash in front; try not breaking actual sentences with a colon at the end by not matching if
+        # a space is inside the text; ignore anything ending with a quote
+        NReProcessor(re.compile(ur'(?u)(?:(?<=^)|(?<=[.\-!?\"]))([\s\->~]*((?=[A-zÀ-ž&+]\s*[A-zÀ-ž&+]\s*[A-zÀ-ž&+])'
+                                ur'[A-zÀ-ž-_0-9\s\"\'&+()\[\]]+:)(?![\"’ʼ❜‘‛”“‟„])\s*)(?![0-9])'),
+                     lambda match:
+                     match.group(1) if (match.group(2).count(" ") > 0 or match.group(1).count("-") > 0)
+                     else "" if not match.group(1).startswith(" ") else " ",
+                     name="HI_before_colon_noncaps"),
+
        # brackets (only remove if at least 3 chars in brackets)
        NReProcessor(re.compile(ur'(?sux)-?%(t)s[([][^([)\]]+?(?=[A-zÀ-ž"\'.]{3,})[^([)\]]+[)\]][\s:]*%(t)s' %
                                {"t": TAG}), "", name="HI_brackets"),
@ -46,21 +62,6 @@ class HearingImpaired(SubtitleTextModification):
        #NReProcessor(re.compile(ur'(?u)(\b|^)([\s-]*(?=[A-zÀ-ž-_0-9"\']{3,})[A-zÀ-ž-_0-9"\']+:\s*)'), "",
        #             name="HI_before_colon"),

-        # uppercase text before colon (at least 3 uppercase chars); at start or after a sentence,
-        # possibly with a dash in front; ignore anything ending with a quote
-        NReProcessor(re.compile(ur'(?u)(?:(?<=^)|(?<=[.\-!?\"\']))([\s-]*(?=[A-ZÀ-Ž&+]\s*[A-ZÀ-Ž&+]\s*[A-ZÀ-Ž&+])'
-                                ur'[A-ZÀ-Ž-_0-9\s\"\'&+]+:(?![\"\'’ʼ❜‘‛”“‟„])(?:\s+|$))(?![0-9])'), "",
-                     name="HI_before_colon_caps"),
-
-        # any text before colon (at least 3 chars); at start or after a sentence,
-        # possibly with a dash in front; try not breaking actual sentences with a colon at the end by not matching if
-        # a space is inside the text; ignore anything ending with a quote
-        NReProcessor(re.compile(ur'(?u)(?:(?<=^)|(?<=[.\-!?\"]))([\s-]*((?=[A-zÀ-ž&+]\s*[A-zÀ-ž&+]\s*[A-zÀ-ž&+])'
-                                ur'[A-zÀ-ž-_0-9\s\"\'&+]+:)(?![\"’ʼ❜‘‛”“‟„])\s*)(?![0-9])'),
-                     lambda match:
-                     match.group(1) if (match.group(2).count(" ") > 0 or match.group(1).count("-") > 0)
-                     else "" if not match.group(1).startswith(" ") else " ",
-                     name="HI_before_colon_noncaps"),

        # text in brackets at start, after optional dash, before colon or at end of line
        # fixme: may be too aggressive