Merge remote-tracking branch 'origin/development' into development

2019-05-29 23:45:51 -04:00 · 2019-05-29 23:45:51 -04:00 · 5d750596e1
parent f9b348ace7 3217a475f2
commit 5d750596e1
3 changed files with 48 additions and 15 deletions
--- a/libs/subliminal_patch/providers/subdivx.py
+++ b/libs/subliminal_patch/providers/subdivx.py
@ -128,9 +128,8 @@ class SubdivxSubtitlesProvider(Provider):
                return []

            page_soup = ParserBeautifulSoup(r.content.decode('iso-8859-1', 'ignore'), ['lxml', 'html.parser'])
-            table_soup = page_soup.find("div", {'id': 'contenedor_izq'})
-            title_soups = table_soup.find_all("div", {'id': 'menu_detalle_buscador'})
-            body_soups = table_soup.find_all("div", {'id': 'buscador_detalle'})
+            title_soups = page_soup.find_all("div", {'id': 'menu_detalle_buscador'})
+            body_soups = page_soup.find_all("div", {'id': 'buscador_detalle'})
            if len(title_soups) != len(body_soups):
                logger.debug('Error in provider data')
                return []
@ -139,12 +138,12 @@ class SubdivxSubtitlesProvider(Provider):

                # title
                title = title_soup.find("a").text.replace("Subtitulo de ", "")
-                page_link = title_soup.find("a")["href"]
+                page_link = title_soup.find("a")["href"].replace('http://', 'https://')

                # body
                description = body_soup.find("div", {'id': 'buscador_detalle_sub'}).text
-                tmp_div_soup = body_soup.find("div", {'id': 'buscador_detalle_sub_datos'})
-                download_link = tmp_div_soup.find("a", {'target': 'new'})["href"]
+                download_link = body_soup.find("div", {'id': 'buscador_detalle_sub_datos'}
+                    ).find("a", {'target': 'new'})["href"].replace('http://', 'https://')

                subtitle = self.subtitle_class(language, page_link, download_link, description, title)

--- a/libs/subliminal_patch/subtitle.py
+++ b/libs/subliminal_patch/subtitle.py
@ -117,14 +117,14 @@ class Subtitle(Subtitle_):

        logger.info('Guessing encoding for language %s', self.language)

-        encodings = ['utf-8', 'utf-16']
+        encodings = ['utf-8']

        # add language-specific encodings
        # http://scratchpad.wikia.com/wiki/Character_Encoding_Recommendation_for_Languages

        if self.language.alpha3 == 'zho':
            encodings.extend(['cp936', 'gb2312', 'gbk', 'gb18030', 'hz', 'iso2022_jp_2', 'cp950', 'gb18030', 'big5',
-                              'big5hkscs'])
+                              'big5hkscs', 'utf-16'])
        elif self.language.alpha3 == 'jpn':
            encodings.extend(['shift-jis', 'cp932', 'euc_jp', 'iso2022_jp', 'iso2022_jp_1', 'iso2022_jp_2',
                              'iso2022_jp_2004', 'iso2022_jp_3', 'iso2022_jp_ext', ])
@ -133,7 +133,7 @@ class Subtitle(Subtitle_):

        # arabian/farsi
        elif self.language.alpha3 in ('ara', 'fas', 'per'):
-            encodings.append('windows-1256')
+            encodings.extend(['windows-1256', 'utf-16'])
        elif self.language.alpha3 == 'heb':
            encodings.extend(['windows-1255', 'iso-8859-8'])
        elif self.language.alpha3 == 'tur':
--- a/libs/subscene_api/subscene.py
+++ b/libs/subscene_api/subscene.py
@ -30,6 +30,7 @@ import enum
 import sys
 import requests
 import time
+import logging

 is_PY2 = sys.version_info[0] < 3
 if is_PY2:
@ -39,8 +40,13 @@ else:
    from contextlib import suppress
    from urllib2.request import Request, urlopen

+from dogpile.cache.api import NO_VALUE
+from subliminal.cache import region
 from bs4 import BeautifulSoup, NavigableString

+
+logger = logging.getLogger(__name__)
+
 # constants
 HEADERS = {
 }
@ -50,6 +56,13 @@ DEFAULT_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWeb"\
                     "Kit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36"


+ENDPOINT_RE = re.compile(ur'(?uis)<form action="/subtitles/(.+)">.*?<input type="text"')
+
+
+class NewEndpoint(Exception):
+    pass
+
+
 # utils
 def soup_for(url, session=None, user_agent=DEFAULT_USER_AGENT):
    url = re.sub("\s", "+", url)
@ -58,7 +71,17 @@ def soup_for(url, session=None, user_agent=DEFAULT_USER_AGENT):
        html = urlopen(r).read().decode("utf-8")
    else:
        ret = session.get(url)
-        ret.raise_for_status()
+        try:
+            ret.raise_for_status()
+        except requests.HTTPError, e:
+            if e.response.status_code == 404:
+                m = ENDPOINT_RE.search(ret.text)
+                if m:
+                    try:
+                        raise NewEndpoint(m.group(1))
+                    except:
+                        pass
+            raise
        html = ret.text
    return BeautifulSoup(html, "html.parser")

@ -250,20 +273,31 @@ def get_first_film(soup, section, year=None, session=None):
 def search(term, release=True, session=None, year=None, limit_to=SearchTypes.Exact, throttle=0):
    # note to subscene: if you actually start to randomize the endpoint, we'll have to query your server even more
    endpoints = ["searching", "search", "srch", "find"]
+
    if release:
        endpoints = ["release"]
+    else:
+        endpoint = region.get("subscene_endpoint")
+        if endpoint is not NO_VALUE and endpoint not in endpoints:
+            endpoints.insert(0, endpoint)

    soup = None
    for endpoint in endpoints:
        try:
            soup = soup_for("%s/subtitles/%s?q=%s" % (SITE_DOMAIN, endpoint, term),
                            session=session)
-        except requests.HTTPError, e:
-            if e.response.status_code == 404:
+
+        except NewEndpoint, e:
+            new_endpoint = e.message
+            if new_endpoint not in endpoints:
+                new_endpoint = new_endpoint.strip()
+                logger.debug("Switching main endpoint to %s", new_endpoint)
+                region.set("subscene_endpoint", new_endpoint)
                time.sleep(throttle)
-                # fixme: detect endpoint from html
-                continue
-            return
+                return search(term, release=release, session=session, year=year, limit_to=limit_to, throttle=throttle)
+            else:
+                region.delete("subscene_endpoint")
+                raise Exception("New endpoint %s didn't work; exiting" % new_endpoint)
        break

    if soup: