core: update to subliminal_patch:head; react on inexistent subscene endpoint if possible

This commit is contained in:
panni 2019-05-30 04:46:43 +02:00
parent 8c92599cbf
commit 3217a475f2
1 changed files with 40 additions and 6 deletions

View File

@ -30,6 +30,7 @@ import enum
import sys import sys
import requests import requests
import time import time
import logging
is_PY2 = sys.version_info[0] < 3 is_PY2 = sys.version_info[0] < 3
if is_PY2: if is_PY2:
@ -39,8 +40,13 @@ else:
from contextlib import suppress from contextlib import suppress
from urllib2.request import Request, urlopen from urllib2.request import Request, urlopen
from dogpile.cache.api import NO_VALUE
from subliminal.cache import region
from bs4 import BeautifulSoup, NavigableString from bs4 import BeautifulSoup, NavigableString
logger = logging.getLogger(__name__)
# constants # constants
HEADERS = { HEADERS = {
} }
@ -50,6 +56,13 @@ DEFAULT_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWeb"\
"Kit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36" "Kit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36"
ENDPOINT_RE = re.compile(ur'(?uis)<form action="/subtitles/(.+)">.*?<input type="text"')
class NewEndpoint(Exception):
pass
# utils # utils
def soup_for(url, session=None, user_agent=DEFAULT_USER_AGENT): def soup_for(url, session=None, user_agent=DEFAULT_USER_AGENT):
url = re.sub("\s", "+", url) url = re.sub("\s", "+", url)
@ -58,7 +71,17 @@ def soup_for(url, session=None, user_agent=DEFAULT_USER_AGENT):
html = urlopen(r).read().decode("utf-8") html = urlopen(r).read().decode("utf-8")
else: else:
ret = session.get(url) ret = session.get(url)
try:
ret.raise_for_status() ret.raise_for_status()
except requests.HTTPError, e:
if e.response.status_code == 404:
m = ENDPOINT_RE.search(ret.text)
if m:
try:
raise NewEndpoint(m.group(1))
except:
pass
raise
html = ret.text html = ret.text
return BeautifulSoup(html, "html.parser") return BeautifulSoup(html, "html.parser")
@ -250,20 +273,31 @@ def get_first_film(soup, section, year=None, session=None):
def search(term, release=True, session=None, year=None, limit_to=SearchTypes.Exact, throttle=0): def search(term, release=True, session=None, year=None, limit_to=SearchTypes.Exact, throttle=0):
# note to subscene: if you actually start to randomize the endpoint, we'll have to query your server even more # note to subscene: if you actually start to randomize the endpoint, we'll have to query your server even more
endpoints = ["searching", "search", "srch", "find"] endpoints = ["searching", "search", "srch", "find"]
if release: if release:
endpoints = ["release"] endpoints = ["release"]
else:
endpoint = region.get("subscene_endpoint")
if endpoint is not NO_VALUE and endpoint not in endpoints:
endpoints.insert(0, endpoint)
soup = None soup = None
for endpoint in endpoints: for endpoint in endpoints:
try: try:
soup = soup_for("%s/subtitles/%s?q=%s" % (SITE_DOMAIN, endpoint, term), soup = soup_for("%s/subtitles/%s?q=%s" % (SITE_DOMAIN, endpoint, term),
session=session) session=session)
except requests.HTTPError, e:
if e.response.status_code == 404: except NewEndpoint, e:
new_endpoint = e.message
if new_endpoint not in endpoints:
new_endpoint = new_endpoint.strip()
logger.debug("Switching main endpoint to %s", new_endpoint)
region.set("subscene_endpoint", new_endpoint)
time.sleep(throttle) time.sleep(throttle)
# fixme: detect endpoint from html return search(term, release=release, session=session, year=year, limit_to=limit_to, throttle=throttle)
continue else:
return region.delete("subscene_endpoint")
raise Exception("New endpoint %s didn't work; exiting" % new_endpoint)
break break
if soup: if soup: