mirror of https://github.com/morpheus65535/bazarr
Merge remote-tracking branch 'origin/development' into development
This commit is contained in:
commit
5d750596e1
|
@ -128,9 +128,8 @@ class SubdivxSubtitlesProvider(Provider):
|
||||||
return []
|
return []
|
||||||
|
|
||||||
page_soup = ParserBeautifulSoup(r.content.decode('iso-8859-1', 'ignore'), ['lxml', 'html.parser'])
|
page_soup = ParserBeautifulSoup(r.content.decode('iso-8859-1', 'ignore'), ['lxml', 'html.parser'])
|
||||||
table_soup = page_soup.find("div", {'id': 'contenedor_izq'})
|
title_soups = page_soup.find_all("div", {'id': 'menu_detalle_buscador'})
|
||||||
title_soups = table_soup.find_all("div", {'id': 'menu_detalle_buscador'})
|
body_soups = page_soup.find_all("div", {'id': 'buscador_detalle'})
|
||||||
body_soups = table_soup.find_all("div", {'id': 'buscador_detalle'})
|
|
||||||
if len(title_soups) != len(body_soups):
|
if len(title_soups) != len(body_soups):
|
||||||
logger.debug('Error in provider data')
|
logger.debug('Error in provider data')
|
||||||
return []
|
return []
|
||||||
|
@ -139,12 +138,12 @@ class SubdivxSubtitlesProvider(Provider):
|
||||||
|
|
||||||
# title
|
# title
|
||||||
title = title_soup.find("a").text.replace("Subtitulo de ", "")
|
title = title_soup.find("a").text.replace("Subtitulo de ", "")
|
||||||
page_link = title_soup.find("a")["href"]
|
page_link = title_soup.find("a")["href"].replace('http://', 'https://')
|
||||||
|
|
||||||
# body
|
# body
|
||||||
description = body_soup.find("div", {'id': 'buscador_detalle_sub'}).text
|
description = body_soup.find("div", {'id': 'buscador_detalle_sub'}).text
|
||||||
tmp_div_soup = body_soup.find("div", {'id': 'buscador_detalle_sub_datos'})
|
download_link = body_soup.find("div", {'id': 'buscador_detalle_sub_datos'}
|
||||||
download_link = tmp_div_soup.find("a", {'target': 'new'})["href"]
|
).find("a", {'target': 'new'})["href"].replace('http://', 'https://')
|
||||||
|
|
||||||
subtitle = self.subtitle_class(language, page_link, download_link, description, title)
|
subtitle = self.subtitle_class(language, page_link, download_link, description, title)
|
||||||
|
|
||||||
|
|
|
@ -117,14 +117,14 @@ class Subtitle(Subtitle_):
|
||||||
|
|
||||||
logger.info('Guessing encoding for language %s', self.language)
|
logger.info('Guessing encoding for language %s', self.language)
|
||||||
|
|
||||||
encodings = ['utf-8', 'utf-16']
|
encodings = ['utf-8']
|
||||||
|
|
||||||
# add language-specific encodings
|
# add language-specific encodings
|
||||||
# http://scratchpad.wikia.com/wiki/Character_Encoding_Recommendation_for_Languages
|
# http://scratchpad.wikia.com/wiki/Character_Encoding_Recommendation_for_Languages
|
||||||
|
|
||||||
if self.language.alpha3 == 'zho':
|
if self.language.alpha3 == 'zho':
|
||||||
encodings.extend(['cp936', 'gb2312', 'gbk', 'gb18030', 'hz', 'iso2022_jp_2', 'cp950', 'gb18030', 'big5',
|
encodings.extend(['cp936', 'gb2312', 'gbk', 'gb18030', 'hz', 'iso2022_jp_2', 'cp950', 'gb18030', 'big5',
|
||||||
'big5hkscs'])
|
'big5hkscs', 'utf-16'])
|
||||||
elif self.language.alpha3 == 'jpn':
|
elif self.language.alpha3 == 'jpn':
|
||||||
encodings.extend(['shift-jis', 'cp932', 'euc_jp', 'iso2022_jp', 'iso2022_jp_1', 'iso2022_jp_2',
|
encodings.extend(['shift-jis', 'cp932', 'euc_jp', 'iso2022_jp', 'iso2022_jp_1', 'iso2022_jp_2',
|
||||||
'iso2022_jp_2004', 'iso2022_jp_3', 'iso2022_jp_ext', ])
|
'iso2022_jp_2004', 'iso2022_jp_3', 'iso2022_jp_ext', ])
|
||||||
|
@ -133,7 +133,7 @@ class Subtitle(Subtitle_):
|
||||||
|
|
||||||
# arabian/farsi
|
# arabian/farsi
|
||||||
elif self.language.alpha3 in ('ara', 'fas', 'per'):
|
elif self.language.alpha3 in ('ara', 'fas', 'per'):
|
||||||
encodings.append('windows-1256')
|
encodings.extend(['windows-1256', 'utf-16'])
|
||||||
elif self.language.alpha3 == 'heb':
|
elif self.language.alpha3 == 'heb':
|
||||||
encodings.extend(['windows-1255', 'iso-8859-8'])
|
encodings.extend(['windows-1255', 'iso-8859-8'])
|
||||||
elif self.language.alpha3 == 'tur':
|
elif self.language.alpha3 == 'tur':
|
||||||
|
|
|
@ -30,6 +30,7 @@ import enum
|
||||||
import sys
|
import sys
|
||||||
import requests
|
import requests
|
||||||
import time
|
import time
|
||||||
|
import logging
|
||||||
|
|
||||||
is_PY2 = sys.version_info[0] < 3
|
is_PY2 = sys.version_info[0] < 3
|
||||||
if is_PY2:
|
if is_PY2:
|
||||||
|
@ -39,8 +40,13 @@ else:
|
||||||
from contextlib import suppress
|
from contextlib import suppress
|
||||||
from urllib2.request import Request, urlopen
|
from urllib2.request import Request, urlopen
|
||||||
|
|
||||||
|
from dogpile.cache.api import NO_VALUE
|
||||||
|
from subliminal.cache import region
|
||||||
from bs4 import BeautifulSoup, NavigableString
|
from bs4 import BeautifulSoup, NavigableString
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# constants
|
# constants
|
||||||
HEADERS = {
|
HEADERS = {
|
||||||
}
|
}
|
||||||
|
@ -50,6 +56,13 @@ DEFAULT_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWeb"\
|
||||||
"Kit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36"
|
"Kit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36"
|
||||||
|
|
||||||
|
|
||||||
|
ENDPOINT_RE = re.compile(ur'(?uis)<form action="/subtitles/(.+)">.*?<input type="text"')
|
||||||
|
|
||||||
|
|
||||||
|
class NewEndpoint(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
# utils
|
# utils
|
||||||
def soup_for(url, session=None, user_agent=DEFAULT_USER_AGENT):
|
def soup_for(url, session=None, user_agent=DEFAULT_USER_AGENT):
|
||||||
url = re.sub("\s", "+", url)
|
url = re.sub("\s", "+", url)
|
||||||
|
@ -58,7 +71,17 @@ def soup_for(url, session=None, user_agent=DEFAULT_USER_AGENT):
|
||||||
html = urlopen(r).read().decode("utf-8")
|
html = urlopen(r).read().decode("utf-8")
|
||||||
else:
|
else:
|
||||||
ret = session.get(url)
|
ret = session.get(url)
|
||||||
|
try:
|
||||||
ret.raise_for_status()
|
ret.raise_for_status()
|
||||||
|
except requests.HTTPError, e:
|
||||||
|
if e.response.status_code == 404:
|
||||||
|
m = ENDPOINT_RE.search(ret.text)
|
||||||
|
if m:
|
||||||
|
try:
|
||||||
|
raise NewEndpoint(m.group(1))
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
raise
|
||||||
html = ret.text
|
html = ret.text
|
||||||
return BeautifulSoup(html, "html.parser")
|
return BeautifulSoup(html, "html.parser")
|
||||||
|
|
||||||
|
@ -250,20 +273,31 @@ def get_first_film(soup, section, year=None, session=None):
|
||||||
def search(term, release=True, session=None, year=None, limit_to=SearchTypes.Exact, throttle=0):
|
def search(term, release=True, session=None, year=None, limit_to=SearchTypes.Exact, throttle=0):
|
||||||
# note to subscene: if you actually start to randomize the endpoint, we'll have to query your server even more
|
# note to subscene: if you actually start to randomize the endpoint, we'll have to query your server even more
|
||||||
endpoints = ["searching", "search", "srch", "find"]
|
endpoints = ["searching", "search", "srch", "find"]
|
||||||
|
|
||||||
if release:
|
if release:
|
||||||
endpoints = ["release"]
|
endpoints = ["release"]
|
||||||
|
else:
|
||||||
|
endpoint = region.get("subscene_endpoint")
|
||||||
|
if endpoint is not NO_VALUE and endpoint not in endpoints:
|
||||||
|
endpoints.insert(0, endpoint)
|
||||||
|
|
||||||
soup = None
|
soup = None
|
||||||
for endpoint in endpoints:
|
for endpoint in endpoints:
|
||||||
try:
|
try:
|
||||||
soup = soup_for("%s/subtitles/%s?q=%s" % (SITE_DOMAIN, endpoint, term),
|
soup = soup_for("%s/subtitles/%s?q=%s" % (SITE_DOMAIN, endpoint, term),
|
||||||
session=session)
|
session=session)
|
||||||
except requests.HTTPError, e:
|
|
||||||
if e.response.status_code == 404:
|
except NewEndpoint, e:
|
||||||
|
new_endpoint = e.message
|
||||||
|
if new_endpoint not in endpoints:
|
||||||
|
new_endpoint = new_endpoint.strip()
|
||||||
|
logger.debug("Switching main endpoint to %s", new_endpoint)
|
||||||
|
region.set("subscene_endpoint", new_endpoint)
|
||||||
time.sleep(throttle)
|
time.sleep(throttle)
|
||||||
# fixme: detect endpoint from html
|
return search(term, release=release, session=session, year=year, limit_to=limit_to, throttle=throttle)
|
||||||
continue
|
else:
|
||||||
return
|
region.delete("subscene_endpoint")
|
||||||
|
raise Exception("New endpoint %s didn't work; exiting" % new_endpoint)
|
||||||
break
|
break
|
||||||
|
|
||||||
if soup:
|
if soup:
|
||||||
|
|
Loading…
Reference in New Issue