From 26ef69d27bba567de94ba1322d26ae6156fb828b Mon Sep 17 00:00:00 2001 From: ngosang Date: Sat, 25 May 2019 23:16:24 +0200 Subject: [PATCH 1/3] Fix Subdivx provider with html.parser --- libs/subliminal_patch/providers/subdivx.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/libs/subliminal_patch/providers/subdivx.py b/libs/subliminal_patch/providers/subdivx.py index fb4e268f0..87f9ce1e9 100644 --- a/libs/subliminal_patch/providers/subdivx.py +++ b/libs/subliminal_patch/providers/subdivx.py @@ -128,9 +128,8 @@ class SubdivxSubtitlesProvider(Provider): return [] page_soup = ParserBeautifulSoup(r.content.decode('iso-8859-1', 'ignore'), ['lxml', 'html.parser']) - table_soup = page_soup.find("div", {'id': 'contenedor_izq'}) - title_soups = table_soup.find_all("div", {'id': 'menu_detalle_buscador'}) - body_soups = table_soup.find_all("div", {'id': 'buscador_detalle'}) + title_soups = page_soup.find_all("div", {'id': 'menu_detalle_buscador'}) + body_soups = page_soup.find_all("div", {'id': 'buscador_detalle'}) if len(title_soups) != len(body_soups): logger.debug('Error in provider data') return [] @@ -139,12 +138,12 @@ class SubdivxSubtitlesProvider(Provider): # title title = title_soup.find("a").text.replace("Subtitulo de ", "") - page_link = title_soup.find("a")["href"] + page_link = title_soup.find("a")["href"].replace('http://', 'https://') # body description = body_soup.find("div", {'id': 'buscador_detalle_sub'}).text - tmp_div_soup = body_soup.find("div", {'id': 'buscador_detalle_sub_datos'}) - download_link = tmp_div_soup.find("a", {'target': 'new'})["href"] + download_link = body_soup.find("div", {'id': 'buscador_detalle_sub_datos'} + ).find("a", {'target': 'new'})["href"].replace('http://', 'https://') subtitle = self.subtitle_class(language, page_link, download_link, description, title) From 8c92599cbf139ed11b9bf011fbf2554c2afca1a3 Mon Sep 17 00:00:00 2001 From: panni Date: Mon, 27 May 2019 12:33:39 +0200 Subject: [PATCH 2/3] core: update to subliminal_patch:head; #38 --- libs/subliminal_patch/subtitle.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libs/subliminal_patch/subtitle.py b/libs/subliminal_patch/subtitle.py index 6b2263e59..057be546a 100644 --- a/libs/subliminal_patch/subtitle.py +++ b/libs/subliminal_patch/subtitle.py @@ -117,14 +117,14 @@ class Subtitle(Subtitle_): logger.info('Guessing encoding for language %s', self.language) - encodings = ['utf-8', 'utf-16'] + encodings = ['utf-8'] # add language-specific encodings # http://scratchpad.wikia.com/wiki/Character_Encoding_Recommendation_for_Languages if self.language.alpha3 == 'zho': encodings.extend(['cp936', 'gb2312', 'gbk', 'gb18030', 'hz', 'iso2022_jp_2', 'cp950', 'gb18030', 'big5', - 'big5hkscs']) + 'big5hkscs', 'utf-16']) elif self.language.alpha3 == 'jpn': encodings.extend(['shift-jis', 'cp932', 'euc_jp', 'iso2022_jp', 'iso2022_jp_1', 'iso2022_jp_2', 'iso2022_jp_2004', 'iso2022_jp_3', 'iso2022_jp_ext', ]) @@ -133,7 +133,7 @@ class Subtitle(Subtitle_): # arabian/farsi elif self.language.alpha3 in ('ara', 'fas', 'per'): - encodings.append('windows-1256') + encodings.extend(['windows-1256', 'utf-16']) elif self.language.alpha3 == 'heb': encodings.extend(['windows-1255', 'iso-8859-8']) elif self.language.alpha3 == 'tur': From 3217a475f2c15e87b99e950d824a2d1c9eb328d0 Mon Sep 17 00:00:00 2001 From: panni Date: Thu, 30 May 2019 04:46:43 +0200 Subject: [PATCH 3/3] core: update to subliminal_patch:head; react on inexistent subscene endpoint if possible --- libs/subscene_api/subscene.py | 46 ++++++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 6 deletions(-) diff --git a/libs/subscene_api/subscene.py b/libs/subscene_api/subscene.py index c8450518a..823e379fe 100644 --- a/libs/subscene_api/subscene.py +++ b/libs/subscene_api/subscene.py @@ -30,6 +30,7 @@ import enum import sys import requests import time +import logging is_PY2 = sys.version_info[0] < 3 if is_PY2: @@ -39,8 +40,13 @@ else: from contextlib import suppress from urllib2.request import Request, urlopen +from dogpile.cache.api import NO_VALUE +from subliminal.cache import region from bs4 import BeautifulSoup, NavigableString + +logger = logging.getLogger(__name__) + # constants HEADERS = { } @@ -50,6 +56,13 @@ DEFAULT_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWeb"\ "Kit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36" +ENDPOINT_RE = re.compile(ur'(?uis)
.*?