diff --git a/.gitignore b/.gitignore index e7310d676..e17044bdb 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ cachefile.dbm bazarr.pid /venv /data +/.vscode # Allow !*.dll \ No newline at end of file diff --git a/bazarr/get_providers.py b/bazarr/get_providers.py index d345b79ff..577518c6e 100644 --- a/bazarr/get_providers.py +++ b/bazarr/get_providers.py @@ -35,6 +35,11 @@ PROVIDER_THROTTLE_MAP = { }, "titulky": { DownloadLimitExceeded: (datetime.timedelta(hours=3), "3 hours") + }, + "legendasdivx": { + TooManyRequests: (datetime.timedelta(hours=2), "2 hours"), + DownloadLimitExceeded: (datetime.timedelta(hours=6), "6 hours"), + ParseResponseError: (datetime.timedelta(hours=1), "1 hours"), } } diff --git a/libs/subliminal_patch/providers/legendasdivx.py b/libs/subliminal_patch/providers/legendasdivx.py index 6247792af..e9f505bdf 100644 --- a/libs/subliminal_patch/providers/legendasdivx.py +++ b/libs/subliminal_patch/providers/legendasdivx.py @@ -2,20 +2,22 @@ from __future__ import absolute_import import logging import io +import re import os import rarfile import zipfile from requests import Session from guessit import guessit -from subliminal_patch.exceptions import ParseResponseError +from subliminal.exceptions import ConfigurationError, AuthenticationError, ServiceUnavailable, DownloadLimitExceeded from subliminal_patch.providers import Provider from subliminal.providers import ParserBeautifulSoup from subliminal_patch.subtitle import Subtitle from subliminal.video import Episode, Movie -from subliminal.subtitle import SUBTITLE_EXTENSIONS, fix_line_ending,guess_matches +from subliminal.subtitle import SUBTITLE_EXTENSIONS, fix_line_ending, guess_matches from subzero.language import Language from subliminal_patch.score import get_scores +from subliminal.utils import sanitize, sanitize_release_group logger = logging.getLogger(__name__) @@ -27,11 +29,12 @@ class LegendasdivxSubtitle(Subtitle): super(LegendasdivxSubtitle, self).__init__(language) self.language = language self.page_link = data['link'] - self.hits=data['hits'] - self.exact_match=data['exact_match'] - self.description=data['description'].lower() + self.hits = data['hits'] + self.exact_match = data['exact_match'] + self.description = data['description'] self.video = video - self.videoname =data['videoname'] + self.video_filename = data['video_filename'] + self.uploader = data['uploader'] @property def id(self): @@ -44,40 +47,37 @@ class LegendasdivxSubtitle(Subtitle): def get_matches(self, video): matches = set() - if self.videoname.lower() in self.description: + description = sanitize(self.description) + + if sanitize(self.video_filename) in description: matches.update(['title']) matches.update(['season']) matches.update(['episode']) # episode - if video.title and video.title.lower() in self.description: + if video.title and sanitize(video.title) in description: matches.update(['title']) - if video.year and '{:04d}'.format(video.year) in self.description: + if video.year and '{:04d}'.format(video.year) in description: matches.update(['year']) if isinstance(video, Episode): # already matched in search query - if video.season and 's{:02d}'.format(video.season) in self.description: + if video.season and 's{:02d}'.format(video.season) in description: matches.update(['season']) - if video.episode and 'e{:02d}'.format(video.episode) in self.description: + if video.episode and 'e{:02d}'.format(video.episode) in description: matches.update(['episode']) if video.episode and video.season and video.series: - if '{}.s{:02d}e{:02d}'.format(video.series.lower(),video.season,video.episode) in self.description: - matches.update(['series']) - matches.update(['season']) - matches.update(['episode']) - if '{} s{:02d}e{:02d}'.format(video.series.lower(),video.season,video.episode) in self.description: + if '{} s{:02d}e{:02d}'.format(sanitize(video.series), video.season, video.episode) in description: matches.update(['series']) matches.update(['season']) matches.update(['episode']) # release_group - if video.release_group and video.release_group.lower() in self.description: + if video.release_group and sanitize_release_group(video.release_group) in sanitize_release_group(description): matches.update(['release_group']) # resolution - - if video.resolution and video.resolution.lower() in self.description: + if video.resolution and video.resolution.lower() in description: matches.update(['resolution']) # format @@ -87,9 +87,9 @@ class LegendasdivxSubtitle(Subtitle): if formats[0] == "web-dl": formats.append("webdl") formats.append("webrip") - formats.append("web ") + formats.append("web") for frmt in formats: - if frmt.lower() in self.description: + if frmt in description: matches.update(['format']) break @@ -97,11 +97,11 @@ class LegendasdivxSubtitle(Subtitle): if video.video_codec: video_codecs = [video.video_codec.lower()] if video_codecs[0] == "h264": - formats.append("x264") + video_codecs.append("x264") elif video_codecs[0] == "h265": - formats.append("x265") - for vc in formats: - if vc.lower() in self.description: + video_codecs.append("x265") + for vc in video_codecs: + if vc in description: matches.update(['video_codec']) break @@ -109,9 +109,6 @@ class LegendasdivxSubtitle(Subtitle): # matches |= guess_matches(video, guessit(self.description)) return matches - - - class LegendasdivxProvider(Provider): """Legendasdivx Provider.""" languages = {Language('por', 'BR')} | {Language('por')} @@ -126,15 +123,21 @@ class LegendasdivxProvider(Provider): 'Cache-Control': 'no-cache' } loginpage = site + '/forum/ucp.php?mode=login' + logoutpage = site + '/sair.php' searchurl = site + '/modules.php?name=Downloads&file=jz&d_op=search&op=_jz00&query={query}' - language_list = list(languages) + download_link = site + '/modules.php{link}' def __init__(self, username, password): + # make sure login credentials are configured. + if any((username, password)) and not all((username, password)): + raise ConfigurationError('Username and password must be specified') self.username = username self.password = password + self.logged_in = False def initialize(self): self.session = Session() + self.session.headers.update(self.headers) self.login() def terminate(self): @@ -143,100 +146,103 @@ class LegendasdivxProvider(Provider): def login(self): logger.info('Logging in') - self.headers['Referer'] = self.site + '/index.php' - self.session.headers.update(self.headers.items()) + res = self.session.get(self.loginpage) bsoup = ParserBeautifulSoup(res.content, ['lxml']) - + _allinputs = bsoup.findAll('input') - fields = {} + data = {} + # necessary to set 'sid' for POST request for field in _allinputs: - fields[field.get('name')] = field.get('value') + data[field.get('name')] = field.get('value') + + data['username'] = self.username + data['password'] = self.password - fields['username'] = self.username - fields['password'] = self.password - fields['autologin'] = 'on' - fields['viewonline'] = 'on' - - self.headers['Referer'] = self.loginpage - self.session.headers.update(self.headers.items()) - res = self.session.post(self.loginpage, fields) + res = self.session.post(self.loginpage, data) + res.raise_for_status() + try: - logger.debug('Got session id %s' % + logger.debug('Logged in successfully: PHPSESSID: %s' % self.session.cookies.get_dict()['PHPSESSID']) - except KeyError as e: - logger.error(repr(e)) - logger.error("Didn't get session id, check your credentials") - return False + self.logged_in = True + except KeyError: + logger.error("Couldn't retrieve session ID, check your credentials") + raise AuthenticationError("Please check your credentials.") except Exception as e: - logger.error(repr(e)) - logger.error('uncached error #legendasdivx #AA') - return False - - return True + if 'bloqueado' in res.text.lower(): # blocked IP address + logger.error("LegendasDivx.pt :: Your IP is blocked on this server.") + raise ParseResponseError("Legendasdivx.pt :: %r" % res.text) + logger.error("LegendasDivx.pt :: Uncaught error: %r" % repr(e)) + raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r" % repr(e)) def logout(self): - # need to figure this out - return True + if self.logged_in: + logger.info('Legendasdivx:: Logging out') + r = self.session.get(self.logoutpage, timeout=10) + r.raise_for_status() + logger.debug('Legendasdivx :: Logged out') + self.logged_in = False + + def _process_page(self, video, bsoup, video_filename): - def _process_page(self, video, bsoup, querytext, videoname): subtitles = [] + _allsubs = bsoup.findAll("div", {"class": "sub_box"}) - lang = Language.fromopensubtitles("pob") + for _subbox in _allsubs: - hits=0 + hits = 0 for th in _subbox.findAll("th", {"class": "color2"}): if th.string == 'Hits:': hits = int(th.parent.find("td").string) if th.string == 'Idioma:': - lang = th.parent.find("td").find ("img").get ('src') - if 'brazil' in lang: + lang = th.parent.find("td").find("img").get('src') + if 'brazil' in lang.lower(): lang = Language.fromopensubtitles('pob') - else: + elif 'portugal' in lang.lower(): lang = Language.fromopensubtitles('por') - - description = _subbox.find("td", {"class": "td_desc brd_up"}) + else: + continue + # get description for matches + description = _subbox.find("td", {"class": "td_desc brd_up"}).get_text() + #get subtitle link download = _subbox.find("a", {"class": "sub_download"}) - try: - # sometimes BSoup just doesn't get the link - logger.debug(download.get('href')) - except Exception as e: - logger.warning('skipping subbox on %s' % self.searchurl.format(query=querytext)) - continue + + # sometimes BSoup can't find 'a' tag and returns None. + i = 0 + while not (download): # must get it... trying again... + download = _subbox.find("a", {"class": "sub_download"}) + i=+1 + logger.debug("Try number {0} try!".format(str(i))) + dl = download.get('href') + logger.debug("Found subtitle on: %s" % self.download_link.format(link=dl)) + + # get subtitle uploader + sub_header = _subbox.find("div", {"class" :"sub_header"}) + uploader = sub_header.find("a").text if sub_header else 'anonymous' exact_match = False - if video.name.lower() in description.get_text().lower(): + if video.name.lower() in description.lower(): exact_match = True data = {'link': self.site + '/modules.php' + download.get('href'), 'exact_match': exact_match, 'hits': hits, - 'videoname': videoname, - 'description': description.get_text() } + 'uploader': uploader, + 'video_filename': video_filename, + 'description': description + } subtitles.append( LegendasdivxSubtitle(lang, video, data) ) return subtitles - def query(self, video, language): - try: - logger.debug('Got session id %s' % - self.session.cookies.get_dict()['PHPSESSID']) - except Exception as e: - self.login() + def query(self, video, languages): - language_ids = '0' - if isinstance(language, (tuple, list, set)): - if len(language) == 1: - language_ids = ','.join(sorted(l.opensubtitles for l in language)) - if language_ids == 'por': - language_ids = '&form_cat=28' - else: - language_ids = '&form_cat=29' + video_filename = video.name + video_filename = os.path.basename(video_filename) + video_filename, _ = os.path.splitext(video_filename) + video_filename = sanitize_release_group(video_filename) - videoname = video.name - videoname = os.path.basename(videoname) - videoname, _ = os.path.splitext(videoname) - # querytext = videoname.lower() _searchurl = self.searchurl if video.imdb_id is None: if isinstance(video, Episode): @@ -246,22 +252,47 @@ class LegendasdivxProvider(Provider): else: querytext = video.imdb_id + # language query filter + if isinstance(languages, (tuple, list, set)): + language_ids = ','.join(sorted(l.opensubtitles for l in languages)) + if 'por' in language_ids: # prioritize portuguese subtitles + lang_filter = '&form_cat=28' # pt + elif 'pob' in language_ids: + lang_filter = '&form_cat=29' # br + else: + lang_filter = '' + + querytext = querytext + lang_filter if lang_filter else querytext - # querytext = querytext.replace( - # ".", "+").replace("[", "").replace("]", "") - if language_ids != '0': - querytext = querytext + language_ids self.headers['Referer'] = self.site + '/index.php' self.session.headers.update(self.headers.items()) res = self.session.get(_searchurl.format(query=querytext)) - # form_cat=28 = br - # form_cat=29 = pt + if "A legenda não foi encontrada" in res.text: logger.warning('%s not found', querytext) return [] bsoup = ParserBeautifulSoup(res.content, ['html.parser']) - subtitles = self._process_page(video, bsoup, querytext, videoname) + subtitles = self._process_page(video, bsoup, video_filename) + + # search for more than 10 results (legendasdivx uses pagination) + # don't throttle - maximum results = 6 * 10 + MAX_PAGES = 6 + + #get number of pages bases on results found + page_header = bsoup.find("div", {"class": "pager_bar"}) + results_found = re.search(r'\((.*?) encontradas\)', page_header.text).group(1) + num_pages = (int(results_found) // 10) + 1 + num_pages = min(MAX_PAGES, num_pages) + + if num_pages > 1: + for num_page in range(2, num_pages+2): + _search_next = self.searchurl.format(query=querytext) + "&page={0}".format(str(num_page)) + logger.debug("Moving to next page: %s" % _search_next) + res = self.session.get(_search_next) + next_page = ParserBeautifulSoup(res.content, ['html.parser']) + subs = self._process_page(video, next_page, video_filename) + subtitles.extend(subs) return subtitles @@ -270,9 +301,14 @@ class LegendasdivxProvider(Provider): def download_subtitle(self, subtitle): res = self.session.get(subtitle.page_link) + res.raise_for_status() if res: - if res.text == '500': - raise ValueError('Error 500 on server') + if res.status_code in ['500', '503']: + raise ServiceUnavailable("Legendasdivx.pt :: 503 - Service Unavailable") + elif 'limite' in res.text.lower(): # daily downloads limit reached + raise DownloadLimitReached("Legendasdivx.pt :: Download limit reached") + elif 'bloqueado' in res.text.lower(): # blocked IP address + raise ParseResponseError("Legendasdivx.pt :: %r" % res.text) archive = self._get_archive(res.content) # extract the subtitle @@ -281,7 +317,9 @@ class LegendasdivxProvider(Provider): subtitle.normalize() return subtitle - raise ValueError('Problems conecting to the server') + + logger.error("Legendasdivx.pt :: there was a problem retrieving subtitle (status %s)" % res.status_code) + return def _get_archive(self, content): # open the archive @@ -294,7 +332,6 @@ class LegendasdivxProvider(Provider): logger.debug('Identified zip archive') archive = zipfile.ZipFile(archive_stream) else: - # raise ParseResponseError('Unsupported compressed format') raise Exception('Unsupported compressed format') return archive @@ -305,7 +342,7 @@ class LegendasdivxProvider(Provider): _tmp.remove('.txt') _subtitle_extensions = tuple(_tmp) _max_score = 0 - _scores = get_scores (subtitle.video) + _scores = get_scores(subtitle.video) for name in archive.namelist(): # discard hidden files @@ -338,4 +375,4 @@ class LegendasdivxProvider(Provider): logger.debug("returning from archive: {} scored {}".format(_max_name, _max_score)) return archive.read(_max_name) - raise ParseResponseError('Can not find the subtitle in the compressed file') + raise ValueError("No subtitle found on compressed file. Max score was 0") \ No newline at end of file