From 0fe9a5a800f5d5b9deb44109ec4b47aa4e68c3ab Mon Sep 17 00:00:00 2001 From: evilhero Date: Wed, 16 Jan 2019 14:32:37 -0500 Subject: [PATCH] IMP: Added DDL option to available download provider options. --- data/interfaces/default/config.html | 2 - mylar/getcomics.py | 274 ++++++++++++++++++++++++++++ mylar/search.py | 27 ++- mylar/webserve.py | 2 +- 4 files changed, 294 insertions(+), 11 deletions(-) create mode 100644 mylar/getcomics.py diff --git a/data/interfaces/default/config.html b/data/interfaces/default/config.html index 1f649f42..6db9f27f 100755 --- a/data/interfaces/default/config.html +++ b/data/interfaces/default/config.html @@ -763,13 +763,11 @@ Note: this is an experimental search - results may be better/worse. -
Torrents diff --git a/mylar/getcomics.py b/mylar/getcomics.py new file mode 100644 index 00000000..ecf20b41 --- /dev/null +++ b/mylar/getcomics.py @@ -0,0 +1,274 @@ +# -*- coding: utf-8 -*- +# This file is part of Mylar. +# +# Mylar is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Mylar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Mylar. If not, see . + +from StringIO import StringIO +import urllib +from threading import Thread +from Queue import Queue +import os +import sys +import re +import gzip +import time +import datetime +import json +from bs4 import BeautifulSoup +import requests +import cfscrape +import mylar +from mylar import logger + +class GC(object): + + def __init__(self, query): + + self.queue = Queue() + + self.valreturn = [] + + self.url = 'https://getcomics.info' + + self.query = query + + self.local_filename = os.path.join(mylar.CONFIG.CACHE_DIR, "getcomics.html") + + self.headers = {'Accept-encoding': 'gzip', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1', 'Referer': 'https://getcomics.info/'} + + def search(self): + + with cfscrape.create_scraper() as s: + cf_cookievalue, cf_user_agent = s.get_tokens(self.url, headers=self.headers) + + t = s.get(self.url+'/', params={'s': self.query}, verify=True, cookies=cf_cookievalue, headers=self.headers, stream=True) + + with open(self.local_filename, 'wb') as f: + for chunk in t.iter_content(chunk_size=1024): + if chunk: # filter out keep-alive new chunks + f.write(chunk) + f.flush() + + return self.search_results() + + def loadsite(self, title, link): + with cfscrape.create_scraper() as s: + self.cf_cookievalue, cf_user_agent = s.get_tokens(link, headers=self.headers) + + t = s.get(link, verify=True, cookies=self.cf_cookievalue, headers=self.headers, stream=True) + + with open(title+'.html', 'wb') as f: + for chunk in t.iter_content(chunk_size=1024): + if chunk: # filter out keep-alive new chunks + f.write(chunk) + f.flush() + + def search_results(self): + results = {} + resultlist = [] + soup = BeautifulSoup(open(self.local_filename), 'html.parser') + + resultline = soup.find("span", {"class": "cover-article-count"}).get_text(strip=True) + logger.info('There are %s results' % re.sub('Articles', '', resultline).strip()) + + for f in soup.findAll("article"): + id = f['id'] + lk = f.find('a') + link = lk['href'] + titlefind = f.find("h1", {"class": "post-title"}) + title = titlefind.get_text(strip=True) + option_find = f.find("p", {"style": "text-align: center;"}) + i = 0 + while i <= 2: + option_find = option_find.findNext(text=True) + if 'Year' in option_find: + year = option_find.findNext(text=True) + year = re.sub('|', '', year).strip() + else: + size = option_find.findNext(text=True) + if 'MB' in size: + size = re.sub('MB', 'M', size).strip() + elif 'GB' in size: + size = re.sub('GB', 'G', size).strip() + i+=1 + dateline = f.find('time') + datefull = dateline['datetime'] + datestamp = time.mktime(time.strptime(datefull, "%Y-%m-%d")) + resultlist.append({"title": title, + "pubdate": datetime.datetime.fromtimestamp(float(datestamp)).strftime('%a, %d %b %Y %H:%M:%S'), + "size": re.sub(' ', '', size).strip(), + "link": link, + "year": year, + "id": re.sub('post-', '', id).strip(), + "site": 'DDL'}) + + logger.fdebug('%s [%s]' % (title, size)) + + results['entries'] = resultlist + + return results + #self.loadsite(title, link) + #self.parse_downloadresults(title) + + def parse_downloadresults(self, title): + + soup = BeautifulSoup(open(title+'.html'), 'html.parser') + orig_find = soup.find("p", {"style": "text-align: center;"}) + i = 0 + option_find = orig_find + while True: #i <= 10: + prev_option = option_find + option_find = option_find.findNext(text=True) + if i == 0: + series = option_find + elif 'Year' in option_find: + year = option_find.findNext(text=True) + else: + if 'Size' in prev_option: + size = option_find #.findNext(text=True) + possible_more = orig_find.next_sibling + break + i+=1 + + logger.fdebug('%s [%s] / %s' % (series, year, size)) + + link = None + for f in soup.findAll("div", {"class": "aio-pulse"}): + lk = f.find('a') + if lk['title'] == 'Download Now': + link = lk['href'] + site = lk['title'] + break #get the first link just to test + + if link is None: + logger.warn('Unable to retrieve any valid immediate download links. They might not exist.') + return + + links = [] + + if possible_more.name == 'ul': + bb = possible_more.findAll('li') + for x in bb: + volume = x.findNext(text=True) + if u'\u2013' in volume: + volume = re.sub(u'\u2013', '-', volume) + linkline = x.find('a') + link = linkline['href'] + site = linkline.findNext(text=True) + links.append({"volume": volume, + "site": site, + "link": link}) + else: + check_extras = soup.findAll("h3") + for sb in check_extras: + header = sb.findNext(text=True) + if header == 'TPBs': + nxt = sb.next_sibling + if nxt.name == 'ul': + bb = nxt.findAll('li') + for x in bb: + volume = x.findNext(text=True) + if u'\u2013' in volume: + volume = re.sub(u'\u2013', '-', volume) + linkline = x.find('a') + link = linkline['href'] + site = linkline.findNext(text=True) + links.append({"volume": volume, + "site": site, + "link": link}) + + if link is None: + logger.warn('Unable to retrieve any valid immediate download links. They might not exist.') + return + + for x in links: + logger.fdebug('[%s] %s - %s' % (x['site'], x['volume'], x['link'])) + + thread_ = Thread(target=self.downloadit, args=[link]) + thread_.start() + thread_.join() + chk = self.queue.get() + while True: + if chk[0]['mode'] == 'stop': + return {"filename": chk[0]['filename'], + "status": 'fail'} + elif chk[0]['mode'] == 'success': + try: + if os.path.isfile(os.path.join(mylar.CONFIG.DDL_LOCATION, chk[0]['filename'])): + logger.fdebug('Finished downloading %s [%s]' % (path, size)) + except: + pass + return {"filename": chk[0]['filename'], + "status": 'success'} + + def downloadit(self, link): + filename = None + try: + t = requests.get(link, verify=True, cookies=self.cf_cookievalue, headers=self.headers, stream=True) + + filename = os.path.basename(urllib.unquote(t.url).decode('utf-8')) + + path = os.path.join(mylar.CONFIG.DDL_LOCATION, filename) + + if t.headers.get('content-encoding') == 'gzip': #.get('Content-Encoding') == 'gzip': + buf = StringIO(t.content) + f = gzip.GzipFile(fileobj=buf) + + + with open(path, 'wb') as f: + for chunk in t.iter_content(chunk_size=1024): + if chunk: # filter out keep-alive new chunks + f.write(chunk) + f.flush() + except: + self.valreturn.append({"mode": "stop", + "filename": filename}) + return self.queue.put(self.valreturn) + + else: + self.valreturn.append({"mode": "success", + "filename": filename}) + return self.queue.put(self.valreturn) + + def issue_list(self, pack): + #packlist = [x.strip() for x in pack.split(',)] + packlist = pack.replace('+', ' ').replace(',', ' ').split() + print packlist + plist = [] + pack_issues = [] + for pl in packlist: + if '-' in pl: + plist.append(range(int(pl[:pl.find('-')]),int(pl[pl.find('-')+1:])+1)) + else: + if 'TPBs' not in pl: + plist.append(int(pl)) + else: + plist.append('TPBs') + + for pi in plist: + if type(pi) == list: + for x in pi: + pack_issues.append(x) + else: + pack_issues.append(pi) + + pack_issues.sort() + print "pack_issues: %s" % pack_issues + +#if __name__ == '__main__': +# ab = GC(sys.argv[1]) #'justice league aquaman') #sys.argv[0]) +# #c = ab.search() +# b = ab.loadsite('test', sys.argv[2]) +# c = ab.parse_downloadresults('test', '60MB') +# #c = ab.issue_list(sys.argv[2]) diff --git a/mylar/search.py b/mylar/search.py index d98c8fc4..5aeef710 100755 --- a/mylar/search.py +++ b/mylar/search.py @@ -16,7 +16,7 @@ from __future__ import division import mylar -from mylar import logger, db, updater, helpers, parseit, findcomicfeed, notifiers, rsscheck, Failed, filechecker, auth32p, sabnzbd, nzbget, wwt #, getcomics +from mylar import logger, db, updater, helpers, parseit, findcomicfeed, notifiers, rsscheck, Failed, filechecker, auth32p, sabnzbd, nzbget, wwt, getcomics import feedparser import requests @@ -181,10 +181,14 @@ def search_init(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueD #fix for issue dates between Nov-Dec/(Jan-Feb-Mar) IssDt = str(IssueDate)[5:7] - if IssDt == "12" or IssDt == "11" or IssDt == "01" or IssDt == "02" or IssDt == "03": + if any([IssDt == "12", IssDt == "11", IssDt == "01", IssDt == "02", IssDt == "03"]): IssDateFix = IssDt else: IssDateFix = "no" + if StoreDate is not None: + StDt = str(StoreDate)[5:7] + if any([StDt == "10", StDt == "12", StDt == "11", StDt == "01", StDt == "02", StDt == "03"]): + IssDateFix = StDt searchcnt = 0 srchloop = 1 @@ -615,9 +619,9 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDa if nzbprov == 'ddl': cmname = re.sub("%20", " ", str(comsrc)) logger.fdebug('Sending request to DDL site for : %s %s' % (findcomic, isssearch)) - #b = getcomics.GC(query=findcomic + ' ' + isssearch) - #bb = b.search() - logger.info('bb returned from DDL: %s' % bb) + b = getcomics.GC(query=findcomic + ' ' + isssearch) + bb = b.search() + #logger.info('bb returned from DDL: %s' % bb) elif RSS == "yes": if nzbprov == '32P' or nzbprov == 'Public Torrents': cmname = re.sub("%20", " ", str(comsrc)) @@ -2293,9 +2297,16 @@ def searcher(nzbprov, nzbname, comicinfo, link, IssueID, ComicID, tmpprov, direc sendsite = ggc.loadsite(os.path.join(mylar.CONFIG.CACHE_DIR, 'getcomics-' + nzbid), link) ddl_it = ggc.parse_downloadresults(os.path.join(mylar.CONFIG.CACHE_DIR, 'getcomics-' + nzbid)) logger.info("ddl status response: %s" % ddl_it) - if ddl_it[0]['status'] == 'success': - nzbname = ddl_it[0]['filename'] - logger.info('Successfully retrieved %s from DDL site' % (nzbname)) + if ddl_it['status'] == 'success': + nzbname = ddl_it['filename'] + logger.info('Successfully retrieved %s from DDL site. Now submitting for post-processing...' % (nzbname)) + mylar.PP_QUEUE.put({'nzb_name': nzbname, + 'nzb_folder': mylar.CONFIG.DDL_LOCATION, + 'issueid': IssueID, + 'failed': False, + 'comicid': ComicID, + 'apicall': True}) + sent_to = "is downloading it directly via DDL" elif mylar.USE_BLACKHOLE and all([nzbprov != '32P', nzbprov != 'WWT', nzbprov != 'DEM', nzbprov != 'torznab']): diff --git a/mylar/webserve.py b/mylar/webserve.py index 3ed987cd..0952cf4e 100644 --- a/mylar/webserve.py +++ b/mylar/webserve.py @@ -5064,7 +5064,7 @@ class WebInterface(object): 'lowercase_filenames', 'autowant_upcoming', 'autowant_all', 'comic_cover_local', 'alternate_latest_series_covers', 'cvinfo', 'snatchedtorrent_notify', 'prowl_enabled', 'prowl_onsnatch', 'nma_enabled', 'nma_onsnatch', 'pushover_enabled', 'pushover_onsnatch', 'boxcar_enabled', 'boxcar_onsnatch', 'pushbullet_enabled', 'pushbullet_onsnatch', 'telegram_enabled', 'telegram_onsnatch', 'slack_enabled', 'slack_onsnatch', - 'opds_enable', 'opds_authentication', 'opds_metainfo'] #, 'enable_ddl'] + 'opds_enable', 'opds_authentication', 'opds_metainfo', 'enable_ddl'] for checked_config in checked_configs: if checked_config not in kwargs: