From 319d7f7f547a91d068299b04379d0c08a861f7e5 Mon Sep 17 00:00:00 2001 From: evilhero Date: Thu, 17 Jan 2019 13:22:36 -0500 Subject: [PATCH] FIX: more general fixes for DDL option, IMP: Added DDL Queue so items hitting the DDL provider option will be queued in sequence for downloading & immediate post-processing thereafter --- mylar/__init__.py | 40 +++++++++++++++++++- mylar/getcomics.py | 94 ++++++++++++++++++++++++---------------------- mylar/helpers.py | 34 ++++++++++++++++- mylar/search.py | 16 ++------ 4 files changed, 124 insertions(+), 60 deletions(-) diff --git a/mylar/__init__.py b/mylar/__init__.py index cba0acc0..98c45076 100644 --- a/mylar/__init__.py +++ b/mylar/__init__.py @@ -123,10 +123,13 @@ USE_WATCHDIR = False SNPOOL = None NZBPOOL = None SEARCHPOOL = None +PPPOOL = None +DDLPOOL = None SNATCHED_QUEUE = Queue.Queue() NZB_QUEUE = Queue.Queue() PP_QUEUE = Queue.Queue() SEARCH_QUEUE = Queue.Queue() +DDL_QUEUE = Queue.Queue() SEARCH_TIER_DATE = None COMICSORT = None PULLBYFILE = False @@ -142,6 +145,7 @@ LOCAL_IP = None DOWNLOAD_APIKEY = None APILOCK = False SEARCHLOCK = False +DDL_LOCK = False CMTAGGER_PATH = None STATIC_COMICRN_VERSION = "1.01" STATIC_APC_VERSION = "2.04" @@ -162,11 +166,11 @@ def initialize(config_file): with INIT_LOCK: global CONFIG, _INITIALIZED, QUIET, CONFIG_FILE, OS_DETECT, MAINTENANCE, CURRENT_VERSION, LATEST_VERSION, COMMITS_BEHIND, INSTALL_TYPE, IMPORTLOCK, PULLBYFILE, INKDROPS_32P, \ - DONATEBUTTON, CURRENT_WEEKNUMBER, CURRENT_YEAR, UMASK, USER_AGENT, SNATCHED_QUEUE, NZB_QUEUE, PP_QUEUE, SEARCH_QUEUE, PULLNEW, COMICSORT, WANTED_TAB_OFF, CV_HEADERS, \ + DONATEBUTTON, CURRENT_WEEKNUMBER, CURRENT_YEAR, UMASK, USER_AGENT, SNATCHED_QUEUE, NZB_QUEUE, PP_QUEUE, SEARCH_QUEUE, DDL_QUEUE, PULLNEW, COMICSORT, WANTED_TAB_OFF, CV_HEADERS, \ IMPORTBUTTON, IMPORT_FILES, IMPORT_TOTALFILES, IMPORT_CID_COUNT, IMPORT_PARSED_COUNT, IMPORT_FAILURE_COUNT, CHECKENABLED, CVURL, DEMURL, WWTURL, WWT_CF_COOKIEVALUE, \ USE_SABNZBD, USE_NZBGET, USE_BLACKHOLE, USE_RTORRENT, USE_UTORRENT, USE_QBITTORRENT, USE_DELUGE, USE_TRANSMISSION, USE_WATCHDIR, SAB_PARAMS, \ PROG_DIR, DATA_DIR, CMTAGGER_PATH, DOWNLOAD_APIKEY, LOCAL_IP, STATIC_COMICRN_VERSION, STATIC_APC_VERSION, KEYS_32P, AUTHKEY_32P, FEED_32P, FEEDINFO_32P, \ - MONITOR_STATUS, SEARCH_STATUS, RSS_STATUS, WEEKLY_STATUS, VERSION_STATUS, UPDATER_STATUS, DBUPDATE_INTERVAL, LOG_LANG, LOG_CHARSET, APILOCK, SEARCHLOCK, LOG_LEVEL, \ + MONITOR_STATUS, SEARCH_STATUS, RSS_STATUS, WEEKLY_STATUS, VERSION_STATUS, UPDATER_STATUS, DBUPDATE_INTERVAL, LOG_LANG, LOG_CHARSET, APILOCK, SEARCHLOCK, DDL_LOCK, LOG_LEVEL, \ SCHED_RSS_LAST, SCHED_WEEKLY_LAST, SCHED_MONITOR_LAST, SCHED_SEARCH_LAST, SCHED_VERSION_LAST, SCHED_DBUPDATE_LAST, COMICINFO, SEARCH_TIER_DATE cc = mylar.config.Config(config_file) @@ -367,6 +371,9 @@ def start(): search_diff = datetime.datetime.utcfromtimestamp(helpers.utctimestamp() + ((int(CONFIG.SEARCH_INTERVAL) * 60) - (duration_diff*60))) logger.fdebug('[AUTO-SEARCH] Scheduling next run @ %s every %s minutes' % (search_diff, CONFIG.SEARCH_INTERVAL)) SCHED.add_job(func=ss.run, id='search', name='Auto-Search', next_run_time=search_diff, trigger=IntervalTrigger(hours=0, minutes=CONFIG.SEARCH_INTERVAL, timezone='UTC')) + else: + ss = searchit.CurrentSearcher() + SCHED.add_job(func=ss.run, id='search', name='Auto-Search', next_run_time=None, trigger=IntervalTrigger(hours=0, minutes=CONFIG.SEARCH_INTERVAL, timezone='UTC')) if all([CONFIG.ENABLE_TORRENTS, CONFIG.AUTO_SNATCH, OS_DETECT != 'Windows']) and any([CONFIG.TORRENT_DOWNLOADER == 2, CONFIG.TORRENT_DOWNLOADER == 4]): logger.info('[AUTO-SNATCHER] Auto-Snatch of completed torrents enabled & attempting to background load....') @@ -396,6 +403,12 @@ def start(): PPPOOL.start() logger.info('[POST-PROCESS-QUEUE] Succesfully started Post-Processing Queuer....') + if CONFIG.ENABLE_DDL is True: + logger.info('[DDL-QUEUE] DDL Download queue enabled & monitoring for requests....') + DDLPOOL = threading.Thread(target=helpers.ddl_downloader, args=(DDL_QUEUE,), name="DDL-QUEUE") + DDLPOOL.start() + logger.info('[DDL-QUEUE] Succesfully started DDL Download Queuer....') + helpers.latestdate_fix() if CONFIG.ALT_PULL == 2: @@ -1223,6 +1236,29 @@ def halt(): SEARCHPOOL.join(5) except AssertionError: os._exit(0) + + if PPPOOL is not None: + logger.info('Terminating the post-processing queue thread.') + try: + PPPOOL.join(10) + logger.info('Joined pool for termination - successful') + except KeyboardInterrupt: + PP_QUEUE.put('exit') + PPPOOL.join(5) + except AssertionError: + os._exit(0) + + if DDLPOOL is not None: + logger.info('Terminating the DDL download queue thread.') + try: + DDLPOOL.join(10) + logger.info('Joined pool for termination - successful') + except KeyboardInterrupt: + DDL_QUEUE.put('exit') + DDLPOOL.join(5) + except AssertionError: + os._exit(0) + _INITIALIZED = False def shutdown(restart=False, update=False, maintenance=False): diff --git a/mylar/getcomics.py b/mylar/getcomics.py index 59615723..052f2fb7 100644 --- a/mylar/getcomics.py +++ b/mylar/getcomics.py @@ -17,7 +17,6 @@ from StringIO import StringIO import urllib from threading import Thread -from Queue import Queue import os import sys import re @@ -28,14 +27,12 @@ import json from bs4 import BeautifulSoup import requests import cfscrape +import logger import mylar -from mylar import logger class GC(object): - def __init__(self, query): - - self.queue = Queue() + def __init__(self, query=None, issueid=None, comicid=None): self.valreturn = [] @@ -43,6 +40,10 @@ class GC(object): self.query = query + self.comicid = comicid + + self.issueid = issueid + self.local_filename = os.path.join(mylar.CONFIG.CACHE_DIR, "getcomics.html") self.headers = {'Accept-encoding': 'gzip', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1', 'Referer': 'https://getcomics.info/'} @@ -94,7 +95,7 @@ class GC(object): option_find = option_find.findNext(text=True) if 'Year' in option_find: year = option_find.findNext(text=True) - year = re.sub('|', '', year).strip() + year = re.sub('\|', '', year).strip() else: size = option_find.findNext(text=True) if 'MB' in size: @@ -118,10 +119,8 @@ class GC(object): results['entries'] = resultlist return results - #self.loadsite(title, link) - #self.parse_downloadresults(title) - def parse_downloadresults(self, title): + def parse_downloadresults(self, title, mainlink): soup = BeautifulSoup(open(title+'.html'), 'html.parser') orig_find = soup.find("p", {"style": "text-align: center;"}) @@ -191,56 +190,61 @@ class GC(object): if link is None: logger.warn('Unable to retrieve any valid immediate download links. They might not exist.') - return + return {'success': False} for x in links: logger.fdebug('[%s] %s - %s' % (x['site'], x['volume'], x['link'])) - thread_ = Thread(target=self.downloadit, args=[link]) - thread_.start() - thread_.join() - chk = self.queue.get() - while True: - if chk[0]['mode'] == 'stop': - return {"filename": chk[0]['filename'], - "status": 'fail'} - elif chk[0]['mode'] == 'success': - try: - if os.path.isfile(os.path.join(mylar.CONFIG.DDL_LOCATION, chk[0]['filename'])): - logger.fdebug('Finished downloading %s [%s]' % (path, size)) - except: - pass - return {"filename": chk[0]['filename'], - "status": 'success'} + mylar.DDL_QUEUE.put({'link': link, + 'mainlink': mainlink, + 'series': series, + 'year': year, + 'size': size, + 'comicid': self.comicid, + 'issueid': self.issueid}) + + return {'success': True} + + def downloadit(self, link, mainlink): + if mylar.DDL_LOCK is True: + logger.fdebug('[DDL] Another item is currently downloading via DDL. Only one item can be downloaded at a time using DDL. Patience.') + return + else: + mylar.DDL_LOCK = True - def downloadit(self, link): filename = None try: - t = requests.get(link, verify=True, cookies=self.cf_cookievalue, headers=self.headers, stream=True) + with cfscrape.create_scraper() as s: + cf_cookievalue, cf_user_agent = s.get_tokens(mainlink, headers=self.headers) + t = s.get(link, verify=True, cookies=cf_cookievalue, headers=self.headers, stream=True) - filename = os.path.basename(urllib.unquote(t.url).decode('utf-8')) + filename = os.path.basename(urllib.unquote(t.url).decode('utf-8')) - path = os.path.join(mylar.CONFIG.DDL_LOCATION, filename) + path = os.path.join(mylar.CONFIG.DDL_LOCATION, filename) - if t.headers.get('content-encoding') == 'gzip': #.get('Content-Encoding') == 'gzip': - buf = StringIO(t.content) - f = gzip.GzipFile(fileobj=buf) + if t.headers.get('content-encoding') == 'gzip': #.get('Content-Encoding') == 'gzip': + buf = StringIO(t.content) + f = gzip.GzipFile(fileobj=buf) + with open(path, 'wb') as f: + for chunk in t.iter_content(chunk_size=1024): + if chunk: # filter out keep-alive new chunks + f.write(chunk) + f.flush() - with open(path, 'wb') as f: - for chunk in t.iter_content(chunk_size=1024): - if chunk: # filter out keep-alive new chunks - f.write(chunk) - f.flush() - except: - self.valreturn.append({"mode": "stop", - "filename": filename}) - return self.queue.put(self.valreturn) + except exception as e: + logger.error('[ERROR] %s' % e) + mylar.DDL_LOCK = False + return ({"success": False, + "filename": filename, + "path": None}) else: - self.valreturn.append({"mode": "success", - "filename": filename}) - return self.queue.put(self.valreturn) + mylar.DDL_LOCK = False + if os.path.isfile(path): + return ({"success": True, + "filename": filename, + "path": path}) def issue_list(self, pack): #packlist = [x.strip() for x in pack.split(',)] diff --git a/mylar/helpers.py b/mylar/helpers.py index 95fb192e..ace20ec0 100755 --- a/mylar/helpers.py +++ b/mylar/helpers.py @@ -37,7 +37,7 @@ from apscheduler.triggers.interval import IntervalTrigger import mylar import logger -from mylar import sabnzbd, nzbget, process +from mylar import sabnzbd, nzbget, process, getcomics def multikeysort(items, columns): @@ -3027,6 +3027,38 @@ def latestdate_update(): logger.info('updating latest date for : ' + a['ComicID'] + ' to ' + a['LatestDate'] + ' #' + a['LatestIssue']) myDB.upsert("comics", newVal, ctrlVal) +def ddl_downloader(queue): + while True: + if mylar.DDL_LOCK is True: + time.sleep(5) + + elif mylar.DDL_LOCK is False and queue.qsize() >= 1: + item = queue.get(True) + logger.info('Now loading request from DDL queue: %s (%s)' % item['series']) + if item == 'exit': + logger.info('Cleaning up workers for shutdown') + break + + ddz = getcomics.GC() + ddzstat = ddz.downloadit(item['link'], item['mainlink']) + + if all([ddzstat['success'] is True, mylar.CONFIG.POST_PROCESSING is True]): + logger.info('%s successfully downloaded - now initiating post-processing.' % (ddzstat['filename'])) + try: + mylar.PP_QUEUE.put({'nzb_name': ddzstat['filename'], + 'nzb_folder': ddzstat['path'], + 'failed': False, + 'issueid': item['issueid'], + 'comicid': item['comicid'], + 'apicall': True, + 'ddl': True}) + except Exception as e: + logger.info('process error: %s [%s]' %(e, ddzstat)) + elif mylar.CONFIG.POST_PROCESSING is True: + logger.info('File successfully downloaded. Post Processing is not enabled - item retained here: %s' % os.path.join(ddzstat['path'],ddzstat['filename'])) + else: + logger.info('[Status: %s] Failed to download: %s ' % (ddzstat['success'], ddzstat)) + def postprocess_main(queue): while True: if mylar.APILOCK is True: diff --git a/mylar/search.py b/mylar/search.py index d825cff4..5d8e2d81 100755 --- a/mylar/search.py +++ b/mylar/search.py @@ -2298,20 +2298,12 @@ def searcher(nzbprov, nzbname, comicinfo, link, IssueID, ComicID, tmpprov, direc sent_to = None t_hash = None if mylar.CONFIG.ENABLE_DDL is True and nzbprov == 'ddl': - ggc = getcomics.GC('nope') + ggc = getcomics.GC(issueid=IssueID, comicid=ComicID) sendsite = ggc.loadsite(os.path.join(mylar.CONFIG.CACHE_DIR, 'getcomics-' + nzbid), link) - ddl_it = ggc.parse_downloadresults(os.path.join(mylar.CONFIG.CACHE_DIR, 'getcomics-' + nzbid)) + ddl_it = ggc.parse_downloadresults(os.path.join(mylar.CONFIG.CACHE_DIR, 'getcomics-' + nzbid), link) logger.info("ddl status response: %s" % ddl_it) - if ddl_it['status'] == 'success': - nzbname = ddl_it['filename'] - logger.info('Successfully retrieved %s from DDL site. Now submitting for post-processing...' % (nzbname)) - mylar.PP_QUEUE.put({'nzb_name': nzbname, - 'nzb_folder': mylar.CONFIG.DDL_LOCATION, - 'issueid': IssueID, - 'failed': False, - 'comicid': ComicID, - 'apicall': True, - 'ddl': True}) + if ddl_it['success'] is True: + logger.info('Successfully snatched %s from DDL site. It is currently being queued to download in position %s' % (nzbname, mylar.DDL_QUEUE.qsize())) else: logger.info('Failed to retrieve %s from the DDL site.' %s (nzbname)) return "ddl-fail"