IMP: Added DDL option to available download provider options.

2019-01-16 14:32:37 -05:00 · 2019-01-16 14:32:37 -05:00 · 0fe9a5a800
parent ba011cc659
commit 0fe9a5a800
4 changed files with 294 additions and 11 deletions
--- a/data/interfaces/default/config.html
+++ b/data/interfaces/default/config.html
@ -763,13 +763,11 @@
                                   <small class="heading"><span style="float: left; margin-right: .3em; margin-top: 4px;" class="ui-icon ui-icon-info"></span>Note: this is an experimental search - results may be better/worse.</small>
                                </div>
                        </fieldset>
-<!--
                        <fieldset>
                                <div class="row checkbox left clearfix">
                                    <input type="checkbox" id="enable_ddl" name="enable_ddl" value=1 ${config['enable_ddl']} /><legend>Enable DDL (GetComics)</legend>
                                </div>
                        </fieldset>
-->
                        <fieldset>
                                <div class="row checkbox left clearfix">
                                    <input id="enable_torrent_search" type="checkbox" onclick="initConfigCheckbox($(this));" name="enable_torrent_search" value=1 ${config['enable_torrent_search']} /><legend>Torrents</legned>
--- a/mylar/getcomics.py
+++ b/mylar/getcomics.py
@ -0,0 +1,274 @@
+# -*- coding: utf-8 -*-
+# This file is part of Mylar.
+#
+# Mylar is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# Mylar is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Mylar.  If not, see <http://www.gnu.org/licenses/>.
+
+from StringIO import StringIO
+import urllib
+from threading import Thread
+from Queue import Queue
+import os
+import sys
+import re
+import gzip
+import time
+import datetime
+import json
+from bs4 import BeautifulSoup
+import requests
+import cfscrape
+import mylar
+from mylar import logger
+
+class GC(object):
+
+    def __init__(self, query):
+
+        self.queue = Queue()
+
+        self.valreturn = []
+
+        self.url = 'https://getcomics.info'
+
+        self.query = query
+
+        self.local_filename = os.path.join(mylar.CONFIG.CACHE_DIR, "getcomics.html")
+
+        self.headers = {'Accept-encoding': 'gzip', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1', 'Referer': 'https://getcomics.info/'}
+
+    def search(self):
+
+        with cfscrape.create_scraper() as s:
+            cf_cookievalue, cf_user_agent = s.get_tokens(self.url, headers=self.headers)
+
+            t = s.get(self.url+'/', params={'s': self.query}, verify=True, cookies=cf_cookievalue, headers=self.headers, stream=True)
+
+            with open(self.local_filename, 'wb') as f:
+                for chunk in t.iter_content(chunk_size=1024):
+                   if chunk: # filter out keep-alive new chunks
+                       f.write(chunk)
+                       f.flush()
+
+        return self.search_results()
+
+    def loadsite(self, title, link):
+        with cfscrape.create_scraper() as s:
+            self.cf_cookievalue, cf_user_agent = s.get_tokens(link, headers=self.headers)
+
+            t = s.get(link, verify=True, cookies=self.cf_cookievalue, headers=self.headers, stream=True)
+
+            with open(title+'.html', 'wb') as f:
+                for chunk in t.iter_content(chunk_size=1024):
+                   if chunk: # filter out keep-alive new chunks
+                       f.write(chunk)
+                       f.flush()
+
+    def search_results(self):
+        results = {}
+        resultlist = []
+        soup = BeautifulSoup(open(self.local_filename), 'html.parser')
+
+        resultline = soup.find("span", {"class": "cover-article-count"}).get_text(strip=True)
+        logger.info('There are %s results' % re.sub('Articles', '', resultline).strip())
+
+        for f in soup.findAll("article"):
+            id = f['id']
+            lk = f.find('a')
+            link = lk['href']
+            titlefind = f.find("h1", {"class": "post-title"})
+            title = titlefind.get_text(strip=True)
+            option_find = f.find("p", {"style": "text-align: center;"})
+            i = 0
+            while i <= 2:
+                option_find = option_find.findNext(text=True)
+                if 'Year' in option_find:
+                    year = option_find.findNext(text=True)
+                    year = re.sub('|', '', year).strip()
+                else:
+                    size = option_find.findNext(text=True)
+                    if 'MB' in size:
+                        size = re.sub('MB', 'M', size).strip()
+                    elif 'GB' in size:
+                        size = re.sub('GB', 'G', size).strip()
+                i+=1
+            dateline = f.find('time')
+            datefull = dateline['datetime']
+            datestamp = time.mktime(time.strptime(datefull, "%Y-%m-%d"))
+            resultlist.append({"title":    title,
+                               "pubdate":  datetime.datetime.fromtimestamp(float(datestamp)).strftime('%a, %d %b %Y %H:%M:%S'),
+                               "size":     re.sub(' ', '', size).strip(),
+                               "link":     link,
+                               "year":     year,
+                               "id":       re.sub('post-', '', id).strip(),
+                               "site":     'DDL'})
+
+            logger.fdebug('%s [%s]' % (title, size))
+
+        results['entries'] = resultlist
+
+        return results
+        #self.loadsite(title, link)
+        #self.parse_downloadresults(title)
+
+    def parse_downloadresults(self, title):
+
+        soup = BeautifulSoup(open(title+'.html'), 'html.parser')
+        orig_find = soup.find("p", {"style": "text-align: center;"})
+        i = 0
+        option_find = orig_find
+        while True: #i <= 10:
+            prev_option = option_find
+            option_find = option_find.findNext(text=True)
+            if i == 0:
+                series = option_find
+            elif 'Year' in option_find:
+                year = option_find.findNext(text=True)
+            else:
+                if 'Size' in prev_option:
+                    size = option_find #.findNext(text=True)
+                    possible_more = orig_find.next_sibling
+                    break
+            i+=1
+
+        logger.fdebug('%s [%s] / %s' % (series, year, size))
+
+        link = None
+        for f in soup.findAll("div", {"class": "aio-pulse"}):
+            lk = f.find('a')
+            if lk['title'] == 'Download Now':
+                link = lk['href']
+                site = lk['title']
+                break #get the first link just to test
+
+        if link is None:
+            logger.warn('Unable to retrieve any valid immediate download links. They might not exist.')
+            return
+
+        links = []
+
+        if possible_more.name == 'ul':
+            bb = possible_more.findAll('li')
+            for x in bb:
+                volume = x.findNext(text=True)
+                if u'\u2013' in volume:
+                    volume = re.sub(u'\u2013', '-', volume)
+                linkline = x.find('a')
+                link = linkline['href']
+                site = linkline.findNext(text=True)
+                links.append({"volume": volume,
+                              "site": site,
+                              "link": link})
+        else:
+            check_extras = soup.findAll("h3")
+            for sb in check_extras:
+                header = sb.findNext(text=True)
+                if header == 'TPBs':
+                    nxt = sb.next_sibling
+                    if nxt.name == 'ul':
+                        bb = nxt.findAll('li')
+                        for x in bb:
+                            volume = x.findNext(text=True)
+                            if u'\u2013' in volume:
+                                volume = re.sub(u'\u2013', '-', volume)
+                            linkline = x.find('a')
+                            link = linkline['href']
+                            site = linkline.findNext(text=True)
+                            links.append({"volume": volume,
+                                          "site": site,
+                                          "link": link})
+
+        if link is None:
+            logger.warn('Unable to retrieve any valid immediate download links. They might not exist.')
+            return
+
+        for x in links:
+            logger.fdebug('[%s] %s - %s' % (x['site'], x['volume'], x['link']))
+
+        thread_ = Thread(target=self.downloadit, args=[link])
+        thread_.start()
+        thread_.join()
+        chk = self.queue.get()
+        while True:
+            if chk[0]['mode'] == 'stop':
+                return {"filename": chk[0]['filename'],
+                        "status":   'fail'}
+            elif chk[0]['mode'] == 'success':
+                try:
+                    if os.path.isfile(os.path.join(mylar.CONFIG.DDL_LOCATION, chk[0]['filename'])):
+                        logger.fdebug('Finished downloading %s [%s]' % (path, size))
+                except:
+                    pass
+                return {"filename": chk[0]['filename'],
+                        "status":   'success'}
+
+    def downloadit(self, link):
+        filename = None
+        try:
+            t = requests.get(link, verify=True, cookies=self.cf_cookievalue, headers=self.headers, stream=True)
+
+            filename = os.path.basename(urllib.unquote(t.url).decode('utf-8'))
+
+            path = os.path.join(mylar.CONFIG.DDL_LOCATION, filename)
+
+            if t.headers.get('content-encoding') == 'gzip': #.get('Content-Encoding') == 'gzip':
+                buf = StringIO(t.content)
+                f = gzip.GzipFile(fileobj=buf)
+
+
+            with open(path, 'wb') as f:
+                for chunk in t.iter_content(chunk_size=1024):
+                    if chunk: # filter out keep-alive new chunks
+                        f.write(chunk)
+                        f.flush()
+        except:
+            self.valreturn.append({"mode": "stop",
+                                   "filename": filename})
+            return self.queue.put(self.valreturn)
+
+        else:
+            self.valreturn.append({"mode": "success",
+                                   "filename": filename})
+            return self.queue.put(self.valreturn)
+
+    def issue_list(self, pack):
+        #packlist = [x.strip() for x in pack.split(',)]
+        packlist = pack.replace('+', ' ').replace(',', ' ').split()
+        print packlist
+        plist = []
+        pack_issues = []
+        for pl in packlist:
+            if '-' in pl:
+                plist.append(range(int(pl[:pl.find('-')]),int(pl[pl.find('-')+1:])+1))
+            else:
+                if 'TPBs' not in pl:
+                    plist.append(int(pl))
+                else:
+                    plist.append('TPBs')
+
+        for pi in plist:
+            if type(pi) == list:
+                for x in pi:
+                    pack_issues.append(x)
+            else:
+                pack_issues.append(pi)
+
+        pack_issues.sort()
+        print "pack_issues: %s" % pack_issues
+
+#if __name__ == '__main__':
+#    ab = GC(sys.argv[1]) #'justice league aquaman') #sys.argv[0])
+#    #c = ab.search()
+#    b = ab.loadsite('test', sys.argv[2])
+#    c = ab.parse_downloadresults('test', '60MB')
+#    #c = ab.issue_list(sys.argv[2])
--- a/mylar/search.py
+++ b/mylar/search.py
@ -16,7 +16,7 @@
 from __future__ import division

 import mylar
-from mylar import logger, db, updater, helpers, parseit, findcomicfeed, notifiers, rsscheck, Failed, filechecker, auth32p, sabnzbd, nzbget, wwt #, getcomics
+from mylar import logger, db, updater, helpers, parseit, findcomicfeed, notifiers, rsscheck, Failed, filechecker, auth32p, sabnzbd, nzbget, wwt, getcomics

 import feedparser
 import requests
@ -181,10 +181,14 @@ def search_init(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueD

    #fix for issue dates between Nov-Dec/(Jan-Feb-Mar)
    IssDt = str(IssueDate)[5:7]
-    if IssDt == "12" or IssDt == "11" or IssDt == "01" or IssDt == "02" or IssDt == "03":
+    if any([IssDt == "12", IssDt == "11", IssDt == "01", IssDt == "02", IssDt == "03"]):
         IssDateFix = IssDt
    else:
         IssDateFix = "no"
+         if StoreDate is not None:
+             StDt = str(StoreDate)[5:7]
+             if any([StDt == "10", StDt == "12", StDt == "11", StDt == "01", StDt == "02", StDt == "03"]):
+                 IssDateFix = StDt

    searchcnt = 0
    srchloop = 1
@ -615,9 +619,9 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDa
        if nzbprov == 'ddl':
            cmname = re.sub("%20", " ", str(comsrc))
            logger.fdebug('Sending request to DDL site for : %s %s' % (findcomic, isssearch))
-            #b = getcomics.GC(query=findcomic + ' ' + isssearch)
-            #bb = b.search()
-            logger.info('bb returned from DDL: %s' % bb)
+            b = getcomics.GC(query=findcomic + ' ' + isssearch)
+            bb = b.search()
+            #logger.info('bb returned from DDL: %s' % bb)
        elif RSS == "yes":
            if nzbprov == '32P' or nzbprov == 'Public Torrents':
                cmname = re.sub("%20", " ", str(comsrc))
@ -2293,9 +2297,16 @@ def searcher(nzbprov, nzbname, comicinfo, link, IssueID, ComicID, tmpprov, direc
        sendsite = ggc.loadsite(os.path.join(mylar.CONFIG.CACHE_DIR, 'getcomics-' + nzbid), link)
        ddl_it = ggc.parse_downloadresults(os.path.join(mylar.CONFIG.CACHE_DIR, 'getcomics-' + nzbid))
        logger.info("ddl status response: %s" % ddl_it)
-        if ddl_it[0]['status'] == 'success':
-            nzbname = ddl_it[0]['filename']
-            logger.info('Successfully retrieved %s from DDL site' % (nzbname))
+        if ddl_it['status'] == 'success':
+            nzbname = ddl_it['filename']
+            logger.info('Successfully retrieved %s from DDL site. Now submitting for post-processing...' % (nzbname))
+            mylar.PP_QUEUE.put({'nzb_name':    nzbname,
+                                'nzb_folder':  mylar.CONFIG.DDL_LOCATION,
+                                'issueid':     IssueID,
+                                'failed':      False,
+                                'comicid':     ComicID,
+                                'apicall':     True})
+
        sent_to = "is downloading it directly via DDL"

    elif mylar.USE_BLACKHOLE and all([nzbprov != '32P', nzbprov != 'WWT', nzbprov != 'DEM', nzbprov != 'torznab']):
--- a/mylar/webserve.py
+++ b/mylar/webserve.py
@ -5064,7 +5064,7 @@ class WebInterface(object):
                           'lowercase_filenames', 'autowant_upcoming', 'autowant_all', 'comic_cover_local', 'alternate_latest_series_covers', 'cvinfo', 'snatchedtorrent_notify',
                           'prowl_enabled', 'prowl_onsnatch', 'nma_enabled', 'nma_onsnatch', 'pushover_enabled', 'pushover_onsnatch', 'boxcar_enabled',
                           'boxcar_onsnatch', 'pushbullet_enabled', 'pushbullet_onsnatch', 'telegram_enabled', 'telegram_onsnatch', 'slack_enabled', 'slack_onsnatch',
-                           'opds_enable', 'opds_authentication', 'opds_metainfo'] #, 'enable_ddl']
+                           'opds_enable', 'opds_authentication', 'opds_metainfo', 'enable_ddl']

        for checked_config in checked_configs:
            if checked_config not in kwargs: