FIX: fix for DDL provider option attempting to use incorrect links when downloading, FIX: Fixed some DDL problems due to various parsing / tpe problems, IMP: DDL Provider will now follow RSS feed option if option is enabled

This commit is contained in:
evilhero 2019-03-08 16:56:05 -05:00
parent ef278eac21
commit c06e96c129
4 changed files with 220 additions and 53 deletions

View File

@ -140,7 +140,7 @@ class GC(object):
nwsize = size.find('//') nwsize = size.find('//')
size = re.sub('\[', '', size[:nwsize]).strip() size = re.sub('\[', '', size[:nwsize]).strip()
else: else:
size = '0 M' size = '0M'
i+=1 i+=1
dateline = f.find('time') dateline = f.find('time')
datefull = dateline['datetime'] datefull = dateline['datetime']
@ -163,15 +163,19 @@ class GC(object):
def parse_downloadresults(self, id, mainlink): def parse_downloadresults(self, id, mainlink):
myDB = db.DBConnection() myDB = db.DBConnection()
series = None
year = None
size = None
title = os.path.join(mylar.CONFIG.CACHE_DIR, 'getcomics-' + id) title = os.path.join(mylar.CONFIG.CACHE_DIR, 'getcomics-' + id)
soup = BeautifulSoup(open(title+'.html'), 'html.parser') soup = BeautifulSoup(open(title+'.html'), 'html.parser')
orig_find = soup.find("p", {"style": "text-align: center;"}) orig_find = soup.find("p", {"style": "text-align: center;"})
i = 0 i = 0
option_find = orig_find option_find = orig_find
possible_more = None
while True: #i <= 10: while True: #i <= 10:
prev_option = option_find prev_option = option_find
option_find = option_find.findNext(text=True) option_find = option_find.findNext(text=True)
if i == 0: if i == 0 and series is None:
series = option_find series = option_find
elif 'Year' in option_find: elif 'Year' in option_find:
year = option_find.findNext(text=True) year = option_find.findNext(text=True)
@ -189,24 +193,52 @@ class GC(object):
for f in soup.findAll("div", {"class": "aio-pulse"}): for f in soup.findAll("div", {"class": "aio-pulse"}):
lk = f.find('a') lk = f.find('a')
if lk['title'] == 'Download Now': if lk['title'] == 'Download Now':
link = lk['href'] link = {"series": series,
site = lk['title'] "site": lk['title'],
"year": year,
"issues": None,
"size": size,
"link": lk['href']}
break #get the first link just to test break #get the first link just to test
links = [] links = []
if link is None and possible_more.name == 'ul': if link is None and possible_more.name == 'ul':
bb = possible_more.findAll('li') try:
for x in bb: bb = possible_more.findAll('li')
volume = x.findNext(text=True) except:
if u'\u2013' in volume: pass
volume = re.sub(u'\u2013', '-', volume) else:
linkline = x.find('a') for x in bb:
link = linkline['href'] linkline = x.find('a')
site = linkline.findNext(text=True) if linkline:
links.append({"volume": volume, if 'go.php' in linkline['href']:
"site": site, volume = x.findNext(text=True)
"link": link}) if u'\u2013' in volume:
volume = re.sub(u'\u2013', '-', volume)
#volume label contains series, issue(s), year(s), and size
series_st = volume.find('(')
issues_st = volume.find('#')
series = volume[:series_st]
if any([issues_st == -1, series_st == -1]):
issues = None
else:
series = volume[:issues_st].strip()
issues = volume[issues_st+1:series_st].strip()
year_end = volume.find(')', series_st+1)
year = re.sub('[\(\)]', '', volume[series_st+1: year_end]).strip()
size_end = volume.find(')', year_end+1)
size = re.sub('[\(\)]', '', volume[year_end+1: size_end]).strip()
linked = linkline['href']
site = linkline.findNext(text=True)
if site == 'Main Server':
links.append({"series": series,
"site": site,
"year": year,
"issues": issues,
"size": size,
"link": linked})
else: else:
check_extras = soup.findAll("h3") check_extras = soup.findAll("h3")
for sb in check_extras: for sb in check_extras:
@ -220,40 +252,52 @@ class GC(object):
if u'\u2013' in volume: if u'\u2013' in volume:
volume = re.sub(u'\u2013', '-', volume) volume = re.sub(u'\u2013', '-', volume)
linkline = x.find('a') linkline = x.find('a')
link = linkline['href'] linked = linkline['href']
site = linkline.findNext(text=True) site = linkline.findNext(text=True)
links.append({"volume": volume, links.append({"volume": volume,
"site": site, "site": site,
"link": link}) "link": linked})
if link is None: if all([link is None, len(links) == 0]):
logger.warn('Unable to retrieve any valid immediate download links. They might not exist.') logger.warn('Unable to retrieve any valid immediate download links. They might not exist.')
return {'success': False} return {'success': False}
if all([link is not None, len(links) == 0]):
logger.info('only one item discovered, changing queue length to accomodate: %s [%s]' % (link, type(link)))
links = [link]
elif len(links) > 0:
if len(links) > 1:
logger.info('[DDL-QUEUER] This pack has been broken up into %s separate packs - queueing each in sequence for your enjoyment.' % len(links))
cnt = 1
for x in links: for x in links:
logger.fdebug('[%s] %s - %s' % (x['site'], x['volume'], x['link'])) if len(links) == 1:
mod_id = id
else:
mod_id = id+'-'+str(cnt)
#logger.fdebug('[%s] %s (%s) %s [%s][%s]' % (x['site'], x['series'], x['year'], x['issues'], x['size'], x['link']))
ctrlval = {'id': id} ctrlval = {'id': mod_id}
vals = {'series': series, vals = {'series': x['series'],
'year': year, 'year': x['year'],
'size': size, 'size': x['size'],
'issueid': self.issueid, 'issues': x['issues'],
'comicid': self.comicid, 'issueid': self.issueid,
'link': link, 'comicid': self.comicid,
'mainlink': mainlink, 'link': x['link'],
'updated_date': datetime.datetime.now().strftime('%Y-%m-%d %H:%M'), 'mainlink': mainlink,
'status': 'Queued'} 'updated_date': datetime.datetime.now().strftime('%Y-%m-%d %H:%M'),
myDB.upsert('ddl_info', vals, ctrlval) 'status': 'Queued'}
myDB.upsert('ddl_info', vals, ctrlval)
mylar.DDL_QUEUE.put({'link': link, mylar.DDL_QUEUE.put({'link': x['link'],
'mainlink': mainlink, 'mainlink': mainlink,
'series': series, 'series': x['series'],
'year': year, 'year': x['year'],
'size': size, 'size': x['size'],
'comicid': self.comicid, 'comicid': self.comicid,
'issueid': self.issueid, 'issueid': self.issueid,
'id': id, 'id': mod_id,
'resume': None}) 'resume': None})
cnt+=1
return {'success': True} return {'success': True}
@ -275,20 +319,23 @@ class GC(object):
t = s.get(link, verify=True, cookies=cf_cookievalue, headers=self.headers, stream=True) t = s.get(link, verify=True, cookies=cf_cookievalue, headers=self.headers, stream=True)
filename = os.path.basename(urllib.unquote(t.url).decode('utf-8')) filename = os.path.basename(urllib.unquote(t.url).decode('utf-8'))
if 'GetComics.INFO' in filename:
filename = re.sub('GetComics.INFO', '', filename, re.I).strip()
try: try:
remote_filesize = int(t.headers['Content-length']) remote_filesize = int(t.headers['Content-length'])
logger.fdebug('remote filesize: %s' % remote_filesize) logger.fdebug('remote filesize: %s' % remote_filesize)
except Exception as e: except Exception as e:
logger.warn('[WARNING] Unable to retrieve remote file size. Error returned as : %s' % e) logger.warn('[WARNING] Unable to retrieve remote file size - this is usually due to the page being behind a different click-bait/ad page. Error returned as : %s' % e)
logger.warn('[WARNING] Considering this particular download as invalid and will ignore this result.')
remote_filesize = 0 remote_filesize = 0
mylar.DDL_LOCK = False mylar.DDL_LOCK = False
return ({"success": False, return ({"success": False,
"filename": filename, "filename": filename,
"path": None}) "path": None})
else:
#write the filename to the db for tracking purposes... #write the filename to the db for tracking purposes...
myDB.upsert('ddl_info', {'filename': filename, 'remote_filesize': remote_filesize}, {'id': id}) myDB.upsert('ddl_info', {'filename': filename, 'remote_filesize': remote_filesize}, {'id': id})
path = os.path.join(mylar.CONFIG.DDL_LOCATION, filename) path = os.path.join(mylar.CONFIG.DDL_LOCATION, filename)

View File

@ -24,6 +24,7 @@ from datetime import datetime, timedelta
import gzip import gzip
import time import time
import random import random
from bs4 import BeautifulSoup
from StringIO import StringIO from StringIO import StringIO
import mylar import mylar
@ -384,6 +385,78 @@ def torrents(pickfeed=None, seriesname=None, issue=None, feedinfo=None):
return torinfo return torinfo
return return
def ddl(forcerss=False):
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1'}
ddl_feed = 'https://getcomics.info/feed/'
try:
r = requests.get(ddl_feed, verify=True, headers=headers)
except Exception, e:
logger.warn('Error fetching RSS Feed Data from DDL: %s' % (e))
return False
else:
if r.status_code != 200:
#typically 403 will not return results, but just catch anything other than a 200
if r.status_code == 403:
logger.warn('ERROR - status code:%s' % r.status_code)
return False
else:
logger.warn('[%s] Status code returned: %s' % (r.status_code))
return False
feedme = feedparser.parse(r.content)
results = []
for entry in feedme.entries:
soup = BeautifulSoup(entry.summary, 'html.parser')
orig_find = soup.find("p", {"style": "text-align: center;"})
i = 0
option_find = orig_find
while True: #i <= 10:
prev_option = option_find
option_find = option_find.findNext(text=True)
if 'Year' in option_find:
year = option_find.findNext(text=True)
year = re.sub('\|', '', year).strip()
else:
if 'Size' in prev_option:
size = option_find #.findNext(text=True)
if '- MB' in size: size = '0 MB'
possible_more = orig_find.next_sibling
break
i+=1
link = entry.link
title = entry.title
updated = entry.updated
if updated.endswith('+0000'):
updated = updated[:-5].strip()
tmpid = entry.id
id = tmpid[tmpid.find('=')+1:]
if 'KB' in size:
szform = 'KB'
sz = 'K'
elif 'GB' in size:
szform = 'GB'
sz = 'G'
elif 'MB' in size:
szform = 'MB'
sz = 'M'
elif 'TB' in size:
szform = 'TB'
sz = 'T'
tsize = helpers.human2bytes(re.sub('[^0-9]', '', size).strip() + sz)
#link can be referenced with the ?p=id url
results.append({'Title': title,
'Size': tsize,
'Link': id,
'Site': 'DDL',
'Pubdate': updated})
if len(results) >0:
logger.info('[RSS][DDL] %s entries have been indexed and are now going to be stored for caching.' % len(results))
rssdbupdate(results, len(results), 'ddl')
return
def nzbs(provider=None, forcerss=False): def nzbs(provider=None, forcerss=False):
@ -569,6 +642,43 @@ def rssdbupdate(feeddata, i, type):
logger.fdebug('Completed adding new data to RSS DB. Next add in ' + str(mylar.CONFIG.RSS_CHECKINTERVAL) + ' minutes') logger.fdebug('Completed adding new data to RSS DB. Next add in ' + str(mylar.CONFIG.RSS_CHECKINTERVAL) + ' minutes')
return return
def ddl_dbsearch(seriesname, issue, comicid=None, nzbprov=None, oneoff=False):
myDB = db.DBConnection()
seriesname_alt = None
if any([comicid is None, comicid == 'None', oneoff is True]):
pass
else:
snm = myDB.selectone("SELECT * FROM comics WHERE comicid=?", [comicid]).fetchone()
if snm is None:
logger.fdebug('Invalid ComicID of %s. Aborting search' % comicid)
return "no results"
else:
seriesname = snm['ComicName']
seriesname_alt = snm['AlternateSearch']
dsearch_rem1 = re.sub("\\band\\b", "%", seriesname.lower())
dsearch_rem2 = re.sub("\\bthe\\b", "%", dsearch_rem1.lower())
dsearch_removed = re.sub('\s+', ' ', dsearch_rem2)
dsearch_seriesname = re.sub('[\'\!\@\#\$\%\:\-\;\/\\=\?\&\.\s\,]', '%', dsearch_removed)
dsearch = '%' + dsearch_seriesname + '%'
dresults = myDB.select("SELECT * FROM rssdb WHERE Title like ? AND Site='DDL'", [dsearch])
ddltheinfo = []
ddlinfo = {}
if not dresults:
return "no results"
else:
for dl in dresults:
ddltheinfo.append({
'title': dl['Title'],
'link': dl['Link'],
'pubdate': dl['Pubdate'],
'site': dl['Site'],
'length': dl['Size']
})
ddlinfo['entries'] = ddltheinfo
return ddlinfo
def torrentdbsearch(seriesname, issue, comicid=None, nzbprov=None, oneoff=False): def torrentdbsearch(seriesname, issue, comicid=None, nzbprov=None, oneoff=False):
myDB = db.DBConnection() myDB = db.DBConnection()

View File

@ -91,6 +91,9 @@ class tehMain():
logger.info('[RSS-FEEDS] Initiating RSS Feed Check for NZB Providers.') logger.info('[RSS-FEEDS] Initiating RSS Feed Check for NZB Providers.')
rsscheck.nzbs(forcerss=forcerss) rsscheck.nzbs(forcerss=forcerss)
if mylar.CONFIG.ENABLE_DDL is True:
logger.info('[RSS-FEEDS] Initiating RSS Feed Check for DDL Provider.')
rsscheck.ddl(forcerss=forcerss)
logger.info('[RSS-FEEDS] RSS Feed Check/Update Complete') logger.info('[RSS-FEEDS] RSS Feed Check/Update Complete')
logger.info('[RSS-FEEDS] Watchlist Check for new Releases') logger.info('[RSS-FEEDS] Watchlist Check for new Releases')
mylar.search.searchforissue(rsscheck='yes') mylar.search.searchforissue(rsscheck='yes')

View File

@ -254,7 +254,6 @@ def search_init(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueD
c_number = c_number[:decst].rstrip() c_number = c_number[:decst].rstrip()
while (srchloop <= searchcnt): while (srchloop <= searchcnt):
logger.fdebug('srchloop: %s' % srchloop)
#searchmodes: #searchmodes:
# rss - will run through the built-cached db of entries # rss - will run through the built-cached db of entries
# api - will run through the providers via api (or non-api in the case of Experimental) # api - will run through the providers via api (or non-api in the case of Experimental)
@ -334,9 +333,9 @@ def search_init(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueD
prov_count+=1 prov_count+=1
continue continue
if searchmode == 'rss': if searchmode == 'rss':
if searchprov.lower() == 'ddl': #if searchprov.lower() == 'ddl':
prov_count+=1 # prov_count+=1
continue # continue
findit = NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDate, StoreDate, searchprov, send_prov_count, IssDateFix, IssueID, UseFuzzy, newznab_host, ComicVersion=ComicVersion, SARC=SARC, IssueArcID=IssueArcID, RSS="yes", ComicID=ComicID, issuetitle=issuetitle, unaltered_ComicName=unaltered_ComicName, oneoff=oneoff, cmloopit=cmloopit, manual=manual, torznab_host=torznab_host, digitaldate=digitaldate, booktype=booktype) findit = NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDate, StoreDate, searchprov, send_prov_count, IssDateFix, IssueID, UseFuzzy, newznab_host, ComicVersion=ComicVersion, SARC=SARC, IssueArcID=IssueArcID, RSS="yes", ComicID=ComicID, issuetitle=issuetitle, unaltered_ComicName=unaltered_ComicName, oneoff=oneoff, cmloopit=cmloopit, manual=manual, torznab_host=torznab_host, digitaldate=digitaldate, booktype=booktype)
if findit['status'] is False: if findit['status'] is False:
if AlternateSearch is not None and AlternateSearch != "None": if AlternateSearch is not None and AlternateSearch != "None":
@ -581,7 +580,7 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDa
foundc['status'] = False foundc['status'] = False
done = True done = True
break break
if any([nzbprov == '32P', nzbprov == 'Public Torrents']): if any([nzbprov == '32P', nzbprov == 'Public Torrents', nzbprov == 'ddl']):
#because 32p directly stores the exact issue, no need to worry about iterating over variations of the issue number. #because 32p directly stores the exact issue, no need to worry about iterating over variations of the issue number.
findloop == 99 findloop == 99
@ -619,14 +618,17 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDa
#logger.fdebug('RSS Check: %s' % RSS) #logger.fdebug('RSS Check: %s' % RSS)
#logger.fdebug('nzbprov: %s' % nzbprov) #logger.fdebug('nzbprov: %s' % nzbprov)
#logger.fdebug('comicid: %s' % ComicID) #logger.fdebug('comicid: %s' % ComicID)
if nzbprov == 'ddl': if nzbprov == 'ddl' and RSS == "no":
cmname = re.sub("%20", " ", str(comsrc)) cmname = re.sub("%20", " ", str(comsrc))
logger.fdebug('Sending request to DDL site for : %s %s' % (findcomic, isssearch)) logger.fdebug('Sending request to DDL site for : %s %s' % (findcomic, isssearch))
b = getcomics.GC(query='%s %s' % (findcomic, isssearch)) b = getcomics.GC(query='%s %s' % (findcomic, isssearch))
bb = b.search() bb = b.search()
#logger.info('bb returned from DDL: %s' % bb) #logger.info('bb returned from DDL: %s' % bb)
elif RSS == "yes": elif RSS == "yes":
if nzbprov == '32P' or nzbprov == 'Public Torrents': if nzbprov == 'ddl':
logger.fdebug('Sending request to [%s] RSS for %s : %s' % (nzbprov, ComicName, mod_isssearch))
bb = rsscheck.ddl_dbsearch(ComicName, mod_isssearch, ComicID, nzbprov, oneoff)
elif nzbprov == '32P' or nzbprov == 'Public Torrents':
cmname = re.sub("%20", " ", str(comsrc)) cmname = re.sub("%20", " ", str(comsrc))
logger.fdebug('Sending request to [%s] RSS for %s : %s' % (nzbprov, ComicName, mod_isssearch)) logger.fdebug('Sending request to [%s] RSS for %s : %s' % (nzbprov, ComicName, mod_isssearch))
bb = rsscheck.torrentdbsearch(ComicName, mod_isssearch, ComicID, nzbprov, oneoff) bb = rsscheck.torrentdbsearch(ComicName, mod_isssearch, ComicID, nzbprov, oneoff)
@ -1389,7 +1391,13 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDa
nowrite = False nowrite = False
if all([nzbprov == 'torznab', 'worldwidetorrents' in entry['link']]): if all([nzbprov == 'torznab', 'worldwidetorrents' in entry['link']]):
nzbid = generate_id(nzbprov, entry['id']) nzbid = generate_id(nzbprov, entry['id'])
elif all([nzbprov == 'ddl', 'getcomics' in entry['link']]): elif all([nzbprov == 'ddl', 'getcomics' in entry['link']]) or all([nzbprov == 'ddl', RSS == 'yes']):
if RSS == "yes":
entry['id'] = entry['link']
entry['link'] = 'https://getcomics.info/?p='+str(entry['id'])
entry['filename'] = entry['title']
if '/cat/' in entry['link']:
entry['link'] = 'https://getcomics.info/?p='+str(entry['id'])
nzbid = entry['id'] nzbid = entry['id']
entry['title'] = entry['filename'] entry['title'] = entry['filename']
else: else:
@ -2318,7 +2326,6 @@ def searcher(nzbprov, nzbname, comicinfo, link, IssueID, ComicID, tmpprov, direc
ggc = getcomics.GC(issueid=IssueID, comicid=ComicID) ggc = getcomics.GC(issueid=IssueID, comicid=ComicID)
sendsite = ggc.loadsite(nzbid, link) sendsite = ggc.loadsite(nzbid, link)
ddl_it = ggc.parse_downloadresults(nzbid, link) ddl_it = ggc.parse_downloadresults(nzbid, link)
logger.info("ddl status response: %s" % ddl_it)
if ddl_it['success'] is True: if ddl_it['success'] is True:
logger.info('Successfully snatched %s from DDL site. It is currently being queued to download in position %s' % (nzbname, mylar.DDL_QUEUE.qsize())) logger.info('Successfully snatched %s from DDL site. It is currently being queued to download in position %s' % (nzbname, mylar.DDL_QUEUE.qsize()))
else: else: