FIX: fix for DDL provider option attempting to use incorrect links when downloading, FIX: Fixed some DDL problems due to various parsing / tpe problems, IMP: DDL Provider will now follow RSS feed option if option is enabled

This commit is contained in:
evilhero 2019-03-08 16:56:05 -05:00
parent ef278eac21
commit c06e96c129
4 changed files with 220 additions and 53 deletions

View File

@ -140,7 +140,7 @@ class GC(object):
nwsize = size.find('//')
size = re.sub('\[', '', size[:nwsize]).strip()
else:
size = '0 M'
size = '0M'
i+=1
dateline = f.find('time')
datefull = dateline['datetime']
@ -163,15 +163,19 @@ class GC(object):
def parse_downloadresults(self, id, mainlink):
myDB = db.DBConnection()
series = None
year = None
size = None
title = os.path.join(mylar.CONFIG.CACHE_DIR, 'getcomics-' + id)
soup = BeautifulSoup(open(title+'.html'), 'html.parser')
orig_find = soup.find("p", {"style": "text-align: center;"})
i = 0
option_find = orig_find
possible_more = None
while True: #i <= 10:
prev_option = option_find
option_find = option_find.findNext(text=True)
if i == 0:
if i == 0 and series is None:
series = option_find
elif 'Year' in option_find:
year = option_find.findNext(text=True)
@ -189,24 +193,52 @@ class GC(object):
for f in soup.findAll("div", {"class": "aio-pulse"}):
lk = f.find('a')
if lk['title'] == 'Download Now':
link = lk['href']
site = lk['title']
link = {"series": series,
"site": lk['title'],
"year": year,
"issues": None,
"size": size,
"link": lk['href']}
break #get the first link just to test
links = []
if link is None and possible_more.name == 'ul':
bb = possible_more.findAll('li')
for x in bb:
volume = x.findNext(text=True)
if u'\u2013' in volume:
volume = re.sub(u'\u2013', '-', volume)
linkline = x.find('a')
link = linkline['href']
site = linkline.findNext(text=True)
links.append({"volume": volume,
"site": site,
"link": link})
try:
bb = possible_more.findAll('li')
except:
pass
else:
for x in bb:
linkline = x.find('a')
if linkline:
if 'go.php' in linkline['href']:
volume = x.findNext(text=True)
if u'\u2013' in volume:
volume = re.sub(u'\u2013', '-', volume)
#volume label contains series, issue(s), year(s), and size
series_st = volume.find('(')
issues_st = volume.find('#')
series = volume[:series_st]
if any([issues_st == -1, series_st == -1]):
issues = None
else:
series = volume[:issues_st].strip()
issues = volume[issues_st+1:series_st].strip()
year_end = volume.find(')', series_st+1)
year = re.sub('[\(\)]', '', volume[series_st+1: year_end]).strip()
size_end = volume.find(')', year_end+1)
size = re.sub('[\(\)]', '', volume[year_end+1: size_end]).strip()
linked = linkline['href']
site = linkline.findNext(text=True)
if site == 'Main Server':
links.append({"series": series,
"site": site,
"year": year,
"issues": issues,
"size": size,
"link": linked})
else:
check_extras = soup.findAll("h3")
for sb in check_extras:
@ -220,40 +252,52 @@ class GC(object):
if u'\u2013' in volume:
volume = re.sub(u'\u2013', '-', volume)
linkline = x.find('a')
link = linkline['href']
linked = linkline['href']
site = linkline.findNext(text=True)
links.append({"volume": volume,
"site": site,
"link": link})
"link": linked})
if link is None:
if all([link is None, len(links) == 0]):
logger.warn('Unable to retrieve any valid immediate download links. They might not exist.')
return {'success': False}
if all([link is not None, len(links) == 0]):
logger.info('only one item discovered, changing queue length to accomodate: %s [%s]' % (link, type(link)))
links = [link]
elif len(links) > 0:
if len(links) > 1:
logger.info('[DDL-QUEUER] This pack has been broken up into %s separate packs - queueing each in sequence for your enjoyment.' % len(links))
cnt = 1
for x in links:
logger.fdebug('[%s] %s - %s' % (x['site'], x['volume'], x['link']))
if len(links) == 1:
mod_id = id
else:
mod_id = id+'-'+str(cnt)
#logger.fdebug('[%s] %s (%s) %s [%s][%s]' % (x['site'], x['series'], x['year'], x['issues'], x['size'], x['link']))
ctrlval = {'id': id}
vals = {'series': series,
'year': year,
'size': size,
'issueid': self.issueid,
'comicid': self.comicid,
'link': link,
'mainlink': mainlink,
'updated_date': datetime.datetime.now().strftime('%Y-%m-%d %H:%M'),
'status': 'Queued'}
myDB.upsert('ddl_info', vals, ctrlval)
ctrlval = {'id': mod_id}
vals = {'series': x['series'],
'year': x['year'],
'size': x['size'],
'issues': x['issues'],
'issueid': self.issueid,
'comicid': self.comicid,
'link': x['link'],
'mainlink': mainlink,
'updated_date': datetime.datetime.now().strftime('%Y-%m-%d %H:%M'),
'status': 'Queued'}
myDB.upsert('ddl_info', vals, ctrlval)
mylar.DDL_QUEUE.put({'link': link,
'mainlink': mainlink,
'series': series,
'year': year,
'size': size,
'comicid': self.comicid,
'issueid': self.issueid,
'id': id,
'resume': None})
mylar.DDL_QUEUE.put({'link': x['link'],
'mainlink': mainlink,
'series': x['series'],
'year': x['year'],
'size': x['size'],
'comicid': self.comicid,
'issueid': self.issueid,
'id': mod_id,
'resume': None})
cnt+=1
return {'success': True}
@ -275,20 +319,23 @@ class GC(object):
t = s.get(link, verify=True, cookies=cf_cookievalue, headers=self.headers, stream=True)
filename = os.path.basename(urllib.unquote(t.url).decode('utf-8'))
if 'GetComics.INFO' in filename:
filename = re.sub('GetComics.INFO', '', filename, re.I).strip()
try:
remote_filesize = int(t.headers['Content-length'])
logger.fdebug('remote filesize: %s' % remote_filesize)
except Exception as e:
logger.warn('[WARNING] Unable to retrieve remote file size. Error returned as : %s' % e)
logger.warn('[WARNING] Unable to retrieve remote file size - this is usually due to the page being behind a different click-bait/ad page. Error returned as : %s' % e)
logger.warn('[WARNING] Considering this particular download as invalid and will ignore this result.')
remote_filesize = 0
mylar.DDL_LOCK = False
return ({"success": False,
"filename": filename,
"path": None})
else:
#write the filename to the db for tracking purposes...
myDB.upsert('ddl_info', {'filename': filename, 'remote_filesize': remote_filesize}, {'id': id})
#write the filename to the db for tracking purposes...
myDB.upsert('ddl_info', {'filename': filename, 'remote_filesize': remote_filesize}, {'id': id})
path = os.path.join(mylar.CONFIG.DDL_LOCATION, filename)

View File

@ -24,6 +24,7 @@ from datetime import datetime, timedelta
import gzip
import time
import random
from bs4 import BeautifulSoup
from StringIO import StringIO
import mylar
@ -384,6 +385,78 @@ def torrents(pickfeed=None, seriesname=None, issue=None, feedinfo=None):
return torinfo
return
def ddl(forcerss=False):
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1'}
ddl_feed = 'https://getcomics.info/feed/'
try:
r = requests.get(ddl_feed, verify=True, headers=headers)
except Exception, e:
logger.warn('Error fetching RSS Feed Data from DDL: %s' % (e))
return False
else:
if r.status_code != 200:
#typically 403 will not return results, but just catch anything other than a 200
if r.status_code == 403:
logger.warn('ERROR - status code:%s' % r.status_code)
return False
else:
logger.warn('[%s] Status code returned: %s' % (r.status_code))
return False
feedme = feedparser.parse(r.content)
results = []
for entry in feedme.entries:
soup = BeautifulSoup(entry.summary, 'html.parser')
orig_find = soup.find("p", {"style": "text-align: center;"})
i = 0
option_find = orig_find
while True: #i <= 10:
prev_option = option_find
option_find = option_find.findNext(text=True)
if 'Year' in option_find:
year = option_find.findNext(text=True)
year = re.sub('\|', '', year).strip()
else:
if 'Size' in prev_option:
size = option_find #.findNext(text=True)
if '- MB' in size: size = '0 MB'
possible_more = orig_find.next_sibling
break
i+=1
link = entry.link
title = entry.title
updated = entry.updated
if updated.endswith('+0000'):
updated = updated[:-5].strip()
tmpid = entry.id
id = tmpid[tmpid.find('=')+1:]
if 'KB' in size:
szform = 'KB'
sz = 'K'
elif 'GB' in size:
szform = 'GB'
sz = 'G'
elif 'MB' in size:
szform = 'MB'
sz = 'M'
elif 'TB' in size:
szform = 'TB'
sz = 'T'
tsize = helpers.human2bytes(re.sub('[^0-9]', '', size).strip() + sz)
#link can be referenced with the ?p=id url
results.append({'Title': title,
'Size': tsize,
'Link': id,
'Site': 'DDL',
'Pubdate': updated})
if len(results) >0:
logger.info('[RSS][DDL] %s entries have been indexed and are now going to be stored for caching.' % len(results))
rssdbupdate(results, len(results), 'ddl')
return
def nzbs(provider=None, forcerss=False):
@ -569,6 +642,43 @@ def rssdbupdate(feeddata, i, type):
logger.fdebug('Completed adding new data to RSS DB. Next add in ' + str(mylar.CONFIG.RSS_CHECKINTERVAL) + ' minutes')
return
def ddl_dbsearch(seriesname, issue, comicid=None, nzbprov=None, oneoff=False):
myDB = db.DBConnection()
seriesname_alt = None
if any([comicid is None, comicid == 'None', oneoff is True]):
pass
else:
snm = myDB.selectone("SELECT * FROM comics WHERE comicid=?", [comicid]).fetchone()
if snm is None:
logger.fdebug('Invalid ComicID of %s. Aborting search' % comicid)
return "no results"
else:
seriesname = snm['ComicName']
seriesname_alt = snm['AlternateSearch']
dsearch_rem1 = re.sub("\\band\\b", "%", seriesname.lower())
dsearch_rem2 = re.sub("\\bthe\\b", "%", dsearch_rem1.lower())
dsearch_removed = re.sub('\s+', ' ', dsearch_rem2)
dsearch_seriesname = re.sub('[\'\!\@\#\$\%\:\-\;\/\\=\?\&\.\s\,]', '%', dsearch_removed)
dsearch = '%' + dsearch_seriesname + '%'
dresults = myDB.select("SELECT * FROM rssdb WHERE Title like ? AND Site='DDL'", [dsearch])
ddltheinfo = []
ddlinfo = {}
if not dresults:
return "no results"
else:
for dl in dresults:
ddltheinfo.append({
'title': dl['Title'],
'link': dl['Link'],
'pubdate': dl['Pubdate'],
'site': dl['Site'],
'length': dl['Size']
})
ddlinfo['entries'] = ddltheinfo
return ddlinfo
def torrentdbsearch(seriesname, issue, comicid=None, nzbprov=None, oneoff=False):
myDB = db.DBConnection()

View File

@ -91,6 +91,9 @@ class tehMain():
logger.info('[RSS-FEEDS] Initiating RSS Feed Check for NZB Providers.')
rsscheck.nzbs(forcerss=forcerss)
if mylar.CONFIG.ENABLE_DDL is True:
logger.info('[RSS-FEEDS] Initiating RSS Feed Check for DDL Provider.')
rsscheck.ddl(forcerss=forcerss)
logger.info('[RSS-FEEDS] RSS Feed Check/Update Complete')
logger.info('[RSS-FEEDS] Watchlist Check for new Releases')
mylar.search.searchforissue(rsscheck='yes')

View File

@ -254,7 +254,6 @@ def search_init(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueD
c_number = c_number[:decst].rstrip()
while (srchloop <= searchcnt):
logger.fdebug('srchloop: %s' % srchloop)
#searchmodes:
# rss - will run through the built-cached db of entries
# api - will run through the providers via api (or non-api in the case of Experimental)
@ -334,9 +333,9 @@ def search_init(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueD
prov_count+=1
continue
if searchmode == 'rss':
if searchprov.lower() == 'ddl':
prov_count+=1
continue
#if searchprov.lower() == 'ddl':
# prov_count+=1
# continue
findit = NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDate, StoreDate, searchprov, send_prov_count, IssDateFix, IssueID, UseFuzzy, newznab_host, ComicVersion=ComicVersion, SARC=SARC, IssueArcID=IssueArcID, RSS="yes", ComicID=ComicID, issuetitle=issuetitle, unaltered_ComicName=unaltered_ComicName, oneoff=oneoff, cmloopit=cmloopit, manual=manual, torznab_host=torznab_host, digitaldate=digitaldate, booktype=booktype)
if findit['status'] is False:
if AlternateSearch is not None and AlternateSearch != "None":
@ -581,7 +580,7 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDa
foundc['status'] = False
done = True
break
if any([nzbprov == '32P', nzbprov == 'Public Torrents']):
if any([nzbprov == '32P', nzbprov == 'Public Torrents', nzbprov == 'ddl']):
#because 32p directly stores the exact issue, no need to worry about iterating over variations of the issue number.
findloop == 99
@ -619,14 +618,17 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDa
#logger.fdebug('RSS Check: %s' % RSS)
#logger.fdebug('nzbprov: %s' % nzbprov)
#logger.fdebug('comicid: %s' % ComicID)
if nzbprov == 'ddl':
if nzbprov == 'ddl' and RSS == "no":
cmname = re.sub("%20", " ", str(comsrc))
logger.fdebug('Sending request to DDL site for : %s %s' % (findcomic, isssearch))
b = getcomics.GC(query='%s %s' % (findcomic, isssearch))
bb = b.search()
#logger.info('bb returned from DDL: %s' % bb)
elif RSS == "yes":
if nzbprov == '32P' or nzbprov == 'Public Torrents':
if nzbprov == 'ddl':
logger.fdebug('Sending request to [%s] RSS for %s : %s' % (nzbprov, ComicName, mod_isssearch))
bb = rsscheck.ddl_dbsearch(ComicName, mod_isssearch, ComicID, nzbprov, oneoff)
elif nzbprov == '32P' or nzbprov == 'Public Torrents':
cmname = re.sub("%20", " ", str(comsrc))
logger.fdebug('Sending request to [%s] RSS for %s : %s' % (nzbprov, ComicName, mod_isssearch))
bb = rsscheck.torrentdbsearch(ComicName, mod_isssearch, ComicID, nzbprov, oneoff)
@ -1389,7 +1391,13 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDa
nowrite = False
if all([nzbprov == 'torznab', 'worldwidetorrents' in entry['link']]):
nzbid = generate_id(nzbprov, entry['id'])
elif all([nzbprov == 'ddl', 'getcomics' in entry['link']]):
elif all([nzbprov == 'ddl', 'getcomics' in entry['link']]) or all([nzbprov == 'ddl', RSS == 'yes']):
if RSS == "yes":
entry['id'] = entry['link']
entry['link'] = 'https://getcomics.info/?p='+str(entry['id'])
entry['filename'] = entry['title']
if '/cat/' in entry['link']:
entry['link'] = 'https://getcomics.info/?p='+str(entry['id'])
nzbid = entry['id']
entry['title'] = entry['filename']
else:
@ -2318,7 +2326,6 @@ def searcher(nzbprov, nzbname, comicinfo, link, IssueID, ComicID, tmpprov, direc
ggc = getcomics.GC(issueid=IssueID, comicid=ComicID)
sendsite = ggc.loadsite(nzbid, link)
ddl_it = ggc.parse_downloadresults(nzbid, link)
logger.info("ddl status response: %s" % ddl_it)
if ddl_it['success'] is True:
logger.info('Successfully snatched %s from DDL site. It is currently being queued to download in position %s' % (nzbname, mylar.DDL_QUEUE.qsize()))
else: