mirror of
https://github.com/evilhero/mylar
synced 2025-03-06 19:38:02 +00:00
FIX: fix for DDL provider option attempting to use incorrect links when downloading, FIX: Fixed some DDL problems due to various parsing / tpe problems, IMP: DDL Provider will now follow RSS feed option if option is enabled
This commit is contained in:
parent
ef278eac21
commit
c06e96c129
4 changed files with 220 additions and 53 deletions
|
@ -140,7 +140,7 @@ class GC(object):
|
|||
nwsize = size.find('//')
|
||||
size = re.sub('\[', '', size[:nwsize]).strip()
|
||||
else:
|
||||
size = '0 M'
|
||||
size = '0M'
|
||||
i+=1
|
||||
dateline = f.find('time')
|
||||
datefull = dateline['datetime']
|
||||
|
@ -163,15 +163,19 @@ class GC(object):
|
|||
|
||||
def parse_downloadresults(self, id, mainlink):
|
||||
myDB = db.DBConnection()
|
||||
series = None
|
||||
year = None
|
||||
size = None
|
||||
title = os.path.join(mylar.CONFIG.CACHE_DIR, 'getcomics-' + id)
|
||||
soup = BeautifulSoup(open(title+'.html'), 'html.parser')
|
||||
orig_find = soup.find("p", {"style": "text-align: center;"})
|
||||
i = 0
|
||||
option_find = orig_find
|
||||
possible_more = None
|
||||
while True: #i <= 10:
|
||||
prev_option = option_find
|
||||
option_find = option_find.findNext(text=True)
|
||||
if i == 0:
|
||||
if i == 0 and series is None:
|
||||
series = option_find
|
||||
elif 'Year' in option_find:
|
||||
year = option_find.findNext(text=True)
|
||||
|
@ -189,24 +193,52 @@ class GC(object):
|
|||
for f in soup.findAll("div", {"class": "aio-pulse"}):
|
||||
lk = f.find('a')
|
||||
if lk['title'] == 'Download Now':
|
||||
link = lk['href']
|
||||
site = lk['title']
|
||||
link = {"series": series,
|
||||
"site": lk['title'],
|
||||
"year": year,
|
||||
"issues": None,
|
||||
"size": size,
|
||||
"link": lk['href']}
|
||||
|
||||
break #get the first link just to test
|
||||
|
||||
links = []
|
||||
|
||||
if link is None and possible_more.name == 'ul':
|
||||
bb = possible_more.findAll('li')
|
||||
for x in bb:
|
||||
volume = x.findNext(text=True)
|
||||
if u'\u2013' in volume:
|
||||
volume = re.sub(u'\u2013', '-', volume)
|
||||
linkline = x.find('a')
|
||||
link = linkline['href']
|
||||
site = linkline.findNext(text=True)
|
||||
links.append({"volume": volume,
|
||||
"site": site,
|
||||
"link": link})
|
||||
try:
|
||||
bb = possible_more.findAll('li')
|
||||
except:
|
||||
pass
|
||||
else:
|
||||
for x in bb:
|
||||
linkline = x.find('a')
|
||||
if linkline:
|
||||
if 'go.php' in linkline['href']:
|
||||
volume = x.findNext(text=True)
|
||||
if u'\u2013' in volume:
|
||||
volume = re.sub(u'\u2013', '-', volume)
|
||||
#volume label contains series, issue(s), year(s), and size
|
||||
series_st = volume.find('(')
|
||||
issues_st = volume.find('#')
|
||||
series = volume[:series_st]
|
||||
if any([issues_st == -1, series_st == -1]):
|
||||
issues = None
|
||||
else:
|
||||
series = volume[:issues_st].strip()
|
||||
issues = volume[issues_st+1:series_st].strip()
|
||||
year_end = volume.find(')', series_st+1)
|
||||
year = re.sub('[\(\)]', '', volume[series_st+1: year_end]).strip()
|
||||
size_end = volume.find(')', year_end+1)
|
||||
size = re.sub('[\(\)]', '', volume[year_end+1: size_end]).strip()
|
||||
linked = linkline['href']
|
||||
site = linkline.findNext(text=True)
|
||||
if site == 'Main Server':
|
||||
links.append({"series": series,
|
||||
"site": site,
|
||||
"year": year,
|
||||
"issues": issues,
|
||||
"size": size,
|
||||
"link": linked})
|
||||
else:
|
||||
check_extras = soup.findAll("h3")
|
||||
for sb in check_extras:
|
||||
|
@ -220,40 +252,52 @@ class GC(object):
|
|||
if u'\u2013' in volume:
|
||||
volume = re.sub(u'\u2013', '-', volume)
|
||||
linkline = x.find('a')
|
||||
link = linkline['href']
|
||||
linked = linkline['href']
|
||||
site = linkline.findNext(text=True)
|
||||
links.append({"volume": volume,
|
||||
"site": site,
|
||||
"link": link})
|
||||
"link": linked})
|
||||
|
||||
if link is None:
|
||||
if all([link is None, len(links) == 0]):
|
||||
logger.warn('Unable to retrieve any valid immediate download links. They might not exist.')
|
||||
return {'success': False}
|
||||
|
||||
if all([link is not None, len(links) == 0]):
|
||||
logger.info('only one item discovered, changing queue length to accomodate: %s [%s]' % (link, type(link)))
|
||||
links = [link]
|
||||
elif len(links) > 0:
|
||||
if len(links) > 1:
|
||||
logger.info('[DDL-QUEUER] This pack has been broken up into %s separate packs - queueing each in sequence for your enjoyment.' % len(links))
|
||||
cnt = 1
|
||||
for x in links:
|
||||
logger.fdebug('[%s] %s - %s' % (x['site'], x['volume'], x['link']))
|
||||
if len(links) == 1:
|
||||
mod_id = id
|
||||
else:
|
||||
mod_id = id+'-'+str(cnt)
|
||||
#logger.fdebug('[%s] %s (%s) %s [%s][%s]' % (x['site'], x['series'], x['year'], x['issues'], x['size'], x['link']))
|
||||
|
||||
ctrlval = {'id': id}
|
||||
vals = {'series': series,
|
||||
'year': year,
|
||||
'size': size,
|
||||
'issueid': self.issueid,
|
||||
'comicid': self.comicid,
|
||||
'link': link,
|
||||
'mainlink': mainlink,
|
||||
'updated_date': datetime.datetime.now().strftime('%Y-%m-%d %H:%M'),
|
||||
'status': 'Queued'}
|
||||
myDB.upsert('ddl_info', vals, ctrlval)
|
||||
ctrlval = {'id': mod_id}
|
||||
vals = {'series': x['series'],
|
||||
'year': x['year'],
|
||||
'size': x['size'],
|
||||
'issues': x['issues'],
|
||||
'issueid': self.issueid,
|
||||
'comicid': self.comicid,
|
||||
'link': x['link'],
|
||||
'mainlink': mainlink,
|
||||
'updated_date': datetime.datetime.now().strftime('%Y-%m-%d %H:%M'),
|
||||
'status': 'Queued'}
|
||||
myDB.upsert('ddl_info', vals, ctrlval)
|
||||
|
||||
mylar.DDL_QUEUE.put({'link': link,
|
||||
'mainlink': mainlink,
|
||||
'series': series,
|
||||
'year': year,
|
||||
'size': size,
|
||||
'comicid': self.comicid,
|
||||
'issueid': self.issueid,
|
||||
'id': id,
|
||||
'resume': None})
|
||||
mylar.DDL_QUEUE.put({'link': x['link'],
|
||||
'mainlink': mainlink,
|
||||
'series': x['series'],
|
||||
'year': x['year'],
|
||||
'size': x['size'],
|
||||
'comicid': self.comicid,
|
||||
'issueid': self.issueid,
|
||||
'id': mod_id,
|
||||
'resume': None})
|
||||
cnt+=1
|
||||
|
||||
return {'success': True}
|
||||
|
||||
|
@ -275,20 +319,23 @@ class GC(object):
|
|||
t = s.get(link, verify=True, cookies=cf_cookievalue, headers=self.headers, stream=True)
|
||||
|
||||
filename = os.path.basename(urllib.unquote(t.url).decode('utf-8'))
|
||||
if 'GetComics.INFO' in filename:
|
||||
filename = re.sub('GetComics.INFO', '', filename, re.I).strip()
|
||||
|
||||
try:
|
||||
remote_filesize = int(t.headers['Content-length'])
|
||||
logger.fdebug('remote filesize: %s' % remote_filesize)
|
||||
except Exception as e:
|
||||
logger.warn('[WARNING] Unable to retrieve remote file size. Error returned as : %s' % e)
|
||||
logger.warn('[WARNING] Unable to retrieve remote file size - this is usually due to the page being behind a different click-bait/ad page. Error returned as : %s' % e)
|
||||
logger.warn('[WARNING] Considering this particular download as invalid and will ignore this result.')
|
||||
remote_filesize = 0
|
||||
mylar.DDL_LOCK = False
|
||||
return ({"success": False,
|
||||
"filename": filename,
|
||||
"path": None})
|
||||
else:
|
||||
#write the filename to the db for tracking purposes...
|
||||
myDB.upsert('ddl_info', {'filename': filename, 'remote_filesize': remote_filesize}, {'id': id})
|
||||
|
||||
#write the filename to the db for tracking purposes...
|
||||
myDB.upsert('ddl_info', {'filename': filename, 'remote_filesize': remote_filesize}, {'id': id})
|
||||
|
||||
path = os.path.join(mylar.CONFIG.DDL_LOCATION, filename)
|
||||
|
||||
|
|
|
@ -24,6 +24,7 @@ from datetime import datetime, timedelta
|
|||
import gzip
|
||||
import time
|
||||
import random
|
||||
from bs4 import BeautifulSoup
|
||||
from StringIO import StringIO
|
||||
|
||||
import mylar
|
||||
|
@ -384,6 +385,78 @@ def torrents(pickfeed=None, seriesname=None, issue=None, feedinfo=None):
|
|||
return torinfo
|
||||
return
|
||||
|
||||
def ddl(forcerss=False):
|
||||
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1'}
|
||||
ddl_feed = 'https://getcomics.info/feed/'
|
||||
try:
|
||||
r = requests.get(ddl_feed, verify=True, headers=headers)
|
||||
except Exception, e:
|
||||
logger.warn('Error fetching RSS Feed Data from DDL: %s' % (e))
|
||||
return False
|
||||
else:
|
||||
if r.status_code != 200:
|
||||
#typically 403 will not return results, but just catch anything other than a 200
|
||||
if r.status_code == 403:
|
||||
logger.warn('ERROR - status code:%s' % r.status_code)
|
||||
return False
|
||||
else:
|
||||
logger.warn('[%s] Status code returned: %s' % (r.status_code))
|
||||
return False
|
||||
|
||||
feedme = feedparser.parse(r.content)
|
||||
results = []
|
||||
for entry in feedme.entries:
|
||||
soup = BeautifulSoup(entry.summary, 'html.parser')
|
||||
orig_find = soup.find("p", {"style": "text-align: center;"})
|
||||
i = 0
|
||||
option_find = orig_find
|
||||
while True: #i <= 10:
|
||||
prev_option = option_find
|
||||
option_find = option_find.findNext(text=True)
|
||||
if 'Year' in option_find:
|
||||
year = option_find.findNext(text=True)
|
||||
year = re.sub('\|', '', year).strip()
|
||||
else:
|
||||
if 'Size' in prev_option:
|
||||
size = option_find #.findNext(text=True)
|
||||
if '- MB' in size: size = '0 MB'
|
||||
possible_more = orig_find.next_sibling
|
||||
break
|
||||
i+=1
|
||||
|
||||
link = entry.link
|
||||
title = entry.title
|
||||
updated = entry.updated
|
||||
if updated.endswith('+0000'):
|
||||
updated = updated[:-5].strip()
|
||||
tmpid = entry.id
|
||||
id = tmpid[tmpid.find('=')+1:]
|
||||
if 'KB' in size:
|
||||
szform = 'KB'
|
||||
sz = 'K'
|
||||
elif 'GB' in size:
|
||||
szform = 'GB'
|
||||
sz = 'G'
|
||||
elif 'MB' in size:
|
||||
szform = 'MB'
|
||||
sz = 'M'
|
||||
elif 'TB' in size:
|
||||
szform = 'TB'
|
||||
sz = 'T'
|
||||
tsize = helpers.human2bytes(re.sub('[^0-9]', '', size).strip() + sz)
|
||||
|
||||
#link can be referenced with the ?p=id url
|
||||
results.append({'Title': title,
|
||||
'Size': tsize,
|
||||
'Link': id,
|
||||
'Site': 'DDL',
|
||||
'Pubdate': updated})
|
||||
|
||||
if len(results) >0:
|
||||
logger.info('[RSS][DDL] %s entries have been indexed and are now going to be stored for caching.' % len(results))
|
||||
rssdbupdate(results, len(results), 'ddl')
|
||||
|
||||
return
|
||||
|
||||
def nzbs(provider=None, forcerss=False):
|
||||
|
||||
|
@ -569,6 +642,43 @@ def rssdbupdate(feeddata, i, type):
|
|||
logger.fdebug('Completed adding new data to RSS DB. Next add in ' + str(mylar.CONFIG.RSS_CHECKINTERVAL) + ' minutes')
|
||||
return
|
||||
|
||||
def ddl_dbsearch(seriesname, issue, comicid=None, nzbprov=None, oneoff=False):
|
||||
myDB = db.DBConnection()
|
||||
seriesname_alt = None
|
||||
if any([comicid is None, comicid == 'None', oneoff is True]):
|
||||
pass
|
||||
else:
|
||||
snm = myDB.selectone("SELECT * FROM comics WHERE comicid=?", [comicid]).fetchone()
|
||||
if snm is None:
|
||||
logger.fdebug('Invalid ComicID of %s. Aborting search' % comicid)
|
||||
return "no results"
|
||||
else:
|
||||
seriesname = snm['ComicName']
|
||||
seriesname_alt = snm['AlternateSearch']
|
||||
|
||||
dsearch_rem1 = re.sub("\\band\\b", "%", seriesname.lower())
|
||||
dsearch_rem2 = re.sub("\\bthe\\b", "%", dsearch_rem1.lower())
|
||||
dsearch_removed = re.sub('\s+', ' ', dsearch_rem2)
|
||||
dsearch_seriesname = re.sub('[\'\!\@\#\$\%\:\-\;\/\\=\?\&\.\s\,]', '%', dsearch_removed)
|
||||
dsearch = '%' + dsearch_seriesname + '%'
|
||||
dresults = myDB.select("SELECT * FROM rssdb WHERE Title like ? AND Site='DDL'", [dsearch])
|
||||
ddltheinfo = []
|
||||
ddlinfo = {}
|
||||
if not dresults:
|
||||
return "no results"
|
||||
else:
|
||||
for dl in dresults:
|
||||
ddltheinfo.append({
|
||||
'title': dl['Title'],
|
||||
'link': dl['Link'],
|
||||
'pubdate': dl['Pubdate'],
|
||||
'site': dl['Site'],
|
||||
'length': dl['Size']
|
||||
})
|
||||
|
||||
ddlinfo['entries'] = ddltheinfo
|
||||
|
||||
return ddlinfo
|
||||
|
||||
def torrentdbsearch(seriesname, issue, comicid=None, nzbprov=None, oneoff=False):
|
||||
myDB = db.DBConnection()
|
||||
|
|
|
@ -91,6 +91,9 @@ class tehMain():
|
|||
|
||||
logger.info('[RSS-FEEDS] Initiating RSS Feed Check for NZB Providers.')
|
||||
rsscheck.nzbs(forcerss=forcerss)
|
||||
if mylar.CONFIG.ENABLE_DDL is True:
|
||||
logger.info('[RSS-FEEDS] Initiating RSS Feed Check for DDL Provider.')
|
||||
rsscheck.ddl(forcerss=forcerss)
|
||||
logger.info('[RSS-FEEDS] RSS Feed Check/Update Complete')
|
||||
logger.info('[RSS-FEEDS] Watchlist Check for new Releases')
|
||||
mylar.search.searchforissue(rsscheck='yes')
|
||||
|
|
|
@ -254,7 +254,6 @@ def search_init(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueD
|
|||
c_number = c_number[:decst].rstrip()
|
||||
|
||||
while (srchloop <= searchcnt):
|
||||
logger.fdebug('srchloop: %s' % srchloop)
|
||||
#searchmodes:
|
||||
# rss - will run through the built-cached db of entries
|
||||
# api - will run through the providers via api (or non-api in the case of Experimental)
|
||||
|
@ -334,9 +333,9 @@ def search_init(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueD
|
|||
prov_count+=1
|
||||
continue
|
||||
if searchmode == 'rss':
|
||||
if searchprov.lower() == 'ddl':
|
||||
prov_count+=1
|
||||
continue
|
||||
#if searchprov.lower() == 'ddl':
|
||||
# prov_count+=1
|
||||
# continue
|
||||
findit = NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDate, StoreDate, searchprov, send_prov_count, IssDateFix, IssueID, UseFuzzy, newznab_host, ComicVersion=ComicVersion, SARC=SARC, IssueArcID=IssueArcID, RSS="yes", ComicID=ComicID, issuetitle=issuetitle, unaltered_ComicName=unaltered_ComicName, oneoff=oneoff, cmloopit=cmloopit, manual=manual, torznab_host=torznab_host, digitaldate=digitaldate, booktype=booktype)
|
||||
if findit['status'] is False:
|
||||
if AlternateSearch is not None and AlternateSearch != "None":
|
||||
|
@ -581,7 +580,7 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDa
|
|||
foundc['status'] = False
|
||||
done = True
|
||||
break
|
||||
if any([nzbprov == '32P', nzbprov == 'Public Torrents']):
|
||||
if any([nzbprov == '32P', nzbprov == 'Public Torrents', nzbprov == 'ddl']):
|
||||
#because 32p directly stores the exact issue, no need to worry about iterating over variations of the issue number.
|
||||
findloop == 99
|
||||
|
||||
|
@ -619,14 +618,17 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDa
|
|||
#logger.fdebug('RSS Check: %s' % RSS)
|
||||
#logger.fdebug('nzbprov: %s' % nzbprov)
|
||||
#logger.fdebug('comicid: %s' % ComicID)
|
||||
if nzbprov == 'ddl':
|
||||
if nzbprov == 'ddl' and RSS == "no":
|
||||
cmname = re.sub("%20", " ", str(comsrc))
|
||||
logger.fdebug('Sending request to DDL site for : %s %s' % (findcomic, isssearch))
|
||||
b = getcomics.GC(query='%s %s' % (findcomic, isssearch))
|
||||
bb = b.search()
|
||||
#logger.info('bb returned from DDL: %s' % bb)
|
||||
elif RSS == "yes":
|
||||
if nzbprov == '32P' or nzbprov == 'Public Torrents':
|
||||
if nzbprov == 'ddl':
|
||||
logger.fdebug('Sending request to [%s] RSS for %s : %s' % (nzbprov, ComicName, mod_isssearch))
|
||||
bb = rsscheck.ddl_dbsearch(ComicName, mod_isssearch, ComicID, nzbprov, oneoff)
|
||||
elif nzbprov == '32P' or nzbprov == 'Public Torrents':
|
||||
cmname = re.sub("%20", " ", str(comsrc))
|
||||
logger.fdebug('Sending request to [%s] RSS for %s : %s' % (nzbprov, ComicName, mod_isssearch))
|
||||
bb = rsscheck.torrentdbsearch(ComicName, mod_isssearch, ComicID, nzbprov, oneoff)
|
||||
|
@ -1389,7 +1391,13 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDa
|
|||
nowrite = False
|
||||
if all([nzbprov == 'torznab', 'worldwidetorrents' in entry['link']]):
|
||||
nzbid = generate_id(nzbprov, entry['id'])
|
||||
elif all([nzbprov == 'ddl', 'getcomics' in entry['link']]):
|
||||
elif all([nzbprov == 'ddl', 'getcomics' in entry['link']]) or all([nzbprov == 'ddl', RSS == 'yes']):
|
||||
if RSS == "yes":
|
||||
entry['id'] = entry['link']
|
||||
entry['link'] = 'https://getcomics.info/?p='+str(entry['id'])
|
||||
entry['filename'] = entry['title']
|
||||
if '/cat/' in entry['link']:
|
||||
entry['link'] = 'https://getcomics.info/?p='+str(entry['id'])
|
||||
nzbid = entry['id']
|
||||
entry['title'] = entry['filename']
|
||||
else:
|
||||
|
@ -2318,7 +2326,6 @@ def searcher(nzbprov, nzbname, comicinfo, link, IssueID, ComicID, tmpprov, direc
|
|||
ggc = getcomics.GC(issueid=IssueID, comicid=ComicID)
|
||||
sendsite = ggc.loadsite(nzbid, link)
|
||||
ddl_it = ggc.parse_downloadresults(nzbid, link)
|
||||
logger.info("ddl status response: %s" % ddl_it)
|
||||
if ddl_it['success'] is True:
|
||||
logger.info('Successfully snatched %s from DDL site. It is currently being queued to download in position %s' % (nzbname, mylar.DDL_QUEUE.qsize()))
|
||||
else:
|
||||
|
|
Loading…
Add table
Reference in a new issue