Merge branch 'development'

This commit is contained in:
evilhero 2019-03-29 10:03:45 -04:00
commit 3f30faee32
8 changed files with 154 additions and 75 deletions

View File

@ -1420,11 +1420,14 @@ class PostProcessor(object):
if self.oneoffinlist is False:
self.oneoff = False
if any([self.issueid is not None, self.issuearcid is not None]):
if self.issueid is not None:
s_id = self.issueid
else:
if self.issuearcid is not None:
s_id = self.issuearcid
else:
s_id = self.issueid
nzbiss = myDB.selectone('SELECT * FROM nzblog WHERE IssueID=?', [s_id]).fetchone()
if nzbiss is None and self.issuearcid is not None:
nzbiss = myDB.selectone('SELECT * FROM nzblog WHERE IssueID=?', ['S'+s_id]).fetchone()
else:
nzbname = self.nzb_name
#remove extensions from nzb_name if they somehow got through (Experimental most likely)

View File

@ -418,11 +418,11 @@ class info32p(object):
if str(r.status_code) != '200':
logger.warn('Unable to download torrent from 32P [Status Code returned: %s]' % r.status_code)
if str(r.status_code) == '404' and site == '32P':
if str(r.status_code) == '404':
logger.warn('[32P-CACHED_ENTRY] Entry found in 32P cache - incorrect. Torrent has probably been merged into a pack, or another series id. Removing from cache.')
helpers.delete_cache_entry(linkit)
self.delete_cache_entry(payload['id'])
else:
logger.info('content: %s' % r.content)
logger.fdebug('content: %s' % r.content)
return False

View File

@ -408,7 +408,7 @@ class FileChecker(object):
lastmod_position = 0
booktype = 'issue'
#exceptions that are considered alpha-numeric issue numbers
exceptions = ('NOW', 'AI', 'AU', 'X', 'A', 'B', 'C', 'INH', 'MU', 'HU', 'SUMMER', 'SPRING', 'FALL', 'WINTER')
exceptions = ('NOW', 'AI', 'AU', 'X', 'A', 'B', 'C', 'INH', 'MU', 'HU', 'SUMMER', 'SPRING', 'FALL', 'WINTER', 'PREVIEW')
#unicode characters, followed by int value
# num_exceptions = [{iss:u'\xbd',val:.5},{iss:u'\xbc',val:.25}, {iss:u'\xe',val:.75}, {iss:u'\221e',val:'infinity'}]
@ -416,11 +416,11 @@ class FileChecker(object):
file_length = 0
validcountchk = False
sep_volume = False
current_pos = -1
current_pos = -1
for sf in split_file:
current_pos +=1
#the series title will always be first and be AT LEAST one word.
if split_file.index(sf) >= 1 and not volumeprior:
if split_file.index(sf) >= 0 and not volumeprior:
dtcheck = re.sub('[\(\)\,]', '', sf).strip()
#if there's more than one date, assume the right-most date is the actual issue date.
if any(['19' in dtcheck, '20' in dtcheck]) and not any([dtcheck.lower().startswith('v19'), dtcheck.lower().startswith('v20')]) and len(dtcheck) >=4:
@ -775,11 +775,11 @@ class FileChecker(object):
for x in possible_years:
logger.info('yearposition[%s] -- dc[position][%s]' % (yearposition, x['yearposition']))
if yearposition < x['yearposition']:
if all([len(possible_issuenumbers) == 1, possible_issuenumbers[0]['number'] == x['year'], x['yearposition'] != possible_issuenumbers[0]['position']]):
if all([len(possible_issuenumbers) == 1, possible_issuenumbers[0]['number'] == x['year'], x['yearposition'] != possible_issuenumbers[0]['position']]):
issue2year = True
highest_series_pos = x['yearposition']
yearposition = x['yearposition']
yearmodposition = x['yearmodposition']
yearposition = x['yearposition']
yearmodposition = x['yearmodposition']
if highest_series_pos > yearposition: highest_series_pos = yearposition #dc['position']: highest_series_pos = dc['position']
else:
@ -790,7 +790,6 @@ class FileChecker(object):
logger.fdebug('highest_series_position: ' + str(highest_series_pos))
issue_number = None
dash_numbers = []
issue_number_position = len(split_file)
@ -811,7 +810,7 @@ class FileChecker(object):
for pis in sorted(possible_issuenumbers, key=operator.itemgetter('position'), reverse=True):
a = ' '.join(split_file)
lenn = pis['mod_position'] + len(pis['number'])
if lenn == len(a):
if lenn == len(a) and finddash != -1:
logger.fdebug('Numeric detected as the last digit after a hyphen. Typically this is the issue number.')
if pis['position'] != yearposition:
issue_number = pis['number']
@ -819,20 +818,20 @@ class FileChecker(object):
issue_number_position = pis['position']
if highest_series_pos > pis['position']: highest_series_pos = pis['position']
#break
if pis['validcountchk'] == True:
elif pis['validcountchk'] == True:
issue_number = pis['number']
issue_number_position = pis['position']
logger.fdebug('Issue verified and detected as part of a numeric count sequnce: ' + issue_number)
if highest_series_pos > pis['position']: highest_series_pos = pis['position']
break
if pis['mod_position'] > finddash and finddash != -1:
elif pis['mod_position'] > finddash and finddash != -1:
if finddash < yearposition and finddash > (yearmodposition + len(split_file[yearposition])):
logger.fdebug('issue number is positioned after a dash - probably not an issue number, but part of an issue title')
dash_numbers.append({'mod_position': pis['mod_position'],
'number': pis['number'],
'position': pis['position']})
continue
if yearposition == pis['position']:
elif yearposition == pis['position']:
logger.fdebug('Already validated year, ignoring as possible issue number: ' + str(pis['number']))
continue
if p == 1:
@ -934,8 +933,10 @@ class FileChecker(object):
break
else:
try:
if possible_years[0]['yearposition'] <= highest_series_pos:
if possible_years[0]['yearposition'] <= highest_series_pos and possible_years[0]['year_position'] != 0:
highest_series_pos = possible_years[0]['yearposition']
elif possible_years[0]['year_position'] == 0:
yearposition = 1
except:
pass
@ -1013,7 +1014,14 @@ class FileChecker(object):
#here we should account for some characters that get stripped out due to the regex's
#namely, unique characters - known so far: +
#c1 = '+'
series_name = ' '.join(split_file[:highest_series_pos])
#series_name = ' '.join(split_file[:highest_series_pos])
if yearposition != 0:
series_name = ' '.join(split_file[:highest_series_pos])
else:
if highest_series_pos <= issue_number_position and all([len(split_file[0]) == 4, split_file[0].isdigit()]):
series_name = ' '.join(split_file[:highest_series_pos])
else:
series_name = ' '.join(split_file[yearposition+1:highest_series_pos])
for x in list(wrds):
if x != '':
@ -1060,11 +1068,19 @@ class FileChecker(object):
#check for annual in title(s) here.
if not self.justparse and all([mylar.CONFIG.ANNUALS_ON, 'annual' not in self.watchcomic.lower(), 'special' not in self.watchcomic.lower()]):
if 'annual' in series_name.lower():
issue_number = 'Annual ' + str(issue_number)
isn = 'Annual'
if issue_number is not None:
issue_number = '%s %s' % (isn, issue_number)
else:
issue_number = isn
series_name = re.sub('annual', '', series_name, flags=re.I).strip()
series_name_decoded = re.sub('annual', '', series_name_decoded, flags=re.I).strip()
elif 'special' in series_name.lower():
issue_number = 'Special ' + str(issue_number)
isn = 'Special'
if issue_number is not None:
issue_number = '%s %s' % (isn, issue_number)
else:
issue_number = isn
series_name = re.sub('special', '', series_name, flags=re.I).strip()
series_name_decoded = re.sub('special', '', series_name_decoded, flags=re.I).strip()
@ -1179,7 +1195,9 @@ class FileChecker(object):
if mylar.CONFIG.ANNUALS_ON and 'annual' not in nspace_watchcomic.lower():
if 'annual' in series_name.lower():
justthedigits = 'Annual ' + series_info['issue_number']
justthedigits = 'Annual'
if series_info['issue_number'] is not None:
justthedigits += ' %s' % series_info['issue_number']
nspace_seriesname = re.sub('annual', '', nspace_seriesname.lower()).strip()
nspace_seriesname_decoded = re.sub('annual', '', nspace_seriesname_decoded.lower()).strip()
if alt_series is not None and 'annual' in alt_series.lower():

View File

@ -34,7 +34,7 @@ from mylar import db
class GC(object):
def __init__(self, query=None, issueid=None, comicid=None):
def __init__(self, query=None, issueid=None, comicid=None, oneoff=False):
self.valreturn = []
@ -46,6 +46,8 @@ class GC(object):
self.issueid = issueid
self.oneoff = oneoff
self.local_filename = os.path.join(mylar.CONFIG.CACHE_DIR, "getcomics.html")
self.headers = {'Accept-encoding': 'gzip', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1', 'Referer': 'https://getcomics.info/'}
@ -55,7 +57,7 @@ class GC(object):
with cfscrape.create_scraper() as s:
cf_cookievalue, cf_user_agent = s.get_tokens(self.url, headers=self.headers)
t = s.get(self.url+'/', params={'s': self.query}, verify=True, cookies=cf_cookievalue, headers=self.headers, stream=True)
t = s.get(self.url+'/', params={'s': self.query}, verify=True, cookies=cf_cookievalue, headers=self.headers, stream=True, timeout=30)
with open(self.local_filename, 'wb') as f:
for chunk in t.iter_content(chunk_size=1024):
@ -70,7 +72,7 @@ class GC(object):
with cfscrape.create_scraper() as s:
self.cf_cookievalue, cf_user_agent = s.get_tokens(link, headers=self.headers)
t = s.get(link, verify=True, cookies=self.cf_cookievalue, headers=self.headers, stream=True)
t = s.get(link, verify=True, cookies=self.cf_cookievalue, headers=self.headers, stream=True, timeout=30)
with open(title+'.html', 'wb') as f:
for chunk in t.iter_content(chunk_size=1024):
@ -251,12 +253,24 @@ class GC(object):
volume = x.findNext(text=True)
if u'\u2013' in volume:
volume = re.sub(u'\u2013', '-', volume)
series_st = volume.find('(')
issues_st = volume.find('#')
series = volume[:issues_st].strip()
issues = volume[issues_st:series_st].strip()
year_end = volume.find(')', series_st+1)
year = re.sub('[\(\)\|]', '', volume[series_st+1: year_end]).strip()
size_end = volume.find(')', year_end+1)
size = re.sub('[\(\)\|]', '', volume[year_end+1: size_end]).strip()
linkline = x.find('a')
linked = linkline['href']
site = linkline.findNext(text=True)
links.append({"volume": volume,
"site": site,
"link": linked})
links.append({"series": series,
"volume": volume,
"site": site,
"year": year,
"issues": issues,
"size": size,
"link": linked})
if all([link is None, len(links) == 0]):
logger.warn('Unable to retrieve any valid immediate download links. They might not exist.')
@ -265,8 +279,11 @@ class GC(object):
logger.info('only one item discovered, changing queue length to accomodate: %s [%s]' % (link, type(link)))
links = [link]
elif len(links) > 0:
if link is not None:
links.append(link)
logger.fdebug('[DDL-QUEUE] Making sure we download the original item in addition to the extra packs.')
if len(links) > 1:
logger.info('[DDL-QUEUER] This pack has been broken up into %s separate packs - queueing each in sequence for your enjoyment.' % len(links))
logger.fdebug('[DDL-QUEUER] This pack has been broken up into %s separate packs - queueing each in sequence for your enjoyment.' % len(links))
cnt = 1
for x in links:
if len(links) == 1:
@ -295,6 +312,7 @@ class GC(object):
'size': x['size'],
'comicid': self.comicid,
'issueid': self.issueid,
'oneoff': self.oneoff,
'id': mod_id,
'resume': None})
cnt+=1
@ -302,6 +320,7 @@ class GC(object):
return {'success': True}
def downloadit(self, id, link, mainlink, resume=None):
#logger.info('[%s] %s -- mainlink: %s' % (id, link, mainlink))
if mylar.DDL_LOCK is True:
logger.fdebug('[DDL] Another item is currently downloading via DDL. Only one item can be downloaded at a time using DDL. Patience.')
return
@ -315,8 +334,8 @@ class GC(object):
if resume is not None:
logger.info('[DDL-RESUME] Attempting to resume from: %s bytes' % resume)
self.headers['Range'] = 'bytes=%d-' % resume
cf_cookievalue, cf_user_agent = s.get_tokens(mainlink, headers=self.headers)
t = s.get(link, verify=True, cookies=cf_cookievalue, headers=self.headers, stream=True)
cf_cookievalue, cf_user_agent = s.get_tokens(mainlink, headers=self.headers, timeout=30)
t = s.get(link, verify=True, cookies=cf_cookievalue, headers=self.headers, stream=True, timeout=30)
filename = os.path.basename(urllib.unquote(t.url).decode('utf-8'))
if 'GetComics.INFO' in filename:
@ -326,13 +345,32 @@ class GC(object):
remote_filesize = int(t.headers['Content-length'])
logger.fdebug('remote filesize: %s' % remote_filesize)
except Exception as e:
logger.warn('[WARNING] Unable to retrieve remote file size - this is usually due to the page being behind a different click-bait/ad page. Error returned as : %s' % e)
logger.warn('[WARNING] Considering this particular download as invalid and will ignore this result.')
remote_filesize = 0
mylar.DDL_LOCK = False
return ({"success": False,
"filename": filename,
"path": None})
if 'go.php-urls' not in link:
link = re.sub('go.php-url=', 'go.php-urls', link)
t = s.get(link, verify=True, cookies=cf_cookievalue, headers=self.headers, stream=True, timeout=30)
filename = os.path.basename(urllib.unquote(t.url).decode('utf-8'))
if 'GetComics.INFO' in filename:
filename = re.sub('GetComics.INFO', '', filename, re.I).strip()
try:
remote_filesize = int(t.headers['Content-length'])
logger.fdebug('remote filesize: %s' % remote_filesize)
except Exception as e:
logger.warn('[WARNING] Unable to retrieve remote file size - this is usually due to the page being behind a different click-bait/ad page. Error returned as : %s' % e)
logger.warn('[WARNING] Considering this particular download as invalid and will ignore this result.')
remote_filesize = 0
mylar.DDL_LOCK = False
return ({"success": False,
"filename": filename,
"path": None})
else:
logger.warn('[WARNING] Unable to retrieve remote file size - this is usually due to the page being behind a different click-bait/ad page. Error returned as : %s' % e)
logger.warn('[WARNING] Considering this particular download as invalid and will ignore this result.')
remote_filesize = 0
mylar.DDL_LOCK = False
return ({"success": False,
"filename": filename,
"path": None})
#write the filename to the db for tracking purposes...
myDB.upsert('ddl_info', {'filename': filename, 'remote_filesize': remote_filesize}, {'id': id})

View File

@ -397,6 +397,7 @@ def rename_param(comicid, comicname, issue, ofilename, comicyear=None, issueid=N
'NOW',
'AI',
'MU',
'HU',
'A',
'B',
'C',
@ -1024,8 +1025,11 @@ def issuedigits(issnum):
x = [vals[key] for key in vals if key in issnum]
if x:
#logger.fdebug('Unicode Issue present - adjusting.')
int_issnum = x[0] * 1000
chk = re.sub('[^0-9]', '', issnum).strip()
if len(chk) == 0:
int_issnum = x[0] * 1000
else:
int_issnum = (int(re.sub('[^0-9]', '', issnum).strip()) + x[0]) * 1000
#logger.fdebug('int_issnum: ' + str(int_issnum))
else:
if any(['.' in issnum, ',' in issnum]):
@ -1530,7 +1534,7 @@ def IssueDetails(filelocation, IssueID=None, justinfo=False):
cover = "found"
break
elif any(['001.jpg' in infile, '001.png' in infile, '001.webp' in infile, '01.jpg' in infile, '01.png' in infile, '01.webp' in infile]) and cover == "notfound":
elif (any(['001.jpg' in infile, '001.png' in infile, '001.webp' in infile, '01.jpg' in infile, '01.png' in infile, '01.webp' in infile]) or all(['0001' in infile, infile.endswith(pic_extensions)]) or all(['01' in infile, infile.endswith(pic_extensions)])) and cover == "notfound":
logger.fdebug('Extracting primary image ' + infile + ' as coverfile for display.')
local_file = open(os.path.join(mylar.CONFIG.CACHE_DIR, 'temp.jpg'), "wb")
local_file.write(inzipfile.read(infile))
@ -1540,6 +1544,7 @@ def IssueDetails(filelocation, IssueID=None, justinfo=False):
if cover != "found":
logger.fdebug('Invalid naming sequence for jpgs discovered. Attempting to find the lowest sequence and will use as cover (it might not work). Currently : ' + str(low_infile))
local_file = open(os.path.join(mylar.CONFIG.CACHE_DIR, 'temp.jpg'), "wb")
logger.fdebug('infile_name used for displaying: %s' % low_infile_name)
local_file.write(inzipfile.read(low_infile_name))
local_file.close
cover = "found"
@ -3657,12 +3662,12 @@ def getImage(comicid, url, issueid=None):
#let's make the dir.
try:
os.makedirs(str(mylar.CONFIG.CACHE_DIR))
logger.info('Cache Directory successfully created at: ' + str(mylar.CONFIG.CACHE_DIR))
logger.info('Cache Directory successfully created at: %s' % mylar.CONFIG.CACHE_DIR)
except OSError:
logger.error('Could not create cache dir. Check permissions of cache dir: ' + str(mylar.CONFIG.CACHE_DIR))
logger.error('Could not create cache dir. Check permissions of cache dir: %s' % mylar.CONFIG.CACHE_DIR)
coverfile = os.path.join(mylar.CONFIG.CACHE_DIR, str(comicid) + ".jpg")
coverfile = os.path.join(mylar.CONFIG.CACHE_DIR, str(comicid) + '.jpg')
#if cover has '+' in url it's malformed, we need to replace '+' with '%20' to retreive properly.
@ -3675,35 +3680,42 @@ def getImage(comicid, url, issueid=None):
logger.info('Attempting to retrieve the comic image for series')
try:
r = requests.get(url, params=None, stream=True, verify=mylar.CONFIG.CV_VERIFY, headers=mylar.CV_HEADERS)
except Exception, e:
logger.warn('Unable to download image from CV URL link: ' + url + ' [Status Code returned: ' + str(r.status_code) + ']')
logger.fdebug('comic image retrieval status code: ' + str(r.status_code))
if str(r.status_code) != '200':
logger.warn('Unable to download image from CV URL link: ' + url + ' [Status Code returned: ' + str(r.status_code) + ']')
except Exception as e:
logger.warn('[ERROR: %s] Unable to download image from CV URL link: %s' % (e, url))
coversize = 0
statuscode = '400'
else:
if r.headers.get('Content-Encoding') == 'gzip':
buf = StringIO(r.content)
f = gzip.GzipFile(fileobj=buf)
statuscode = str(r.status_code)
logger.fdebug('comic image retrieval status code: %s' % statuscode)
with open(coverfile, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
f.flush()
statinfo = os.stat(coverfile)
coversize = statinfo.st_size
if int(coversize) < 10000 or str(r.status_code) != '200':
if str(r.status_code) != '200':
logger.info('Trying to grab an alternate cover due to problems trying to retrieve the main cover image.')
if statuscode != '200':
logger.warn('Unable to download image from CV URL link: %s [Status Code returned: %s]' % (url, statuscode))
coversize = 0
else:
logger.info('Image size invalid [' + str(coversize) + ' bytes] - trying to get alternate cover image.')
logger.fdebug('invalid image link is here: ' + url)
if r.headers.get('Content-Encoding') == 'gzip':
buf = StringIO(r.content)
f = gzip.GzipFile(fileobj=buf)
with open(coverfile, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
f.flush()
statinfo = os.stat(coverfile)
coversize = statinfo.st_size
if any([int(coversize) < 10000, statuscode != '200']):
try:
if statuscode != '200':
logger.info('Trying to grab an alternate cover due to problems trying to retrieve the main cover image.')
else:
logger.info('Image size invalid [%s bytes] - trying to get alternate cover image.' % coversize)
except Exception as e:
logger.info('Image size invalid [%s bytes] - trying to get alternate cover image.' % coversize)
logger.fdebug('invalid image link is here: %s' % url)
if os.path.exists(coverfile):
os.remove(coverfile)

View File

@ -58,7 +58,7 @@ def addComictoDB(comicid, mismatch=None, pullupd=None, imported=None, ogcname=No
if dbcomic is None:
newValueDict = {"ComicName": "Comic ID: %s" % (comicid),
"Status": "Loading"}
if all([imported, mylar.CONFIG.IMP_PATHS is True]):
if all([imported is not None, mylar.CONFIG.IMP_PATHS is True]):
comlocation = os.path.dirname(imported['filelisting'][0]['comiclocation'])
else:
comlocation = None
@ -249,7 +249,7 @@ def addComictoDB(comicid, mismatch=None, pullupd=None, imported=None, ogcname=No
covercheck = helpers.getImage(comicid, comic['ComicImageALT'])
#if the comic cover local is checked, save a cover.jpg to the series folder.
if all([mylar.CONFIG.COMIC_COVER_LOCAL is True, os.path.isdir(comlocation) is True, os.path.isfile(PRComicImage) is False]):
if all([mylar.CONFIG.COMIC_COVER_LOCAL is True, os.path.isdir(comlocation) is True, os.path.isfile(os.path.join(comlocation, 'cover.jpg')) is False]):
try:
comiclocal = os.path.join(comlocation, 'cover.jpg')
shutil.copyfile(PRComicImage, comiclocal)
@ -1130,7 +1130,11 @@ def updateissuedata(comicid, comicname=None, issued=None, comicIssues=None, call
elif 'hu' in issnum.lower():
int_issnum = (int(issnum[:-3]) * 1000) + ord('h') + ord('u')
elif u'\xbd' in issnum:
int_issnum = .5 * 1000
tmpiss = re.sub('[^0-9]', '', issnum).strip()
if len(tmpiss) > 0:
int_issnum = (int(tmpiss) + .5) * 1000
else:
int_issnum = .5 * 1000
logger.fdebug('1/2 issue detected :' + issnum + ' === ' + str(int_issnum))
elif u'\xbc' in issnum:
int_issnum = .25 * 1000
@ -1599,7 +1603,7 @@ def image_it(comicid, latestissueid, comlocation, ComicImage):
ComicImage = helpers.replacetheslash(PRComicImage)
#if the comic cover local is checked, save a cover.jpg to the series folder.
if all([mylar.CONFIG.COMIC_COVER_LOCAL is True, os.path.isdir(comlocation) is True, os.path.isfile(PRComicImage)]):
if all([mylar.CONFIG.COMIC_COVER_LOCAL is True, os.path.isdir(comlocation) is True, os.path.isfile(os.path.join(comlocation, 'cover.jpg'))]):
try:
comiclocal = os.path.join(comlocation, 'cover.jpg')
shutil.copyfile(PRComicImage, comiclocal)

View File

@ -2323,7 +2323,11 @@ def searcher(nzbprov, nzbname, comicinfo, link, IssueID, ComicID, tmpprov, direc
sent_to = None
t_hash = None
if mylar.CONFIG.ENABLE_DDL is True and nzbprov == 'ddl':
ggc = getcomics.GC(issueid=IssueID, comicid=ComicID)
if all([IssueID is None, IssueArcID is not None]):
tmp_issueid = IssueArcID
else:
tmp_issueid = IssueID
ggc = getcomics.GC(issueid=tmp_issueid, comicid=ComicID)
sendsite = ggc.loadsite(nzbid, link)
ddl_it = ggc.parse_downloadresults(nzbid, link)
if ddl_it['success'] is True:

View File

@ -1642,7 +1642,7 @@ def totals(ComicID, havefiles=None, totalfiles=None, module=None, issueid=None,
if totalfiles == 1:
havefiles = 1
else:
logger.warn('Total issues for this series [ComiciD:%s/IssueID:%] is not 1 when it should be. This is probably a mistake and the series should be refreshed.' % (ComicID, IssueID))
logger.warn('Total issues for this series [ComiciD:%s/IssueID:%s] is not 1 when it should be. This is probably a mistake and the series should be refreshed.' % (ComicID, issueid))
havefiles = 0
logger.fdebug('incremented havefiles: %s' % havefiles)
else: