FIX:(#2164) When adding TPB series with collected information in the description, if there were bulleted items in the description in some cases would cause an error, FIX: When searching for TPBs, would incorrectly reference a portion of the publication date as the issue # when logging, FIX: When searching for TPBs make sure to only remove the issue number for relevant search queries, FIX: Adjust the parser to use correct hyphens instead of unicode version in all usable instances

This commit is contained in:
evilhero 2019-01-09 11:12:00 -05:00
parent a09ac48951
commit 625a36abe7
3 changed files with 30 additions and 5 deletions

View File

@ -285,6 +285,7 @@ def GetComicInfo(comicid, dom, safechk=None):
desdeck = 0
#the description field actually holds the Volume# - so let's grab it
desc_soup = None
try:
descchunk = dom.getElementsByTagName('description')[0].firstChild.wholeText
desc_soup = Soup(descchunk, "html.parser")
@ -368,12 +369,21 @@ def GetComicInfo(comicid, dom, safechk=None):
#logger.info('comic_desc: %s' % comic_desc)
#logger.info('desclinks: %s' % desclinks)
issue_list = []
micdrop = []
if desc_soup is not None:
#if it's point form bullets, ignore it cause it's not the current volume stuff.
test_it = desc_soup.find('ul')
if test_it:
for x in test_it.findAll('a'):
micdrop.append(x['data-ref-id'])
for fc in desclinks:
#logger.info('fc: %s' % fc)
fc_id = fc['data-ref-id']
#logger.info('fc_id: %s' % fc_id)
if fc_id in micdrop:
continue
fc_name = fc.findNext(text=True)
#logger.info('fc_name: %s' % fc_name)
if fc_id.startswith('4000'):
fc_cid = None
fc_isid = fc_id

View File

@ -49,6 +49,7 @@ class FileChecker(object):
self.og_watchcomic = watchcomic
self.watchcomic = re.sub('\?', '', watchcomic).strip() #strip the ? sepearte since it affects the regex.
self.watchcomic = re.sub(u'\u2014', ' - ', watchcomic).strip() #replace the \u2014 with a normal - because this world is f'd up enough to have something like that.
self.watchcomic = re.sub(u'\u2013', ' - ', watchcomic).strip() #replace the \u2013 with a normal - because again, people are dumb.
self.watchcomic = unicodedata.normalize('NFKD', self.watchcomic).encode('ASCII', 'ignore')
else:
self.watchcomic = None
@ -97,7 +98,7 @@ class FileChecker(object):
self.pp_mode = False
self.failed_files = []
self.dynamic_handlers = ['/','-',':','\'',',','&','?','!','+','(',')','\u2014']
self.dynamic_handlers = ['/','-',':','\'',',','&','?','!','+','(',')','\u2014','\u2013']
self.dynamic_replacements = ['and','the']
self.rippers = ['-empire','-empire-hd','minutemen-','-dcp']
@ -1355,6 +1356,7 @@ class FileChecker(object):
mod_watchcomic = mod_watchcomic[:wd] + spacer + mod_watchcomic[wd+len(wdrm):]
series_name = re.sub(u'\u2014', ' - ', series_name)
series_name = re.sub(u'\u2013', ' - ', series_name)
seriesdynamic_handlers_match = [x for x in self.dynamic_handlers if x.lower() in series_name.lower()]
#logger.fdebug('series dynamic handlers recognized : ' + str(seriesdynamic_handlers_match))
seriesdynamic_replacements_match = [x for x in self.dynamic_replacements if x.lower() in series_name.lower()]

View File

@ -379,8 +379,14 @@ def search_init(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueD
if IssueNumber is not None:
issuedisplay = IssueNumber
else:
issuedisplay = StoreDate[5:]
logger.info('Could not find Issue %s of %s (%s) using %s [%s]' % (issuedisplay, ComicName, SeriesYear, searchprov, searchmode))
if any([booktype == 'One-Shot', booktype == 'TPB']):
issuedisplay = None
else:
issuedisplay = StoreDate[5:]
if issuedisplay is None:
logger.info('Could not find %s (%s) using %s [%s]' % (ComicName, SeriesYear, searchprov, searchmode))
else:
logger.info('Could not find Issue %s of %s (%s) using %s [%s]' % (issuedisplay, ComicName, SeriesYear, searchprov, searchmode))
prov_count+=1
if findit['status'] is True:
@ -1345,8 +1351,15 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDa
logger.fdebug("integer value of issue we have found : %s" % comintIss)
else:
comintIss = 11111111111
#do this so that we don't touch the actual value but just use it for comparisons
if parsed_comic['issue_number'] is None:
pc_in = None
else:
pc_in = int(parsed_comic['issue_number'])
#issue comparison now as well
if int(intIss) == int(comintIss) or all([cmloopit == 4, findcomiciss is None, parsed_comic['issue_number'] is None]):
if int(intIss) == int(comintIss) or all([cmloopit == 4, findcomiciss is None, pc_in is None]) or all([cmloopit == 4, findcomiciss is None, pc_in == 1]):
nowrite = False
if all([nzbprov == 'torznab', 'worldwidetorrents' in entry['link']]):
nzbid = generate_id(nzbprov, entry['id'])