FIX:(#817) If series contained issue that was a lone alpha (ie. issue X), would fail everywhere. Adjusted to allow for adding / searching / filechecking, FIX: Unicoded Issue Numbers that are fractions for GUI display, FIX: Fraction Issue Numbers ( 1/2, 3/4, etc) would round up for filechecking purposes and would cause problems when the series contained more than one identical issue number

This commit is contained in:
evilhero 2014-09-08 04:35:15 -04:00
parent 2173614836
commit 4e46e91b8b
5 changed files with 141 additions and 50 deletions

View File

@ -556,7 +556,10 @@ class PostProcessor(object):
logger.fdebug(module + ' Zero Suppression set to : ' + str(mylar.ZERO_LEVEL_N))
if str(len(issueno)) > 1:
if int(issueno) < 0:
if issueno.isalpha():
self._log('issue detected as an alpha.')
prettycomiss = str(issueno)
elif int(issueno) < 0:
self._log("issue detected is a negative")
prettycomiss = '-' + str(zeroadd) + str(abs(issueno))
elif int(issueno) < 10:

View File

@ -66,7 +66,8 @@ def listFiles(dir,watchcomic,Publisher,AlternateSearch=None,manual=None,sarc=Non
'AI',
'A',
'B',
'C']
'C',
'X']
extensions = ('.cbr', '.cbz')
@ -256,8 +257,9 @@ def listFiles(dir,watchcomic,Publisher,AlternateSearch=None,manual=None,sarc=Non
watchname = re.sub('[\:\;\!\'\/\?\+\=\_\%\-]', '', watchcomic) #remove spec chars for watchcomic match.
subthis = re.sub('[\:\;\!\'\/\?\+\=\_\%\-]', '', subthis)
else:
watchname = re.sub('[\:\;\!\'\/\?\+\=\_\%\.\-]', '', watchcomic) #remove spec chars for watchcomic match.
subthis = re.sub('[\:\;\!\'\/\?\+\=\_\%\.\-]', '', subthis)
# in order to get series like Earth 2 scanned in that contain a decimal, I removed the \. from the re.subs below - 28-08-2014
watchname = re.sub('[\:\;\!\'\/\?\+\=\_\%\-]', '', watchcomic) #remove spec chars for watchcomic match.
subthis = re.sub('[\:\;\!\'\/\?\+\=\_\%\-]', '', subthis)
logger.fdebug('[FILECHECKER] watch-cleaned: ' + watchname)
subthis = re.sub('\s+',' ', subthis)
logger.fdebug('[FILECHECKER] sub-cleaned: ' + subthis)

View File

@ -171,7 +171,7 @@ def human2bytes(s):
def replace_all(text, dic):
for i, j in dic.iteritems():
text = text.replace(i, j)
return text
return text.rstrip()
def cleanName(string):
@ -734,7 +734,7 @@ def cleanhtml(raw_html):
def issuedigits(issnum):
import db, logger
#print "issnum : " + str(issnum)
if str(issnum).isdigit():
if issnum.isdigit():
int_issnum = int( issnum ) * 1000
else:
#count = 0
@ -767,14 +767,11 @@ def issuedigits(issnum):
else:
int_issnum = (int(issnum[:-4]) * 1000) + ord('n') + ord('o') + ord('w')
elif u'\xbd' in issnum:
issnum = .5
int_issnum = int(issnum) * 1000
int_issnum = .5 * 1000
elif u'\xbc' in issnum:
issnum = .25
int_issnum = int(issnum) * 1000
int_issnum = .25 * 1000
elif u'\xbe' in issnum:
issnum = .75
int_issnum = int(issnum) * 1000
int_issnum = .75 * 1000
elif u'\u221e' in issnum:
#issnum = utf-8 will encode the infinity symbol without any help
int_issnum = 9999999999 * 1000 # set 9999999999 for integer value of issue
@ -820,6 +817,8 @@ def issuedigits(issnum):
try:
isschk = float(issno)
except ValueError, e:
if len(issnum) == 1 and issnum.isalpha():
break
logger.fdebug('invalid numeric for issue - cannot be found. Ignoring.')
issno = None
tstord = None
@ -827,18 +826,18 @@ def issuedigits(issnum):
break
x+=1
if tstord is not None and issno is not None:
logger.fdebug('tstord: ' + str(tstord))
a = 0
ordtot = 0
while (a < len(tstord)):
try:
ordtot += ord(tstord[a].lower()) #lower-case the letters for simplicty
except ValueError:
break
a+=1
logger.fdebug('issno: ' + str(issno))
int_issnum = (int(issno) * 1000) + ordtot
logger.fdebug('intissnum : ' + str(int_issnum))
if len(issnum) == 1 and issnum.isalpha():
int_issnum = ord(tstord.lower())
else:
while (a < len(tstord)):
try:
ordtot += ord(tstord[a].lower()) #lower-case the letters for simplicty
except ValueError:
break
a+=1
int_issnum = (int(issno) * 1000) + ordtot
elif invchk == "true":
logger.fdebug('this does not have an issue # that I can parse properly.')
int_issnum = 999999999999999
@ -1395,6 +1394,14 @@ def IssueDetails(filelocation, IssueID=None):
return issuedetails
def get_issue_title(IssueID):
import db
myDB = db.DBConnection()
issue = myDB.selectone('SELECT * FROM issues WHERE IssueID=?', [IssueID]).fetchone()
if issue is None:
logger.warn('Unable to locate given IssueID within the db.')
return None
return issue['IssueName']
from threading import Thread

View File

@ -1110,14 +1110,12 @@ def updateissuedata(comicid, comicname=None, issued=None, comicIssues=None, call
elif 'now' in issnum.lower():
int_issnum = (int(issnum[:-4]) * 1000) + ord('n') + ord('o') + ord('w')
elif u'\xbd' in issnum:
issnum = .5
int_issnum = int(issnum) * 1000
int_issnum = .5 * 1000
logger.info('1/2 issue detected :' + issnum + ' === ' + str(int_issnum))
elif u'\xbc' in issnum:
issnum = .25
int_issnum = int(issnum) * 1000
int_issnum = .25 * 1000
elif u'\xbe' in issnum:
issnum = .75
int_issnum = int(issnum) * 1000
int_issnum = .75 * 1000
elif u'\u221e' in issnum:
#issnum = utf-8 will encode the infinity symbol without any help
int_issnum = 9999999999 * 1000 # set 9999999999 for integer value of issue
@ -1167,10 +1165,13 @@ def updateissuedata(comicid, comicname=None, issued=None, comicIssues=None, call
if issnum[x].isalpha():
#take first occurance of alpha in string and carry it through
tstord = issnum[x:].rstrip()
issno = issnum[:x].rstrip()
issno = issnum[:x+1].rstrip()
try:
isschk = float(issno)
except ValueError, e:
if len(issnum) == 1 and issnum.isalpha():
logger.fdebug('detected lone alpha issue. Attempting to figure this out.')
break
logger.fdebug('invalid numeric for issue - cannot be found. Ignoring.')
issno = None
tstord = None
@ -1178,15 +1179,15 @@ def updateissuedata(comicid, comicname=None, issued=None, comicIssues=None, call
break
x+=1
if tstord is not None and issno is not None:
logger.fdebug('tstord: ' + str(tstord))
a = 0
ordtot = 0
while (a < len(tstord)):
ordtot += ord(tstord[a].lower()) #lower-case the letters for simplicty
a+=1
logger.fdebug('issno: ' + str(issno))
int_issnum = (int(issno) * 1000) + ordtot
logger.fdebug('intissnum : ' + str(int_issnum))
if len(issno) == 1 and issnum.isalpha():
int_issnum = ord(tstord.lower())
else:
while (a < len(tstord)):
ordtot += ord(tstord[a].lower()) #lower-case the letters for simplicty
a+=1
int_issnum = (int(issno) * 1000) + ordtot
elif invchk == "true":
logger.fdebug('this does not have an issue # that I can parse properly.')
return

View File

@ -42,6 +42,10 @@ def search_init(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueD
else: ComicYear = str(ComicYear)[:4]
if Publisher == 'IDW Publishing': Publisher = 'IDW'
logger.fdebug('Publisher is : ' + str(Publisher))
issuetitle = helpers.get_issue_title(IssueID)
logger.info('Issue Title given as : ' + str(issuetitle))
if mode == 'want_ann':
logger.info("Annual issue search detected. Appending to issue #")
#anything for mode other than None indicates an annual.
@ -191,7 +195,7 @@ def search_init(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueD
searchprov = prov_order[prov_count].lower()
if searchmode == 'rss':
findit = NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDate, StoreDate, searchprov, prov_count, IssDateFix, IssueID, UseFuzzy, newznab_host, ComicVersion=ComicVersion, SARC=SARC, IssueArcID=IssueArcID, RSS="yes", ComicID=ComicID)
findit = NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDate, StoreDate, searchprov, prov_count, IssDateFix, IssueID, UseFuzzy, newznab_host, ComicVersion=ComicVersion, SARC=SARC, IssueArcID=IssueArcID, RSS="yes", ComicID=ComicID, issuetitle=issuetitle)
if findit == 'yes':
logger.fdebug("findit = found!")
break
@ -204,13 +208,13 @@ def search_init(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueD
for calt in chkthealt:
AS_Alternate = re.sub('##','',calt)
logger.info(u"Alternate Search pattern detected...re-adjusting to : " + str(AS_Alternate) + " " + str(ComicYear))
findit = NZB_SEARCH(AS_Alternate, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDate, StoreDate, searchprov, prov_count, IssDateFix, IssueID, UseFuzzy, newznab_host, ComicVersion=ComicVersion, SARC=SARC, IssueArcID=IssueArcID, RSS="yes", ComicID=ComicID)
findit = NZB_SEARCH(AS_Alternate, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDate, StoreDate, searchprov, prov_count, IssDateFix, IssueID, UseFuzzy, newznab_host, ComicVersion=ComicVersion, SARC=SARC, IssueArcID=IssueArcID, RSS="yes", ComicID=ComicID, issuetitle=issuetitle)
if findit == 'yes':
break
if findit == 'yes': break
else:
findit = NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDate, StoreDate, searchprov, prov_count, IssDateFix, IssueID, UseFuzzy, newznab_host, ComicVersion=ComicVersion, SARC=SARC, IssueArcID=IssueArcID, ComicID=ComicID)
findit = NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDate, StoreDate, searchprov, prov_count, IssDateFix, IssueID, UseFuzzy, newznab_host, ComicVersion=ComicVersion, SARC=SARC, IssueArcID=IssueArcID, ComicID=ComicID, issuetitle=issuetitle)
if findit == 'yes':
logger.fdebug("findit = found!")
break
@ -223,14 +227,14 @@ def search_init(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueD
for calt in chkthealt:
AS_Alternate = re.sub('##','',calt)
logger.info(u"Alternate Search pattern detected...re-adjusting to : " + str(AS_Alternate) + " " + str(ComicYear))
findit = NZB_SEARCH(AS_Alternate, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDate, StoreDate, searchprov, prov_count, IssDateFix, IssueID, UseFuzzy, newznab_host, ComicVersion=ComicVersion, SARC=SARC, IssueArcID=IssueArcID, ComicID=ComicID)
findit = NZB_SEARCH(AS_Alternate, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDate, StoreDate, searchprov, prov_count, IssDateFix, IssueID, UseFuzzy, newznab_host, ComicVersion=ComicVersion, SARC=SARC, IssueArcID=IssueArcID, ComicID=ComicID, issuetitle=issuetitle)
if findit == 'yes':
break
if findit == 'yes': break
if searchprov == 'newznab':
searchprov = newznab_host[0].rstrip()
logger.info('Could not find Issue ' + str(IssueNumber) + ' of ' + ComicName + '(' + str(SeriesYear) + ') using ' + str(searchprov))
logger.info('Could not find Issue ' + IssueNumber + ' of ' + ComicName + '(' + str(SeriesYear) + ') using ' + str(searchprov))
prov_count+=1
#torprtmp+=1 #torprtmp-=1
@ -245,7 +249,7 @@ def search_init(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueD
return findit, 'None'
def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDate, StoreDate, nzbprov, prov_count, IssDateFix, IssueID, UseFuzzy, newznab_host=None, ComicVersion=None, SARC=None, IssueArcID=None, RSS=None, ComicID=None):
def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDate, StoreDate, nzbprov, prov_count, IssDateFix, IssueID, UseFuzzy, newznab_host=None, ComicVersion=None, SARC=None, IssueArcID=None, RSS=None, ComicID=None, issuetitle=None):
if nzbprov == 'nzb.su':
apikey = mylar.NZBSU_APIKEY
@ -276,7 +280,7 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDa
tmpprov = name_newznab + ' (' + nzbprov + ')'
else:
tmpprov = nzbprov
logger.info(u"Shhh be very quiet...I'm looking for " + ComicName + " issue: " + str(IssueNumber) + " (" + str(ComicYear) + ") using " + str(tmpprov))
logger.info(u"Shhh be very quiet...I'm looking for " + ComicName + " issue: " + IssueNumber + " (" + str(ComicYear) + ") using " + str(tmpprov))
#load in do not download db here for given series
#myDB = db.DBConnection()
@ -322,7 +326,17 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDa
#NEW ---
intIss = helpers.issuedigits(IssueNumber)
iss = IssueNumber
findcomiciss = iss
if u'\xbd' in IssueNumber:
findcomiciss = '0.5'
elif u'\xbc' in IssueNumber:
findcomiciss = '0.25'
elif u'\xbe' in IssueNumber:
findcomiciss = '0.75'
elif u'\u221e' in IssueNumber:
#issnum = utf-8 will encode the infinity symbol without any help
findcomiciss = 'infinity' # set 9999999999 for integer value of issue
else:
findcomiciss = iss
#print ("we need : " + str(findcomic[findcount]) + " issue: #" + str(findcomiciss[findcount]))
cm1 = re.sub("[\/]", " ", findcomic)
@ -835,6 +849,7 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDa
comic_iss = comic_iss_b4.replace('.',' ')
#if issue_except: comic_iss = re.sub(issue_except.lower(), '', comic_iss)
logger.fdebug("adjusted nzb comic and issue: " + str(comic_iss))
splitit = comic_iss.split(None)
#something happened to dognzb searches or results...added a '.' in place of spaces
#screwed up most search results with dognzb. Let's try to adjust.
@ -877,7 +892,7 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDa
if tmpiss[i].isalpha():
#take first occurance of alpha in string and carry it through
alphas = tmpiss[i:].rstrip()
a_issno = tmpiss[:i].rstrip()
a_issno = tmpiss[:i+1].rstrip()
break
i+=1
logger.fdebug("alphas: " + str(alphas))
@ -1000,9 +1015,61 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDa
initialchk = 'ok'
if (splitst) != len(watchcomic_split):
logger.fdebug("incorrect comic lengths...not a match")
#because the word 'the' can appear anywhere and really mess up matches...
# if str(splitit[0]).lower() == "the" or str(watchcomic_split[0]).lower() == "the":
# if str(splitit[0]).lower() == "the":
issuetitle = re.sub('[\-\:\,\?\.]', ' ', str(issuetitle))
issuetitle_words = issuetitle.split(None)
#issue title comparison here:
logger.fdebug('there are ' + str(len(issuetitle_words)) + ' words in the issue title of : ' + str(issuetitle))
# we minus 1 the splitst since the issue # is included in there.
if (splitst - 1) > len(watchcomic_split):
extra_words = splitst - len(watchcomic_split)
logger.fdebug('there are ' + str(extra_words) + ' left over after we remove the series title.')
wordcount = 1
#remove the series title here so we just have the 'hopefully' issue title
for word in splitit:
#logger.info('word: ' + str(word))
if wordcount > len(watchcomic_split):
#logger.info('wordcount: ' + str(wordcount))
#logger.info('watchcomic_split: ' + str(len(watchcomic_split)))
if wordcount - len(watchcomic_split) == 1:
search_issue_title = word
else:
search_issue_title += ' ' + word
wordcount +=1
logger.fdebug('search_issue_title is : ' + str(search_issue_title))
#now we have the nzb issue title (if it exists), let's break it down further.
sit_split = search_issue_title.split(None)
watch_split_count = len(issuetitle_words)
wsplit = 0
isstitle_match = 0 #counter to tally % match
misword = 0 # counter to tally words that probably don't need to be an 'exact' match for
for sit in sit_split:
if sit.lower() == issuetitle_words[wsplit].lower():
logger.fdebug('word match: ' + str(sit))
isstitle_match +=1
else:
if sit.lower() == 'part':
#logger.fdebug('not worrying about this word : ' + str(sit))
misword +=1
if sit.isdigit():
#logger.fdebug('found digit - possible mini-series/arc subset.')
if sit in issuetitle:
logger.fdebug('found matching numeric in issuetitle.')
isstitle_match +=1
logger.fdebug('isstitle_match count : ' + str(isstitle_match))
if isstitle_match > 0:
iss_calc = int( watch_split_count / isstitle_match )
logger.fdebug('iss_calc: ' + str(iss_calc) + ' %')
else:
iss_calc = 0
logger.fdebug('0 words matched on issue title.')
if int(iss_calc) > 80:
logger.fdebug('>80% match on issue name. If this were implemented, this would be considered a match.')
else:
pass
for tstsplit in splitit:
if tstsplit.lower() == 'the':
logger.fdebug("THE word detected in found comic...attempting to adjust pattern matching")
@ -1014,6 +1081,7 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDa
splitst = splitst - 1 #remove 'the' from start
logger.fdebug("comic is now : " + str(splitit))#str(comic_iss[4:]))
#if str(watchcomic_split[0]).lower() == "the":
for tstsplit in watchcomic_split:
if tstsplit.lower() == 'the':
logger.fdebug("THE word detected in watchcomic - attempting to adjust match.")
@ -1362,7 +1430,17 @@ def nzbname_create(provider, title=None, info=None):
#pretty this biatch up.
BComicName = re.sub('[\:\,\/\?]', '', str(ComicName))
Bl_ComicName = re.sub('[\&]', 'and', str(BComicName))
nzbname = str(re.sub(" ", ".", str(Bl_ComicName))) + "." + str(IssueNumber) + ".(" + str(comyear) + ")"
if u'\xbd' in issnum:
str_IssueNumber = '0.5'
elif u'\xbc' in issnum:
str_IssueNumber = '0.25'
elif u'\xbe' in issnum:
str_IssueNumber = '0.75'
elif u'\u221e' in issnum:
str_IssueNumber = 'infinity'
else:
str_IssueNumber = IssueNumber
nzbname = str(re.sub(" ", ".", str(Bl_ComicName))) + "." + str(str_IssueNumber) + ".(" + str(comyear) + ")"
logger.fdebug("nzb name to be used for post-processing is : " + str(nzbname))
@ -1468,7 +1546,7 @@ def searcher(nzbprov, nzbname, comicinfo, link, IssueID, ComicID, tmpprov, direc
logger.fdebug('[FAILED_DOWNLOAD_CHECKER] This is not in the failed downloads list. Will continue with the download.')
logger.fdebug('issues match!')
logger.info(u"Found " + ComicName + " (" + str(comyear) + ") issue: " + str(IssueNumber) + " using " + str(tmpprov) )
logger.info(u"Found " + ComicName + " (" + str(comyear) + ") issue: " + IssueNumber + " using " + str(tmpprov) )
linkstart = os.path.splitext(link)[0]
if nzbprov == 'nzb.su' or nzbprov == 'newznab':