From 49dcba816187f487a605bb028209d07c8af589d6 Mon Sep 17 00:00:00 2001 From: evilhero Date: Thu, 14 Feb 2013 13:56:55 -0500 Subject: [PATCH] FIX: When searching, if & present in nzb it would only use information after the &, FIX: PostProcessing would fail if & present in title, FIX: adding a series would fail occasionally/or lock when trying to load in comic information --- data/interfaces/default/config.html | 2 +- mylar/PostProcessor.py | 3 +- mylar/librarysync.py | 199 ++++++++++++++-------------- mylar/parseit.py | 4 +- mylar/search.py | 9 +- mylar/webserve.py | 8 +- 6 files changed, 122 insertions(+), 103 deletions(-) diff --git a/data/interfaces/default/config.html b/data/interfaces/default/config.html index e1fad890..273a0cc6 100755 --- a/data/interfaces/default/config.html +++ b/data/interfaces/default/config.html @@ -361,7 +361,7 @@
Post-Processing
- +
diff --git a/mylar/PostProcessor.py b/mylar/PostProcessor.py index 2cda6852..3e5a8fa6 100755 --- a/mylar/PostProcessor.py +++ b/mylar/PostProcessor.py @@ -172,6 +172,7 @@ class PostProcessor(object): #replace spaces nzbname = re.sub(' ', '.', str(nzbname)) nzbname = re.sub('[\,\:]', '', str(nzbname)) + nzbname = re.sub('[\&]', 'and', str(nzbname)) logger.fdebug("After conversions, nzbname is : " + str(nzbname)) self._log("nzbname: " + str(nzbname), logger.DEBUG) @@ -323,7 +324,7 @@ class PostProcessor(object): logger.fdebug("Original Filname: " + str(ofilename)) logger.fdebug("Original Extension: " + str(ext)) - if mylar.FILE_FORMAT == '': + if mylar.FILE_FORMAT == '' or not mylar.RENAME_FILES: self._log("Rename Files isn't enabled...keeping original filename.", logger.DEBUG) logger.fdebug("Rename Files isn't enabled - keeping original filename.") #check if extension is in nzb_name - will screw up otherwise diff --git a/mylar/librarysync.py b/mylar/librarysync.py index 189975ea..00c7e3c8 100755 --- a/mylar/librarysync.py +++ b/mylar/librarysync.py @@ -135,7 +135,7 @@ def libraryScan(dir=None, append=False, ComicID=None, ComicName=None, cron=None) comlocation = i['ComicLocation'] #let's clean up the filename for matching purposes - cfilename = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\&\+\'\?\@]', ' ', str(comfilename)) + cfilename = re.sub('[\_\#\,\/\:\;\-\!\$\%\&\+\'\?\@]', ' ', str(comfilename)) #cfilename = re.sub('\s', '_', str(cfilename)) cm_cn = 0 @@ -173,6 +173,7 @@ def libraryScan(dir=None, append=False, ComicID=None, ComicName=None, cron=None) cn = '' ydetected = 'no' idetected = 'no' + decimaldetect = 'no' for i in reversed(xrange(len(cs))): #start at the end. print ("word: " + str(cs[i])) @@ -180,56 +181,54 @@ def libraryScan(dir=None, append=False, ComicID=None, ComicName=None, cron=None) print ("year detected: " + str(cs[i])) ydetected = 'yes' result_comyear = cs[i] - elif '\#' in cs[i]: - print ("issue detected: " + str(cs[i])) - idetected = 'yes' - elif cs[i].isdigit() and idetected == 'no': + elif cs[i].isdigit() and idetected == 'no' or '.' in cs[i]: + issue = cs[i] + print ("issue detected : " + str(issue)) + idetected = 'yes' + if '.' in cs[i]: + #make sure it's a number on either side of decimal and assume decimal issue. + decst = cs[i].find('.') + dec_st = cs[i][:decst] + dec_en = cs[i][decst+1:] + print ("st: " + str(dec_st)) + print ("en: " + str(dec_en)) + if dec_st.isdigit() and dec_en.isdigit(): + print ("decimal issue detected...adjusting.") + issue = dec_st + "." + dec_en + print ("issue detected: " + str(issue)) + idetected = 'yes' + else: + print ("false decimal represent. Chunking to extra word.") + cn = cn + cs[i] + " " + break + elif '\#' in cs[i] or decimaldetect == 'yes': print ("issue detected: " + str(cs[i])) idetected = 'yes' + else: cn = cn + cs[i] + " " if ydetected == 'no': #assume no year given in filename... result_comyear = "0000" print ("cm?: " + str(cn)) + comiss = issue cnsplit = cn.split() cname = '' findcn = 0 while (findcn < len(cnsplit)): cname = cname + cs[findcn] + " " findcn+=1 + cname = cname[:len(cname)-1] # drop the end space... print ("assuming name is : " + str(cname)) + com_NAME = cname + print ("com_NAME : " + str(com_NAME)) yearmatch = "True" - if m[cnt][:-2] == '19' or m[cnt][:-2] == '20': - logger.fdebug("year detected: " + str(m[cnt])) - result_comyear = m[cnt] - yearmatch = "true" - # if str(comyear) in result_comyear: - # logger.fdebug(str(comyear) + " - right years match baby!") - # yearmatch = "true" - # else: - # logger.fdebug(str(comyear) + " - not right - years do not match") - # yearmatch = "false" - #let's do this hear and save a few extra loops ;) - if 'digital' in m[cnt] and len(m[cnt]) == 7: - logger.fdebug("digital edition detected") - pass - if ' of ' in m[cnt]: - logger.fdebug("mini-series detected : " + str(m[cnt])) - result_of = m[cnt] - if 'cover' in m[cnt]: - logger.fdebug("covers detected: " + str(m[cnt])) - result_comcovers = m[cnt] - for ripper in ripperlist: - if ripper in m[cnt]: - logger.fdebug("Scanner detected: " + str(m[cnt])) - result_comscanner = m[cnt] cnt+=1 splitit = [] watchcomic_split = [] logger.fdebug("filename comic and issue: " + str(cfilename)) #changed this from '' to ' ' - comic_iss_b4 = re.sub('[\-\:\,]', ' ', str(comic_andiss)) + comic_iss_b4 = re.sub('[\-\:\,]', ' ', str(com_NAME)) comic_iss = comic_iss_b4.replace('.',' ') logger.fdebug("adjusted comic and issue: " + str(comic_iss)) #remove 'the' from here for proper comparisons. @@ -252,31 +251,34 @@ def libraryScan(dir=None, append=False, ComicID=None, ComicName=None, cron=None) comicid = ComicID[cm_cn] watch_location = ComicLocation[cm_cn] - if splitit[(len(splitit)-1)].isdigit(): - #compares - if the last digit and second last digit are #'s seperated by spaces assume decimal - comic_iss = splitit[(len(splitit)-1)] - splitst = len(splitit) - 1 - if splitit[(len(splitit)-2)].isdigit(): - # for series that have a digit at the end, it screws up the logistics. - i = 1 - chg_comic = splitit[0] - while (i < (len(splitit)-1)): - chg_comic = chg_comic + " " + splitit[i] - i+=1 - logger.fdebug("chg_comic:" + str(chg_comic)) - if chg_comic.upper() == comname.upper(): - logger.fdebug("series contains numerics...adjusting..") - else: - changeup = "." + splitit[(len(splitit)-1)] - logger.fdebug("changeup to decimal: " + str(changeup)) - comic_iss = splitit[(len(splitit)-2)] + "." + comic_iss - splitst = len(splitit) - 2 - else: +# if splitit[(len(splitit)-1)].isdigit(): +# #compares - if the last digit and second last digit are #'s seperated by spaces assume decimal +# comic_iss = splitit[(len(splitit)-1)] +# splitst = len(splitit) - 1 +# if splitit[(len(splitit)-2)].isdigit(): +# # for series that have a digit at the end, it screws up the logistics. +# i = 1 +# chg_comic = splitit[0] +# while (i < (len(splitit)-1)): +# chg_comic = chg_comic + " " + splitit[i] +# i+=1 +# logger.fdebug("chg_comic:" + str(chg_comic)) +# if chg_comic.upper() == comname.upper(): +# logger.fdebug("series contains numerics...adjusting..") +# else: +# changeup = "." + splitit[(len(splitit)-1)] +# logger.fdebug("changeup to decimal: " + str(changeup)) +# comic_iss = splitit[(len(splitit)-2)] + "." + comic_iss +# splitst = len(splitit) - 2 +# else: # if the nzb name doesn't follow the series-issue-year format even closely..ignore nzb - logger.fdebug("invalid naming format of filename detected - cannot properly determine issue") - continue +# logger.fdebug("invalid naming format of filename detected - cannot properly determine issue") +# continue # make sure that things like - in watchcomic are accounted for when comparing to nzb. + + # there shouldn't be an issue in the comic now, so let's just assume it's all gravy. + splitst = len(splitit) watchcomic_split = helpers.cleanName(str(comname)) watchcomic_split = re.sub('[\-\:\,\.]', ' ', watchcomic_split).split(None) @@ -316,13 +318,13 @@ def libraryScan(dir=None, append=False, ComicID=None, ComicName=None, cron=None) if splitit[n].isdigit(): logger.fdebug("issue detected") #comiss = splitit[n] - comicNAMER = n - 1 - com_NAME = splitit[0] - cmnam = 1 - while (cmnam <= comicNAMER): - com_NAME = str(com_NAME) + " " + str(splitit[cmnam]) - cmnam+=1 - logger.fdebug("comic: " + str(com_NAME)) +# comicNAMER = n - 1 +# com_NAME = splitit[0] +# cmnam = 1 +# while (cmnam <= comicNAMER): +# com_NAME = str(com_NAME) + " " + str(splitit[cmnam]) +# cmnam+=1 +# logger.fdebug("comic: " + str(com_NAME)) else: logger.fdebug("non-match for: "+ str(splitit[n])) pass @@ -339,31 +341,32 @@ def libraryScan(dir=None, append=False, ComicID=None, ComicName=None, cron=None) if int(spercent) >= 80: logger.fdebug("it's a go captain... - we matched " + str(spercent) + "%!") logger.fdebug("this should be a match!") - if '.' in comic_iss: - comisschk_find = comic_iss.find('.') - comisschk_b4dec = comic_iss[:comisschk_find] - comisschk_decval = comic_iss[comisschk_find+1:] - logger.fdebug("Found IssueNumber: " + str(comic_iss)) - logger.fdebug("..before decimal: " + str(comisschk_b4dec)) - logger.fdebug("...after decimal: " + str(comisschk_decval)) - #--let's make sure we don't wipe out decimal issues ;) - if int(comisschk_decval) == 0: - ciss = comisschk_b4dec - cintdec = int(comisschk_decval) - else: - if len(comisschk_decval) == 1: - ciss = comisschk_b4dec + "." + comisschk_decval - cintdec = int(comisschk_decval) * 10 - else: - ciss = comisschk_b4dec + "." + comisschk_decval.rstrip('0') - cintdec = int(comisschk_decval.rstrip('0')) * 10 - comintIss = (int(comisschk_b4dec) * 1000) + cintdec - else: - comintIss = int(comic_iss) * 1000 - logger.fdebug("issue we found for is : " + str(comic_iss)) - +# if '.' in comic_iss: +# comisschk_find = comic_iss.find('.') +# comisschk_b4dec = comic_iss[:comisschk_find] +# comisschk_decval = comic_iss[comisschk_find+1:] +# logger.fdebug("Found IssueNumber: " + str(comic_iss)) +# logger.fdebug("..before decimal: " + str(comisschk_b4dec)) +# logger.fdebug("...after decimal: " + str(comisschk_decval)) +# #--let's make sure we don't wipe out decimal issues ;) +# if int(comisschk_decval) == 0: +# ciss = comisschk_b4dec +# cintdec = int(comisschk_decval) +# else: +# if len(comisschk_decval) == 1: +# ciss = comisschk_b4dec + "." + comisschk_decval +# cintdec = int(comisschk_decval) * 10 +# else: +# ciss = comisschk_b4dec + "." + comisschk_decval.rstrip('0') +# cintdec = int(comisschk_decval.rstrip('0')) * 10 +# comintIss = (int(comisschk_b4dec) * 1000) + cintdec +# else: +# comintIss = int(comic_iss) * 1000 + logger.fdebug("issue we found for is : " + str(comiss)) + #set the year to the series we just found ;) + result_comyear = comyear #issue comparison now as well - logger.info(u"Found " + str(comname) + " (" + str(comyear) + ") issue: " + str(comic_iss)) + logger.info(u"Found " + str(comname) + " (" + str(comyear) + ") issue: " + str(comiss)) # watchfound+=1 watchmatch = str(comicid) # watch_kchoice.append({ @@ -385,20 +388,22 @@ def libraryScan(dir=None, append=False, ComicID=None, ComicName=None, cron=None) watchmatch = None #---if it's not a match - send it to the importer. n = 0 - csplit = comic_andiss.split(None) - while ( n <= (len(csplit)-1) ): - if csplit[n].isdigit(): - logger.fdebug("issue detected") - comiss = splitit[n] - logger.fdebug("issue # : " + str(comiss)) - comicNAMER = n - 1 - com_NAME = csplit[0] - cmnam = 1 - while (cmnam <= comicNAMER): - com_NAME = str(com_NAME) + " " + str(csplit[cmnam]) - cmnam+=1 - logger.fdebug("comic: " + str(com_NAME)) - n+=1 +# print ("comic_andiss : " + str(comic_andiss)) +# csplit = comic_andiss.split(None) +# while ( n <= (len(csplit)-1) ): +# print ("csplit:" + str(csplit[n])) +# if csplit[n].isdigit(): +# logger.fdebug("issue detected") +# comiss = splitit[n] +# logger.fdebug("issue # : " + str(comiss)) +# comicNAMER = n - 1 +# com_NAME = csplit[0] +# cmnam = 1 +# while (cmnam <= comicNAMER): +# com_NAME = str(com_NAME) + " " + str(csplit[cmnam]) +# cmnam+=1 +# logger.fdebug("comic: " + str(com_NAME)) +# n+=1 if result_comyear is None: result_comyear = '0000' #no year in filename basically. print ("adding " + str(com_NAME) + " to the import-queue!") impid = str(com_NAME) + "-" + str(result_comyear) + "-" + str(comiss) diff --git a/mylar/parseit.py b/mylar/parseit.py index fc1bd8fe..a5e4da56 100755 --- a/mylar/parseit.py +++ b/mylar/parseit.py @@ -416,7 +416,7 @@ def GCDdetails(comseries, resultURL, vari_loop, ComicID, TotalIssues, issvariati def GettheDate(parsed,PrevYRMO): #--- let's use pubdate. #try publicationd date first - logger.fdebug("parsed:" + str(parsed)) + #logger.fdebug("parsed:" + str(parsed)) subtxt1 = parsed('td')[1] ParseDate = subtxt1.findNext(text=True) basmonths = {'january':'01','february':'02','march':'03','april':'04','may':'05','june':'06','july':'07','august':'08','september':'09','october':'10','november':'11','december':'12'} @@ -435,6 +435,7 @@ def GettheDate(parsed,PrevYRMO): logger.fdebug("!success - Publication date: " + str(ParseDate)) break # some comics are messed with pub.dates and have Spring/Summer/Fall/Winter + else: baseseasons = {'spring':'03','summer':'06','fall':'09','winter':'12'} for seas in baseseasons: if seas in ParseDate.lower(): @@ -442,7 +443,6 @@ def GettheDate(parsed,PrevYRMO): ParseYear = re.sub('/s','',ParseDate[-5:]) ParseDate = str(ParseYear) + "-" + str(sconv) break - else: # #try key date # subtxt1 = parsed('td')[2] # ParseDate = subtxt1.findNext(text=True) diff --git a/mylar/search.py b/mylar/search.py index 9ba13735..97d0a115 100755 --- a/mylar/search.py +++ b/mylar/search.py @@ -281,7 +281,9 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, nzbprov, nzbpr, Is #print ("we need : " + str(findcomic[findcount]) + " issue: #" + str(findcomiciss[findcount])) # replace whitespace in comic name with %20 for api search cm1 = re.sub(" ", "%20", str(findcomic[findcount])) - cm = re.sub("\&", "%26", str(cm1)) + #cm = re.sub("\&", "%26", str(cm1)) + cm = re.sub("and", "", str(cm1)) # remove 'and' & '&' from the search pattern entirely (broader results, will filter out later) + cm = re.sub("\&", "", str(cm)) #print (cmi) if '.' in findcomiciss[findcount]: if len(str(isschk_b4dec)) == 3: @@ -362,6 +364,10 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, nzbprov, nzbpr, Is logger.fdebug("Entry: " + str(thisentry)) cleantitle = re.sub('[_/.]', ' ', str(entry['title'])) cleantitle = helpers.cleanName(str(cleantitle)) + # this is new - if title contains a '&' in the title it will assume the filename has ended at that point + # which causes false positives (ie. wolverine & the x-men becomes the x-men, which matches on x-men. + # 'the' is removed for comparisons later on + if '&' in cleantitle: cleantitle = re.sub('[/&]','and', cleantitle) nzbname = cleantitle @@ -500,6 +506,7 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, nzbprov, nzbpr, Is # make sure that things like - in watchcomic are accounted for when comparing to nzb. watchcomic_split = helpers.cleanName(str(findcomic[findloop])) + if '&' in watchcomic_split: watchcomic_split = re.sub('[/&]','and', watchcomic_split) watchcomic_split = re.sub('[\-\:\,\.]', ' ', watchcomic_split).split(None) logger.fdebug(str(splitit) + " nzb series word count: " + str(splitst)) diff --git a/mylar/webserve.py b/mylar/webserve.py index feb6f991..1b6c9830 100755 --- a/mylar/webserve.py +++ b/mylar/webserve.py @@ -791,7 +791,13 @@ class WebInterface(object): for result in results: if result is None: break - elif result['WatchMatch'].startswith('C'): + + if result['WatchMatch']: + watchmatched = result['WatchMatch'] + else: + watchmatched = '' + + if watchmatched.startswith('C'): print ("Confirmed. ComicID already provided - initiating auto-magik mode for import.") comicid = result['WatchMatch'][1:] print (result['WatchMatch'] + " .to. " + str(comicid))