From b08998ba8227efc6e2ab2ddbaf8a1d75f8bb1257 Mon Sep 17 00:00:00 2001 From: evilhero Date: Wed, 8 May 2013 22:22:47 -0400 Subject: [PATCH] FIX:(#378) Improved filechecker to pick up different variations in Volume inclusions, as well as special chars, IMP: Pullist improvements with regards to identical titles but not recent (would get confused), IMP: Added some extra checks when determining if a series is Continuing vs Ended --- data/interfaces/default/index.html | 13 +++++++- mylar/filechecker.py | 49 +++++++++++++++++++++++++----- mylar/findcomicfeed.py | 2 ++ mylar/weeklypull.py | 47 ++++++++++++++++++---------- 4 files changed, 87 insertions(+), 24 deletions(-) diff --git a/data/interfaces/default/index.html b/data/interfaces/default/index.html index 22bfc9b6..e23f43ac 100644 --- a/data/interfaces/default/index.html +++ b/data/interfaces/default/index.html @@ -1,6 +1,7 @@ <%inherit file="base.html"/> <%! from mylar import helpers, db + import datetime %> <%def name="body()"> @@ -74,7 +75,17 @@ %if comic['ComicPublished'] is None or comic['ComicPublished'] == '': Unknown %elif 'present' in comic['ComicPublished'].lower() or ( helpers.today()[:4] in comic['LatestDate']): - Continuing + <% + latestdate = comic['LatestDate'] + c_date = datetime.date(int(latestdate[:4]),int(latestdate[5:7]),1) + n_date = datetime.date.today() + recentchk = (n_date - c_date).days + if recentchk < 45: + recentstatus = 'Continuing' + else: + recentstatus = 'Ended' + %> + ${recentstatus} %else: Ended %endif diff --git a/mylar/filechecker.py b/mylar/filechecker.py index 0c94bed4..e2a2f308 100755 --- a/mylar/filechecker.py +++ b/mylar/filechecker.py @@ -40,6 +40,22 @@ def listFiles(dir,watchcomic,AlternateSearch=None): watchmatch = {} comiclist = [] comiccnt = 0 + not_these = ['\#', + '\,', + '\/', + '\:', + '\;', + '.', + '\-', + '\!', + '\$', + '\%', + '\+', + '\'', + '\?', + '\@'] + + for item in os.listdir(basedir): #print item #subname = os.path.join(basedir, item) @@ -51,19 +67,37 @@ def listFiles(dir,watchcomic,AlternateSearch=None): #print ("subit:" + str(subit)) if 'v' in str(subit).lower(): #print ("possible versioning detected.") + vfull = 0 if subit[1:].isdigit(): #if in format v1, v2009 etc... + if len(subit) > 3: + # if it's greater than 3 in length, then the format is Vyyyy + vfull = 1 # add on 1 character length to account for extra space #print (subit + " - assuming versioning. Removing from initial search pattern.") subname = re.sub(str(subit), '', subname) volrem = subit + #print ("removed " + str(volrem) + " from filename wording") if subit.lower()[:3] == 'vol': #if in format vol.2013 etc #because the '.' in Vol. gets removed, let's loop thru again after the Vol hit to remove it entirely #print ("volume detected as version #:" + str(subit)) subname = re.sub(subit, '', subname) volrem = subit - - subname = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\+\'\?\@]',' ', str(subname)) + + subname = re.sub('\_', ' ', subname) + nonocount = 0 + for nono in not_these: + if nono in subname: + subcnt = subname.count(nono) + #logger.fdebug(str(nono) + " detected " + str(subcnt) + " times.") + # segment '.' having a . by itself will denote the entire string which we don't want + if nono == '.': + subname = re.sub('\.', ' ', subname) + nonocount = nonocount + subcnt - 1 #(remove the extension from the length) + else: + subname = re.sub(str(nono), ' ', subname) + nonocount = nonocount + subcnt + #subname = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\+\'\?\@]',' ', subname) modwatchcomic = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\+\'\?\@]', ' ', u_watchcomic) detectand = False modwatchcomic = re.sub('\&', ' and ', modwatchcomic) @@ -83,7 +117,7 @@ def listFiles(dir,watchcomic,AlternateSearch=None): altsearchcomic = "127372873872871091383 abdkhjhskjhkjdhakajhf" #if '_' in subname: # subname = subname.replace('_', ' ') - logger.fdebug("watchcomic:" + str(modwatchcomic) + " ..comparing to found file: " + str(subname)) + #logger.fdebug("watchcomic:" + str(modwatchcomic) + " ..comparing to found file: " + str(subname)) if modwatchcomic.lower() in subname.lower() or altsearchcomic.lower() in subname.lower(): if 'annual' in subname.lower(): #print ("it's an annual - unsure how to proceed") @@ -94,19 +128,20 @@ def listFiles(dir,watchcomic,AlternateSearch=None): #print ("Comicsize:" + str(comicsize)) comiccnt+=1 if modwatchcomic.lower() in subname.lower(): + #print ("we should remove " + str(nonocount) + " characters") #remove versioning here if volrem != None: - jtd_len = len(modwatchcomic) + len(volrem) + 1 #1 is to account for space btwn comic and vol # + jtd_len = len(modwatchcomic) + len(volrem) + nonocount + 1 #1 is to account for space btwn comic and vol # else: - jtd_len = len(modwatchcomic) + jtd_len = len(modwatchcomic) + nonocount if detectand: jtd_len = jtd_len - 2 # char substitution diff between & and 'and' = 2 chars elif altsearchcomic.lower() in subname.lower(): #remove versioning here if volrem != None: - jtd_len = len(altsearchcomic) + len(volrem) + 1 + jtd_len = len(altsearchcomic) + len(volrem) + nonocount + 1 else: - jtd_len = len(altsearchcomic) + jtd_len = len(altsearchcomic) + nonocount if detectand: jtd_len = jtd_len - 2 diff --git a/mylar/findcomicfeed.py b/mylar/findcomicfeed.py index f7cadec8..ebeff6af 100755 --- a/mylar/findcomicfeed.py +++ b/mylar/findcomicfeed.py @@ -16,6 +16,8 @@ def Startit(searchName, searchIssue, searchYear, ComicVersion): #searchYear = "2012" #clean up searchName due to webparse. searchName = searchName.replace("%20", " ") + if "," in searchName: + searchName = searchName.replace(",", "") logger.fdebug("name:" + str(searchName)) logger.fdebug("issue:" + str(searchIssue)) logger.fdebug("year:" + str(searchYear)) diff --git a/mylar/weeklypull.py b/mylar/weeklypull.py index fa173c39..753ad078 100755 --- a/mylar/weeklypull.py +++ b/mylar/weeklypull.py @@ -25,6 +25,7 @@ import urllib import os import time import re +import datetime import mylar from mylar import db, updater, helpers, logger @@ -378,28 +379,42 @@ def pullitcheck(comic1off_name=None,comic1off_id=None,forcecheck=None): w = 1 else: #let's read in the comic.watchlist from the db here - cur.execute("SELECT ComicID, ComicName, ComicYear, ComicPublisher, ComicPublished from comics") + cur.execute("SELECT ComicID, ComicName, ComicYear, ComicPublisher, ComicPublished, LatestDate from comics") while True: watchd = cur.fetchone() #print ("watchd: " + str(watchd)) if watchd is None: break if 'Present' in watchd[4] or (helpers.now()[:4] in watchd[4]): - # let's not even bother with comics that are in the Present. - a_list.append(watchd[1]) - b_list.append(watchd[2]) - comicid.append(watchd[0]) - pubdate.append(watchd[4]) - #print ( "Comic:" + str(a_list[w]) + " Year: " + str(b_list[w]) ) - #if "WOLVERINE AND THE X-MEN" in str(a_list[w]): a_list[w] = "WOLVERINE AND X-MEN" - lines.append(a_list[w].strip()) - unlines.append(a_list[w].strip()) - llen.append(a_list[w].splitlines()) - ccname.append(a_list[w].strip()) - tmpwords = a_list[w].split(None) - ltmpwords = len(tmpwords) - ltmp = 1 - w+=1 + # this gets buggered up when series are named the same, and one ends in the current + # year, and the new series starts in the same year - ie. Avengers + # lets' grab the latest issue date and see how far it is from current + # anything > 45 days we'll assume it's a false match ;) + #logger.fdebug("ComicName: " + watchd[1]) + latestdate = watchd[5] + #logger.fdebug("latestdate: " + str(latestdate)) + c_date = datetime.date(int(latestdate[:4]),int(latestdate[5:7]),1) + n_date = datetime.date.today() + #logger.fdebug("c_date : " + str(c_date) + " ... n_date : " + str(n_date)) + recentchk = (n_date - c_date).days + #logger.fdebug("recentchk: " + str(recentchk) + " days") + #logger.fdebug(" ----- ") + if recentchk < 45: + # let's not even bother with comics that are in the Present. + a_list.append(watchd[1]) + b_list.append(watchd[2]) + comicid.append(watchd[0]) + pubdate.append(watchd[4]) + #print ( "Comic:" + str(a_list[w]) + " Year: " + str(b_list[w]) ) + #if "WOLVERINE AND THE X-MEN" in str(a_list[w]): a_list[w] = "WOLVERINE AND X-MEN" + lines.append(a_list[w].strip()) + unlines.append(a_list[w].strip()) + llen.append(a_list[w].splitlines()) + ccname.append(a_list[w].strip()) + tmpwords = a_list[w].split(None) + ltmpwords = len(tmpwords) + ltmp = 1 + w+=1 cnt = int(w-1) cntback = int(w-1) kp = []