From b08998ba8227efc6e2ab2ddbaf8a1d75f8bb1257 Mon Sep 17 00:00:00 2001
From: evilhero <evilhero@gmail.com>
Date: Wed, 8 May 2013 22:22:47 -0400
Subject: [PATCH] FIX:(#378) Improved filechecker to pick up different
 variations in Volume inclusions, as well as special chars, IMP: Pullist
 improvements with regards to identical titles but not recent (would get
 confused), IMP: Added some extra checks when determining if a series is
 Continuing vs Ended

---
 data/interfaces/default/index.html | 13 +++++++-
 mylar/filechecker.py               | 49 +++++++++++++++++++++++++-----
 mylar/findcomicfeed.py             |  2 ++
 mylar/weeklypull.py                | 47 ++++++++++++++++++----------
 4 files changed, 87 insertions(+), 24 deletions(-)

diff --git a/data/interfaces/default/index.html b/data/interfaces/default/index.html
index 22bfc9b6..e23f43ac 100644
--- a/data/interfaces/default/index.html
+++ b/data/interfaces/default/index.html
@@ -1,6 +1,7 @@
 <%inherit file="base.html"/>
 <%!
 	from mylar import helpers, db
+        import datetime
 %>
 
 <%def name="body()">
@@ -74,7 +75,17 @@
                                 %if comic['ComicPublished'] is None or comic['ComicPublished'] == '':
                                     Unknown 
                                 %elif 'present' in comic['ComicPublished'].lower() or ( helpers.today()[:4] in comic['LatestDate']):
-                                    Continuing
+                                    <%
+                                          latestdate = comic['LatestDate']
+                                          c_date = datetime.date(int(latestdate[:4]),int(latestdate[5:7]),1)
+                                          n_date = datetime.date.today()
+                                          recentchk = (n_date - c_date).days
+                                          if recentchk < 45:
+                                              recentstatus = 'Continuing'
+                                          else:
+                                              recentstatus = 'Ended'
+                                     %>
+                                    ${recentstatus}
                                 %else:
                                     Ended
                                 %endif
diff --git a/mylar/filechecker.py b/mylar/filechecker.py
index 0c94bed4..e2a2f308 100755
--- a/mylar/filechecker.py
+++ b/mylar/filechecker.py
@@ -40,6 +40,22 @@ def listFiles(dir,watchcomic,AlternateSearch=None):
     watchmatch = {}
     comiclist = []
     comiccnt = 0
+    not_these = ['\#',
+               '\,',
+               '\/',
+               '\:',
+               '\;',
+               '.',
+               '\-',
+               '\!',
+               '\$',
+               '\%',
+               '\+',
+               '\'',
+               '\?',
+               '\@']
+
+
     for item in os.listdir(basedir):
         #print item
         #subname = os.path.join(basedir, item)
@@ -51,19 +67,37 @@ def listFiles(dir,watchcomic,AlternateSearch=None):
             #print ("subit:" + str(subit))
             if 'v' in str(subit).lower():
                 #print ("possible versioning detected.")
+                vfull = 0
                 if subit[1:].isdigit():
                     #if in format v1, v2009 etc...
+                    if len(subit) > 3:
+                        # if it's greater than 3 in length, then the format is Vyyyy
+                        vfull = 1 # add on 1 character length to account for extra space
                     #print (subit + "  - assuming versioning. Removing from initial search pattern.")
                     subname = re.sub(str(subit), '', subname)
                     volrem = subit
+                    #print ("removed " + str(volrem) + " from filename wording")
                 if subit.lower()[:3] == 'vol':
                     #if in format vol.2013 etc
                     #because the '.' in Vol. gets removed, let's loop thru again after the Vol hit to remove it entirely
                     #print ("volume detected as version #:" + str(subit))
                     subname = re.sub(subit, '', subname)
                     volrem = subit
-        
-        subname = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\+\'\?\@]',' ', str(subname))
+
+        subname = re.sub('\_', ' ', subname)
+        nonocount = 0
+        for nono in not_these:
+            if nono in subname:
+                subcnt = subname.count(nono)
+                #logger.fdebug(str(nono) + " detected " + str(subcnt) + " times.")
+                # segment '.' having a . by itself will denote the entire string which we don't want
+                if nono == '.':
+                    subname = re.sub('\.', ' ', subname)
+                    nonocount = nonocount + subcnt - 1 #(remove the extension from the length)
+                else:
+                    subname = re.sub(str(nono), ' ', subname)
+                    nonocount = nonocount + subcnt
+        #subname = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\+\'\?\@]',' ', subname)
         modwatchcomic = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\+\'\?\@]', ' ', u_watchcomic)
         detectand = False
         modwatchcomic = re.sub('\&', ' and ', modwatchcomic)
@@ -83,7 +117,7 @@ def listFiles(dir,watchcomic,AlternateSearch=None):
             altsearchcomic = "127372873872871091383 abdkhjhskjhkjdhakajhf"
         #if '_' in subname:
         #    subname = subname.replace('_', ' ')
-        logger.fdebug("watchcomic:" + str(modwatchcomic) + " ..comparing to found file: " + str(subname))
+        #logger.fdebug("watchcomic:" + str(modwatchcomic) + " ..comparing to found file: " + str(subname))
         if modwatchcomic.lower() in subname.lower() or altsearchcomic.lower() in subname.lower():
             if 'annual' in subname.lower():
                 #print ("it's an annual - unsure how to proceed")
@@ -94,19 +128,20 @@ def listFiles(dir,watchcomic,AlternateSearch=None):
             #print ("Comicsize:" + str(comicsize))
             comiccnt+=1
             if modwatchcomic.lower() in subname.lower():
+                #print ("we should remove " + str(nonocount) + " characters")                
                 #remove versioning here
                 if volrem != None:
-                    jtd_len = len(modwatchcomic) + len(volrem) + 1 #1 is to account for space btwn comic and vol #
+                    jtd_len = len(modwatchcomic) + len(volrem) + nonocount + 1 #1 is to account for space btwn comic and vol #
                 else:
-                    jtd_len = len(modwatchcomic)
+                    jtd_len = len(modwatchcomic) + nonocount
                 if detectand:
                     jtd_len = jtd_len - 2 # char substitution diff between & and 'and' = 2 chars
             elif altsearchcomic.lower() in subname.lower():
                 #remove versioning here
                 if volrem != None:
-                    jtd_len = len(altsearchcomic) + len(volrem) + 1
+                    jtd_len = len(altsearchcomic) + len(volrem) + nonocount + 1
                 else:
-                    jtd_len = len(altsearchcomic)
+                    jtd_len = len(altsearchcomic) + nonocount
                 if detectand: 
                     jtd_len = jtd_len - 2
 
diff --git a/mylar/findcomicfeed.py b/mylar/findcomicfeed.py
index f7cadec8..ebeff6af 100755
--- a/mylar/findcomicfeed.py
+++ b/mylar/findcomicfeed.py
@@ -16,6 +16,8 @@ def Startit(searchName, searchIssue, searchYear, ComicVersion):
     #searchYear = "2012"
     #clean up searchName due to webparse.
     searchName = searchName.replace("%20", " ")
+    if "," in searchName:
+        searchName = searchName.replace(",", "")
     logger.fdebug("name:" + str(searchName))
     logger.fdebug("issue:" + str(searchIssue))
     logger.fdebug("year:" + str(searchYear))
diff --git a/mylar/weeklypull.py b/mylar/weeklypull.py
index fa173c39..753ad078 100755
--- a/mylar/weeklypull.py
+++ b/mylar/weeklypull.py
@@ -25,6 +25,7 @@ import urllib
 import os 
 import time 
 import re
+import datetime
 
 import mylar 
 from mylar import db, updater, helpers, logger
@@ -378,28 +379,42 @@ def pullitcheck(comic1off_name=None,comic1off_id=None,forcecheck=None):
             w = 1            
         else:
             #let's read in the comic.watchlist from the db here
-            cur.execute("SELECT ComicID, ComicName, ComicYear, ComicPublisher, ComicPublished from comics")
+            cur.execute("SELECT ComicID, ComicName, ComicYear, ComicPublisher, ComicPublished, LatestDate from comics")
             while True:
                 watchd = cur.fetchone()
                 #print ("watchd: " + str(watchd))
                 if watchd is None:
                     break
                 if 'Present' in watchd[4] or (helpers.now()[:4] in watchd[4]):
-                 # let's not even bother with comics that are in the Present.
-                    a_list.append(watchd[1])
-                    b_list.append(watchd[2])
-                    comicid.append(watchd[0])
-                    pubdate.append(watchd[4])
-                    #print ( "Comic:" + str(a_list[w]) + " Year: " + str(b_list[w]) )
-                    #if "WOLVERINE AND THE X-MEN" in str(a_list[w]): a_list[w] = "WOLVERINE AND X-MEN"
-                    lines.append(a_list[w].strip())
-                    unlines.append(a_list[w].strip())
-                    llen.append(a_list[w].splitlines())
-                    ccname.append(a_list[w].strip())
-                    tmpwords = a_list[w].split(None)
-                    ltmpwords = len(tmpwords)
-                    ltmp = 1
-                    w+=1
+                 # this gets buggered up when series are named the same, and one ends in the current
+                 # year, and the new series starts in the same year - ie. Avengers
+                 # lets' grab the latest issue date and see how far it is from current
+                 # anything > 45 days we'll assume it's a false match ;)
+                    #logger.fdebug("ComicName: " + watchd[1])
+                    latestdate = watchd[5]
+                    #logger.fdebug("latestdate:  " + str(latestdate))
+                    c_date = datetime.date(int(latestdate[:4]),int(latestdate[5:7]),1)
+                    n_date = datetime.date.today()
+                    #logger.fdebug("c_date : " + str(c_date) + " ... n_date : " + str(n_date))
+                    recentchk = (n_date - c_date).days
+                    #logger.fdebug("recentchk: " + str(recentchk) + " days")
+                    #logger.fdebug(" ----- ")
+                    if recentchk < 45:
+                        # let's not even bother with comics that are in the Present.
+                        a_list.append(watchd[1])
+                        b_list.append(watchd[2])
+                        comicid.append(watchd[0])
+                        pubdate.append(watchd[4])
+                        #print ( "Comic:" + str(a_list[w]) + " Year: " + str(b_list[w]) )
+                        #if "WOLVERINE AND THE X-MEN" in str(a_list[w]): a_list[w] = "WOLVERINE AND X-MEN"
+                        lines.append(a_list[w].strip())
+                        unlines.append(a_list[w].strip())
+                        llen.append(a_list[w].splitlines())
+                        ccname.append(a_list[w].strip())
+                        tmpwords = a_list[w].split(None)
+                        ltmpwords = len(tmpwords)
+                        ltmp = 1
+                        w+=1
         cnt = int(w-1)
         cntback = int(w-1)
         kp = []