From acc60d9ff7ba21f0767bf7ed3724e61e843c736a Mon Sep 17 00:00:00 2001
From: evilhero <evilhero@gmail.com>
Date: Sun, 7 Apr 2013 14:06:36 -0400
Subject: [PATCH] FIX:(#308) Issues adding large series (100+ issues),
 FIX:(#294) AU problems with snatches/file checking, FIX:(#310) double issues
 occuring -- Now if CV_ONLY enabled along with CV_ONETIMER in config.ini,
 doing a Refresh on a series will correct data issues while retaining Issue
 Status

---
 mylar/PostProcessor.py |  9 ++---
 mylar/cv.py            | 78 ++++++++++++++++++++++++------------------
 mylar/updater.py       | 52 +++++++++++++++-------------
 mylar/webserve.py      | 60 +++++++++++++++++++++++---------
 4 files changed, 123 insertions(+), 76 deletions(-)

diff --git a/mylar/PostProcessor.py b/mylar/PostProcessor.py
index 7a8ee315..76920495 100755
--- a/mylar/PostProcessor.py
+++ b/mylar/PostProcessor.py
@@ -207,9 +207,10 @@ class PostProcessor(object):
                 #use issueid to get publisher, series, year, issue number
             issuenzb = myDB.action("SELECT * from issues WHERE issueid=?", [issueid]).fetchone()
             comicid = issuenzb['ComicID']
-            issuenum = issuenzb['Issue_Number']
+            issuenumOG = issuenzb['Issue_Number']
             #issueno = str(issuenum).split('.')[0]
             #new CV API - removed all decimals...here we go AGAIN!
+            issuenum = issuenumOG
             issue_except = 'None'
             if 'au' in issuenum.lower():
                 issuenum = re.sub("[^0-9]", "", issuenum)
@@ -416,18 +417,18 @@ class PostProcessor(object):
             myDB.action('DELETE from nzblog WHERE issueid=?', [issueid])
                     #force rescan of files
             updater.forceRescan(comicid)
-            logger.info(u"Post-Processing completed for: " + series + " issue: " + str(issuenum) )
+            logger.info(u"Post-Processing completed for: " + series + " issue: " + str(issuenumOG) )
             self._log(u"Post Processing SUCCESSFULL! ", logger.DEBUG)
 
             if mylar.PROWL_ENABLED:
-                pushmessage = series + '(' + issueyear + ') - issue #' + issuenum
+                pushmessage = series + '(' + issueyear + ') - issue #' + issuenumOG
                 logger.info(u"Prowl request")
                 prowl = notifiers.PROWL()
                 prowl.notify(pushmessage,"Download and Postprocessing completed")
 
             if mylar.NMA_ENABLED:
                 nma = notifiers.NMA()
-                nma.notify(series, str(issueyear), str(issuenum))
+                nma.notify(series, str(issueyear), str(issuenumOG))
 
             # retrieve/create the corresponding comic objects
 
diff --git a/mylar/cv.py b/mylar/cv.py
index 1099f755..13bb5ff2 100755
--- a/mylar/cv.py
+++ b/mylar/cv.py
@@ -23,47 +23,28 @@ import lib.feedparser
 import mylar
 from bs4 import BeautifulSoup as Soup
 
-def getComic(comicid,type,issueid=None):
+def pulldetails(comicid,type,issueid=None,offset=1):
+    import urllib2
+
+    #import easy to use xml parser called minidom:
+    from xml.dom.minidom import parseString
+
     comicapi='583939a3df0a25fc4e8b7a29934a13078002dc27'
-    #api
     if type == 'comic':
         PULLURL='http://api.comicvine.com/volume/' + str(comicid) + '/?api_key=' + str(comicapi) + '&format=xml&field_list=name,count_of_issues,issues,start_year,site_detail_url,image,publisher,description,first_issue'
     elif type == 'issue':
         if mylar.CV_ONLY:
             cv_type = 'issues'
-            searchset = 'filter=volume:' + str(comicid) + '&field_list=cover_date,description,id,image,issue_number,name,date_last_updated,store_date'
+            searchset = 'filter=volume:' + str(comicid) + '&field_list=cover_date,description,id,image,issue_number,name,date_last_updated,store_date&offset='
         else:
             cv_type = 'volume/' + str(comicid)
             searchset = 'name,count_of_issues,issues,start_year,site_detail_url,image,publisher,description'
-        PULLURL = 'http://api.comicvine.com/' + str(cv_type) + '/?api_key=' + str(comicapi) + '&format=xml&' + str(searchset)
+        PULLURL = 'http://api.comicvine.com/' + str(cv_type) + '/?api_key=' + str(comicapi) + '&format=xml&' + str(searchset) + '&offset=' + str(offset)
     elif type == 'firstissue':
         #this is used ONLY for CV_ONLY
         PULLURL = 'http://api.comicvine.com/issues/?api_key=' + str(comicapi) + '&format=xml&filter=id:' + str(issueid) + '&field_list=cover_date'
 
-
-    #import library to do http requests:
-    import urllib2
-
-    #import easy to use xml parser called minidom:
-    from xml.dom.minidom import parseString
-    #all these imports are standard on most modern python implementations
-
     #download the file:
-    #first we should check to see if file is in cache to save hits to api.
-    #parsing error - will investigate later...
-    cache_path='cache/'
-    #if os.path.isfile( str(cache_path) + str(comicid) + '.xml' ) == 'True':
-    #    pass
-    #else:
-    #    f = urllib2.urlopen(PULLURL)
-    #    # write api retrieval to tmp file for caching
-    #    local_file = open(str(cache_path) + str(comicid) + '.xml', 'wb')
-    #    local_file.write(f.read())
-    #    local_file.close
-    #    f.close
-
-    #file = open(str(cache_path) + str(comicid) + '.xml', 'rb')
- 
     file = urllib2.urlopen(PULLURL)
     #convert to string:
     data = file.read()
@@ -72,9 +53,40 @@ def getComic(comicid,type,issueid=None):
     #parse the xml you downloaded
     dom = parseString(data)
 
-    if type == 'comic': return GetComicInfo(comicid,dom)
-    if type == 'issue': return GetIssuesInfo(comicid,dom)
-    if type == 'firstissue': return GetFirstIssue(issueid,dom)
+    return dom
+
+
+def getComic(comicid,type,issueid=None):
+    if type == 'issue': 
+        offset = 1
+        issue = {}
+        comicResults = []
+        #let's find out how many results we get from the query...
+        searched = pulldetails(comicid,'issue',None,1)
+        if searched is None: return False
+        totalResults = searched.getElementsByTagName('number_of_total_results')[0].firstChild.wholeText
+        logger.fdebug("there are " + str(totalResults) + " search results...")
+        if not totalResults:
+            return False
+        countResults = 0
+        while (countResults < int(totalResults)):
+            logger.fdebug("querying " + str(countResults))
+            if countResults > 0:
+                #new api - have to change to page # instead of offset count
+                offsetcount = countResults
+                searched = pulldetails(comicid,'issue',None,offsetcount)
+            comicResults = GetIssuesInfo(comicid,searched,issue)
+            #search results are limited to 100 and by pagination now...let's account for this.
+            countResults = countResults + 100
+
+        return issue
+
+    elif type == 'comic':
+        dom = pulldetails(comicid,'comic',None,1)
+        return GetComicInfo(comicid,dom)
+    elif type == 'firstissue': 
+        dom = pulldetails(comicid,'firstissue',issueid,1)
+        return GetFirstIssue(issueid,dom)
 
 def GetComicInfo(comicid,dom):
 
@@ -162,7 +174,7 @@ def GetComicInfo(comicid,dom):
 #    comic['comicchoice'] = comicchoice
     return comic
 
-def GetIssuesInfo(comicid,dom):
+def GetIssuesInfo(comicid,dom,issue):
     subtracks = dom.getElementsByTagName('issue')
     if not mylar.CV_ONLY:
         cntiss = dom.getElementsByTagName('count_of_issues')[0].firstChild.wholeText
@@ -176,7 +188,7 @@ def GetIssuesInfo(comicid,dom):
         n = cntiss-1
     else:
         n = int(len(subtracks))-1    
-    issue = {}
+#    issue = {}
     issuechoice = []
     firstdate = '2099-00-00'
     for subtrack in subtracks:
@@ -214,7 +226,7 @@ def GetIssuesInfo(comicid,dom):
             if issue['CoverDate'] < firstdate and issue['CoverDate'] != '0000-00-00':
                 firstdate = issue['CoverDate']
         n-=1
-
+    print issuechoice
     issue['issuechoice'] = issuechoice
     issue['firstdate'] = firstdate
     return issue
diff --git a/mylar/updater.py b/mylar/updater.py
index 3723819c..e9d79754 100755
--- a/mylar/updater.py
+++ b/mylar/updater.py
@@ -272,6 +272,7 @@ def forceRescan(ComicID,archive=None):
     fcnew = []
     fn = 0
     issuedupechk = []
+    issueexceptdupechk = []
     reissues = myDB.action('SELECT * FROM issues WHERE ComicID=?', [ComicID]).fetchall()
     # if filechecker returns 0 files (it doesn't find any), but some issues have a status of 'Archived'
     # the loop below won't work...let's adjust :)
@@ -347,7 +348,6 @@ def forceRescan(ComicID,archive=None):
                 int_iss, iss_except = helpers.decimal_issue(reiss['Issue_Number'])
                 issyear = reiss['IssueDate'][:4]
                 old_status = reiss['Status']
-                                                
                 #logger.fdebug("integer_issue:" + str(int_iss) + " ... status: " + str(old_status))
 
                 #if comic in format of "SomeSeries 5(c2c)(2013).cbr" whatever...it'll die.
@@ -389,6 +389,7 @@ def forceRescan(ComicID,archive=None):
                                 #print ("AU detected")
                                 #if the 'AU' is in 005AU vs 005 AU it will yield different results.
                                 fnd_iss_except = 'AU'
+                                #logger.info("AU Detected - fnd_iss_except set.")
                         else: 
                             #fcdigit = "0"
                             fcdigit = 0
@@ -434,6 +435,7 @@ def forceRescan(ComicID,archive=None):
                         if fcnew[som][:austart].isdigit():
                             fcdigit = int(fcnew[som][:austart]) * 1000
                             fnd_iss_except = 'AU'
+                            #logger.info("iss_except set to AU")
                         #if AU is part of issue (5AU instead of 5 AU)
                     else:
                         # it's a word, skip it.
@@ -442,39 +444,43 @@ def forceRescan(ComicID,archive=None):
                     #logger.fdebug("int_iss: " + str(int_iss))
                     if "." in str(int_iss):
                          int_iss = helpers.decimal_issue(int_iss)
-                    #print("this is the int issue:" + str(int_iss))
-                    #print("this is the fcdigit:" + str(fcdigit))
+                    #logger.fdebug("this is the int issue:" + str(int_iss))
+                    #logger.fdebug("this is the fcdigit:" + str(fcdigit))
                     if int(fcdigit) == int_iss:
-                        #print ("fnd_iss_except: " + str(fnd_iss_except))
-                        #print ("iss_except: " + str(iss_except))
-                        if fnd_iss_except != 'None' and iss_except == 'AU':
+                        #logger.fdebug("fnd_iss_except: " + str(fnd_iss_except))
+                        #logger.fdebug("iss_except: " + str(iss_except))
+                        if str(fnd_iss_except) != 'None' and str(iss_except) == 'AU':
                             if fnd_iss_except.lower() == iss_except.lower():
                                 logger.fdebug("matched for AU")
                             else:
-                                #logger.fdebug("this is not an AU match..ignoring result.")
+                                logger.fdebug("this is not an AU match..ignoring result.")
                                 break                       
-                        elif fnd_iss_except == 'None' and iss_except == 'AU':break
-                        elif fnd_iss_except == 'AU' and iss_except == 'None':break
+                        elif str(fnd_iss_except) == 'None' and str(iss_except) == 'AU':break
+                        elif str(fnd_iss_except) == 'AU' and str(iss_except) == 'None':break
                         #if issyear in fcnew[som+1]:
                         #    print "matched on year:" + str(issyear)
                         #issuedupechk here.
                         #print ("fcdigit:" + str(fcdigit))
                         #print ("findiss_except:" + str(fnd_iss_except) + " = iss_except:" + str(iss_except))
 
-                        if int(fcdigit) in issuedupechk and str(fnd_iss_except) == str(iss_except):
-                            logger.fdebug("duplicate issue detected - not counting this: " + str(tmpfc['ComicFilename']))
-                            issuedupe = "yes"
-                            break
-                        #logger.fdebug("matched...issue: " + rescan['ComicName'] + "#" + str(reiss['Issue_Number']) + " --- " + str(int_iss))
-                        havefiles+=1
-                        haveissue = "yes"
-                        isslocation = str(tmpfc['ComicFilename'])
-                        issSize = str(tmpfc['ComicSize'])
-                        logger.fdebug(".......filename: " + str(isslocation))
-                        logger.fdebug(".......filesize: " + str(tmpfc['ComicSize'])) 
-                        # to avoid duplicate issues which screws up the count...let's store the filename issues then 
-                        # compare earlier...
-                        issuedupechk.append(int(fcdigit))
+                        #if int(fcdigit) in issuedupechk and str(fnd_iss_except) not in issueexceptdupechk: #str(fnd_iss_except) == str(iss_except):
+                        for d in issuedupechk:
+                            if int(d['fcdigit']) == int(fcdigit) and d['fnd_iss_except'] == str(fnd_iss_except):
+                                logger.fdebug("duplicate issue detected - not counting this: " + str(tmpfc['ComicFilename']))
+                                issuedupe = "yes"
+                                break
+                        if issuedupe == "no":
+                            logger.fdebug("matched...issue: " + rescan['ComicName'] + "#" + str(reiss['Issue_Number']) + " --- " + str(int_iss))
+                            havefiles+=1
+                            haveissue = "yes"
+                            isslocation = str(tmpfc['ComicFilename'])
+                            issSize = str(tmpfc['ComicSize'])
+                            logger.fdebug(".......filename: " + str(isslocation))
+                            logger.fdebug(".......filesize: " + str(tmpfc['ComicSize'])) 
+                            # to avoid duplicate issues which screws up the count...let's store the filename issues then 
+                            # compare earlier...
+                            issuedupechk.append({'fcdigit': int(fcdigit),
+                                                 'fnd_iss_except': fnd_iss_except})
                         break
                         #else:
                         # if the issue # matches, but there is no year present - still match.
diff --git a/mylar/webserve.py b/mylar/webserve.py
index 3e1eb365..df8481ab 100755
--- a/mylar/webserve.py
+++ b/mylar/webserve.py
@@ -374,13 +374,37 @@ class WebInterface(object):
     def refreshArtist(self, ComicID):
         myDB = db.DBConnection()
         mismatch = "no"
-        CV_EXcomicid = myDB.action("SELECT * from exceptions WHERE ComicID=?", [ComicID]).fetchone()
-        if CV_EXcomicid is None: pass
+        if not mylar.CV_ONLY or ComicID[:1] == "G":
+
+            CV_EXcomicid = myDB.action("SELECT * from exceptions WHERE ComicID=?", [ComicID]).fetchone()
+            if CV_EXcomicid is None: pass
+            else:
+                if CV_EXcomicid['variloop'] == '99':
+                    mismatch = "yes"
+            if ComicID[:1] == "G": threading.Thread(target=importer.GCDimport, args=[ComicID]).start()
+            else: threading.Thread(target=importer.addComictoDB, args=[ComicID,mismatch]).start()    
         else:
-            if CV_EXcomicid['variloop'] == '99':
-                mismatch = "yes"
-        if ComicID[:1] == "G": threading.Thread(target=importer.GCDimport, args=[ComicID]).start()
-        else: threading.Thread(target=importer.addComictoDB, args=[ComicID,mismatch]).start()    
+            if mylar.CV_ONETIMER == 1:
+                #in order to update to JUST CV_ONLY, we need to delete the issues for a given series so it's a clea$
+                issues = myDB.select('SELECT * FROM issues WHERE ComicID=?', [ComicID])
+                #store the issues' status for a given comicid, after deleting and readding, flip the status back to$
+                myDB.select('DELETE FROM issues WHERE ComicID=?', [ComicID])
+                mylar.importer.addComictoDB(ComicID,mismatch)
+                issues_new = myDB.select('SELECT * FROM issues WHERE ComicID=?', [ComicID])
+                icount = 0
+                for issue in issues:
+                    for issuenew in issues_new:
+                        if issuenew['IssueID'] == issue['IssueID'] and issuenew['Status'] != issue['Status']:
+                            #change the status to the previous status
+                            ctrlVAL = {'IssueID':  issue['IssueID']}
+                            newVAL = {'Status':  issue['Status']}
+                            myDB.upsert("Issues", newVAL, ctrlVAL)
+                            icount+=1
+                            break
+                logger.info("changed the status of " + str(icount) + " issues.")
+            else:
+                mylar.importer.addComictoDB(ComicID,mismatch)
+
         raise cherrypy.HTTPRedirect("artistPage?ComicID=%s" % ComicID)
     refreshArtist.exposed=True  
 
@@ -845,33 +869,37 @@ class WebInterface(object):
             arc_match = []
 
             for arc in ArcWatch:
+                print ("arc: " + str(arc['ComicName']))
                 #cycle through the story arcs here for matches on the watchlist
                 mod_arc = re.sub('[\:/,\'\/\-\&\%\$\#\@\!\*\+\.]', '', arc['ComicName'])
                 mod_arc = re.sub(r'\s', '', mod_arc)                    
                 for comic in Comics:
+                    print ("comic: " + comic['ComicName'])
                     mod_watch = re.sub('[\:\,\'\/\-\&\%\$\#\@\!\*\+\.]', '', comic['ComicName'])
                     mod_watch = re.sub(r'\s', '', mod_watch)
-                    if mod_watch == mod_arc:
+                    if mod_watch == mod_arc and arc['SeriesYear'] == comic['SeriesYear']:
                         #gather the matches now.
                         arc_match.append({ 
                             "match_name":          arc['ComicName'],
                             "match_id":            comic['ComicID'],
                             "match_issue":         arc['IssueNumber'],
                             "match_issuearcid":    arc['IssueArcID']})
-
-            print ("we matched on " + str(len(arc_match)) + " issues")
+                        logger.fdebu("arc_Match:" + arc_match)
+            logger.fdebu("we matched on " + str(len(arc_match)) + " issues")
 
             for m_arc in arc_match:
                 print m_arc
                 #now we cycle through the issues looking for a match.
-                issue = myDB.select("SELECT * FROM issues where ComicID=?", [m_arc['match_id']])
-                for issuechk in issue:
-                    print ("issuechk: " + str(issuechk['Issue_Number']) + "..." + str(m_arc['match_issue']))
-                    if helpers.decimal_issue(issuechk['Issue_Number']) == helpers.decimal_issue(m_arc['match_issue']):
-                        logger.info("we matched on " + str(issuechk['Issue_Number']) + " for " + str(m_arc['match_name']))
-                        if issuechk['Status'] == 'Downloaded' or issuechk['Status'] == 'Archived':
+                issue = myDB.action("SELECT * FROM issues where ComicID=? and Issue_Number=?", [m_arc['match_id'],m_arc['match_issue']])
+                if issue is None: pass
+                else:
+                    logger.fdebug("issue: " + str(issue['Issue_Number']) + "..." + str(m_arc['match_issue']))
+#                   if helpers.decimal_issue(issuechk['Issue_Number']) == helpers.decimal_issue(m_arc['match_issue']):
+                    if issue['Issue_Number'] == m_arc['match_issue']:
+                        logger.fdebug("we matched on " + str(issue['Issue_Number']) + " for " + str(m_arc['match_name']))
+                        if issue['Status'] == 'Downloaded' or issue['Status'] == 'Archived':
                             ctrlVal = {"IssueArcID":  match_issuearcid }
-                            newVal = {"Status":  issuechk['Status']}
+                            newVal = {"Status":  issue['Status']}
                             myDB.upsert("readinglist",newVal,ctrlVal)
                             logger.info("Already have " + match_issuearcid)
                             break