From acc60d9ff7ba21f0767bf7ed3724e61e843c736a Mon Sep 17 00:00:00 2001 From: evilhero Date: Sun, 7 Apr 2013 14:06:36 -0400 Subject: [PATCH] FIX:(#308) Issues adding large series (100+ issues), FIX:(#294) AU problems with snatches/file checking, FIX:(#310) double issues occuring -- Now if CV_ONLY enabled along with CV_ONETIMER in config.ini, doing a Refresh on a series will correct data issues while retaining Issue Status --- mylar/PostProcessor.py | 9 ++--- mylar/cv.py | 78 ++++++++++++++++++++++++------------------ mylar/updater.py | 52 +++++++++++++++------------- mylar/webserve.py | 60 +++++++++++++++++++++++--------- 4 files changed, 123 insertions(+), 76 deletions(-) diff --git a/mylar/PostProcessor.py b/mylar/PostProcessor.py index 7a8ee315..76920495 100755 --- a/mylar/PostProcessor.py +++ b/mylar/PostProcessor.py @@ -207,9 +207,10 @@ class PostProcessor(object): #use issueid to get publisher, series, year, issue number issuenzb = myDB.action("SELECT * from issues WHERE issueid=?", [issueid]).fetchone() comicid = issuenzb['ComicID'] - issuenum = issuenzb['Issue_Number'] + issuenumOG = issuenzb['Issue_Number'] #issueno = str(issuenum).split('.')[0] #new CV API - removed all decimals...here we go AGAIN! + issuenum = issuenumOG issue_except = 'None' if 'au' in issuenum.lower(): issuenum = re.sub("[^0-9]", "", issuenum) @@ -416,18 +417,18 @@ class PostProcessor(object): myDB.action('DELETE from nzblog WHERE issueid=?', [issueid]) #force rescan of files updater.forceRescan(comicid) - logger.info(u"Post-Processing completed for: " + series + " issue: " + str(issuenum) ) + logger.info(u"Post-Processing completed for: " + series + " issue: " + str(issuenumOG) ) self._log(u"Post Processing SUCCESSFULL! ", logger.DEBUG) if mylar.PROWL_ENABLED: - pushmessage = series + '(' + issueyear + ') - issue #' + issuenum + pushmessage = series + '(' + issueyear + ') - issue #' + issuenumOG logger.info(u"Prowl request") prowl = notifiers.PROWL() prowl.notify(pushmessage,"Download and Postprocessing completed") if mylar.NMA_ENABLED: nma = notifiers.NMA() - nma.notify(series, str(issueyear), str(issuenum)) + nma.notify(series, str(issueyear), str(issuenumOG)) # retrieve/create the corresponding comic objects diff --git a/mylar/cv.py b/mylar/cv.py index 1099f755..13bb5ff2 100755 --- a/mylar/cv.py +++ b/mylar/cv.py @@ -23,47 +23,28 @@ import lib.feedparser import mylar from bs4 import BeautifulSoup as Soup -def getComic(comicid,type,issueid=None): +def pulldetails(comicid,type,issueid=None,offset=1): + import urllib2 + + #import easy to use xml parser called minidom: + from xml.dom.minidom import parseString + comicapi='583939a3df0a25fc4e8b7a29934a13078002dc27' - #api if type == 'comic': PULLURL='http://api.comicvine.com/volume/' + str(comicid) + '/?api_key=' + str(comicapi) + '&format=xml&field_list=name,count_of_issues,issues,start_year,site_detail_url,image,publisher,description,first_issue' elif type == 'issue': if mylar.CV_ONLY: cv_type = 'issues' - searchset = 'filter=volume:' + str(comicid) + '&field_list=cover_date,description,id,image,issue_number,name,date_last_updated,store_date' + searchset = 'filter=volume:' + str(comicid) + '&field_list=cover_date,description,id,image,issue_number,name,date_last_updated,store_date&offset=' else: cv_type = 'volume/' + str(comicid) searchset = 'name,count_of_issues,issues,start_year,site_detail_url,image,publisher,description' - PULLURL = 'http://api.comicvine.com/' + str(cv_type) + '/?api_key=' + str(comicapi) + '&format=xml&' + str(searchset) + PULLURL = 'http://api.comicvine.com/' + str(cv_type) + '/?api_key=' + str(comicapi) + '&format=xml&' + str(searchset) + '&offset=' + str(offset) elif type == 'firstissue': #this is used ONLY for CV_ONLY PULLURL = 'http://api.comicvine.com/issues/?api_key=' + str(comicapi) + '&format=xml&filter=id:' + str(issueid) + '&field_list=cover_date' - - #import library to do http requests: - import urllib2 - - #import easy to use xml parser called minidom: - from xml.dom.minidom import parseString - #all these imports are standard on most modern python implementations - #download the file: - #first we should check to see if file is in cache to save hits to api. - #parsing error - will investigate later... - cache_path='cache/' - #if os.path.isfile( str(cache_path) + str(comicid) + '.xml' ) == 'True': - # pass - #else: - # f = urllib2.urlopen(PULLURL) - # # write api retrieval to tmp file for caching - # local_file = open(str(cache_path) + str(comicid) + '.xml', 'wb') - # local_file.write(f.read()) - # local_file.close - # f.close - - #file = open(str(cache_path) + str(comicid) + '.xml', 'rb') - file = urllib2.urlopen(PULLURL) #convert to string: data = file.read() @@ -72,9 +53,40 @@ def getComic(comicid,type,issueid=None): #parse the xml you downloaded dom = parseString(data) - if type == 'comic': return GetComicInfo(comicid,dom) - if type == 'issue': return GetIssuesInfo(comicid,dom) - if type == 'firstissue': return GetFirstIssue(issueid,dom) + return dom + + +def getComic(comicid,type,issueid=None): + if type == 'issue': + offset = 1 + issue = {} + comicResults = [] + #let's find out how many results we get from the query... + searched = pulldetails(comicid,'issue',None,1) + if searched is None: return False + totalResults = searched.getElementsByTagName('number_of_total_results')[0].firstChild.wholeText + logger.fdebug("there are " + str(totalResults) + " search results...") + if not totalResults: + return False + countResults = 0 + while (countResults < int(totalResults)): + logger.fdebug("querying " + str(countResults)) + if countResults > 0: + #new api - have to change to page # instead of offset count + offsetcount = countResults + searched = pulldetails(comicid,'issue',None,offsetcount) + comicResults = GetIssuesInfo(comicid,searched,issue) + #search results are limited to 100 and by pagination now...let's account for this. + countResults = countResults + 100 + + return issue + + elif type == 'comic': + dom = pulldetails(comicid,'comic',None,1) + return GetComicInfo(comicid,dom) + elif type == 'firstissue': + dom = pulldetails(comicid,'firstissue',issueid,1) + return GetFirstIssue(issueid,dom) def GetComicInfo(comicid,dom): @@ -162,7 +174,7 @@ def GetComicInfo(comicid,dom): # comic['comicchoice'] = comicchoice return comic -def GetIssuesInfo(comicid,dom): +def GetIssuesInfo(comicid,dom,issue): subtracks = dom.getElementsByTagName('issue') if not mylar.CV_ONLY: cntiss = dom.getElementsByTagName('count_of_issues')[0].firstChild.wholeText @@ -176,7 +188,7 @@ def GetIssuesInfo(comicid,dom): n = cntiss-1 else: n = int(len(subtracks))-1 - issue = {} +# issue = {} issuechoice = [] firstdate = '2099-00-00' for subtrack in subtracks: @@ -214,7 +226,7 @@ def GetIssuesInfo(comicid,dom): if issue['CoverDate'] < firstdate and issue['CoverDate'] != '0000-00-00': firstdate = issue['CoverDate'] n-=1 - + print issuechoice issue['issuechoice'] = issuechoice issue['firstdate'] = firstdate return issue diff --git a/mylar/updater.py b/mylar/updater.py index 3723819c..e9d79754 100755 --- a/mylar/updater.py +++ b/mylar/updater.py @@ -272,6 +272,7 @@ def forceRescan(ComicID,archive=None): fcnew = [] fn = 0 issuedupechk = [] + issueexceptdupechk = [] reissues = myDB.action('SELECT * FROM issues WHERE ComicID=?', [ComicID]).fetchall() # if filechecker returns 0 files (it doesn't find any), but some issues have a status of 'Archived' # the loop below won't work...let's adjust :) @@ -347,7 +348,6 @@ def forceRescan(ComicID,archive=None): int_iss, iss_except = helpers.decimal_issue(reiss['Issue_Number']) issyear = reiss['IssueDate'][:4] old_status = reiss['Status'] - #logger.fdebug("integer_issue:" + str(int_iss) + " ... status: " + str(old_status)) #if comic in format of "SomeSeries 5(c2c)(2013).cbr" whatever...it'll die. @@ -389,6 +389,7 @@ def forceRescan(ComicID,archive=None): #print ("AU detected") #if the 'AU' is in 005AU vs 005 AU it will yield different results. fnd_iss_except = 'AU' + #logger.info("AU Detected - fnd_iss_except set.") else: #fcdigit = "0" fcdigit = 0 @@ -434,6 +435,7 @@ def forceRescan(ComicID,archive=None): if fcnew[som][:austart].isdigit(): fcdigit = int(fcnew[som][:austart]) * 1000 fnd_iss_except = 'AU' + #logger.info("iss_except set to AU") #if AU is part of issue (5AU instead of 5 AU) else: # it's a word, skip it. @@ -442,39 +444,43 @@ def forceRescan(ComicID,archive=None): #logger.fdebug("int_iss: " + str(int_iss)) if "." in str(int_iss): int_iss = helpers.decimal_issue(int_iss) - #print("this is the int issue:" + str(int_iss)) - #print("this is the fcdigit:" + str(fcdigit)) + #logger.fdebug("this is the int issue:" + str(int_iss)) + #logger.fdebug("this is the fcdigit:" + str(fcdigit)) if int(fcdigit) == int_iss: - #print ("fnd_iss_except: " + str(fnd_iss_except)) - #print ("iss_except: " + str(iss_except)) - if fnd_iss_except != 'None' and iss_except == 'AU': + #logger.fdebug("fnd_iss_except: " + str(fnd_iss_except)) + #logger.fdebug("iss_except: " + str(iss_except)) + if str(fnd_iss_except) != 'None' and str(iss_except) == 'AU': if fnd_iss_except.lower() == iss_except.lower(): logger.fdebug("matched for AU") else: - #logger.fdebug("this is not an AU match..ignoring result.") + logger.fdebug("this is not an AU match..ignoring result.") break - elif fnd_iss_except == 'None' and iss_except == 'AU':break - elif fnd_iss_except == 'AU' and iss_except == 'None':break + elif str(fnd_iss_except) == 'None' and str(iss_except) == 'AU':break + elif str(fnd_iss_except) == 'AU' and str(iss_except) == 'None':break #if issyear in fcnew[som+1]: # print "matched on year:" + str(issyear) #issuedupechk here. #print ("fcdigit:" + str(fcdigit)) #print ("findiss_except:" + str(fnd_iss_except) + " = iss_except:" + str(iss_except)) - if int(fcdigit) in issuedupechk and str(fnd_iss_except) == str(iss_except): - logger.fdebug("duplicate issue detected - not counting this: " + str(tmpfc['ComicFilename'])) - issuedupe = "yes" - break - #logger.fdebug("matched...issue: " + rescan['ComicName'] + "#" + str(reiss['Issue_Number']) + " --- " + str(int_iss)) - havefiles+=1 - haveissue = "yes" - isslocation = str(tmpfc['ComicFilename']) - issSize = str(tmpfc['ComicSize']) - logger.fdebug(".......filename: " + str(isslocation)) - logger.fdebug(".......filesize: " + str(tmpfc['ComicSize'])) - # to avoid duplicate issues which screws up the count...let's store the filename issues then - # compare earlier... - issuedupechk.append(int(fcdigit)) + #if int(fcdigit) in issuedupechk and str(fnd_iss_except) not in issueexceptdupechk: #str(fnd_iss_except) == str(iss_except): + for d in issuedupechk: + if int(d['fcdigit']) == int(fcdigit) and d['fnd_iss_except'] == str(fnd_iss_except): + logger.fdebug("duplicate issue detected - not counting this: " + str(tmpfc['ComicFilename'])) + issuedupe = "yes" + break + if issuedupe == "no": + logger.fdebug("matched...issue: " + rescan['ComicName'] + "#" + str(reiss['Issue_Number']) + " --- " + str(int_iss)) + havefiles+=1 + haveissue = "yes" + isslocation = str(tmpfc['ComicFilename']) + issSize = str(tmpfc['ComicSize']) + logger.fdebug(".......filename: " + str(isslocation)) + logger.fdebug(".......filesize: " + str(tmpfc['ComicSize'])) + # to avoid duplicate issues which screws up the count...let's store the filename issues then + # compare earlier... + issuedupechk.append({'fcdigit': int(fcdigit), + 'fnd_iss_except': fnd_iss_except}) break #else: # if the issue # matches, but there is no year present - still match. diff --git a/mylar/webserve.py b/mylar/webserve.py index 3e1eb365..df8481ab 100755 --- a/mylar/webserve.py +++ b/mylar/webserve.py @@ -374,13 +374,37 @@ class WebInterface(object): def refreshArtist(self, ComicID): myDB = db.DBConnection() mismatch = "no" - CV_EXcomicid = myDB.action("SELECT * from exceptions WHERE ComicID=?", [ComicID]).fetchone() - if CV_EXcomicid is None: pass + if not mylar.CV_ONLY or ComicID[:1] == "G": + + CV_EXcomicid = myDB.action("SELECT * from exceptions WHERE ComicID=?", [ComicID]).fetchone() + if CV_EXcomicid is None: pass + else: + if CV_EXcomicid['variloop'] == '99': + mismatch = "yes" + if ComicID[:1] == "G": threading.Thread(target=importer.GCDimport, args=[ComicID]).start() + else: threading.Thread(target=importer.addComictoDB, args=[ComicID,mismatch]).start() else: - if CV_EXcomicid['variloop'] == '99': - mismatch = "yes" - if ComicID[:1] == "G": threading.Thread(target=importer.GCDimport, args=[ComicID]).start() - else: threading.Thread(target=importer.addComictoDB, args=[ComicID,mismatch]).start() + if mylar.CV_ONETIMER == 1: + #in order to update to JUST CV_ONLY, we need to delete the issues for a given series so it's a clea$ + issues = myDB.select('SELECT * FROM issues WHERE ComicID=?', [ComicID]) + #store the issues' status for a given comicid, after deleting and readding, flip the status back to$ + myDB.select('DELETE FROM issues WHERE ComicID=?', [ComicID]) + mylar.importer.addComictoDB(ComicID,mismatch) + issues_new = myDB.select('SELECT * FROM issues WHERE ComicID=?', [ComicID]) + icount = 0 + for issue in issues: + for issuenew in issues_new: + if issuenew['IssueID'] == issue['IssueID'] and issuenew['Status'] != issue['Status']: + #change the status to the previous status + ctrlVAL = {'IssueID': issue['IssueID']} + newVAL = {'Status': issue['Status']} + myDB.upsert("Issues", newVAL, ctrlVAL) + icount+=1 + break + logger.info("changed the status of " + str(icount) + " issues.") + else: + mylar.importer.addComictoDB(ComicID,mismatch) + raise cherrypy.HTTPRedirect("artistPage?ComicID=%s" % ComicID) refreshArtist.exposed=True @@ -845,33 +869,37 @@ class WebInterface(object): arc_match = [] for arc in ArcWatch: + print ("arc: " + str(arc['ComicName'])) #cycle through the story arcs here for matches on the watchlist mod_arc = re.sub('[\:/,\'\/\-\&\%\$\#\@\!\*\+\.]', '', arc['ComicName']) mod_arc = re.sub(r'\s', '', mod_arc) for comic in Comics: + print ("comic: " + comic['ComicName']) mod_watch = re.sub('[\:\,\'\/\-\&\%\$\#\@\!\*\+\.]', '', comic['ComicName']) mod_watch = re.sub(r'\s', '', mod_watch) - if mod_watch == mod_arc: + if mod_watch == mod_arc and arc['SeriesYear'] == comic['SeriesYear']: #gather the matches now. arc_match.append({ "match_name": arc['ComicName'], "match_id": comic['ComicID'], "match_issue": arc['IssueNumber'], "match_issuearcid": arc['IssueArcID']}) - - print ("we matched on " + str(len(arc_match)) + " issues") + logger.fdebu("arc_Match:" + arc_match) + logger.fdebu("we matched on " + str(len(arc_match)) + " issues") for m_arc in arc_match: print m_arc #now we cycle through the issues looking for a match. - issue = myDB.select("SELECT * FROM issues where ComicID=?", [m_arc['match_id']]) - for issuechk in issue: - print ("issuechk: " + str(issuechk['Issue_Number']) + "..." + str(m_arc['match_issue'])) - if helpers.decimal_issue(issuechk['Issue_Number']) == helpers.decimal_issue(m_arc['match_issue']): - logger.info("we matched on " + str(issuechk['Issue_Number']) + " for " + str(m_arc['match_name'])) - if issuechk['Status'] == 'Downloaded' or issuechk['Status'] == 'Archived': + issue = myDB.action("SELECT * FROM issues where ComicID=? and Issue_Number=?", [m_arc['match_id'],m_arc['match_issue']]) + if issue is None: pass + else: + logger.fdebug("issue: " + str(issue['Issue_Number']) + "..." + str(m_arc['match_issue'])) +# if helpers.decimal_issue(issuechk['Issue_Number']) == helpers.decimal_issue(m_arc['match_issue']): + if issue['Issue_Number'] == m_arc['match_issue']: + logger.fdebug("we matched on " + str(issue['Issue_Number']) + " for " + str(m_arc['match_name'])) + if issue['Status'] == 'Downloaded' or issue['Status'] == 'Archived': ctrlVal = {"IssueArcID": match_issuearcid } - newVal = {"Status": issuechk['Status']} + newVal = {"Status": issue['Status']} myDB.upsert("readinglist",newVal,ctrlVal) logger.info("Already have " + match_issuearcid) break