FIX: Fixed some issues with unicode conversions and handling special characters

2013-03-07 20:36:36 -05:00 · 2013-03-07 20:36:36 -05:00 · 88ca380940
parent c3676e88c7
commit 88ca380940
4 changed files with 42 additions and 28 deletions
--- a/mylar/filechecker.py
+++ b/mylar/filechecker.py
@ -29,7 +29,9 @@ def listFiles(dir,watchcomic,AlternateSearch=None):
    # use AlternateSearch to check for filenames that follow that naming pattern
    # ie. Star Trek TNG Doctor Who Assimilation won't get hits as the 
    # checker looks for Star Trek TNG Doctor Who Assimilation2 (according to CV)
-
+    
+    # we need to convert to ascii, as watchcomic is utf-8 and special chars f'it up
+    u_watchcomic = watchcomic.encode('ascii', 'ignore').strip()    
    logger.fdebug("comic: " + watchcomic)
    basedir = dir
    logger.fdebug("Looking in: " + dir)
@ -42,7 +44,7 @@ def listFiles(dir,watchcomic,AlternateSearch=None):
        subname = item
        #print subname
        subname = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\&\+\'\?\@]',' ', str(subname))
-        modwatchcomic = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\&\+\'\?\@]', ' ', str(watchcomic))
+        modwatchcomic = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\&\+\'\?\@]', ' ', u_watchcomic)
        modwatchcomic = re.sub('\s+', ' ', str(modwatchcomic)).strip()
        #versioning - remove it
        subsplit = subname.split()
@ -55,7 +57,9 @@ def listFiles(dir,watchcomic,AlternateSearch=None):
                
        subname = re.sub('\s+', ' ', str(subname)).strip()
        if AlternateSearch is not None:
-            altsearchcomic = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\&\+\'\?\@]', ' ', str(AlternateSearch))
+            #same = encode.
+            u_altsearchcomic = AlternateSearch.encode('ascii', 'ignore').strip()
+            altsearchcomic = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\&\+\'\?\@]', ' ', u_altsearchcomic)
            altsearchcomic = re.sub('\s+', ' ', str(altsearchcomic)).strip()       
        else:
            #create random characters so it will never match.
@ -88,6 +92,6 @@ def listFiles(dir,watchcomic,AlternateSearch=None):
        else:
            pass
            #print ("directory found - ignoring")
-    logger.fdebug("you have a total of " + str(comiccnt) + " " + str(watchcomic) + " comics")
+    logger.fdebug("you have a total of " + str(comiccnt) + " " + watchcomic + " comics")
    watchmatch['comiccount'] = comiccnt
    return watchmatch
--- a/mylar/importer.py
+++ b/mylar/importer.py
@ -119,15 +119,18 @@ def addComictoDB(comicid,mismatch=None,pullupd=None,imported=None,ogcname=None):
    # setup default location here

    if comlocation is None:
-        if ':' in comic['ComicName'] or '/' in comic['ComicName'] or ',' in comic['ComicName']:
-            comicdir = comic['ComicName']
+        # let's remove the non-standard characters here.
+        u_comicnm = comic['ComicName']
+        u_comicname = u_comicnm.encode('ascii', 'ignore').strip()
+        if ':' in u_comicname or '/' in u_comicname or ',' in u_comicname:
+            comicdir = u_comicname
            if ':' in comicdir:
                comicdir = comicdir.replace(':','')
            if '/' in comicdir:
                comicdir = comicdir.replace('/','-')
            if ',' in comicdir:
                comicdir = comicdir.replace(',','')
-        else: comicdir = comic['ComicName']
+        else: comicdir = u_comicname

        series = comicdir
        publisher = comic['ComicPublisher']
@ -197,7 +200,7 @@ def addComictoDB(comicid,mismatch=None,pullupd=None,imported=None,ogcname=None):
    try:
        with open(str(coverfile)) as f:
            ComicImage = os.path.join('cache',str(comicid) + ".jpg")
-            logger.info(u"Sucessfully retrieved cover for " + str(comic['ComicName']))
+            logger.info(u"Sucessfully retrieved cover for " + comic['ComicName'])
            #if the comic cover local is checked, save a cover.jpg to the series folder.
            if mylar.COMIC_COVER_LOCAL:
                comiclocal = os.path.join(str(comlocation) + "/cover.jpg")
@ -374,7 +377,7 @@ def addComictoDB(comicid,mismatch=None,pullupd=None,imported=None,ogcname=None):
    if mylar.CVINFO:
        if not os.path.exists(comlocation + "/cvinfo"):
            with open(comlocation + "/cvinfo","w") as text_file:
-                text_file.write("http://www.comicvine.com/" + str(comic['ComicName']).replace(" ", "-") + "/49-" + str(comicid))
+                text_file.write("http://www.comicvine.com/volume/49-" + str(comicid))
  
    logger.info(u"Updating complete for: " + comic['ComicName'])

@ -396,7 +399,7 @@ def addComictoDB(comicid,mismatch=None,pullupd=None,imported=None,ogcname=None):
    # lets' check the pullist for anything at this time as well since we're here.
    # do this for only Present comics....
        if mylar.AUTOWANT_UPCOMING and 'Present' in gcdinfo['resultPublished']:
-            logger.info(u"Checking this week's pullist for new issues of " + str(comic['ComicName']))
+            logger.info(u"Checking this week's pullist for new issues of " + comic['ComicName'])
            updater.newpullcheck(comic['ComicName'], comicid)

    #here we grab issues that have been marked as wanted above...
@ -485,15 +488,18 @@ def GCDimport(gcomicid, pullupd=None):
    #comic book location on machine
    # setup default location here
    if comlocation is None:
-        if ':' in ComicName or '/' in ComicName or ',' in ComicName:
-            comicdir = ComicName
+        # let's remove the non-standard characters here.
+        u_comicnm = comicname
+        u_comicname = u_comicnm.encode('ascii', 'ignore').strip()
+        if ':' in u_comicname or '/' in u_comicname or ',' in u_comicname:
+            comicdir = u_comicname
            if ':' in comicdir:
                comicdir = comicdir.replace(':','')
            if '/' in comicdir:
                comicdir = comicdir.replace('/','-')
            if ',' in comicdir:
                comicdir = comicdir.replace(',','')            
-        else: comicdir = ComicName
+        else: comicdir = u_comicname

        series = comicdir
        publisher = ComicPublisher
@ -551,7 +557,7 @@ def GCDimport(gcomicid, pullupd=None):
    try:
        with open(str(coverfile)) as f:
            ComicImage = "cache/" + str(gcomicid) + ".jpg"
-            logger.info(u"Sucessfully retrieved cover for " + str(ComicName))
+            logger.info(u"Sucessfully retrieved cover for " + ComicName)
    except IOError as e:
        logger.error(u"Unable to save cover locally at this time.")
        
@ -686,14 +692,14 @@ def GCDimport(gcomicid, pullupd=None):
    if mylar.CVINFO:
        if not os.path.exists(comlocation + "/cvinfo"):
            with open(comlocation + "/cvinfo","w") as text_file:
-                text_file.write("http://www.comicvine.com/" + str(comic['ComicName']).replace(" ", "-") + "/49-" + str(comicid))
+                text_file.write("http://www.comicvine.com/volume/49-" + str(comicid))

    logger.info(u"Updating complete for: " + ComicName)

    if pullupd is None:
        # lets' check the pullist for anyting at this time as well since we're here.
        if mylar.AUTOWANT_UPCOMING and 'Present' in ComicPublished:
-            logger.info(u"Checking this week's pullist for new issues of " + str(ComicName))
+            logger.info(u"Checking this week's pullist for new issues of " + ComicName)
            updater.newpullcheck(comic['ComicName'], gcomicid)

        #here we grab issues that have been marked as wanted above...
--- a/mylar/updater.py
+++ b/mylar/updater.py
@ -88,7 +88,7 @@ def upcoming_update(ComicID, ComicName, IssueNumber, IssueDate):

    issuechk = myDB.action("SELECT * FROM issues WHERE ComicID=? AND Issue_Number=?", [ComicID, IssueNumber]).fetchone()
    if issuechk is None:
-        logger.fdebug(str(ComicName) + " Issue: " + str(IssueNumber) + " not present in listings to mark for download...updating comic and adding to Upcoming Wanted Releases.")
+        logger.fdebug(ComicName + " Issue: " + str(IssueNumber) + " not present in listings to mark for download...updating comic and adding to Upcoming Wanted Releases.")
        # we need to either decrease the total issue count, OR indicate that an issue is upcoming.
        upco_results = myDB.action("SELECT COUNT(*) FROM UPCOMING WHERE ComicID=?",[ComicID]).fetchall()
        upco_iss = upco_results[0][0]
@ -106,7 +106,7 @@ def upcoming_update(ComicID, ComicName, IssueNumber, IssueDate):

        if hours > 5:
            pullupd = "yes"
-            logger.fdebug("Now Refreshing comic " + str(ComicName) + " to make sure it's up-to-date")
+            logger.fdebug("Now Refreshing comic " + ComicName + " to make sure it's up-to-date")
            if ComicID[:1] == "G": mylar.importer.GCDimport(ComicID,pullupd)
            else: mylar.importer.addComictoDB(ComicID,mismatch,pullupd)
        else:
@ -226,14 +226,14 @@ def foundsearch(ComicID, IssueID):
    myDB.upsert("snatched", newsnatchValues, snatchedupdate)

    #print ("finished updating snatched db.")
-    logger.info(u"Updating now complete for " + str(comic['ComicName']) + " issue: " + str(issue['Issue_Number']))
+    logger.info(u"Updating now complete for " + comic['ComicName'] + " issue: " + str(issue['Issue_Number']))
    return

 def forceRescan(ComicID,archive=None):
    myDB = db.DBConnection()
    # file check to see if issue exists
    rescan = myDB.action('SELECT * FROM comics WHERE ComicID=?', [ComicID]).fetchone()
-    logger.info(u"Now checking files for " + str(rescan['ComicName']) + " (" + str(rescan['ComicYear']) + ") in " + str(rescan['ComicLocation']) )
+    logger.info(u"Now checking files for " + rescan['ComicName'] + " (" + str(rescan['ComicYear']) + ") in " + str(rescan['ComicLocation']) )
    if archive is None:
        fc = filechecker.listFiles(dir=rescan['ComicLocation'], watchcomic=rescan['ComicName'], AlternateSearch=rescan['AlternateSearch'])
    else:
@ -413,7 +413,7 @@ def forceRescan(ComicID,archive=None):
                            logger.fdebug("duplicate issue detected - not counting this: " + str(tmpfc['ComicFilename']))
                            issuedupe = "yes"
                            break
-                        logger.fdebug("matched...issue: " + str(rescan['ComicName']) + " --- " + str(int_iss))
+                        logger.fdebug("matched...issue: " + rescan['ComicName'] + " --- " + str(int_iss))
                        havefiles+=1
                        haveissue = "yes"
                        isslocation = str(tmpfc['ComicFilename'])
--- a/mylar/webserve.py
+++ b/mylar/webserve.py
@ -225,13 +225,14 @@ class WebInterface(object):
    addComic.exposed = True

    def from_Exceptions(self, comicid, gcdid, comicname=None, comicyear=None, comicissues=None, comicpublisher=None):
+        import unicodedata
        mismatch = "yes"
        #print ("gcdid:" + str(gcdid))
        #write it to the custom_exceptions.csv and reload it so that importer will pick it up and do it's thing :)
        #custom_exceptions in this format...
        #99, (comicid), (gcdid), none
        logger.info("saving new information into custom_exceptions.csv...")
-        except_info = "none #" + comicname.decode('utf-8', 'replace') + "-(" + str(comicyear) + ")"
+        except_info = "none #" + str(comicname) + "-(" + str(comicyear) + ")"
        except_file = os.path.join(mylar.DATA_DIR,"custom_exceptions.csv")
        if not os.path.exists(except_file):
            try:
@ -240,9 +241,12 @@ class WebInterface(object):
            except (OSError,IOError):
                logger.error("Could not locate " + str(except_file) + " file. Make sure it's in datadir: " + mylar.DATA_DIR + " with proper permissions.")
                return
+        exceptln = "99," + str(comicid) + "," + str(gcdid) + "," + str(except_info)
+        exceptline = exceptln.decode('utf-8','ignore')

        with open(str(except_file), 'a') as f:
-            f.write('%s,%s,%s,%s\n' % ("99", str(comicid), str(gcdid), str(except_info)) )
+           #f.write('%s,%s,%s,%s\n' % ("99", comicid, gcdid, except_info)
+            f.write(exceptline.encode('ascii','replace').strip())
        logger.info("re-loading csv file so it's all nice and current.")
        mylar.csv_load()
       
@ -316,7 +320,7 @@ class WebInterface(object):
        comic = myDB.action('SELECT * from comics WHERE ComicID=?', [ComicID]).fetchone()
        if comic['ComicName'] is None: ComicName = "None"
        else: ComicName = comic['ComicName']
-        logger.info(u"Deleting all traces of Comic: " + str(ComicName))
+        logger.info(u"Deleting all traces of Comic: " + ComicName))
        myDB.action('DELETE from comics WHERE ComicID=?', [ComicID])
        myDB.action('DELETE from issues WHERE ComicID=?', [ComicID])
        myDB.action('DELETE from upcoming WHERE ComicID=?', [ComicID])
@ -588,7 +592,7 @@ class WebInterface(object):
                            else:
                                logger.info("Not renaming " + str(filename) + " as it is in desired format already.")
                            #continue
-            logger.info("I have renamed " + str(filefind) + " issues of " + str(comicname))
+            logger.info("I have renamed " + str(filefind) + " issues of " + comicname)
    manualRename.exposed = True

    def searchScan(self, name):
@ -763,7 +767,7 @@ class WebInterface(object):
        for book_element in tracks:
            st_issueid = str(storyarcid) + "_" + str(random.randint(1000,9999))
            comicname = book_element.getAttribute('Series')
-            print ("comic: " + str(comicname))
+            print ("comic: " + comicname)
            comicnumber = book_element.getAttribute('Number')
            print ("number: " + str(comicnumber))
            comicvolume = book_element.getAttribute('Volume')
@ -976,7 +980,7 @@ class WebInterface(object):

    def deleteimport(self, ComicName):
        myDB = db.DBConnection()
-        logger.info("Removing import data for Comic: " + str(ComicName))
+        logger.info("Removing import data for Comic: " + ComicName)
        myDB.action('DELETE from importresults WHERE ComicName=?', [ComicName])
        raise cherrypy.HTTPRedirect("importResults")
    deleteimport.exposed = True
@ -1067,7 +1071,7 @@ class WebInterface(object):
                if splitt[1:].isdigit():
                    print (splitt + "  - assuming versioning. Removing from initial search pattern.")
                    ComicName = re.sub(str(splitt), '', ComicName)
-                    print ("new comicname is : " + str(ComicName))
+                    print ("new comicname is : " + ComicName)
        # we need to pass the original comicname here into the entire importer module
        # so that we can reference the correct issues later.