FIX: Fixed some issues with unicode conversions and handling special characters

This commit is contained in:
evilhero 2013-03-07 20:36:36 -05:00
parent c3676e88c7
commit 88ca380940
4 changed files with 42 additions and 28 deletions

View File

@ -29,7 +29,9 @@ def listFiles(dir,watchcomic,AlternateSearch=None):
# use AlternateSearch to check for filenames that follow that naming pattern
# ie. Star Trek TNG Doctor Who Assimilation won't get hits as the
# checker looks for Star Trek TNG Doctor Who Assimilation2 (according to CV)
# we need to convert to ascii, as watchcomic is utf-8 and special chars f'it up
u_watchcomic = watchcomic.encode('ascii', 'ignore').strip()
logger.fdebug("comic: " + watchcomic)
basedir = dir
logger.fdebug("Looking in: " + dir)
@ -42,7 +44,7 @@ def listFiles(dir,watchcomic,AlternateSearch=None):
subname = item
#print subname
subname = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\&\+\'\?\@]',' ', str(subname))
modwatchcomic = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\&\+\'\?\@]', ' ', str(watchcomic))
modwatchcomic = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\&\+\'\?\@]', ' ', u_watchcomic)
modwatchcomic = re.sub('\s+', ' ', str(modwatchcomic)).strip()
#versioning - remove it
subsplit = subname.split()
@ -55,7 +57,9 @@ def listFiles(dir,watchcomic,AlternateSearch=None):
subname = re.sub('\s+', ' ', str(subname)).strip()
if AlternateSearch is not None:
altsearchcomic = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\&\+\'\?\@]', ' ', str(AlternateSearch))
#same = encode.
u_altsearchcomic = AlternateSearch.encode('ascii', 'ignore').strip()
altsearchcomic = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\&\+\'\?\@]', ' ', u_altsearchcomic)
altsearchcomic = re.sub('\s+', ' ', str(altsearchcomic)).strip()
else:
#create random characters so it will never match.
@ -88,6 +92,6 @@ def listFiles(dir,watchcomic,AlternateSearch=None):
else:
pass
#print ("directory found - ignoring")
logger.fdebug("you have a total of " + str(comiccnt) + " " + str(watchcomic) + " comics")
logger.fdebug("you have a total of " + str(comiccnt) + " " + watchcomic + " comics")
watchmatch['comiccount'] = comiccnt
return watchmatch

View File

@ -119,15 +119,18 @@ def addComictoDB(comicid,mismatch=None,pullupd=None,imported=None,ogcname=None):
# setup default location here
if comlocation is None:
if ':' in comic['ComicName'] or '/' in comic['ComicName'] or ',' in comic['ComicName']:
comicdir = comic['ComicName']
# let's remove the non-standard characters here.
u_comicnm = comic['ComicName']
u_comicname = u_comicnm.encode('ascii', 'ignore').strip()
if ':' in u_comicname or '/' in u_comicname or ',' in u_comicname:
comicdir = u_comicname
if ':' in comicdir:
comicdir = comicdir.replace(':','')
if '/' in comicdir:
comicdir = comicdir.replace('/','-')
if ',' in comicdir:
comicdir = comicdir.replace(',','')
else: comicdir = comic['ComicName']
else: comicdir = u_comicname
series = comicdir
publisher = comic['ComicPublisher']
@ -197,7 +200,7 @@ def addComictoDB(comicid,mismatch=None,pullupd=None,imported=None,ogcname=None):
try:
with open(str(coverfile)) as f:
ComicImage = os.path.join('cache',str(comicid) + ".jpg")
logger.info(u"Sucessfully retrieved cover for " + str(comic['ComicName']))
logger.info(u"Sucessfully retrieved cover for " + comic['ComicName'])
#if the comic cover local is checked, save a cover.jpg to the series folder.
if mylar.COMIC_COVER_LOCAL:
comiclocal = os.path.join(str(comlocation) + "/cover.jpg")
@ -374,7 +377,7 @@ def addComictoDB(comicid,mismatch=None,pullupd=None,imported=None,ogcname=None):
if mylar.CVINFO:
if not os.path.exists(comlocation + "/cvinfo"):
with open(comlocation + "/cvinfo","w") as text_file:
text_file.write("http://www.comicvine.com/" + str(comic['ComicName']).replace(" ", "-") + "/49-" + str(comicid))
text_file.write("http://www.comicvine.com/volume/49-" + str(comicid))
logger.info(u"Updating complete for: " + comic['ComicName'])
@ -396,7 +399,7 @@ def addComictoDB(comicid,mismatch=None,pullupd=None,imported=None,ogcname=None):
# lets' check the pullist for anything at this time as well since we're here.
# do this for only Present comics....
if mylar.AUTOWANT_UPCOMING and 'Present' in gcdinfo['resultPublished']:
logger.info(u"Checking this week's pullist for new issues of " + str(comic['ComicName']))
logger.info(u"Checking this week's pullist for new issues of " + comic['ComicName'])
updater.newpullcheck(comic['ComicName'], comicid)
#here we grab issues that have been marked as wanted above...
@ -485,15 +488,18 @@ def GCDimport(gcomicid, pullupd=None):
#comic book location on machine
# setup default location here
if comlocation is None:
if ':' in ComicName or '/' in ComicName or ',' in ComicName:
comicdir = ComicName
# let's remove the non-standard characters here.
u_comicnm = comicname
u_comicname = u_comicnm.encode('ascii', 'ignore').strip()
if ':' in u_comicname or '/' in u_comicname or ',' in u_comicname:
comicdir = u_comicname
if ':' in comicdir:
comicdir = comicdir.replace(':','')
if '/' in comicdir:
comicdir = comicdir.replace('/','-')
if ',' in comicdir:
comicdir = comicdir.replace(',','')
else: comicdir = ComicName
else: comicdir = u_comicname
series = comicdir
publisher = ComicPublisher
@ -551,7 +557,7 @@ def GCDimport(gcomicid, pullupd=None):
try:
with open(str(coverfile)) as f:
ComicImage = "cache/" + str(gcomicid) + ".jpg"
logger.info(u"Sucessfully retrieved cover for " + str(ComicName))
logger.info(u"Sucessfully retrieved cover for " + ComicName)
except IOError as e:
logger.error(u"Unable to save cover locally at this time.")
@ -686,14 +692,14 @@ def GCDimport(gcomicid, pullupd=None):
if mylar.CVINFO:
if not os.path.exists(comlocation + "/cvinfo"):
with open(comlocation + "/cvinfo","w") as text_file:
text_file.write("http://www.comicvine.com/" + str(comic['ComicName']).replace(" ", "-") + "/49-" + str(comicid))
text_file.write("http://www.comicvine.com/volume/49-" + str(comicid))
logger.info(u"Updating complete for: " + ComicName)
if pullupd is None:
# lets' check the pullist for anyting at this time as well since we're here.
if mylar.AUTOWANT_UPCOMING and 'Present' in ComicPublished:
logger.info(u"Checking this week's pullist for new issues of " + str(ComicName))
logger.info(u"Checking this week's pullist for new issues of " + ComicName)
updater.newpullcheck(comic['ComicName'], gcomicid)
#here we grab issues that have been marked as wanted above...

View File

@ -88,7 +88,7 @@ def upcoming_update(ComicID, ComicName, IssueNumber, IssueDate):
issuechk = myDB.action("SELECT * FROM issues WHERE ComicID=? AND Issue_Number=?", [ComicID, IssueNumber]).fetchone()
if issuechk is None:
logger.fdebug(str(ComicName) + " Issue: " + str(IssueNumber) + " not present in listings to mark for download...updating comic and adding to Upcoming Wanted Releases.")
logger.fdebug(ComicName + " Issue: " + str(IssueNumber) + " not present in listings to mark for download...updating comic and adding to Upcoming Wanted Releases.")
# we need to either decrease the total issue count, OR indicate that an issue is upcoming.
upco_results = myDB.action("SELECT COUNT(*) FROM UPCOMING WHERE ComicID=?",[ComicID]).fetchall()
upco_iss = upco_results[0][0]
@ -106,7 +106,7 @@ def upcoming_update(ComicID, ComicName, IssueNumber, IssueDate):
if hours > 5:
pullupd = "yes"
logger.fdebug("Now Refreshing comic " + str(ComicName) + " to make sure it's up-to-date")
logger.fdebug("Now Refreshing comic " + ComicName + " to make sure it's up-to-date")
if ComicID[:1] == "G": mylar.importer.GCDimport(ComicID,pullupd)
else: mylar.importer.addComictoDB(ComicID,mismatch,pullupd)
else:
@ -226,14 +226,14 @@ def foundsearch(ComicID, IssueID):
myDB.upsert("snatched", newsnatchValues, snatchedupdate)
#print ("finished updating snatched db.")
logger.info(u"Updating now complete for " + str(comic['ComicName']) + " issue: " + str(issue['Issue_Number']))
logger.info(u"Updating now complete for " + comic['ComicName'] + " issue: " + str(issue['Issue_Number']))
return
def forceRescan(ComicID,archive=None):
myDB = db.DBConnection()
# file check to see if issue exists
rescan = myDB.action('SELECT * FROM comics WHERE ComicID=?', [ComicID]).fetchone()
logger.info(u"Now checking files for " + str(rescan['ComicName']) + " (" + str(rescan['ComicYear']) + ") in " + str(rescan['ComicLocation']) )
logger.info(u"Now checking files for " + rescan['ComicName'] + " (" + str(rescan['ComicYear']) + ") in " + str(rescan['ComicLocation']) )
if archive is None:
fc = filechecker.listFiles(dir=rescan['ComicLocation'], watchcomic=rescan['ComicName'], AlternateSearch=rescan['AlternateSearch'])
else:
@ -413,7 +413,7 @@ def forceRescan(ComicID,archive=None):
logger.fdebug("duplicate issue detected - not counting this: " + str(tmpfc['ComicFilename']))
issuedupe = "yes"
break
logger.fdebug("matched...issue: " + str(rescan['ComicName']) + " --- " + str(int_iss))
logger.fdebug("matched...issue: " + rescan['ComicName'] + " --- " + str(int_iss))
havefiles+=1
haveissue = "yes"
isslocation = str(tmpfc['ComicFilename'])

View File

@ -225,13 +225,14 @@ class WebInterface(object):
addComic.exposed = True
def from_Exceptions(self, comicid, gcdid, comicname=None, comicyear=None, comicissues=None, comicpublisher=None):
import unicodedata
mismatch = "yes"
#print ("gcdid:" + str(gcdid))
#write it to the custom_exceptions.csv and reload it so that importer will pick it up and do it's thing :)
#custom_exceptions in this format...
#99, (comicid), (gcdid), none
logger.info("saving new information into custom_exceptions.csv...")
except_info = "none #" + comicname.decode('utf-8', 'replace') + "-(" + str(comicyear) + ")"
except_info = "none #" + str(comicname) + "-(" + str(comicyear) + ")"
except_file = os.path.join(mylar.DATA_DIR,"custom_exceptions.csv")
if not os.path.exists(except_file):
try:
@ -240,9 +241,12 @@ class WebInterface(object):
except (OSError,IOError):
logger.error("Could not locate " + str(except_file) + " file. Make sure it's in datadir: " + mylar.DATA_DIR + " with proper permissions.")
return
exceptln = "99," + str(comicid) + "," + str(gcdid) + "," + str(except_info)
exceptline = exceptln.decode('utf-8','ignore')
with open(str(except_file), 'a') as f:
f.write('%s,%s,%s,%s\n' % ("99", str(comicid), str(gcdid), str(except_info)) )
#f.write('%s,%s,%s,%s\n' % ("99", comicid, gcdid, except_info)
f.write(exceptline.encode('ascii','replace').strip())
logger.info("re-loading csv file so it's all nice and current.")
mylar.csv_load()
@ -316,7 +320,7 @@ class WebInterface(object):
comic = myDB.action('SELECT * from comics WHERE ComicID=?', [ComicID]).fetchone()
if comic['ComicName'] is None: ComicName = "None"
else: ComicName = comic['ComicName']
logger.info(u"Deleting all traces of Comic: " + str(ComicName))
logger.info(u"Deleting all traces of Comic: " + ComicName))
myDB.action('DELETE from comics WHERE ComicID=?', [ComicID])
myDB.action('DELETE from issues WHERE ComicID=?', [ComicID])
myDB.action('DELETE from upcoming WHERE ComicID=?', [ComicID])
@ -588,7 +592,7 @@ class WebInterface(object):
else:
logger.info("Not renaming " + str(filename) + " as it is in desired format already.")
#continue
logger.info("I have renamed " + str(filefind) + " issues of " + str(comicname))
logger.info("I have renamed " + str(filefind) + " issues of " + comicname)
manualRename.exposed = True
def searchScan(self, name):
@ -763,7 +767,7 @@ class WebInterface(object):
for book_element in tracks:
st_issueid = str(storyarcid) + "_" + str(random.randint(1000,9999))
comicname = book_element.getAttribute('Series')
print ("comic: " + str(comicname))
print ("comic: " + comicname)
comicnumber = book_element.getAttribute('Number')
print ("number: " + str(comicnumber))
comicvolume = book_element.getAttribute('Volume')
@ -976,7 +980,7 @@ class WebInterface(object):
def deleteimport(self, ComicName):
myDB = db.DBConnection()
logger.info("Removing import data for Comic: " + str(ComicName))
logger.info("Removing import data for Comic: " + ComicName)
myDB.action('DELETE from importresults WHERE ComicName=?', [ComicName])
raise cherrypy.HTTPRedirect("importResults")
deleteimport.exposed = True
@ -1067,7 +1071,7 @@ class WebInterface(object):
if splitt[1:].isdigit():
print (splitt + " - assuming versioning. Removing from initial search pattern.")
ComicName = re.sub(str(splitt), '', ComicName)
print ("new comicname is : " + str(ComicName))
print ("new comicname is : " + ComicName)
# we need to pass the original comicname here into the entire importer module
# so that we can reference the correct issues later.