FIX:(#241) UnicodeDecodeError when UEC would return results that couldn't decode properly.

This commit is contained in:
evilhero 2013-03-01 20:41:45 -05:00
parent c3faf642a7
commit 1c1e4cb96e
2 changed files with 65 additions and 9 deletions

View File

@ -24,6 +24,7 @@ import sys
from decimal import Decimal
from HTMLParser import HTMLParseError
from time import strptime
import mylar
def GCDScraper(ComicName, ComicYear, Total, ComicID, quickmatch=None):
NOWyr = datetime.date.today().year
@ -641,11 +642,16 @@ def ComChk(ComicName, ComicYear, ComicPublisher, Total, ComicID):
n_odd+=1
resultp = soup.findAll("tr", {"class" : "listing_odd"})[n_odd]
rtp = resultp('a')[1]
resultName.append(helpers.cleanName(rtp.findNext(text=True)))
rtpit = rtp.findNext(text=True)
rtpthis = rtpit.encode('utf-8').strip()
resultName.append(helpers.cleanName(rtpthis))
# print ( "Comic Name: " + str(resultName[n]) )
pub = resultp('a')[0]
resultPublisher.append(pub.findNext(text=True))
pubit = pub.findNext(text=True)
# pubthis = u' '.join(pubit).encode('utf-8').strip()
pubthis = pubit.encode('utf-8').strip()
resultPublisher.append(pubthis)
# print ( "Publisher: " + str(resultPublisher[n]) )
fip = resultp('a',href=True)[1]
@ -668,10 +674,10 @@ def ComChk(ComicName, ComicYear, ComicPublisher, Total, ComicID):
#print ( str(resultID[n]) + " not in DB...adding.")
comchkchoice.append({
"ComicID": str(comicid),
"ComicName": str(resultName[n]),
"ComicName": resultName[n],
"GCDID": str(resultID[n]).split('/')[2],
"ComicYear" : str(resultYear[n]),
"ComicPublisher" : str(resultPublisher[n]),
"ComicPublisher" : resultPublisher[n],
"ComicURL" : "http://www.comics.org" + str(resultID[n]),
"ComicIssues" : str(resultIssues[n])
})
@ -684,7 +690,7 @@ def ComChk(ComicName, ComicYear, ComicPublisher, Total, ComicID):
return comchoice, totalcount
def decode_html(html_string):
converted = UnicodeDammit(html_string, isHTML=True)
converted = UnicodeDammit(html_string)
if not converted.unicode:
raise UnicodeDecodeError(
"Failed to detect encoding, tried [%s]",

View File

@ -178,11 +178,11 @@ class WebInterface(object):
break
cresults.append({
'ComicID' : stoopie['ComicID'],
'ComicName' : stoopie['ComicName'],
'ComicName' : stoopie['ComicName'].decode('utf-8', 'replace'),
'ComicYear' : stoopie['ComicYear'],
'ComicIssues' : stoopie['ComicIssues'],
'ComicURL' : stoopie['ComicURL'],
'ComicPublisher' : stoopie['ComicPublisher'],
'ComicPublisher' : stoopie['ComicPublisher'].decode('utf-8', 'replace'),
'GCDID' : stoopie['GCDID']
})
i+=1
@ -671,11 +671,17 @@ class WebInterface(object):
def readlist(self):
myDB = db.DBConnection()
readlist = myDB.select("SELECT * from readlist order by DateAdded DESC")
return serve_template(templatename="readlist.html", title="Readlist", readlist=readlist)
readlist = myDB.select("SELECT * from readinglist group by StoryArcID COLLATE NOCASE")
return serve_template(templatename="readinglist.html", title="Readlist", readlist=readlist)
return page
readlist.exposed = True
def detailReadlist(self,StoryArcID, StoryArcName):
myDB = db.DBConnection()
readlist = myDB.select("SELECT * from readinglist WHERE StoryArcID=? order by ReadingOrder ASC", [StoryArcID])
return serve_template(templatename="readlist.html", title="Detailed Arc list", readlist=readlist, storyarcname=StoryArcName)
detailReadlist.exposed = True
def addtoreadlist(self, IssueID):
myDB = db.DBConnection()
readlist = myDB.action("SELECT * from issues where IssueID=?", [IssueID]).fetchone()
@ -693,6 +699,50 @@ class WebInterface(object):
raise cherrypy.HTTPRedirect("artistPage?ComicID=%s" % readlist['ComicID'])
addtoreadlist.exposed = True
def importReadlist(self,filename):
from xml.dom.minidom import parseString, Element
import random
myDB = db.DBConnection()
file = open(str(filename))
data = file.read()
file.close()
dom = parseString(data)
# of results
storyarc = dom.getElementsByTagName('Name')[0].firstChild.wholeText
tracks = dom.getElementsByTagName('Book')
i = 1
node = dom.documentElement
print ("there are " + str(len(tracks)) + " issues in the story-arc: " + str(storyarc))
#generate a random number for the ID, and tack on the total issue count to the end as a str :)
storyarcid = str(random.randint(1000,9999)) + str(len(tracks))
i = 1
for book_element in tracks:
st_issueid = str(storyarcid) + "_" + str(random.randint(1000,9999))
comicname = book_element.getAttribute('Series')
print ("comic: " + str(comicname))
comicnumber = book_element.getAttribute('Number')
print ("number: " + str(comicnumber))
comicvolume = book_element.getAttribute('Volume')
print ("volume: " + str(comicvolume))
comicyear = book_element.getAttribute('Year')
print ("year: " + str(comicyear))
CtrlVal = {"IssueArcID": st_issueid}
NewVals = {"StoryArcID": storyarcid,
"ComicName": comicname,
"IssueNumber": comicnumber,
"SeriesYear": comicvolume,
"IssueYear": comicyear,
"StoryArc": storyarc,
"ReadingOrder": i,
"TotalIssues": len(tracks)}
myDB.upsert("readinglist", NewVals, CtrlVal)
i+=1
importReadlist.exposed = True
def logs(self):
if mylar.LOG_LEVEL is None or mylar.LOG_LEVEL == '':