FIX:(#241) UnicodeDecodeError when UEC would return results that couldn't decode properly.

2013-03-01 20:41:45 -05:00 · 2013-03-01 20:41:45 -05:00 · 1c1e4cb96e
parent c3faf642a7
commit 1c1e4cb96e
2 changed files with 65 additions and 9 deletions
--- a/mylar/parseit.py
+++ b/mylar/parseit.py
@ -24,6 +24,7 @@ import sys
 from decimal import Decimal 
 from HTMLParser import HTMLParseError
 from time import strptime
+import mylar

 def GCDScraper(ComicName, ComicYear, Total, ComicID, quickmatch=None):
    NOWyr = datetime.date.today().year
@ -641,11 +642,16 @@ def ComChk(ComicName, ComicYear, ComicPublisher, Total, ComicID):
                n_odd+=1
                resultp = soup.findAll("tr", {"class" : "listing_odd"})[n_odd]
            rtp = resultp('a')[1]
-            resultName.append(helpers.cleanName(rtp.findNext(text=True)))
+            rtpit = rtp.findNext(text=True)
+            rtpthis = rtpit.encode('utf-8').strip()
+            resultName.append(helpers.cleanName(rtpthis))
 #            print ( "Comic Name: " + str(resultName[n]) )

            pub = resultp('a')[0]
-            resultPublisher.append(pub.findNext(text=True))
+            pubit = pub.findNext(text=True)
+#            pubthis = u' '.join(pubit).encode('utf-8').strip()
+            pubthis = pubit.encode('utf-8').strip()
+            resultPublisher.append(pubthis)
 #            print ( "Publisher: " + str(resultPublisher[n]) )

            fip = resultp('a',href=True)[1]
@ -668,10 +674,10 @@ def ComChk(ComicName, ComicYear, ComicPublisher, Total, ComicID):
                #print ( str(resultID[n]) + " not in DB...adding.")
                comchkchoice.append({
                       "ComicID":         str(comicid),
-                       "ComicName":       str(resultName[n]),
+                       "ComicName":       resultName[n],
                       "GCDID":           str(resultID[n]).split('/')[2],
                       "ComicYear" :      str(resultYear[n]),
-                       "ComicPublisher" : str(resultPublisher[n]),
+                       "ComicPublisher" : resultPublisher[n],
                       "ComicURL" :       "http://www.comics.org" + str(resultID[n]),
                       "ComicIssues" :    str(resultIssues[n])
                      })
@ -684,7 +690,7 @@ def ComChk(ComicName, ComicYear, ComicPublisher, Total, ComicID):
    return comchoice, totalcount 

 def decode_html(html_string):
-    converted = UnicodeDammit(html_string, isHTML=True)
+    converted = UnicodeDammit(html_string)
    if not converted.unicode:
        raise UnicodeDecodeError(
            "Failed to detect encoding, tried [%s]",
--- a/mylar/webserve.py
+++ b/mylar/webserve.py
@ -178,11 +178,11 @@ class WebInterface(object):
                        break
                    cresults.append({
                           'ComicID'   :   stoopie['ComicID'],
-                           'ComicName' :   stoopie['ComicName'],
+                           'ComicName' :   stoopie['ComicName'].decode('utf-8', 'replace'),
                           'ComicYear' :   stoopie['ComicYear'],
                           'ComicIssues' : stoopie['ComicIssues'],
                           'ComicURL' :    stoopie['ComicURL'],
-                           'ComicPublisher' : stoopie['ComicPublisher'],
+                           'ComicPublisher' : stoopie['ComicPublisher'].decode('utf-8', 'replace'),
                           'GCDID' : stoopie['GCDID']
                           })
                    i+=1
@ -671,11 +671,17 @@ class WebInterface(object):

    def readlist(self):
        myDB = db.DBConnection()
-        readlist = myDB.select("SELECT * from readlist order by DateAdded DESC")
-        return serve_template(templatename="readlist.html", title="Readlist", readlist=readlist)
+        readlist = myDB.select("SELECT * from readinglist group by StoryArcID COLLATE NOCASE")
+        return serve_template(templatename="readinglist.html", title="Readlist", readlist=readlist)
        return page
    readlist.exposed = True

+    def detailReadlist(self,StoryArcID, StoryArcName):
+        myDB = db.DBConnection()
+        readlist = myDB.select("SELECT * from readinglist WHERE StoryArcID=? order by ReadingOrder ASC", [StoryArcID])
+        return serve_template(templatename="readlist.html", title="Detailed Arc list", readlist=readlist, storyarcname=StoryArcName)
+    detailReadlist.exposed = True
+
    def addtoreadlist(self, IssueID):
        myDB = db.DBConnection()
        readlist = myDB.action("SELECT * from issues where IssueID=?", [IssueID]).fetchone()
@ -693,6 +699,50 @@ class WebInterface(object):
 
        raise cherrypy.HTTPRedirect("artistPage?ComicID=%s" % readlist['ComicID'])
    addtoreadlist.exposed = True
+
+    def importReadlist(self,filename):
+        from xml.dom.minidom import parseString, Element
+        import random
+        myDB = db.DBConnection()
+  
+        file = open(str(filename))
+        data = file.read()
+        file.close()
+
+        dom = parseString(data)
+        # of results
+        storyarc = dom.getElementsByTagName('Name')[0].firstChild.wholeText
+        tracks = dom.getElementsByTagName('Book')
+        i = 1
+        node = dom.documentElement
+        print ("there are " + str(len(tracks)) + " issues in the story-arc: " + str(storyarc))
+        #generate a random number for the ID, and tack on the total issue count to the end as a str :)
+        storyarcid = str(random.randint(1000,9999)) + str(len(tracks))
+        i = 1
+        for book_element in tracks:
+            st_issueid = str(storyarcid) + "_" + str(random.randint(1000,9999))
+            comicname = book_element.getAttribute('Series')
+            print ("comic: " + str(comicname))
+            comicnumber = book_element.getAttribute('Number')
+            print ("number: " + str(comicnumber))
+            comicvolume = book_element.getAttribute('Volume')
+            print ("volume: " + str(comicvolume))
+            comicyear = book_element.getAttribute('Year')
+            print ("year: " + str(comicyear))
+            CtrlVal = {"IssueArcID": st_issueid}
+            NewVals = {"StoryArcID":  storyarcid,
+                       "ComicName":   comicname,
+                       "IssueNumber": comicnumber,
+                       "SeriesYear":  comicvolume,
+                       "IssueYear":   comicyear,
+                       "StoryArc":    storyarc,
+                       "ReadingOrder": i,
+                       "TotalIssues": len(tracks)}
+            myDB.upsert("readinglist", NewVals, CtrlVal)
+            i+=1
+
+    importReadlist.exposed = True
+
    
    def logs(self):
        if mylar.LOG_LEVEL is None or mylar.LOG_LEVEL == '':