FIX:(#281) Re-downloading random issues (adding to Wanted list), FIX:(#383) Filechecker should now pick up special characters

This commit is contained in:
evilhero 2013-05-15 05:09:43 -04:00
parent 342b9daa30
commit 424c3ec9a0
4 changed files with 55 additions and 20 deletions

View File

@ -142,12 +142,12 @@ def IssueDetails(cbdb_id):
while (i < noresults):
resultit = tableno[i] # 7th table, 1st set of tr (which indicates an issue).
#print ("resultit: " + str(resultit))
print ("resultit: " + str(resultit))
issuet = resultit.find("a", {"class" : "page_link" }) # gets the issue # portion
try:
issue = issuet.findNext(text=True)
except:
#print ("blank space - skipping")
print ("blank space - skipping")
i+=1
continue
if 'annual' not in issue.lower():
@ -161,9 +161,13 @@ def IssueDetails(cbdb_id):
#since we don't know which one contains the story arc, we need to iterate through to find it
#we need to know story arc, because the following td is the Publication Date
n=0
issuetitle = 'None'
while (n < lengtht):
storyt = lent[n] #
#print ("storyt: " + str(storyt))
print ("storyt: " + str(storyt))
if 'issue.php' in storyt:
issuetitle = storyt.findNext(text=True)
print ("title:" + issuetitle)
if 'storyarc.php' in storyt:
#print ("found storyarc")
storyarc = storyt.findNext(text=True)
@ -174,15 +178,25 @@ def IssueDetails(cbdb_id):
publen = len(pubd) # find the # of <td>'s
pubs = pubd[publen-1] #take the last <td> which will always contain the publication date
pdaters = pubs.findNext(text=True) #get the actual date :)
pubdate = re.sub("[^0-9]", "", pdaters)
basmonths = {'january':'01','february':'02','march':'03','april':'04','may':'05','june':'06','july':'07','august':'09','september':'10','october':'11','december':'12'}
for numbs in basmonths:
if numbs in pdaters.lower():
pconv = basmonths[numbs]
ParseYear = re.sub('/s','',pdaters[-5:])
pubdate= str(ParseYear) + "-" + str(pconv)
#logger.fdebug("!success - Publication date: " + str(ParseDate))
#pubdate = re.sub("[^0-9]", "", pdaters)
print ("Issue : " + str(issue) + " (" + str(pubdate) + ")")
print ("Issuetitle " + str(issuetitle))
annualslist.append({
'AnnualIssue': str(issue),
'AnnualDate': pubdate
'AnnualIssue': str(issue),
'AnnualTitle': issuetitle,
'AnnualDate': str(pubdate)
})
gcount+=1
print("annualslist appended...")
i+=1
annuals['annualslist'] = annualslist

View File

@ -41,12 +41,12 @@ def listFiles(dir,watchcomic,AlternateSearch=None):
comiclist = []
comiccnt = 0
not_these = ['\#',
'\,',
',',
'\/',
'\:',
'\;',
'.',
'\-',
'-',
'\!',
'\$',
'\%',
@ -76,7 +76,6 @@ def listFiles(dir,watchcomic,AlternateSearch=None):
#print (subit + " - assuming versioning. Removing from initial search pattern.")
subname = re.sub(str(subit), '', subname)
volrem = subit
#print ("removed " + str(volrem) + " from filename wording")
if subit.lower()[:3] == 'vol':
#if in format vol.2013 etc
#because the '.' in Vol. gets removed, let's loop thru again after the Vol hit to remove it entirely
@ -84,7 +83,10 @@ def listFiles(dir,watchcomic,AlternateSearch=None):
subname = re.sub(subit, '', subname)
volrem = subit
subname = re.sub('\_', ' ', subname)
#remove the brackets..
subname = re.findall('[^()]+', subname)
logger.fdebug("subname no brackets: " + str(subname[0]))
subname = re.sub('\_', ' ', subname[0])
nonocount = 0
for nono in not_these:
if nono in subname:
@ -98,7 +100,7 @@ def listFiles(dir,watchcomic,AlternateSearch=None):
subname = re.sub(str(nono), ' ', subname)
nonocount = nonocount + subcnt
#subname = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\+\'\?\@]',' ', subname)
modwatchcomic = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\+\'\?\@]', ' ', u_watchcomic)
modwatchcomic = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\'\?\@]', ' ', u_watchcomic)
detectand = False
modwatchcomic = re.sub('\&', ' and ', modwatchcomic)
modwatchcomic = re.sub('\s+', ' ', str(modwatchcomic)).strip()
@ -117,7 +119,7 @@ def listFiles(dir,watchcomic,AlternateSearch=None):
altsearchcomic = "127372873872871091383 abdkhjhskjhkjdhakajhf"
#if '_' in subname:
# subname = subname.replace('_', ' ')
#logger.fdebug("watchcomic:" + str(modwatchcomic) + " ..comparing to found file: " + str(subname))
logger.fdebug("watchcomic:" + str(modwatchcomic) + " ..comparing to found file: " + str(subname))
if modwatchcomic.lower() in subname.lower() or altsearchcomic.lower() in subname.lower():
if 'annual' in subname.lower():
#print ("it's an annual - unsure how to proceed")

View File

@ -540,3 +540,19 @@ def updateComicLocation():
#raise cherrypy.HTTPRedirect("config")
return
def cleanhtml(raw_html):
#cleanr = re.compile('<.*?>')
#cleantext = re.sub(cleanr, '', raw_html)
#return cleantext
from bs4 import BeautifulSoup
VALID_TAGS = ['div', 'p']
soup = BeautifulSoup(raw_html)
for tag in soup.findAll('p'):
if tag.name not in VALID_TAGS:
tag.replaceWith(tag.renderContents())
flipflop = soup.renderContents()
print flipflop
return flipflop

View File

@ -616,13 +616,15 @@ class WebInterface(object):
mvupcome = myDB.select("SELECT * from upcoming WHERE IssueDate < date('now') order by IssueDate DESC")
#get the issue ID's
for mvup in mvupcome:
myissue = myDB.action("SELECT * FROM issues WHERE Issue_Number=?", [mvup['IssueNumber']]).fetchone()
myissue = myDB.action("SELECT * FROM issues WHERE IssueID=?", [mvup['IssueID']]).fetchone()
#myissue = myDB.action("SELECT * FROM issues WHERE Issue_Number=?", [mvup['IssueNumber']]).fetchone()
if myissue is None: pass
else:
#print ("ComicName: " + str(myissue['ComicName']))
#print ("Issue number : " + str(myissue['Issue_Number']) )
logger.fdebug("--Updating Status of issues table because of Upcoming status--")
logger.fdebug("ComicName: " + str(myissue['ComicName']))
logger.fdebug("Issue number : " + str(myissue['Issue_Number']) )
mvcontroldict = {"IssueID": myissue['IssueID']}
mvvalues = {"ComicID": myissue['ComicID'],
"Status": "Wanted"}
@ -1211,6 +1213,7 @@ class WebInterface(object):
soma,noids = librarysync.libraryScan()
except Exception, e:
logger.error('Unable to complete the scan: %s' % e)
return
if soma == "Completed":
print ("sucessfully completed import.")
else:
@ -1253,8 +1256,8 @@ class WebInterface(object):
# unzip -z filename.cbz < /dev/null will remove the comment field, and thus the metadata.
self.importResults()
#self.importResults()
raise cherrypy.HTTPRedirect("importResults")
if redirect:
raise cherrypy.HTTPRedirect(redirect)
else: