FIX:(#272) Database locking errors constantly should be resolved now, FIX:(#269) Date not populating if no publication date was found (defaults to on-sale now), IMP: started the framework for Annuals

This commit is contained in:
evilhero 2013-03-21 13:09:10 -04:00
parent 5eaf02c817
commit 2775cd496e
7 changed files with 501 additions and 91 deletions

View File

@ -323,7 +323,33 @@
</tr>
%endfor
</tbody>
</table>
</table>
%if annuals:
<table class="display" id="issue_table">
<thead>
<tr>
<th id="select" align="left"><input type="checkbox" onClick="toggle(this)" class="checkbox" /></th>
<th id="issuenumber">Number</th>
<th id="issuename">Name</th>
<th id="reldate">Date</th>
<th id="status">Status</th>
<th id="options">Options</th>
</tr>
</thead>
<tbody>
%for annual in annuals:
<tr>
<td id="select"><input type="checkbox" name="${annual['IssueID']}" class="checkbox" value="${annual['IssueID']}"/></td>
<td id="issuenumber">${annual['Issue_Number']}</td>
<td id="issuename">${annual['IssueName']}</td>
<td id="reldate">${annual['IssueDate']}</td>
<td id="status">${annual['Status']}</td>
<td id="options"></td>
</tr>
%endfor
</tbody>
</table>
%endif
</div>
</form>
</div>

View File

@ -191,6 +191,9 @@ COUNT_COMICS = 0
COUNT_ISSUES = 0
COUNT_HAVES = 0
COMICSORT = None
ANNUALS_ON = 0
def CheckSection(sec):
""" Check if INI section exists, if not create it """
try:
@ -241,9 +244,9 @@ def initialize():
with INIT_LOCK:
global __INITIALIZED__, FULL_PATH, PROG_DIR, VERBOSE, DAEMON, DATA_DIR, CONFIG_FILE, CFG, CONFIG_VERSION, LOG_DIR, CACHE_DIR, LOGVERBOSE, \
global __INITIALIZED__, FULL_PATH, PROG_DIR, VERBOSE, DAEMON, COMICSORT, DATA_DIR, CONFIG_FILE, CFG, CONFIG_VERSION, LOG_DIR, CACHE_DIR, LOGVERBOSE, \
HTTP_PORT, HTTP_HOST, HTTP_USERNAME, HTTP_PASSWORD, HTTP_ROOT, LAUNCH_BROWSER, GIT_PATH, \
CURRENT_VERSION, LATEST_VERSION, CHECK_GITHUB, CHECK_GITHUB_ON_STARTUP, CHECK_GITHUB_INTERVAL, USER_AGENT, MUSIC_DIR, DESTINATION_DIR, \
CURRENT_VERSION, LATEST_VERSION, CHECK_GITHUB, CHECK_GITHUB_ON_STARTUP, CHECK_GITHUB_INTERVAL, USER_AGENT, DESTINATION_DIR, \
DOWNLOAD_DIR, USENET_RETENTION, SEARCH_INTERVAL, NZB_STARTUP_SEARCH, INTERFACE, AUTOWANT_ALL, AUTOWANT_UPCOMING, ZERO_LEVEL, ZERO_LEVEL_N, COMIC_COVER_LOCAL, \
LIBRARYSCAN, LIBRARYSCAN_INTERVAL, DOWNLOAD_SCAN_INTERVAL, USE_SABNZBD, SAB_HOST, SAB_USERNAME, SAB_PASSWORD, SAB_APIKEY, SAB_CATEGORY, SAB_PRIORITY, SAB_DIRECTORY, BLACKHOLE, BLACKHOLE_DIR, ADD_COMICS, COMIC_DIR, IMP_MOVE, IMP_RENAME, IMP_METADATA, \
USE_NZBGET, NZBGET_HOST, NZBGET_PORT, NZBGET_USERNAME, NZBGET_PASSWORD, NZBGET_CATEGORY, NZBGET_PRIORITY, NZBSU, NZBSU_APIKEY, DOGNZB, DOGNZB_APIKEY, NZBX,\
@ -251,7 +254,7 @@ def initialize():
RAW, RAW_PROVIDER, RAW_USERNAME, RAW_PASSWORD, RAW_GROUPS, EXPERIMENTAL, \
PROWL_ENABLED, PROWL_PRIORITY, PROWL_KEYS, PROWL_ONSNATCH, NMA_ENABLED, NMA_APIKEY, NMA_PRIORITY, NMA_ONSNATCH, \
PREFERRED_QUALITY, MOVE_FILES, RENAME_FILES, LOWERCASE_FILENAMES, USE_MINSIZE, MINSIZE, USE_MAXSIZE, MAXSIZE, CORRECT_METADATA, FOLDER_FORMAT, FILE_FORMAT, REPLACE_CHAR, REPLACE_SPACES, ADD_TO_CSV, CVINFO, LOG_LEVEL, POST_PROCESSING, \
COMIC_LOCATION, QUAL_ALTVERS, QUAL_SCANNER, QUAL_TYPE, QUAL_QUALITY, ENABLE_EXTRA_SCRIPTS, EXTRA_SCRIPTS, ENABLE_PRE_SCRIPTS, PRE_SCRIPTS, PULLNEW, COUNT_ISSUES, COUNT_HAVES, COUNT_COMICS, SYNO_FIX
COMIC_LOCATION, QUAL_ALTVERS, QUAL_SCANNER, QUAL_TYPE, QUAL_QUALITY, ENABLE_EXTRA_SCRIPTS, EXTRA_SCRIPTS, ENABLE_PRE_SCRIPTS, PRE_SCRIPTS, PULLNEW, COUNT_ISSUES, COUNT_HAVES, COUNT_COMICS, SYNO_FIX, ANNUALS_ON
if __INITIALIZED__:
return False
@ -338,6 +341,10 @@ def initialize():
MAXSIZE = check_setting_str(CFG, 'General', 'maxsize', '')
ADD_TO_CSV = bool(check_setting_int(CFG, 'General', 'add_to_csv', 1))
CVINFO = bool(check_setting_int(CFG, 'General', 'cvinfo', 0))
ANNUALS_ON = bool(check_setting_int(CFG, 'General', 'annuals_on', 0))
if not ANNUALS_ON:
#default to off
ANNUALS_ON = 0
LOG_LEVEL = check_setting_str(CFG, 'General', 'log_level', '')
ENABLE_EXTRA_SCRIPTS = bool(check_setting_int(CFG, 'General', 'enable_extra_scripts', 0))
EXTRA_SCRIPTS = check_setting_str(CFG, 'General', 'extra_scripts', '')
@ -517,6 +524,9 @@ def initialize():
else:
logger.info("Synology Parsing Fix already implemented. No changes required at this time.")
#Ordering comics here
logger.info("Remapping the sorting to allow for new additions.")
COMICSORT = helpers.ComicSort(sequence='startup')
__INITIALIZED__ = True
return True
@ -596,6 +606,7 @@ def config_write():
new_config['General']['logverbose'] = int(LOGVERBOSE)
new_config['General']['git_path'] = GIT_PATH
new_config['General']['cache_dir'] = CACHE_DIR
new_config['General']['annuals_on'] = ANNUALS_ON
new_config['General']['check_github'] = int(CHECK_GITHUB)
new_config['General']['check_github_on_startup'] = int(CHECK_GITHUB_ON_STARTUP)
@ -730,10 +741,6 @@ def start():
SCHED.add_interval_job(search.searchforissue, minutes=SEARCH_INTERVAL)
#SCHED.add_interval_job(librarysync.libraryScan, minutes=LIBRARYSCAN_INTERVAL)
#Ordering comics here
logger.info("Remapping the sorting to allow for new additions.")
helpers.ComicSort()
#weekly pull list gets messed up if it's not populated first, so let's populate it then set the scheduler.
logger.info("Checking for existance of Weekly Comic listing...")
PULLNEW = 'no' #reset the indicator here.
@ -971,7 +978,7 @@ def shutdown(restart=False, update=False):
SCHED.shutdown(wait=False)
config_write()
if not restart and not update:
logger.info('Mylar is shutting down...')
if update:

198
mylar/comicbookdb.py Executable file
View File

@ -0,0 +1,198 @@
from bs4 import BeautifulSoup, UnicodeDammit
import urllib2
import re
import helpers
import logger
import datetime
import sys
from decimal import Decimal
from HTMLParser import HTMLParseError
from time import strptime
def cbdb(comicnm, ComicYear):
#comicnm = 'Animal Man'
#print ( "comicname: " + str(comicnm) )
#print ( "comicyear: " + str(comicyr) )
comicnm = re.sub(' ', '+', comicnm)
input = "http://mobile.comicbookdb.com/search.php?form_search=" + str(comicnm) + "&form_searchtype=Title&x=0&y=0"
response = urllib2.urlopen ( input )
soup = BeautifulSoup ( response)
abc = soup.findAll('a', href=True)
lenabc = len(abc)
i=0
resultName = []
resultID = []
resultYear = []
resultIssues = []
resultURL = []
matched = "no"
while (i < lenabc):
titlet = abc[i] #iterate through the href's, pulling out only results.
print ("titlet: " + str(titlet))
if "title.php" in str(titlet):
print ("found title")
tempName = titlet.findNext(text=True)
print ("tempName: " + tempName)
resultName = tempName[:tempName.find("(")]
print ("ComicName: " + resultName)
resultYear = tempName[tempName.find("(")+1:tempName.find(")")]
if resultYear.isdigit(): pass
else:
i+=1
continue
print "ComicYear: " + resultYear
ID_som = titlet['href']
resultURL = ID_som
print "CBDB URL: " + resultURL
IDst = ID_som.find('?ID=')
resultID = ID_som[(IDst+4):]
print "CBDB ID: " + resultID
print ("resultname: " + resultName)
CleanComicName = re.sub('[\,\.\:\;\'\[\]\(\)\!\@\#\$\%\^\&\*\-\_\+\=\?\/]', '', comicnm)
CleanComicName = re.sub(' ', '', CleanComicName).lower()
CleanResultName = re.sub('[\,\.\:\;\'\[\]\(\)\!\@\#\$\%\^\&\*\-\_\+\=\?\/]', '', resultName)
CleanResultName = re.sub(' ', '', CleanResultName).lower()
print ("CleanComicName: " + CleanComicName)
print ("CleanResultName: " + CleanResultName)
if CleanResultName == CleanComicName or CleanResultName[3:] == CleanComicName or len(CleanComicName) == len(CleanResultName):
#if resultName[n].lower() == helpers.cleanName(str(ComicName)).lower():
print ("i:" + str(i) + "...matched by name to Mylar!")
print ("ComicYear: " + str(ComicYear) + ".. to ResultYear: " + str(resultYear))
if resultYear.isdigit():
if int(resultYear) == int(ComicYear) or int(resultYear) == int(ComicYear)+1:
resultID = str(resultID)
print ("Matchumundo!")
matched = "yes"
else:
continue
if matched == "yes":
break
i+=1
return IssueDetails(resultID)
def IssueDetails(cbdb_id):
annuals = {}
annualslist = []
gcount = 0
pagethis = 'http://comicbookdb.com/title.php?ID=' + str(cbdb_id)
response = urllib2.urlopen(pagethis)
soup = BeautifulSoup(response)
resultp = soup.findAll("table")
total = len(resultp) # -- number of tables
#get details here
startit = resultp[0].find("table", {"width" : "884" })
i = 0
pubchk = 0
boop = startit.findAll('strong')
for t in boop:
if pubchk == 0:
if ("publisher.php?" in startit('a')[i]['href']):
print (startit('a')[i]['href'])
publisher = str(startit('a')[i].contents)
print ("publisher: " + publisher)
pubchk = "1"
elif 'Publication Date: ' in t:
pdi = boop[i].nextSibling
print ("publication date: " + pdi)
elif 'Number of issues cataloged: ' in t:
noi = boop[i].nextSibling
print ("number of issues: " + noi)
i+=1
if i > len(boop): break
# pd = startit.find("Publication Date: ").nextSibling.next.text
# resultPublished = str(pd)
# noi = startit.find("Number of issues cataloged: ").nextSibling.next.text
# totalIssues = str(noi)
# print ("Publication Dates : " + str(resultPublished))
# print ("Total Issues: " + str(totalIssues))
ti = 1 # start at one as 0 is the ENTIRE soup structure
while (ti < total):
#print result
if resultp[ti].find("a", {"class" : "page_link" }):
#print "matcheroso"
tableno = resultp[ti].findAll('tr') #7th table, all the tr's
#print ti, total
break
ti+=1
noresults = len(tableno)
#print ("tableno: " + str(tableno))
print ("there are " + str(noresults) + " issues total (cover variations, et all).")
i=1 # start at 1 so we don't grab the table headers ;)
issue = []
storyarc = []
pubdate = []
#resultit = tableno[1]
#print ("resultit: " + str(resultit))
while (i < noresults):
resultit = tableno[i] # 7th table, 1st set of tr (which indicates an issue).
#print ("resultit: " + str(resultit))
issuet = resultit.find("a", {"class" : "page_link" }) # gets the issue # portion
try:
issue = issuet.findNext(text=True)
except:
#print ("blank space - skipping")
i+=1
continue
if 'annual' not in issue.lower():
i+=1
continue
lent = resultit('a',href=True) #gathers all the a href's within this particular tr
#print ("lent: " + str(lent))
lengtht = len(lent) #returns the # of ahref's within this particular tr
#print ("lengtht: " + str(lengtht))
#since we don't know which one contains the story arc, we need to iterate through to find it
#we need to know story arc, because the following td is the Publication Date
n=0
while (n < lengtht):
storyt = lent[n] #
#print ("storyt: " + str(storyt))
if 'storyarc.php' in storyt:
#print ("found storyarc")
storyarc = storyt.findNext(text=True)
#print ("Story Arc: " + str(storyarc))
break
n+=1
pubd = resultit('td') # find all the <td>'s within this tr
publen = len(pubd) # find the # of <td>'s
pubs = pubd[publen-1] #take the last <td> which will always contain the publication date
pdaters = pubs.findNext(text=True) #get the actual date :)
pubdate = re.sub("[^0-9]", "", pdaters)
print ("Issue : " + str(issue) + " (" + str(pubdate) + ")")
annualslist.append({
'AnnualIssue': str(issue),
'AnnualDate': pubdate
})
gcount+=1
i+=1
annuals['annualslist'] = annualslist
print ("Issues:" + str(annuals['annualslist']))
print ("There are " + str(gcount) + " issues.")
annuals['totalissues'] = gcount
annuals['GCDComicID'] = cbdb_id
return annuals
if __name__ == '__main__':
cbdb(sys.argv[1], sys.argv[2])

View File

@ -381,27 +381,78 @@ def apiremove(apistring, type):
return apiremoved
def ComicSort(imported=None):
from mylar import db, logger
myDB = db.DBConnection()
if imported != None:
#def ComicSort(imported=None):
#from mylar import db, logger
#myDB = db.DBConnection()
#if imported != None:
#if it's an Add Series, set it to the last record for now so it doesn't throw a 500.
cid = {"ComicID": imported}
val = {"SortOrder": 999}
myDB.upsert("comics", val, cid)
logger.info("New Series...Set SortOrder to last record to avoid errors for now.")
else:
i = 1
#cid = {"ComicID": imported}
#val = {"SortOrder": 999}
# comicorder.append({
# 'ComicID': imported,
# 'SortOrder': 999
# })
# issue['issuechoice'] = issuechoice
# myDB.upsert("comics", val, cid)
# logger.info("New Series...Set SortOrder to last record to avoid errors for now.")
#else:
def ComicSort(comicorder=None,sequence=None,imported=None):
if sequence:
# if it's on startup, load the sql into a tuple for use to avoid record-locking
i = 0
import db, logger
myDB = db.DBConnection()
comicsort = myDB.action("SELECT * FROM comics ORDER BY ComicSortName COLLATE NOCASE")
comicorderlist = []
comicorder = {}
comicidlist = []
if sequence == 'update':
mylar.COMICSORT['SortOrder'] == None
mylar.COMICSORT['LastOrderNo'] = None
mylar.COMICSORT['LastOrderID'] = None
for csort in comicsort:
if csort['ComicID'] is None: pass
if not csort['ComicID'] in comicidlist:
cid = {"ComicID": csort['ComicID']}
val = {"SortOrder": i}
myDB.upsert("comics", val, cid)
if sequence == 'startup':
comicorderlist.append({
'ComicID': csort['ComicID'],
'ComicOrder': i
})
elif sequence == 'update':
mylar.COMICSORT['SortOrder'].append({
'ComicID': csort['ComicID'],
'ComicOrder': i
})
comicidlist.append(csort['ComicID'])
i+=1
logger.info("Sucessfully ordered " + str(i-1) + " series in your watchlist.")
return
if sequence == 'startup':
comicorder['SortOrder'] = comicorderlist
comicorder['LastOrderNo'] = i-1
comicorder['LastOrderID'] = comicorder['SortOrder'][i-1]['ComicID']
logger.info("Sucessfully ordered " + str(i-1) + " series in your watchlist.")
return comicorder
elif sequence == 'update':
mylar.COMICSORT['LastOrderNo'] = i-1
mylar.COMICSORT['LastOrderID'] = mylar.COMICSORT['SortOrder'][i-1]['ComicID']
return
else:
# for new series adds, we already know the comicid, so we set the sortorder to an abnormally high #
# we DO NOT write to the db to avoid record-locking.
# if we get 2 999's we're in trouble though.
sortedapp = []
if comicorder['LastOrderNo'] == '999':
lastorderval = int(comicorder['LastOrderNo']) + 1
else:
lastorderval = 999
sortedapp.append({
'ComicID': imported,
'ComicOrder': lastorderval
})
mylar.COMICSORT['SortOrder'] = sortedapp
mylar.COMICSORT['LastOrderNo'] = lastorderval
mylar.COMICSORT['LastOrderID'] = imported
return

View File

@ -25,7 +25,7 @@ import sqlite3
import cherrypy
import mylar
from mylar import logger, helpers, db, mb, albumart, cv, parseit, filechecker, search, updater, moveit
from mylar import logger, helpers, db, mb, albumart, cv, parseit, filechecker, search, updater, moveit, comicbookdb
def is_exists(comicid):
@ -65,7 +65,8 @@ def addComictoDB(comicid,mismatch=None,pullupd=None,imported=None,ogcname=None):
myDB.upsert("comics", newValueDict, controlValueDict)
#run the re-sortorder here in order to properly display the page
helpers.ComicSort(comicid)
if pullupd is None:
helpers.ComicSort(comicorder=mylar.COMICSORT, imported=comicid)
# we need to lookup the info for the requested ComicID in full now
comic = cv.getComic(comicid,'comic')
@ -124,6 +125,25 @@ def addComictoDB(comicid,mismatch=None,pullupd=None,imported=None,ogcname=None):
else:
SeriesYear = comic['ComicYear']
#let's do the Annual check here.
if mylar.ANNUALS_ON:
annuals = comicbookdb.cbdb(comic['ComicName'], SeriesYear)
print ("Number of Annuals returned: " + str(annuals['totalissues']))
nb = 0
while (nb <= int(annuals['totalissues'])):
try:
annualval = annuals['annualslist'][nb]
except IndexError:
break
newCtrl = {"IssueID": str(annualval['AnnualIssue'] + annualval['AnnualDate'])}
newVals = {"Issue_Number": annualval['AnnualIssue'],
"IssueDate": annualval['AnnualDate'],
"ComicID": comicid,
"Status": "skipped"}
myDB.upsert("annuals", newVals, newCtrl)
nb+=1
#parseit.annualCheck(gcomicid=gcdinfo['GCDComicID'], comicid=comicid, comicname=comic['ComicName'], comicyear=SeriesYear)
#comic book location on machine
# setup default location here
@ -172,16 +192,18 @@ def addComictoDB(comicid,mismatch=None,pullupd=None,imported=None,ogcname=None):
if mylar.REPLACE_SPACES:
#mylar.REPLACE_CHAR ...determines what to replace spaces with underscore or dot
comlocation = comlocation.replace(' ', mylar.REPLACE_CHAR)
#if it doesn't exist - create it (otherwise will bugger up later on)
if os.path.isdir(str(comlocation)):
logger.info(u"Directory (" + str(comlocation) + ") already exists! Continuing...")
else:
#print ("Directory doesn't exist!")
try:
os.makedirs(str(comlocation))
logger.info(u"Directory successfully created at: " + str(comlocation))
except OSError:
logger.error(u"Could not create comicdir : " + str(comlocation))
#moved this out of the above loop so it will chk for existance of comlocation in case moved
#if it doesn't exist - create it (otherwise will bugger up later on)
if os.path.isdir(str(comlocation)):
logger.info(u"Directory (" + str(comlocation) + ") already exists! Continuing...")
else:
#print ("Directory doesn't exist!")
try:
os.makedirs(str(comlocation))
logger.info(u"Directory successfully created at: " + str(comlocation))
except OSError:
logger.error(u"Could not create comicdir : " + str(comlocation))
#try to account for CV not updating new issues as fast as GCD
#seems CV doesn't update total counts
@ -202,13 +224,18 @@ def addComictoDB(comicid,mismatch=None,pullupd=None,imported=None,ogcname=None):
except OSError:
logger.error('Could not create cache dir. Check permissions of cache dir: ' + str(mylar.CACHE_DIR))
coverfile = mylar.CACHE_DIR + "/" + str(comicid) + ".jpg"
coverfile = os.path.join(mylar.CACHE_DIR, str(comicid) + ".jpg")
#try:
urllib.urlretrieve(str(comic['ComicImage']), str(coverfile))
try:
with open(str(coverfile)) as f:
ComicImage = os.path.join('cache',str(comicid) + ".jpg")
#this is for Firefox when outside the LAN...it works, but I don't know how to implement it
#without breaking the normal flow for inside the LAN (above)
#ComicImage = "http://" + str(mylar.HTTP_HOST) + ":" + str(mylar.HTTP_PORT) + "/cache/" + str(comicid) + ".jpg"
logger.info(u"Sucessfully retrieved cover for " + comic['ComicName'])
#if the comic cover local is checked, save a cover.jpg to the series folder.
if mylar.COMIC_COVER_LOCAL:
@ -237,9 +264,10 @@ def addComictoDB(comicid,mismatch=None,pullupd=None,imported=None,ogcname=None):
myDB.upsert("comics", newValueDict, controlValueDict)
#re-run the re-sortorder here now that the ComicName has been written to the db.
helpers.ComicSort()
#comicsort here...
#run the re-sortorder here in order to properly display the page
if pullupd is None:
helpers.ComicSort(sequence='update')
issued = cv.getComic(comicid,'issue')
logger.info(u"Sucessfully retrieved issue details for " + comic['ComicName'] )

View File

@ -387,7 +387,7 @@ def GCDdetails(comseries, resultURL, vari_loop, ComicID, TotalIssues, issvariati
PrevYRMO = ParseDate
gcdinfo['ComicDate'] = ParseDate
#^^ will retrieve date #
#logger.fdebug("adding: " + str(gcdinfo['ComicIssue']))
#logger.fdebug("adding: " + str(gcdinfo['ComicIssue']) + " - date: " + str(ParseDate))
if ComicID[:1] == "G":
gcdchoice.append({
'GCDid': ComicID,
@ -430,6 +430,7 @@ def GCDdetails(comseries, resultURL, vari_loop, ComicID, TotalIssues, issvariati
gcdinfo['ComicImage'] = gcdcover
gcdinfo['resultPublished'] = resultPublished
gcdinfo['SeriesYear'] = ParseYear
gcdinfo['GCDComicID'] = resultURL.split('/')[0]
return gcdinfo
## -- end (GCD) -- ##
@ -438,31 +439,39 @@ def GettheDate(parsed,PrevYRMO):
#try publicationd date first
#logger.fdebug("parsed:" + str(parsed))
subtxt1 = parsed('td')[1]
ParseDate = subtxt1.findNext(text=True)
ParseDate = subtxt1.findNext(text=True).rstrip()
pformat = 'pub'
if ParseDate is None or ParseDate == '':
subtxt1 = parsed('td')[2]
ParseDate = subtxt1.findNext(text=True)
pformat = 'on-sale'
if len(ParseDate) < 7: ParseDate = '0000-00' #invalid on-sale date format , drop it 0000-00 to avoid errors
basmonths = {'january':'01','february':'02','march':'03','april':'04','may':'05','june':'06','july':'07','august':'08','september':'09','october':'10','november':'11','december':'12'}
pdlen = len(ParseDate)
pdfind = ParseDate.find(' ',2)
#logger.fdebug("length: " + str(pdlen) + "....first space @ pos " + str(pdfind))
#logger.fdebug("this should be the year: " + str(ParseDate[pdfind+1:pdlen-1]))
if ParseDate[pdfind+1:pdlen-1].isdigit():
#assume valid date.
#search for number as text, and change to numeric
for numbs in basmonths:
if numbs in ParseDate.lower():
pconv = basmonths[numbs]
ParseYear = re.sub('/s','',ParseDate[-5:])
ParseDate = str(ParseYear) + "-" + str(pconv)
#logger.fdebug("!success - Publication date: " + str(ParseDate))
break
# some comics are messed with pub.dates and have Spring/Summer/Fall/Winter
if pformat == 'on-sale': pass # date is in correct format...
else:
baseseasons = {'spring':'03','summer':'06','fall':'09','winter':'12'}
for seas in baseseasons:
if seas in ParseDate.lower():
sconv = baseseasons[seas]
ParseYear = re.sub('/s','',ParseDate[-5:])
ParseDate = str(ParseYear) + "-" + str(sconv)
break
if ParseDate[pdfind+1:pdlen-1].isdigit():
#assume valid date.
#search for number as text, and change to numeric
for numbs in basmonths:
if numbs in ParseDate.lower():
pconv = basmonths[numbs]
ParseYear = re.sub('/s','',ParseDate[-5:])
ParseDate = str(ParseYear) + "-" + str(pconv)
#logger.fdebug("!success - Publication date: " + str(ParseDate))
break
# some comics are messed with pub.dates and have Spring/Summer/Fall/Winter
else:
baseseasons = {'spring':'03','summer':'06','fall':'09','winter':'12'}
for seas in baseseasons:
if seas in ParseDate.lower():
sconv = baseseasons[seas]
ParseYear = re.sub('/s','',ParseDate[-5:])
ParseDate = str(ParseYear) + "-" + str(sconv)
break
# #try key date
# subtxt1 = parsed('td')[2]
# ParseDate = subtxt1.findNext(text=True)
@ -472,22 +481,22 @@ def GettheDate(parsed,PrevYRMO):
# subtxt3 = parsed('td')[0]
# ParseDate = subtxt3.findNext(text=True)
# if ParseDate == ' ':
#increment previous month by one and throw it in until it's populated properly.
if PrevYRMO == '0000-00':
ParseDate = '0000-00'
else:
PrevYR = str(PrevYRMO)[:4]
PrevMO = str(PrevYRMO)[5:]
#let's increment the month now (if it's 12th month, up the year and hit Jan.)
if int(PrevMO) == 12:
PrevYR = int(PrevYR) + 1
PrevMO = 1
#increment previous month by one and throw it in until it's populated properly.
if PrevYRMO == '0000-00':
ParseDate = '0000-00'
else:
PrevMO = int(PrevMO) + 1
if int(PrevMO) < 10:
PrevMO = "0" + str(PrevMO)
ParseDate = str(PrevYR) + "-" + str(PrevMO)
#logger.fdebug("parseDAte:" + str(ParseDate))
PrevYR = str(PrevYRMO)[:4]
PrevMO = str(PrevYRMO)[5:]
#let's increment the month now (if it's 12th month, up the year and hit Jan.)
if int(PrevMO) == 12:
PrevYR = int(PrevYR) + 1
PrevMO = 1
else:
PrevMO = int(PrevMO) + 1
if int(PrevMO) < 10:
PrevMO = "0" + str(PrevMO)
ParseDate = str(PrevYR) + "-" + str(PrevMO)
#logger.fdebug("parseDAte:" + str(ParseDate))
return ParseDate
def GCDAdd(gcdcomicid):
@ -711,3 +720,84 @@ def decode_html(html_string):
', '.join(converted.triedEncodings))
# print converted.originalEncoding
return converted.unicode
def annualCheck(gcomicid, comicid, comicname, comicyear):
# will only work if we already matched for gcd.
# search for <comicname> annual
# grab annual listing that hits on comicyear (seriesyear)
# grab results :)
print ("GcomicID: " + str(gcomicid))
print ("comicID: " + str(comicid))
print ("comicname: " + comicname)
print ("comicyear: " + str(comicyear))
comicnm = comicname.encode('utf-8').strip()
comicnm_1 = re.sub('\+', '%2B', comicnm + " annual")
comicnm = re.sub(' ', '+', comicnm_1)
input = 'http://www.comics.org/search/advanced/process/?target=series&method=icontains&logic=False&order2=date&order3=&start_date=' + str(comicyear) + '-01-01&end_date=' + str(comicyear) + '-12-31&series=' + str(comicnm) + '&is_indexed=None'
response = urllib2.urlopen ( input )
soup = BeautifulSoup ( response)
cnt1 = len(soup.findAll("tr", {"class" : "listing_even"}))
cnt2 = len(soup.findAll("tr", {"class" : "listing_odd"}))
cnt = int(cnt1 + cnt2)
print (str(cnt) + " results")
resultName = []
resultID = []
resultYear = []
resultIssues = []
resultURL = None
n_odd = -1
n_even = -1
n = 0
while ( n < cnt ):
if n%2==0:
n_even+=1
resultp = soup.findAll("tr", {"class" : "listing_even"})[n_even]
else:
n_odd+=1
resultp = soup.findAll("tr", {"class" : "listing_odd"})[n_odd]
rtp = resultp('a')[1]
rtp1 = re.sub('Annual', '', rtp)
resultName.append(helpers.cleanName(rtp1.findNext(text=True)))
print ( "Comic Name: " + str(resultName[n]) )
fip = resultp('a',href=True)[1]
resultID.append(fip['href'])
print ( "ID: " + str(resultID[n]) )
subtxt3 = resultp('td')[3]
resultYear.append(subtxt3.findNext(text=True))
resultYear[n] = resultYear[n].replace(' ','')
subtxt4 = resultp('td')[4]
resultIssues.append(helpers.cleanName(subtxt4.findNext(text=True)))
resiss = resultIssues[n].find('issue')
resiss = int(resiss)
resultIssues[n] = resultIssues[n].replace('','')[:resiss]
resultIssues[n] = resultIssues[n].replace(' ','')
print ( "Year: " + str(resultYear[n]) )
print ( "Issues: " + str(resultIssues[n]) )
CleanComicName = re.sub('[\,\.\:\;\'\[\]\(\)\!\@\#\$\%\^\&\*\-\_\+\=\?\/]', '', comicnm)
CleanComicName = re.sub(' ', '', CleanComicName).lower()
CleanResultName = re.sub('[\,\.\:\;\'\[\]\(\)\!\@\#\$\%\^\&\*\-\_\+\=\?\/]', '', resultName[n])
CleanResultName = re.sub(' ', '', CleanResultName).lower()
print ("CleanComicName: " + str(CleanComicName))
print ("CleanResultName: " + str(CleanResultName))
if CleanResultName == CleanComicName or CleanResultName[3:] == CleanComicName:
#if resultName[n].lower() == helpers.cleanName(str(ComicName)).lower():
#print ("n:" + str(n) + "...matched by name to Mylar!")
if resultYear[n] == ComicYear or resultYear[n] == str(int(ComicYear)+1):
print ("n:" + str(n) + "...matched by year to Mylar!")
print ( "Year: " + str(resultYear[n]) )
TotalIssues = resultIssues[n]
resultURL = str(resultID[n])
rptxt = resultp('td')[6]
resultPublished = rptxt.findNext(text=True)
#print ("Series Published: " + str(resultPublished))
break
n+=1
return

View File

@ -71,27 +71,33 @@ class WebInterface(object):
if comic is None:
raise cherrypy.HTTPRedirect("home")
#let's cheat. :)
comicskip = myDB.select('SELECT * from comics order by ComicSortName COLLATE NOCASE')
#comicskip = myDB.select('SELECT * from comics order by ComicSortName COLLATE NOCASE')
skipno = len(mylar.COMICSORT['SortOrder'])
lastno = mylar.COMICSORT['LastOrderNo']
lastid = mylar.COMICSORT['LastOrderID']
series = {}
for cskip in comicskip:
i = 0
while (i < skipno):
cskip = mylar.COMICSORT['SortOrder'][i]
if cskip['ComicID'] == ComicID:
cursortnum = cskip['SortOrder']
cursortnum = cskip['ComicOrder']
series['Current'] = cskip['ComicID']
if cursortnum == 1 or cursortnum == 999:
if cursortnum == 0:
# if first record, set the Previous record to the LAST record.
previous = myDB.action("SELECT ComicID from Comics order by SortOrder DESC LIMIT 1").fetchone()
previous = lastid
else:
previous = myDB.action("SELECT ComicID from Comics WHERE SortOrder=?", [cursortnum-1]).fetchone()
previous = mylar.COMICSORT['SortOrder'][i-1]['ComicID']
next = myDB.action("SELECT ComicID from Comics WHERE SortOrder=?", [cursortnum+1]).fetchone()
if next is None:
# if last record, set the Next record to the FIRST record.
next = myDB.action("SELECT ComicID from Comics order by ComicSortName").fetchone()
series['Previous'] = previous[0]
series['Next'] = next[0]
# if last record, set the Next record to the FIRST record.
if cursortnum == lastno:
next = mylar.COMICSORT['SortOrder'][0]['ComicID']
else:
next = mylar.COMICSORT['SortOrder'][i+1]['ComicID']
series['Previous'] = previous
series['Next'] = next
break
i+=1
issues = myDB.select('SELECT * FROM issues WHERE ComicID=? order by Int_IssueNumber DESC', [ComicID])
isCounts = {}
isCounts[1] = 0 #1 skipped
@ -123,7 +129,11 @@ class WebInterface(object):
"fuzzy_year2" : helpers.radio(int(usethefuzzy), 2),
"skipped2wanted" : helpers.checked(skipped2wanted)
}
return serve_template(templatename="artistredone.html", title=comic['ComicName'], comic=comic, issues=issues, comicConfig=comicConfig, isCounts=isCounts, series=series)
if mylar.ANNUALS_ON:
annuals = myDB.select("SELECT * FROM annuals WHERE ComicID=?", [ComicID])
else: annuals = None
return serve_template(templatename="artistredone.html", title=comic['ComicName'], comic=comic, issues=issues, comicConfig=comicConfig, isCounts=isCounts, series=series, annuals=annuals)
artistPage.exposed = True
def searchit(self, name, issue=None, mode=None, type=None):