mirror of
https://github.com/evilhero/mylar
synced 2025-03-10 05:52:48 +00:00
606 lines
26 KiB
Python
Executable file
606 lines
26 KiB
Python
Executable file
# This file is part of Mylar.
|
|
#
|
|
# Mylar is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# Mylar is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
|
|
# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
|
|
# License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with Mylar. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
import sys
|
|
import os
|
|
import re
|
|
import time
|
|
import logger
|
|
import string
|
|
import urllib2
|
|
import lib.feedparser
|
|
import mylar
|
|
import platform
|
|
from bs4 import BeautifulSoup as Soup
|
|
import httplib
|
|
|
|
|
|
def patch_http_response_read(func):
|
|
def inner(*args):
|
|
try:
|
|
return func(*args)
|
|
except httplib.IncompleteRead, e:
|
|
return e.partial
|
|
|
|
return inner
|
|
httplib.HTTPResponse.read = patch_http_response_read(httplib.HTTPResponse.read)
|
|
|
|
if platform.python_version() == '2.7.6':
|
|
httplib.HTTPConnection._http_vsn = 10
|
|
httplib.HTTPConnection._http_vsn_str = 'HTTP/1.0'
|
|
|
|
|
|
def pulldetails(comicid, type, issueid=None, offset=1, arclist=None, comicidlist=None):
|
|
#import easy to use xml parser called minidom:
|
|
from xml.dom.minidom import parseString
|
|
|
|
if mylar.COMICVINE_API == 'None' or mylar.COMICVINE_API is None or mylar.COMICVINE_API == mylar.DEFAULT_CVAPI:
|
|
logger.warn('You have not specified your own ComicVine API key - alot of things will be limited. Get your own @ http://api.comicvine.com.')
|
|
comicapi = mylar.DEFAULT_CVAPI
|
|
else:
|
|
comicapi = mylar.COMICVINE_API
|
|
|
|
if type == 'comic':
|
|
if not comicid.startswith('4050-'): comicid = '4050-' + comicid
|
|
PULLURL = mylar.CVURL + 'volume/' + str(comicid) + '/?api_key=' + str(comicapi) + '&format=xml&field_list=name,count_of_issues,issues,start_year,site_detail_url,image,publisher,description,first_issue,deck,aliases'
|
|
elif type == 'issue':
|
|
if mylar.CV_ONLY:
|
|
cv_type = 'issues'
|
|
if arclist is None:
|
|
searchset = 'filter=volume:' + str(comicid) + '&field_list=cover_date,description,id,image,issue_number,name,date_last_updated,store_date'
|
|
else:
|
|
searchset = 'filter=id:' + (arclist) + '&field_list=cover_date,id,issue_number,name,date_last_updated,store_date,volume'
|
|
else:
|
|
cv_type = 'volume/' + str(comicid)
|
|
searchset = 'name,count_of_issues,issues,start_year,site_detail_url,image,publisher,description,store_date'
|
|
PULLURL = mylar.CVURL + str(cv_type) + '/?api_key=' + str(comicapi) + '&format=xml&' + str(searchset) + '&offset=' + str(offset)
|
|
elif type == 'firstissue':
|
|
#this is used ONLY for CV_ONLY
|
|
PULLURL = mylar.CVURL + 'issues/?api_key=' + str(comicapi) + '&format=xml&filter=id:' + str(issueid) + '&field_list=cover_date'
|
|
elif type == 'storyarc':
|
|
PULLURL = mylar.CVURL + 'story_arcs/?api_key=' + str(comicapi) + '&format=xml&filter=name:' + str(issueid) + '&field_list=cover_date'
|
|
elif type == 'comicyears':
|
|
PULLURL = mylar.CVURL + 'volumes/?api_key=' + str(comicapi) + '&format=xml&filter=id:' + str(comicidlist) + '&field_list=name,id,start_year,publisher&offset=' + str(offset)
|
|
elif type == 'import':
|
|
PULLURL = mylar.CVURL + 'issues/?api_key=' + str(comicapi) + '&format=xml&filter=id:' + (comicidlist) + '&field_list=cover_date,id,issue_number,name,date_last_updated,store_date,volume' + '&offset=' + str(offset)
|
|
|
|
#logger.info('CV.PULLURL: ' + PULLURL)
|
|
#new CV API restriction - one api request / second.
|
|
if mylar.CVAPI_RATE is None or mylar.CVAPI_RATE < 2:
|
|
time.sleep(2)
|
|
else:
|
|
time.sleep(mylar.CVAPI_RATE)
|
|
|
|
#download the file:
|
|
file = urllib2.urlopen(PULLURL)
|
|
#convert to string:
|
|
data = file.read()
|
|
#close file because we dont need it anymore:
|
|
file.close()
|
|
#parse the xml you downloaded
|
|
dom = parseString(data)
|
|
|
|
return dom
|
|
|
|
|
|
def getComic(comicid, type, issueid=None, arc=None, arcid=None, arclist=None, comicidlist=None):
|
|
if type == 'issue':
|
|
offset = 1
|
|
issue = {}
|
|
ndic = []
|
|
issuechoice = []
|
|
comicResults = []
|
|
firstdate = '2099-00-00'
|
|
#let's find out how many results we get from the query...
|
|
if comicid is None:
|
|
#if comicid is None, it's coming from the story arc search results.
|
|
id = arcid
|
|
#since the arclist holds the issueids, and the pertinent reading order - we need to strip out the reading order so this works.
|
|
aclist = ''
|
|
for ac in arclist.split('|'):
|
|
aclist += ac[:ac.find(',')] + '|'
|
|
if aclist.endswith('|'):
|
|
aclist = aclist[:-1]
|
|
islist = aclist
|
|
else:
|
|
id = comicid
|
|
islist = None
|
|
searched = pulldetails(id, 'issue', None, 0, islist)
|
|
if searched is None:
|
|
return False
|
|
totalResults = searched.getElementsByTagName('number_of_total_results')[0].firstChild.wholeText
|
|
logger.fdebug("there are " + str(totalResults) + " search results...")
|
|
if not totalResults:
|
|
return False
|
|
countResults = 0
|
|
while (countResults < int(totalResults)):
|
|
logger.fdebug("querying range from " + str(countResults) + " to " + str(countResults + 100))
|
|
if countResults > 0:
|
|
#new api - have to change to page # instead of offset count
|
|
offsetcount = countResults
|
|
searched = pulldetails(id, 'issue', None, offsetcount, islist)
|
|
issuechoice, tmpdate = GetIssuesInfo(id, searched, arcid)
|
|
if tmpdate < firstdate:
|
|
firstdate = tmpdate
|
|
ndic = ndic + issuechoice
|
|
#search results are limited to 100 and by pagination now...let's account for this.
|
|
countResults = countResults + 100
|
|
|
|
issue['issuechoice'] = ndic
|
|
issue['firstdate'] = firstdate
|
|
return issue
|
|
|
|
elif type == 'comic':
|
|
dom = pulldetails(comicid, 'comic', None, 1)
|
|
return GetComicInfo(comicid, dom)
|
|
elif type == 'firstissue':
|
|
dom = pulldetails(comicid, 'firstissue', issueid, 1)
|
|
return GetFirstIssue(issueid, dom)
|
|
elif type == 'storyarc':
|
|
dom = pulldetails(arc, 'storyarc', None, 1)
|
|
return GetComicInfo(issueid, dom)
|
|
elif type == 'comicyears':
|
|
#used by the story arc searcher when adding a given arc to poll each ComicID in order to populate the Series Year.
|
|
#this grabs each issue based on issueid, and then subsets the comicid for each to be used later.
|
|
#set the offset to 0, since we're doing a filter.
|
|
dom = pulldetails(arcid, 'comicyears', offset=0, comicidlist=comicidlist)
|
|
return GetSeriesYears(dom)
|
|
elif type == 'import':
|
|
#used by the importer when doing a scan with metatagging enabled. If metatagging comes back true, then there's an IssueID present
|
|
#within the tagging (with CT). This compiles all of the IssueID's during a scan (in 100's), and returns the corresponding CV data
|
|
#related to the given IssueID's - namely ComicID, Name, Volume (more at some point, but those are the important ones).
|
|
offset = 1
|
|
if len(comicidlist) <= 100:
|
|
endcnt = len(comicidlist)
|
|
else:
|
|
endcnt = 100
|
|
|
|
id_count = 0
|
|
import_list = []
|
|
logger.fdebug('comicidlist:' + str(comicidlist))
|
|
|
|
while id_count < len(comicidlist):
|
|
#break it up by 100 per api hit
|
|
#do the first 100 regardless
|
|
in_cnt = 0
|
|
for i in range(id_count, endcnt):
|
|
if in_cnt == 0:
|
|
tmpidlist = str(comicidlist[i])
|
|
else:
|
|
tmpidlist += '|' + str(comicidlist[i])
|
|
in_cnt +=1
|
|
logger.info('tmpidlist: ' + str(tmpidlist))
|
|
|
|
searched = pulldetails(None, 'import', offset=0, comicidlist=tmpidlist)
|
|
|
|
if searched is None:
|
|
break
|
|
else:
|
|
tGIL = GetImportList(searched)
|
|
import_list += tGIL
|
|
|
|
endcnt +=100
|
|
id_count +=100
|
|
|
|
return import_list
|
|
|
|
|
|
def GetComicInfo(comicid, dom, safechk=None):
|
|
if safechk is None:
|
|
#safetycheck when checking comicvine. If it times out, increment the chk on retry attempts up until 5 tries then abort.
|
|
safechk = 1
|
|
elif safechk > 4:
|
|
logger.error('Unable to add / refresh the series due to inablity to retrieve data from ComicVine. You might want to try abit later and/or make sure ComicVine is up.')
|
|
return
|
|
#comicvine isn't as up-to-date with issue counts..
|
|
#so this can get really buggered, really fast.
|
|
tracks = dom.getElementsByTagName('issue')
|
|
try:
|
|
cntit = dom.getElementsByTagName('count_of_issues')[0].firstChild.wholeText
|
|
except:
|
|
cntit = len(tracks)
|
|
trackcnt = len(tracks)
|
|
logger.fdebug("number of issues I counted: " + str(trackcnt))
|
|
logger.fdebug("number of issues CV says it has: " + str(cntit))
|
|
# if the two don't match, use trackcnt as count_of_issues might be not upto-date for some reason
|
|
if int(trackcnt) != int(cntit):
|
|
cntit = trackcnt
|
|
vari = "yes"
|
|
else: vari = "no"
|
|
logger.fdebug("vari is set to: " + str(vari))
|
|
#if str(trackcnt) != str(int(cntit)+2):
|
|
# cntit = int(cntit) + 1
|
|
comic = {}
|
|
comicchoice = []
|
|
cntit = int(cntit)
|
|
#retrieve the first xml tag (<tag>data</tag>)
|
|
#that the parser finds with name tagName:
|
|
# to return the parent name of the <name> node : dom.getElementsByTagName('name')[0].parentNode.nodeName
|
|
# where [0] denotes the number of the name field(s)
|
|
# where nodeName denotes the parentNode : ComicName = results, publisher = publisher, issues = issue
|
|
try:
|
|
names = len(dom.getElementsByTagName('name'))
|
|
n = 0
|
|
while (n < names):
|
|
if dom.getElementsByTagName('name')[n].parentNode.nodeName == 'results':
|
|
try:
|
|
comic['ComicName'] = dom.getElementsByTagName('name')[n].firstChild.wholeText
|
|
comic['ComicName'] = comic['ComicName'].rstrip()
|
|
except:
|
|
logger.error('There was a problem retrieving the given data from ComicVine. Ensure that www.comicvine.com is accessible AND that you have provided your OWN ComicVine API key.')
|
|
return
|
|
|
|
elif dom.getElementsByTagName('name')[n].parentNode.nodeName == 'publisher':
|
|
try:
|
|
comic['ComicPublisher'] = dom.getElementsByTagName('name')[n].firstChild.wholeText
|
|
except:
|
|
comic['ComicPublisher'] = "Unknown"
|
|
|
|
n += 1
|
|
except:
|
|
logger.warn('Something went wrong retrieving from ComicVine. Ensure your API is up-to-date and that comicvine is accessible')
|
|
return
|
|
|
|
try:
|
|
comic['ComicYear'] = dom.getElementsByTagName('start_year')[0].firstChild.wholeText
|
|
except:
|
|
comic['ComicYear'] = '0000'
|
|
|
|
try:
|
|
comic['ComicURL'] = dom.getElementsByTagName('site_detail_url')[trackcnt].firstChild.wholeText
|
|
except:
|
|
#this should never be an exception. If it is, it's probably due to CV timing out - so let's sleep for abit then retry.
|
|
logger.warn('Unable to retrieve URL for volume. This is usually due to a timeout to CV, or going over the API. Retrying again in 10s.')
|
|
time.sleep(10)
|
|
safechk +=1
|
|
GetComicInfo(comicid, dom, safechk)
|
|
|
|
desdeck = 0
|
|
#the description field actually holds the Volume# - so let's grab it
|
|
try:
|
|
descchunk = dom.getElementsByTagName('description')[0].firstChild.wholeText
|
|
comic_desc = drophtml(descchunk)
|
|
desdeck +=1
|
|
except:
|
|
comic_desc = 'None'
|
|
|
|
#sometimes the deck has volume labels
|
|
try:
|
|
deckchunk = dom.getElementsByTagName('deck')[0].firstChild.wholeText
|
|
comic_deck = deckchunk
|
|
desdeck +=1
|
|
except:
|
|
comic_deck = 'None'
|
|
|
|
#comic['ComicDescription'] = comic_desc
|
|
|
|
try:
|
|
comic['Aliases'] = dom.getElementsByTagName('aliases')[0].firstChild.wholeText
|
|
#logger.fdebug('Aliases: ' + str(aliases))
|
|
except:
|
|
comic['Aliases'] = 'None'
|
|
|
|
comic['ComicVersion'] = 'noversion'
|
|
#logger.info('comic_desc:' + comic_desc)
|
|
#logger.info('comic_deck:' + comic_deck)
|
|
#logger.info('desdeck: ' + str(desdeck))
|
|
while (desdeck > 0):
|
|
if desdeck == 1:
|
|
if comic_desc == 'None':
|
|
comicDes = comic_deck[:30]
|
|
else:
|
|
#extract the first 60 characters
|
|
comicDes = comic_desc[:60].replace('New 52', '')
|
|
elif desdeck == 2:
|
|
#extract the characters from the deck
|
|
comicDes = comic_deck[:30].replace('New 52', '')
|
|
else:
|
|
break
|
|
|
|
i = 0
|
|
while (i < 2):
|
|
if 'volume' in comicDes.lower():
|
|
#found volume - let's grab it.
|
|
v_find = comicDes.lower().find('volume')
|
|
#arbitrarily grab the next 10 chars (6 for volume + 1 for space + 3 for the actual vol #)
|
|
#increased to 10 to allow for text numbering (+5 max)
|
|
#sometimes it's volume 5 and ocassionally it's fifth volume.
|
|
if i == 0:
|
|
vfind = comicDes[v_find:v_find +15] #if it's volume 5 format
|
|
basenums = {'zero': '0', 'one': '1', 'two': '2', 'three': '3', 'four': '4', 'five': '5', 'six': '6', 'seven': '7', 'eight': '8', 'nine': '9', 'ten': '10', 'i': '1', 'ii': '2', 'iii': '3', 'iv': '4', 'v': '5'}
|
|
logger.fdebug('volume X format - ' + str(i) + ': ' + vfind)
|
|
else:
|
|
vfind = comicDes[:v_find] # if it's fifth volume format
|
|
basenums = {'zero': '0', 'first': '1', 'second': '2', 'third': '3', 'fourth': '4', 'fifth': '5', 'sixth': '6', 'seventh': '7', 'eighth': '8', 'nineth': '9', 'tenth': '10', 'i': '1', 'ii': '2', 'iii': '3', 'iv': '4', 'v': '5'}
|
|
logger.fdebug('X volume format - ' + str(i) + ': ' + vfind)
|
|
volconv = ''
|
|
for nums in basenums:
|
|
if nums in vfind.lower():
|
|
sconv = basenums[nums]
|
|
vfind = re.sub(nums, sconv, vfind.lower())
|
|
break
|
|
#logger.info('volconv: ' + str(volconv))
|
|
|
|
#now we attempt to find the character position after the word 'volume'
|
|
if i == 0:
|
|
volthis = vfind.lower().find('volume')
|
|
volthis = volthis + 6 # add on the actual word to the position so that we can grab the subsequent digit
|
|
vfind = vfind[volthis:volthis + 4] # grab the next 4 characters ;)
|
|
elif i == 1:
|
|
volthis = vfind.lower().find('volume')
|
|
vfind = vfind[volthis - 4:volthis] # grab the next 4 characters ;)
|
|
|
|
if '(' in vfind:
|
|
#bracket detected in versioning'
|
|
vfindit = re.findall('[^()]+', vfind)
|
|
vfind = vfindit[0]
|
|
vf = re.findall('[^<>]+', vfind)
|
|
try:
|
|
ledigit = re.sub("[^0-9]", "", vf[0])
|
|
if ledigit != '':
|
|
comic['ComicVersion'] = ledigit
|
|
logger.fdebug("Volume information found! Adding to series record : volume " + comic['ComicVersion'])
|
|
break
|
|
except:
|
|
pass
|
|
|
|
i += 1
|
|
else:
|
|
i += 1
|
|
|
|
if comic['ComicVersion'] == 'noversion':
|
|
logger.fdebug('comic[ComicVersion]:' + str(comic['ComicVersion']))
|
|
desdeck -= 1
|
|
else:
|
|
break
|
|
|
|
if vari == "yes":
|
|
comic['ComicIssues'] = str(cntit)
|
|
else:
|
|
comic['ComicIssues'] = dom.getElementsByTagName('count_of_issues')[0].firstChild.wholeText
|
|
|
|
comic['ComicImage'] = dom.getElementsByTagName('super_url')[0].firstChild.wholeText
|
|
comic['ComicImageALT'] = dom.getElementsByTagName('small_url')[0].firstChild.wholeText
|
|
|
|
comic['FirstIssueID'] = dom.getElementsByTagName('id')[0].firstChild.wholeText
|
|
|
|
# print ("fistIss:" + str(comic['FirstIssueID']))
|
|
# comicchoice.append({
|
|
# 'ComicName': comic['ComicName'],
|
|
# 'ComicYear': comic['ComicYear'],
|
|
# 'Comicid': comicid,
|
|
# 'ComicURL': comic['ComicURL'],
|
|
# 'ComicIssues': comic['ComicIssues'],
|
|
# 'ComicImage': comic['ComicImage'],
|
|
# 'ComicVolume': ParseVol,
|
|
# 'ComicPublisher': comic['ComicPublisher']
|
|
# })
|
|
|
|
# comic['comicchoice'] = comicchoice
|
|
return comic
|
|
|
|
def GetIssuesInfo(comicid, dom, arcid=None):
|
|
subtracks = dom.getElementsByTagName('issue')
|
|
if not mylar.CV_ONLY:
|
|
cntiss = dom.getElementsByTagName('count_of_issues')[0].firstChild.wholeText
|
|
logger.fdebug("issues I've counted: " + str(len(subtracks)))
|
|
logger.fdebug("issues CV says it has: " + str(int(cntiss)))
|
|
|
|
if int(len(subtracks)) != int(cntiss):
|
|
logger.fdebug("CV's count is wrong, I counted different...going with my count for physicals" + str(len(subtracks)))
|
|
cntiss = len(subtracks) # assume count of issues is wrong, go with ACTUAL physical api count
|
|
cntiss = int(cntiss)
|
|
n = cntiss -1
|
|
else:
|
|
n = int(len(subtracks))
|
|
tempissue = {}
|
|
issuech = []
|
|
firstdate = '2099-00-00'
|
|
for subtrack in subtracks:
|
|
if not mylar.CV_ONLY:
|
|
if (dom.getElementsByTagName('name')[n].firstChild) is not None:
|
|
issue['Issue_Name'] = dom.getElementsByTagName('name')[n].firstChild.wholeText
|
|
else:
|
|
issue['Issue_Name'] = 'None'
|
|
|
|
issue['Issue_ID'] = dom.getElementsByTagName('id')[n].firstChild.wholeText
|
|
issue['Issue_Number'] = dom.getElementsByTagName('issue_number')[n].firstChild.wholeText
|
|
|
|
issuech.append({
|
|
'Issue_ID': issue['Issue_ID'],
|
|
'Issue_Number': issue['Issue_Number'],
|
|
'Issue_Name': issue['Issue_Name']
|
|
})
|
|
else:
|
|
try:
|
|
totnames = len(subtrack.getElementsByTagName('name'))
|
|
tot = 0
|
|
while (tot < totnames):
|
|
if subtrack.getElementsByTagName('name')[tot].parentNode.nodeName == 'volume':
|
|
tempissue['ComicName'] = subtrack.getElementsByTagName('name')[tot].firstChild.wholeText
|
|
elif subtrack.getElementsByTagName('name')[tot].parentNode.nodeName == 'issue':
|
|
try:
|
|
tempissue['Issue_Name'] = subtrack.getElementsByTagName('name')[tot].firstChild.wholeText
|
|
except:
|
|
tempissue['Issue_Name'] = None
|
|
tot += 1
|
|
except:
|
|
tempissue['ComicName'] = 'None'
|
|
|
|
try:
|
|
totids = len(subtrack.getElementsByTagName('id'))
|
|
idt = 0
|
|
while (idt < totids):
|
|
if subtrack.getElementsByTagName('id')[idt].parentNode.nodeName == 'volume':
|
|
tempissue['Comic_ID'] = subtrack.getElementsByTagName('id')[idt].firstChild.wholeText
|
|
elif subtrack.getElementsByTagName('id')[idt].parentNode.nodeName == 'issue':
|
|
tempissue['Issue_ID'] = subtrack.getElementsByTagName('id')[idt].firstChild.wholeText
|
|
idt += 1
|
|
except:
|
|
tempissue['Issue_Name'] = 'None'
|
|
|
|
try:
|
|
tempissue['CoverDate'] = subtrack.getElementsByTagName('cover_date')[0].firstChild.wholeText
|
|
except:
|
|
tempissue['CoverDate'] = '0000-00-00'
|
|
try:
|
|
tempissue['StoreDate'] = subtrack.getElementsByTagName('store_date')[0].firstChild.wholeText
|
|
except:
|
|
tempissue['StoreDate'] = '0000-00-00'
|
|
try:
|
|
tempissue['Issue_Number'] = subtrack.getElementsByTagName('issue_number')[0].firstChild.wholeText
|
|
except:
|
|
logger.fdebug('No Issue Number available - Trade Paperbacks, Graphic Novels and Compendiums are not supported as of yet.')
|
|
|
|
if arcid is None:
|
|
issuech.append({
|
|
'Comic_ID': comicid,
|
|
'Issue_ID': tempissue['Issue_ID'],
|
|
'Issue_Number': tempissue['Issue_Number'],
|
|
'Issue_Date': tempissue['CoverDate'],
|
|
'Store_Date': tempissue['StoreDate'],
|
|
'Issue_Name': tempissue['Issue_Name']
|
|
})
|
|
|
|
else:
|
|
issuech.append({
|
|
'ArcID': arcid,
|
|
'ComicName': tempissue['ComicName'],
|
|
'ComicID': tempissue['Comic_ID'],
|
|
'IssueID': tempissue['Issue_ID'],
|
|
'Issue_Number': tempissue['Issue_Number'],
|
|
'Issue_Date': tempissue['CoverDate'],
|
|
'Store_Date': tempissue['StoreDate'],
|
|
'Issue_Name': tempissue['Issue_Name']
|
|
})
|
|
|
|
if tempissue['CoverDate'] < firstdate and tempissue['CoverDate'] != '0000-00-00':
|
|
firstdate = tempissue['CoverDate']
|
|
n-= 1
|
|
|
|
#issue['firstdate'] = firstdate
|
|
return issuech, firstdate
|
|
|
|
def GetFirstIssue(issueid, dom):
|
|
#if the Series Year doesn't exist, get the first issue and take the date from that
|
|
try:
|
|
first_year = dom.getElementsByTagName('cover_date')[0].firstChild.wholeText
|
|
except:
|
|
first_year = '0000'
|
|
return first_year
|
|
|
|
the_year = first_year[:4]
|
|
the_month = first_year[5:7]
|
|
the_date = the_year + '-' + the_month
|
|
|
|
return the_year
|
|
|
|
def GetSeriesYears(dom):
|
|
#used by the 'add a story arc' option to individually populate the Series Year for each series within the given arc.
|
|
#series year is required for alot of functionality.
|
|
series = dom.getElementsByTagName('volume')
|
|
tempseries = {}
|
|
serieslist = []
|
|
for dm in series:
|
|
try:
|
|
totids = len(dm.getElementsByTagName('id'))
|
|
idc = 0
|
|
while (idc < totids):
|
|
if dm.getElementsByTagName('id')[idc].parentNode.nodeName == 'volume':
|
|
tempseries['ComicID'] = dm.getElementsByTagName('id')[idc].firstChild.wholeText
|
|
idc+=1
|
|
except:
|
|
logger.warn('There was a problem retrieving a comicid for a series within the arc. This will have to manually corrected most likely.')
|
|
tempseries['ComicID'] = 'None'
|
|
|
|
tempseries['Series'] = 'None'
|
|
tempseries['Publisher'] = 'None'
|
|
try:
|
|
totnames = len(dm.getElementsByTagName('name'))
|
|
namesc = 0
|
|
while (namesc < totnames):
|
|
if dm.getElementsByTagName('name')[namesc].parentNode.nodeName == 'volume':
|
|
tempseries['Series'] = dm.getElementsByTagName('name')[namesc].firstChild.wholeText
|
|
elif dm.getElementsByTagName('name')[namesc].parentNode.nodeName == 'publisher':
|
|
tempseries['Publisher'] = dm.getElementsByTagName('name')[namesc].firstChild.wholeText
|
|
namesc+=1
|
|
except:
|
|
logger.warn('There was a problem retrieving a Series Name or Publisher for a series within the arc. This will have to manually corrected.')
|
|
|
|
try:
|
|
tempseries['SeriesYear'] = dm.getElementsByTagName('start_year')[0].firstChild.wholeText
|
|
except:
|
|
logger.warn('There was a problem retrieving the start year for a particular series within the story arc.')
|
|
tempseries['SeriesYear'] = '0000'
|
|
|
|
serieslist.append({"ComicID": tempseries['ComicID'],
|
|
"ComicName": tempseries['Series'],
|
|
"SeriesYear": tempseries['SeriesYear'],
|
|
"Publisher": tempseries['Publisher']})
|
|
|
|
return serieslist
|
|
|
|
def GetImportList(results):
|
|
logger.info('booyah')
|
|
importlist = results.getElementsByTagName('issue')
|
|
serieslist = []
|
|
importids = {}
|
|
tempseries = {}
|
|
for implist in importlist:
|
|
try:
|
|
totids = len(implist.getElementsByTagName('id'))
|
|
idt = 0
|
|
while (idt < totids):
|
|
if implist.getElementsByTagName('id')[idt].parentNode.nodeName == 'volume':
|
|
tempseries['ComicID'] = implist.getElementsByTagName('id')[idt].firstChild.wholeText
|
|
elif implist.getElementsByTagName('id')[idt].parentNode.nodeName == 'issue':
|
|
tempseries['IssueID'] = implist.getElementsByTagName('id')[idt].firstChild.wholeText
|
|
idt += 1
|
|
except:
|
|
tempseries['ComicID'] = None
|
|
|
|
try:
|
|
totnames = len(implist.getElementsByTagName('name'))
|
|
tot = 0
|
|
while (tot < totnames):
|
|
if implist.getElementsByTagName('name')[tot].parentNode.nodeName == 'volume':
|
|
tempseries['ComicName'] = implist.getElementsByTagName('name')[tot].firstChild.wholeText
|
|
elif implist.getElementsByTagName('name')[tot].parentNode.nodeName == 'issue':
|
|
try:
|
|
tempseries['Issue_Name'] = implist.getElementsByTagName('name')[tot].firstChild.wholeText
|
|
except:
|
|
tempseries['Issue_Name'] = None
|
|
tot += 1
|
|
except:
|
|
tempseries['ComicName'] = 'None'
|
|
|
|
logger.info('tempseries:' + str(tempseries))
|
|
serieslist.append({"ComicID": tempseries['ComicID'],
|
|
"IssueID": tempseries['IssueID'],
|
|
"ComicName": tempseries['ComicName'],
|
|
"Issue_Name": tempseries['Issue_Name']})
|
|
|
|
|
|
return serieslist
|
|
|
|
def drophtml(html):
|
|
from bs4 import BeautifulSoup
|
|
soup = BeautifulSoup(html)
|
|
|
|
text_parts = soup.findAll(text=True)
|
|
#print ''.join(text_parts)
|
|
return ''.join(text_parts)
|
|
|
|
|