mylar/mylar/mb.py

399 lines
20 KiB
Python
Executable File

# This file is part of Mylar.
#
# Mylar is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Mylar is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Mylar. If not, see <http://www.gnu.org/licenses/>.
from __future__ import with_statement
import re
import time
import threading
import platform
import urllib, urllib2
from xml.dom.minidom import parseString, Element
import mylar
from mylar import logger, db, cv
from mylar.helpers import multikeysort, replace_all, cleanName, cvapi_check, listLibrary
import httplib
mb_lock = threading.Lock()
def patch_http_response_read(func):
def inner(*args):
try:
return func(*args)
except httplib.IncompleteRead, e:
return e.partial
return inner
httplib.HTTPResponse.read = patch_http_response_read(httplib.HTTPResponse.read)
if platform.python_version() == '2.7.6':
httplib.HTTPConnection._http_vsn = 10
httplib.HTTPConnection._http_vsn_str = 'HTTP/1.0'
def pullsearch(comicapi, comicquery, offset, explicit, type):
u_comicquery = urllib.quote(comicquery.encode('utf-8').strip())
u_comicquery = u_comicquery.replace(" ", "%20")
if explicit == 'all' or explicit == 'loose':
PULLURL = mylar.CVURL + 'search?api_key=' + str(comicapi) + '&resources=' + str(type) + '&query=' + u_comicquery + '&field_list=id,name,start_year,first_issue,site_detail_url,count_of_issues,image,publisher,deck,description&format=xml&page=' + str(offset)
else:
# 02/22/2014 use the volume filter label to get the right results.
# add the 's' to the end of type to pluralize the caption (it's needed)
if type == 'story_arc':
u_comicquery = re.sub("%20AND%20", "%20", u_comicquery)
PULLURL = mylar.CVURL + str(type) + 's?api_key=' + str(comicapi) + '&filter=name:' + u_comicquery + '&field_list=id,name,start_year,site_detail_url,count_of_issues,image,publisher,deck,description&format=xml&offset=' + str(offset) # 2012/22/02 - CVAPI flipped back to offset instead of page
#all these imports are standard on most modern python implementations
#CV API Check here.
#logger.info('PULLURL:' + PULLURL)
if mylar.CVAPI_COUNT == 0 or mylar.CVAPI_COUNT >= mylar.CVAPI_MAX:
cvapi_check()
#download the file:
try:
file = urllib2.urlopen(PULLURL)
except urllib2.HTTPError, err:
logger.error('err : ' + str(err))
logger.error("There was a major problem retrieving data from ComicVine - on their end. You'll have to try again later most likely.")
return
#increment CV API counter.
mylar.CVAPI_COUNT +=1
#convert to string:
data = file.read()
#close file because we dont need it anymore:
file.close()
#parse the xml you downloaded
dom = parseString(data)
return dom
def findComic(name, mode, issue, limityear=None, explicit=None, type=None):
#with mb_lock:
comiclist = []
comicResults = None
comicLibrary = listLibrary()
chars = set('!?*')
if any((c in chars) for c in name):
name = '"' +name +'"'
#print ("limityear: " + str(limityear))
if limityear is None: limityear = 'None'
comicquery = name
#comicquery=name.replace(" ", "%20")
if explicit is None:
#logger.fdebug('explicit is None. Setting to Default mode of ALL search words.')
#comicquery=name.replace(" ", " AND ")
explicit = 'all'
#OR
if explicit == 'loose':
logger.fdebug('Changing to loose mode - this will match ANY of the search words')
comicquery = name.replace(" ", " OR ")
elif explicit == 'explicit':
logger.fdebug('Changing to explicit mode - this will match explicitly on the EXACT words')
comicquery=name.replace(" ", " AND ")
else:
logger.fdebug('Default search mode - this will match on ALL search words')
comicquery = name.replace(" ", " AND ")
explicit = 'all'
if mylar.COMICVINE_API == 'None' or mylar.COMICVINE_API is None or mylar.COMICVINE_API == mylar.DEFAULT_CVAPI:
logger.warn('You have not specified your own ComicVine API key - alot of things will be limited. Get your own @ http://api.comicvine.com.')
comicapi = mylar.DEFAULT_CVAPI
else:
comicapi = mylar.COMICVINE_API
if type is None:
type = 'volume'
#let's find out how many results we get from the query...
searched = pullsearch(comicapi, comicquery, 0, explicit, type)
if searched is None: return False
totalResults = searched.getElementsByTagName('number_of_total_results')[0].firstChild.wholeText
logger.fdebug("there are " + str(totalResults) + " search results...")
if not totalResults:
return False
countResults = 0
while (countResults < int(totalResults)):
#logger.fdebug("querying " + str(countResults))
if countResults > 0:
#2012/22/02 - CV API flipped back to offset usage instead of page
if explicit == 'all' or explicit == 'loose':
#all / loose uses page for offset
offsetcount = (countResults /100) + 1
else:
#explicit uses offset
offsetcount = countResults
searched = pullsearch(comicapi, comicquery, offsetcount, explicit, type)
comicResults = searched.getElementsByTagName(type) #('volume')
body = ''
n = 0
if not comicResults:
break
for result in comicResults:
#retrieve the first xml tag (<tag>data</tag>)
#that the parser finds with name tagName:
arclist = []
if type == 'story_arc':
#call cv.py here to find out issue count in story arc
try:
logger.fdebug('story_arc ascension')
names = len(result.getElementsByTagName('name'))
n = 0
logger.fdebug('length: ' + str(names))
xmlpub = None #set this incase the publisher field isn't populated in the xml
while (n < names):
logger.fdebug(result.getElementsByTagName('name')[n].parentNode.nodeName)
if result.getElementsByTagName('name')[n].parentNode.nodeName == 'story_arc':
logger.fdebug('yes')
try:
xmlTag = result.getElementsByTagName('name')[n].firstChild.wholeText
xmlTag = xmlTag.rstrip()
logger.fdebug('name: ' + str(xmlTag))
except:
logger.error('There was a problem retrieving the given data from ComicVine. Ensure that www.comicvine.com is accessible.')
return
elif result.getElementsByTagName('name')[n].parentNode.nodeName == 'publisher':
logger.fdebug('publisher check.')
xmlpub = result.getElementsByTagName('name')[n].firstChild.wholeText
n+=1
except:
logger.warn('error retrieving story arc search results.')
return
siteurl = len(result.getElementsByTagName('site_detail_url'))
s = 0
logger.fdebug('length: ' + str(names))
xmlurl = None
while (s < siteurl):
logger.fdebug(result.getElementsByTagName('site_detail_url')[s].parentNode.nodeName)
if result.getElementsByTagName('site_detail_url')[s].parentNode.nodeName == 'story_arc':
try:
xmlurl = result.getElementsByTagName('site_detail_url')[s].firstChild.wholeText
except:
logger.error('There was a problem retrieving the given data from ComicVine. Ensure that www.comicvine.com is accessible.')
return
s+=1
xmlid = result.getElementsByTagName('id')[0].firstChild.wholeText
if xmlid is not None:
#respawn to the exact id for the story arc and count the # of issues present.
ARCPULL_URL = mylar.CVURL + 'story_arc/4045-' + str(xmlid) + '/?api_key=' + str(comicapi) + '&field_list=issues,name,first_appeared_in_issue,deck,image&format=xml&offset=0'
logger.fdebug('arcpull_url:' + str(ARCPULL_URL))
if mylar.CVAPI_COUNT == 0 or mylar.CVAPI_COUNT >= mylar.CVAPI_MAX:
cvapi_check()
try:
file = urllib2.urlopen(ARCPULL_URL)
except urllib2.HTTPError, err:
logger.error('err : ' + str(err))
logger.error('There was a major problem retrieving data from ComicVine - on their end.')
return
mylar.CVAPI_COUNT +=1
arcdata = file.read()
file.close()
arcdom = parseString(arcdata)
try:
logger.fdebug('story_arc ascension')
issuecount = len(arcdom.getElementsByTagName('issue'))
issuedom = arcdom.getElementsByTagName('issue')
isc = 0
arclist = ''
for isd in issuedom:
zeline = isd.getElementsByTagName('id')
isdlen = len(zeline)
isb = 0
while (isb < isdlen):
if isc == 0:
arclist = str(zeline[isb].firstChild.wholeText).strip()
else:
arclist += '|' + str(zeline[isb].firstChild.wholeText).strip()
isb+=1
isc+=1
except:
logger.fdebug('unable to retrive issue count - nullifying value.')
issuecount = 0
try:
firstid = None
arcyear = None
fid = len (arcdom.getElementsByTagName('id'))
fi = 0
while (fi < fid):
if arcdom.getElementsByTagName('id')[fi].parentNode.nodeName == 'first_appeared_in_issue':
if not arcdom.getElementsByTagName('id')[fi].firstChild.wholeText == xmlid:
logger.fdebug('hit it.')
firstid = arcdom.getElementsByTagName('id')[fi].firstChild.wholeText
break # - dont' break out here as we want to gather ALL the issue ID's since it's here
fi+=1
logger.fdebug('firstid: ' + str(firstid))
if firstid is not None:
firstdom = cv.pulldetails(comicid=None, type='firstissue', issueid=firstid)
logger.fdebug('success')
arcyear = cv.GetFirstIssue(firstid, firstdom)
except:
logger.fdebug('Unable to retrieve first issue details. Not caclulating at this time.')
if (arcdom.getElementsByTagName('image')[0].childNodes[0].nodeValue) is None:
xmlimage = arcdom.getElementsByTagName('super_url')[0].firstChild.wholeText
else:
xmlimage = "cache/blankcover.jpg"
try:
xmldesc = arcdom.getElementsByTagName('desc')[0].firstChild.wholeText
except:
xmldesc = "None"
try:
xmldeck = arcdom.getElementsByTagName('deck')[0].firstChild.wholeText
except:
xmldeck = "None"
if xmlid in comicLibrary:
haveit = comicLibrary[xmlid]
else:
haveit = "No"
comiclist.append({
'name': xmlTag,
'comicyear': arcyear,
'comicid': xmlid,
'url': xmlurl,
'issues': issuecount,
'comicimage': xmlimage,
'publisher': xmlpub,
'description': xmldesc,
'deck': xmldeck,
'arclist': arclist,
'haveit': haveit
})
logger.fdebug('IssueID\'s that are a part of ' + xmlTag + ' : ' + str(arclist))
else:
xmlcnt = result.getElementsByTagName('count_of_issues')[0].firstChild.wholeText
#here we can determine what called us, and either start gathering all issues or just limited ones.
if issue is not None and str(issue).isdigit():
#this gets buggered up with NEW/ONGOING series because the db hasn't been updated
#to reflect the proper count. Drop it by 1 to make sure.
limiter = int(issue) - 1
else: limiter = 0
#get the first issue # (for auto-magick calcs)
try:
xmlfirst = result.getElementsByTagName('issue_number')[0].firstChild.wholeText
if '\xbd' in xmlfirst:
xmlfirst = "1" #if the first issue is 1/2, just assume 1 for logistics
except:
xmlfirst = '1'
#logger.info('There are : ' + str(xmlcnt) + ' issues in this series.')
#logger.info('The first issue started at # ' + str(xmlfirst))
cnt_numerical = int(xmlcnt) + int(xmlfirst) # (of issues + start of first issue = numerical range)
#logger.info('The maximum issue number should be roughly # ' + str(cnt_numerical))
#logger.info('The limiter (issue max that we know of) is # ' + str(limiter))
if cnt_numerical >= limiter:
cnl = len (result.getElementsByTagName('name'))
cl = 0
xmlTag = 'None'
xmlimage = "cache/blankcover.jpg"
while (cl < cnl):
if result.getElementsByTagName('name')[cl].parentNode.nodeName == 'volume':
xmlTag = result.getElementsByTagName('name')[cl].firstChild.wholeText
#break
if result.getElementsByTagName('name')[cl].parentNode.nodeName == 'image':
xmlimage = result.getElementsByTagName('super_url')[0].firstChild.wholeText
cl+=1
if (result.getElementsByTagName('start_year')[0].firstChild) is not None:
xmlYr = result.getElementsByTagName('start_year')[0].firstChild.wholeText
else: xmlYr = "0000"
#logger.info('name:' + str(xmlTag) + ' -- ' + str(xmlYr))
if xmlYr in limityear or limityear == 'None':
xmlurl = result.getElementsByTagName('site_detail_url')[0].firstChild.wholeText
idl = len (result.getElementsByTagName('id'))
idt = 0
xmlid = None
while (idt < idl):
if result.getElementsByTagName('id')[idt].parentNode.nodeName == 'volume':
xmlid = result.getElementsByTagName('id')[idt].firstChild.wholeText
break
idt+=1
if xmlid is None:
logger.error('Unable to figure out the comicid - skipping this : ' + str(xmlurl))
continue
#logger.info('xmlid: ' + str(xmlid))
publishers = result.getElementsByTagName('publisher')
if len(publishers) > 0:
pubnames = publishers[0].getElementsByTagName('name')
if len(pubnames) >0:
xmlpub = pubnames[0].firstChild.wholeText
else:
xmlpub = "Unknown"
else:
xmlpub = "Unknown"
try:
xmldesc = result.getElementsByTagName('description')[0].firstChild.wholeText
except:
xmldesc = "None"
#this is needed to display brief synopsis for each series on search results page.
try:
xmldeck = result.getElementsByTagName('deck')[0].firstChild.wholeText
except:
xmldeck = "None"
if xmlid in comicLibrary:
haveit = comicLibrary[xmlid]
else:
haveit = "No"
comiclist.append({
'name': xmlTag,
'comicyear': xmlYr,
'comicid': xmlid,
'url': xmlurl,
'issues': xmlcnt,
'comicimage': xmlimage,
'publisher': xmlpub,
'description': xmldesc,
'deck': xmldeck,
'haveit': haveit
})
#logger.fdebug('year: ' + str(xmlYr) + ' - constraint met: ' + str(xmlTag) + '[' + str(xmlYr) + '] --- 4050-' + str(xmlid))
else:
pass
#logger.fdebug('year: ' + str(xmlYr) + ' - contraint not met. Has to be within ' + str(limityear))
n+=1
#search results are limited to 100 and by pagination now...let's account for this.
countResults = countResults + 100
return comiclist, explicit