mirror of https://github.com/evilhero/mylar
FIX: Fixed search query to be more accurate on multi-termed queries
This commit is contained in:
parent
95b39ca1ed
commit
dccbdcdba8
53
mylar/mb.py
53
mylar/mb.py
|
@ -44,20 +44,17 @@ if platform.python_version() == '2.7.6':
|
|||
httplib.HTTPConnection._http_vsn = 10
|
||||
httplib.HTTPConnection._http_vsn_str = 'HTTP/1.0'
|
||||
|
||||
def pullsearch(comicapi, comicquery, offset, type, annuals=False):
|
||||
u_comicquery = urllib.quote(comicquery.encode('utf-8').strip())
|
||||
u_comicquery = u_comicquery.replace(" ", "%20")
|
||||
u_comicquery = u_comicquery.replace('-', '%2D')
|
||||
#logger.info('comicquery: %s' % comicquery)
|
||||
if annuals is True:
|
||||
PULLURL = mylar.CVURL + 'search?api_key=' + str(comicapi) + '&resources=' + str(type) + '&query=' + u_comicquery + '&field_list=id,name,start_year,first_issue,site_detail_url,count_of_issues,image,publisher,deck,description,last_issue&format=xml&limit=100&page=' + str(offset)
|
||||
def pullsearch(comicapi, comicquery, offset, type):
|
||||
|
||||
else:
|
||||
# 02/22/2014 use the volume filter label to get the right results.
|
||||
# add the 's' to the end of type to pluralize the caption (it's needed)
|
||||
if type == 'story_arc':
|
||||
u_comicquery = re.sub("%20AND%20", "%20", u_comicquery)
|
||||
PULLURL = mylar.CVURL + str(type) + 's?api_key=' + str(comicapi) + '&filter=name:' + u_comicquery + '&field_list=id,name,start_year,site_detail_url,count_of_issues,image,publisher,deck,description,first_issue,last_issue&format=xml&offset=' + str(offset) # 2012/22/02 - CVAPI flipped back to offset instead of page
|
||||
cnt = 1
|
||||
for x in comicquery:
|
||||
if cnt == 1:
|
||||
filterline = '%s' % x
|
||||
else:
|
||||
filterline+= ',name:%s' % x
|
||||
cnt+=1
|
||||
|
||||
PULLURL = mylar.CVURL + str(type) + 's?api_key=' + str(comicapi) + '&filter=name:' + filterline + '&field_list=id,name,start_year,site_detail_url,count_of_issues,image,publisher,deck,description,first_issue,last_issue&format=xml&offset=' + str(offset) # 2012/22/02 - CVAPI flipped back to offset instead of page
|
||||
|
||||
#all these imports are standard on most modern python implementations
|
||||
#logger.info('MB.PULLURL:' + PULLURL)
|
||||
|
@ -88,18 +85,14 @@ def findComic(name, mode, issue, limityear=None, type=None):
|
|||
comiclist = []
|
||||
arcinfolist = []
|
||||
|
||||
#if type == 'story_arc':
|
||||
# chars = set('!?*&')
|
||||
#else:
|
||||
# chars = set('!?*&-')
|
||||
#if any((c in chars) for c in name) or 'annual' in name:
|
||||
# name = '"' +name +'"'
|
||||
annuals = False
|
||||
if 'annual' in name:
|
||||
name = '"' + name +'"'
|
||||
annuals = True
|
||||
commons = [' and ', ' the ']
|
||||
for x in commons:
|
||||
if x in name.lower():
|
||||
name = re.sub(x, ' ', name.lower()).strip()
|
||||
|
||||
pattern = re.compile(ur'\w+', re.UNICODE)
|
||||
name = pattern.findall(name)
|
||||
|
||||
#print ("limityear: " + str(limityear))
|
||||
if limityear is None: limityear = 'None'
|
||||
|
||||
comicquery = name
|
||||
|
@ -114,7 +107,7 @@ def findComic(name, mode, issue, limityear=None, type=None):
|
|||
type = 'volume'
|
||||
|
||||
#let's find out how many results we get from the query...
|
||||
searched = pullsearch(comicapi, comicquery, 0, type, annuals)
|
||||
searched = pullsearch(comicapi, comicquery, 0, type)
|
||||
if searched is None:
|
||||
return False
|
||||
totalResults = searched.getElementsByTagName('number_of_total_results')[0].firstChild.wholeText
|
||||
|
@ -128,15 +121,9 @@ def findComic(name, mode, issue, limityear=None, type=None):
|
|||
while (countResults < int(totalResults)):
|
||||
#logger.fdebug("querying " + str(countResults))
|
||||
if countResults > 0:
|
||||
#2012/22/02 - CV API flipped back to offset usage instead of page
|
||||
if annuals is True:
|
||||
# search uses page for offset
|
||||
offsetcount = (countResults /100) + 1
|
||||
else:
|
||||
# filter uses offset
|
||||
offsetcount = countResults
|
||||
offsetcount = countResults
|
||||
|
||||
searched = pullsearch(comicapi, comicquery, offsetcount, type, annuals)
|
||||
searched = pullsearch(comicapi, comicquery, offsetcount, type)
|
||||
comicResults = searched.getElementsByTagName(type)
|
||||
body = ''
|
||||
n = 0
|
||||
|
|
|
@ -1404,7 +1404,7 @@ def future_check():
|
|||
if not theissdate.startswith('20'):
|
||||
theissdate = ser['IssueDate'][:4]
|
||||
logger.info('looking for new data for ' + ser['ComicName'] + '[#' + str(ser['IssueNumber']) + '] (' + str(theissdate) + ')')
|
||||
searchresults, explicit = mb.findComic(ser['ComicName'], mode='pullseries', issue=ser['IssueNumber'], limityear=theissdate, explicit='all')
|
||||
searchresults = mb.findComic(ser['ComicName'], mode='pullseries', issue=ser['IssueNumber'], limityear=theissdate)
|
||||
if len(searchresults) > 0:
|
||||
if len(searchresults) > 1:
|
||||
logger.info('More than one result returned - this may have to be a manual add, but I\'m going to try to figure it out myself first.')
|
||||
|
|
Loading…
Reference in New Issue