mirror of https://github.com/evilhero/mylar
FIX: Fixed search query to be more accurate on multi-termed queries
This commit is contained in:
parent
95b39ca1ed
commit
dccbdcdba8
53
mylar/mb.py
53
mylar/mb.py
|
@ -44,20 +44,17 @@ if platform.python_version() == '2.7.6':
|
||||||
httplib.HTTPConnection._http_vsn = 10
|
httplib.HTTPConnection._http_vsn = 10
|
||||||
httplib.HTTPConnection._http_vsn_str = 'HTTP/1.0'
|
httplib.HTTPConnection._http_vsn_str = 'HTTP/1.0'
|
||||||
|
|
||||||
def pullsearch(comicapi, comicquery, offset, type, annuals=False):
|
def pullsearch(comicapi, comicquery, offset, type):
|
||||||
u_comicquery = urllib.quote(comicquery.encode('utf-8').strip())
|
|
||||||
u_comicquery = u_comicquery.replace(" ", "%20")
|
|
||||||
u_comicquery = u_comicquery.replace('-', '%2D')
|
|
||||||
#logger.info('comicquery: %s' % comicquery)
|
|
||||||
if annuals is True:
|
|
||||||
PULLURL = mylar.CVURL + 'search?api_key=' + str(comicapi) + '&resources=' + str(type) + '&query=' + u_comicquery + '&field_list=id,name,start_year,first_issue,site_detail_url,count_of_issues,image,publisher,deck,description,last_issue&format=xml&limit=100&page=' + str(offset)
|
|
||||||
|
|
||||||
else:
|
cnt = 1
|
||||||
# 02/22/2014 use the volume filter label to get the right results.
|
for x in comicquery:
|
||||||
# add the 's' to the end of type to pluralize the caption (it's needed)
|
if cnt == 1:
|
||||||
if type == 'story_arc':
|
filterline = '%s' % x
|
||||||
u_comicquery = re.sub("%20AND%20", "%20", u_comicquery)
|
else:
|
||||||
PULLURL = mylar.CVURL + str(type) + 's?api_key=' + str(comicapi) + '&filter=name:' + u_comicquery + '&field_list=id,name,start_year,site_detail_url,count_of_issues,image,publisher,deck,description,first_issue,last_issue&format=xml&offset=' + str(offset) # 2012/22/02 - CVAPI flipped back to offset instead of page
|
filterline+= ',name:%s' % x
|
||||||
|
cnt+=1
|
||||||
|
|
||||||
|
PULLURL = mylar.CVURL + str(type) + 's?api_key=' + str(comicapi) + '&filter=name:' + filterline + '&field_list=id,name,start_year,site_detail_url,count_of_issues,image,publisher,deck,description,first_issue,last_issue&format=xml&offset=' + str(offset) # 2012/22/02 - CVAPI flipped back to offset instead of page
|
||||||
|
|
||||||
#all these imports are standard on most modern python implementations
|
#all these imports are standard on most modern python implementations
|
||||||
#logger.info('MB.PULLURL:' + PULLURL)
|
#logger.info('MB.PULLURL:' + PULLURL)
|
||||||
|
@ -88,18 +85,14 @@ def findComic(name, mode, issue, limityear=None, type=None):
|
||||||
comiclist = []
|
comiclist = []
|
||||||
arcinfolist = []
|
arcinfolist = []
|
||||||
|
|
||||||
#if type == 'story_arc':
|
commons = [' and ', ' the ']
|
||||||
# chars = set('!?*&')
|
for x in commons:
|
||||||
#else:
|
if x in name.lower():
|
||||||
# chars = set('!?*&-')
|
name = re.sub(x, ' ', name.lower()).strip()
|
||||||
#if any((c in chars) for c in name) or 'annual' in name:
|
|
||||||
# name = '"' +name +'"'
|
pattern = re.compile(ur'\w+', re.UNICODE)
|
||||||
annuals = False
|
name = pattern.findall(name)
|
||||||
if 'annual' in name:
|
|
||||||
name = '"' + name +'"'
|
|
||||||
annuals = True
|
|
||||||
|
|
||||||
#print ("limityear: " + str(limityear))
|
|
||||||
if limityear is None: limityear = 'None'
|
if limityear is None: limityear = 'None'
|
||||||
|
|
||||||
comicquery = name
|
comicquery = name
|
||||||
|
@ -114,7 +107,7 @@ def findComic(name, mode, issue, limityear=None, type=None):
|
||||||
type = 'volume'
|
type = 'volume'
|
||||||
|
|
||||||
#let's find out how many results we get from the query...
|
#let's find out how many results we get from the query...
|
||||||
searched = pullsearch(comicapi, comicquery, 0, type, annuals)
|
searched = pullsearch(comicapi, comicquery, 0, type)
|
||||||
if searched is None:
|
if searched is None:
|
||||||
return False
|
return False
|
||||||
totalResults = searched.getElementsByTagName('number_of_total_results')[0].firstChild.wholeText
|
totalResults = searched.getElementsByTagName('number_of_total_results')[0].firstChild.wholeText
|
||||||
|
@ -128,15 +121,9 @@ def findComic(name, mode, issue, limityear=None, type=None):
|
||||||
while (countResults < int(totalResults)):
|
while (countResults < int(totalResults)):
|
||||||
#logger.fdebug("querying " + str(countResults))
|
#logger.fdebug("querying " + str(countResults))
|
||||||
if countResults > 0:
|
if countResults > 0:
|
||||||
#2012/22/02 - CV API flipped back to offset usage instead of page
|
offsetcount = countResults
|
||||||
if annuals is True:
|
|
||||||
# search uses page for offset
|
|
||||||
offsetcount = (countResults /100) + 1
|
|
||||||
else:
|
|
||||||
# filter uses offset
|
|
||||||
offsetcount = countResults
|
|
||||||
|
|
||||||
searched = pullsearch(comicapi, comicquery, offsetcount, type, annuals)
|
searched = pullsearch(comicapi, comicquery, offsetcount, type)
|
||||||
comicResults = searched.getElementsByTagName(type)
|
comicResults = searched.getElementsByTagName(type)
|
||||||
body = ''
|
body = ''
|
||||||
n = 0
|
n = 0
|
||||||
|
|
|
@ -1404,7 +1404,7 @@ def future_check():
|
||||||
if not theissdate.startswith('20'):
|
if not theissdate.startswith('20'):
|
||||||
theissdate = ser['IssueDate'][:4]
|
theissdate = ser['IssueDate'][:4]
|
||||||
logger.info('looking for new data for ' + ser['ComicName'] + '[#' + str(ser['IssueNumber']) + '] (' + str(theissdate) + ')')
|
logger.info('looking for new data for ' + ser['ComicName'] + '[#' + str(ser['IssueNumber']) + '] (' + str(theissdate) + ')')
|
||||||
searchresults, explicit = mb.findComic(ser['ComicName'], mode='pullseries', issue=ser['IssueNumber'], limityear=theissdate, explicit='all')
|
searchresults = mb.findComic(ser['ComicName'], mode='pullseries', issue=ser['IssueNumber'], limityear=theissdate)
|
||||||
if len(searchresults) > 0:
|
if len(searchresults) > 0:
|
||||||
if len(searchresults) > 1:
|
if len(searchresults) > 1:
|
||||||
logger.info('More than one result returned - this may have to be a manual add, but I\'m going to try to figure it out myself first.')
|
logger.info('More than one result returned - this may have to be a manual add, but I\'m going to try to figure it out myself first.')
|
||||||
|
|
Loading…
Reference in New Issue